src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  32
  33     def elaborate(self, platform):
  34         m = Module()
  35         m.submodules.rdport = self.rdport = self.mem.read_port()
  36         m.submodules.wrport = self.wrport = self.mem.write_port()
  37         return m
  38
  39
  40 class MemSim:
  41     def __init__(self, regwid, addrw):
  42         self.regwid = regwid
  43         self.ddepth = 1 # regwid//8
  44         depth = (1<<addrw) // self.ddepth
  45         self.mem = list(range(0, depth))
  46
  47     def ld(self, addr):
  48         return self.mem[addr>>self.ddepth]
  49
  50     def st(self, addr, data):
  51         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  52
  53
  54 class CompUnitsBase(Elaboratable):
  55     """ Computation Unit Base class.
  56
  57         Amazingly, this class works recursively.  It's supposed to just
  58         look after some ALUs (that can handle the same operations),
  59         grouping them together, however it turns out that the same code
  60         can also group *groups* of Computation Units together as well.
  61
  62         Basically it was intended just to concatenate the ALU's issue,
  63         go_rd etc. signals together, which start out as bits and become
  64         sequences.  Turns out that the same trick works just as well
  65         on Computation Units!
  66
  67         So this class may be used recursively to present a top-level
  68         sequential concatenation of all the signals in and out of
  69         ALUs, whilst at the same time making it convenient to group
  70         ALUs together.
  71
  72         At the lower level, the intent is that groups of (identical)
  73         ALUs may be passed the same operation.  Even beyond that,
  74         the intent is that that group of (identical) ALUs actually
  75         share the *same pipeline* and as such become a "Concurrent
  76         Computation Unit" as defined by Mitch Alsup (see section
  77         11.4.9.3)
  78     """
  79     def __init__(self, rwid, units, ldstmode=False):
  80         """ Inputs:
  81
  82             * :rwid:   bit width of register file(s) - both FP and INT
  83             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  84         """
  85         self.units = units
  86         self.ldstmode = ldstmode
  87         self.rwid = rwid
  88         self.rwid = rwid
  89         if units and isinstance(units[0], CompUnitsBase):
  90             self.n_units = 0
  91             for u in self.units:
  92                 self.n_units += u.n_units
  93         else:
  94             self.n_units = len(units)
  95
  96         n_units = self.n_units
  97
  98         # inputs
  99         self.issue_i = Signal(n_units, reset_less=True)
 100         self.go_rd_i = Signal(n_units, reset_less=True)
 101         self.go_wr_i = Signal(n_units, reset_less=True)
 102         self.shadown_i = Signal(n_units, reset_less=True)
 103         self.go_die_i = Signal(n_units, reset_less=True)
 104         if ldstmode:
 105             self.go_ad_i = Signal(n_units, reset_less=True)
 106             self.go_st_i = Signal(n_units, reset_less=True)
 107
 108         # outputs
 109         self.busy_o = Signal(n_units, reset_less=True)
 110         self.rd_rel_o = Signal(n_units, reset_less=True)
 111         self.req_rel_o = Signal(n_units, reset_less=True)
 112         if ldstmode:
 113             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 114             self.st_o = Signal(n_units, reset_less=True) # op is ST
 115             self.adr_rel_o = Signal(n_units, reset_less=True)
 116             self.sto_rel_o = Signal(n_units, reset_less=True)
 117             self.req_rel_o = Signal(n_units, reset_less=True)
 118             self.load_mem_o = Signal(n_units, reset_less=True)
 119             self.stwd_mem_o = Signal(n_units, reset_less=True)
 120             self.addr_o = Signal(rwid, reset_less=True)
 121
 122         # in/out register data (note: not register#, actual data)
 123         self.data_o = Signal(rwid, reset_less=True)
 124         self.src1_i = Signal(rwid, reset_less=True)
 125         self.src2_i = Signal(rwid, reset_less=True)
 126         # input operand
 127
 128     def elaborate(self, platform):
 129         m = Module()
 130         comb = m.d.comb
 131
 132         for i, alu in enumerate(self.units):
 133             setattr(m.submodules, "comp%d" % i, alu)
 134
 135         go_rd_l = []
 136         go_wr_l = []
 137         issue_l = []
 138         busy_l = []
 139         req_rel_l = []
 140         rd_rel_l = []
 141         shadow_l = []
 142         godie_l = []
 143         for alu in self.units:
 144             req_rel_l.append(alu.req_rel_o)
 145             rd_rel_l.append(alu.rd_rel_o)
 146             shadow_l.append(alu.shadown_i)
 147             godie_l.append(alu.go_die_i)
 148             go_wr_l.append(alu.go_wr_i)
 149             go_rd_l.append(alu.go_rd_i)
 150             issue_l.append(alu.issue_i)
 151             busy_l.append(alu.busy_o)
 152         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 153         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 154         comb += self.busy_o.eq(Cat(*busy_l))
 155         comb += Cat(*godie_l).eq(self.go_die_i)
 156         comb += Cat(*shadow_l).eq(self.shadown_i)
 157         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 158         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 159         comb += Cat(*issue_l).eq(self.issue_i)
 160
 161         # connect data register input/output
 162
 163         # merge (OR) all integer FU / ALU outputs to a single value
 164         if self.units:
 165             data_o = treereduce(self.units, "data_o")
 166             comb += self.data_o.eq(data_o)
 167             if self.ldstmode:
 168                 addr_o = treereduce(self.units, "addr_o")
 169                 comb += self.addr_o.eq(addr_o)
 170
 171         for i, alu in enumerate(self.units):
 172             comb += alu.src1_i.eq(self.src1_i)
 173             comb += alu.src2_i.eq(self.src2_i)
 174
 175         if not self.ldstmode:
 176             return m
 177
 178         ldmem_l = []
 179         stmem_l = []
 180         go_ad_l = []
 181         go_st_l = []
 182         ld_l = []
 183         st_l = []
 184         adr_rel_l = []
 185         sto_rel_l = []
 186         for alu in self.units:
 187             ld_l.append(alu.ld_o)
 188             st_l.append(alu.st_o)
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194             go_st_l.append(alu.go_st_i)
 195         comb += self.ld_o.eq(Cat(*ld_l))
 196         comb += self.st_o.eq(Cat(*st_l))
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # Memory (test for now)
 409         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 410
 411         # issue q needs to get at these
 412         self.aluissue = IssueUnitGroup(2)
 413         self.lsissue = IssueUnitGroup(2)
 414         self.brissue = IssueUnitGroup(1)
 415         # and these
 416         self.alu_oper_i = Signal(4, reset_less=True)
 417         self.alu_imm_i = Signal(rwid, reset_less=True)
 418         self.br_oper_i = Signal(4, reset_less=True)
 419         self.br_imm_i = Signal(rwid, reset_less=True)
 420         self.ls_oper_i = Signal(4, reset_less=True)
 421         self.ls_imm_i = Signal(rwid, reset_less=True)
 422
 423         # inputs
 424         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 425         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 426         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 427         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 428
 429         # outputs
 430         self.issue_o = Signal(reset_less=True) # instruction was accepted
 431         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 432
 433         # for branch speculation experiment.  branch_direction = 0 if
 434         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 435         # branch_succ and branch_fail are requests to have the current
 436         # instruction be dependent on the branch unit "shadow" capability.
 437         self.branch_succ_i = Signal(reset_less=True)
 438         self.branch_fail_i = Signal(reset_less=True)
 439         self.branch_direction_o = Signal(2, reset_less=True)
 440
 441     def elaborate(self, platform):
 442         m = Module()
 443         comb = m.d.comb
 444         sync = m.d.sync
 445
 446         m.submodules.intregs = self.intregs
 447         m.submodules.fpregs = self.fpregs
 448         m.submodules.mem = mem = self.mem
 449
 450         # register ports
 451         int_dest = self.intregs.write_port("dest")
 452         int_src1 = self.intregs.read_port("src1")
 453         int_src2 = self.intregs.read_port("src2")
 454
 455         fp_dest = self.fpregs.write_port("dest")
 456         fp_src1 = self.fpregs.read_port("src1")
 457         fp_src2 = self.fpregs.read_port("src2")
 458
 459         # Int ALUs and BR ALUs
 460         n_int_alus = 5
 461         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 462         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 463
 464         # LDST Comp Units
 465         n_ldsts = 2
 466         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 467
 468         # Comp Units
 469         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 470         bgt = cub.bgt # get at the branch computation unit
 471         br1 = cub.br1
 472
 473         # Int FUs
 474         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 475
 476         # Memory FUs
 477         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 478
 479         # Memory Priority Picker 1: one gateway per memory port
 480         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 481         m.submodules.mempick1 = mempick1
 482
 483         # Count of number of FUs
 484         n_intfus = n_int_alus
 485         n_fp_fus = 0 # for now
 486
 487         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 488         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 489         m.submodules.intpick1 = intpick1
 490
 491         # INT/FP Issue Unit
 492         regdecode = RegDecode(self.n_regs)
 493         m.submodules.regdecode = regdecode
 494         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 495         m.submodules.issueunit = issueunit
 496
 497         # Shadow Matrix.  currently n_intfus shadows, to be used for
 498         # write-after-write hazards.  NOTE: there is one extra for branches,
 499         # so the shadow width is increased by 1
 500         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 501         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 502
 503         # record previous instruction to cast shadow on current instruction
 504         prev_shadow = Signal(n_intfus)
 505
 506         # Branch Speculation recorder.  tracks the success/fail state as
 507         # each instruction is issued, so that when the branch occurs the
 508         # allow/cancel can be issued as appropriate.
 509         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 510
 511         #---------
 512         # ok start wiring things together...
 513         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 514         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 515         #---------
 516
 517         #---------
 518         # Issue Unit is where it starts.  set up some in/outs for this module
 519         #---------
 520         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 521                      regdecode.src1_i.eq(self.int_src1_i),
 522                      regdecode.src2_i.eq(self.int_src2_i),
 523                      regdecode.enable_i.eq(self.reg_enable_i),
 524                      self.issue_o.eq(issueunit.issue_o)
 525                     ]
 526
 527         # take these to outside (issue needs them)
 528         comb += cua.oper_i.eq(self.alu_oper_i)
 529         comb += cua.imm_i.eq(self.alu_imm_i)
 530         comb += cub.oper_i.eq(self.br_oper_i)
 531         comb += cub.imm_i.eq(self.br_imm_i)
 532         comb += cul.oper_i.eq(self.ls_oper_i)
 533         comb += cul.imm_i.eq(self.ls_imm_i)
 534
 535         # TODO: issueunit.f (FP)
 536
 537         # and int function issue / busy arrays, and dest/src1/src2
 538         comb += intfus.dest_i.eq(regdecode.dest_o)
 539         comb += intfus.src1_i.eq(regdecode.src1_o)
 540         comb += intfus.src2_i.eq(regdecode.src2_o)
 541
 542         fn_issue_o = issueunit.fn_issue_o
 543
 544         comb += intfus.fn_issue_i.eq(fn_issue_o)
 545         comb += issueunit.busy_i.eq(cu.busy_o)
 546         comb += self.busy_o.eq(cu.busy_o.bool())
 547
 548         #---------
 549         # Memory Function Unit
 550         #---------
 551         reset_b = Signal(cul.n_units, reset_less=True)
 552         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 553
 554         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 555         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 556         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 557
 558         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 559         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 560         # issue_i.  multi-issue gets a bit more complex but not a lot.
 561         prior_ldsts = Signal(cul.n_units, reset_less=True)
 562         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 563         with m.If(self.ls_oper_i[2]): # LD bit of operand
 564             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 565         with m.If(self.ls_oper_i[3]): # ST bit of operand
 566             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 567
 568         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 569         # just immediately activate go_adr
 570         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 571
 572         # connect up address data
 573         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 574         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 575
 576         # connect loadable / storable to go_ld/go_st.
 577         # XXX should only be done when the memory ld/st has actually happened!
 578         go_st_i = Signal(cul.n_units, reset_less=True)
 579         go_ld_i = Signal(cul.n_units, reset_less=True)
 580         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
 581                                   cul.req_rel_o & cul.ld_o)
 582         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 583                                   cul.sto_rel_o & cul.st_o)
 584         comb += memfus.go_ld_i.eq(go_ld_i)
 585         comb += memfus.go_st_i.eq(go_st_i)
 586         #comb += cul.go_wr_i.eq(go_ld_i)
 587         comb += cul.go_st_i.eq(go_st_i)
 588
 589         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 590         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 591         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 592
 593         #---------
 594         # merge shadow matrices outputs
 595         #---------
 596
 597         # these are explained in ShadowMatrix docstring, and are to be
 598         # connected to the FUReg and FUFU Matrices, to get them to reset
 599         anydie = Signal(n_intfus, reset_less=True)
 600         allshadown = Signal(n_intfus, reset_less=True)
 601         shreset = Signal(n_intfus, reset_less=True)
 602         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 603         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 604         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 605
 606         #---------
 607         # connect fu-fu matrix
 608         #---------
 609
 610         # Group Picker... done manually for now.
 611         go_rd_o = intpick1.go_rd_o
 612         go_wr_o = intpick1.go_wr_o
 613         go_rd_i = intfus.go_rd_i
 614         go_wr_i = intfus.go_wr_i
 615         go_die_i = intfus.go_die_i
 616         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 617         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 618         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 619         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 620
 621         # Connect Picker
 622         #---------
 623         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 624         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 625         int_rd_o = intfus.readable_o
 626         int_wr_o = intfus.writable_o
 627         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 628         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 629
 630         #---------
 631         # Shadow Matrix
 632         #---------
 633
 634         comb += shadows.issue_i.eq(fn_issue_o)
 635         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 636         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 637         #---------
 638         # NOTE; this setup is for the instruction order preservation...
 639
 640         # connect shadows / go_dies to Computation Units
 641         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 642         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 643
 644         # ok connect first n_int_fu shadows to busy lines, to create an
 645         # instruction-order linked-list-like arrangement, using a bit-matrix
 646         # (instead of e.g. a ring buffer).
 647
 648         # when written, the shadow can be cancelled (and was good)
 649         for i in range(n_intfus):
 650             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 651
 652         # *previous* instruction shadows *current* instruction, and, obviously,
 653         # if the previous is completed (!busy) don't cast the shadow!
 654         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 655         for i in range(n_intfus):
 656             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 657
 658         #---------
 659         # ... and this is for branch speculation.  it uses the extra bit
 660         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 661         # only needs to set shadow_i, s_fail_i and s_good_i
 662
 663         # issue captures shadow_i (if enabled)
 664         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 665
 666         bactive = Signal(reset_less=True)
 667         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 668
 669         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 670         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 671             comb += bshadow.issue_i.eq(fn_issue_o)
 672             for i in range(n_intfus):
 673                 with m.If(fn_issue_o & (Const(1<<i))):
 674                     comb += bshadow.shadow_i[i][0].eq(1)
 675
 676         # finally, we need an indicator to the test infrastructure as to
 677         # whether the branch succeeded or failed, plus, link up to the
 678         # "recorder" of whether the instruction was under shadow or not
 679
 680         with m.If(br1.issue_i):
 681             sync += bspec.active_i.eq(1)
 682         with m.If(self.branch_succ_i):
 683             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 684         with m.If(self.branch_fail_i):
 685             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 686
 687         # branch is active (TODO: a better signal: this is over-using the
 688         # go_write signal - actually the branch should not be "writing")
 689         with m.If(br1.go_wr_i):
 690             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 691             sync += bspec.active_i.eq(0)
 692             comb += bspec.br_i.eq(1)
 693             # branch occurs if data == 1, failed if data == 0
 694             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 695             for i in range(n_intfus):
 696                 # *expected* direction of the branch matched against *actual*
 697                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 698                 # ... or it didn't
 699                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 700
 701         #---------
 702         # Connect Register File(s)
 703         #---------
 704         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 705         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 706         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 707
 708         # connect ALUs to regfule
 709         comb += int_dest.data_i.eq(cu.data_o)
 710         comb += cu.src1_i.eq(int_src1.data_o)
 711         comb += cu.src2_i.eq(int_src2.data_o)
 712
 713         # connect ALU Computation Units
 714         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 715         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 716         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 717
 718         return m
 719
 720     def __iter__(self):
 721         yield from self.intregs
 722         yield from self.fpregs
 723         yield self.int_dest_i
 724         yield self.int_src1_i
 725         yield self.int_src2_i
 726         yield self.issue_o
 727         yield self.branch_succ_i
 728         yield self.branch_fail_i
 729         yield self.branch_direction_o
 730
 731     def ports(self):
 732         return list(self)
 733
 734
 735 class IssueToScoreboard(Elaboratable):
 736
 737     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 738         self.qlen = qlen
 739         self.n_in = n_in
 740         self.n_out = n_out
 741         self.rwid = rwid
 742         self.opw = opwid
 743         self.n_regs = n_regs
 744
 745         mqbits = (int(log(qlen) / log(2))+2, False)
 746         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 747         self.p_ready_o = Signal() # instructions were added
 748         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 749
 750         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 751         self.qlen_o = Signal(mqbits, reset_less=True)
 752
 753     def elaborate(self, platform):
 754         m = Module()
 755         comb = m.d.comb
 756         sync = m.d.sync
 757
 758         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 759         sc = Scoreboard(self.rwid, self.n_regs)
 760         m.submodules.iq = iq
 761         m.submodules.sc = sc
 762
 763         # get at the regfile for testing
 764         self.intregs = sc.intregs
 765
 766         # and the "busy" signal and instruction queue length
 767         comb += self.busy_o.eq(sc.busy_o)
 768         comb += self.qlen_o.eq(iq.qlen_o)
 769
 770         # link up instruction queue
 771         comb += iq.p_add_i.eq(self.p_add_i)
 772         comb += self.p_ready_o.eq(iq.p_ready_o)
 773         for i in range(self.n_in):
 774             comb += eq(iq.data_i[i], self.data_i[i])
 775
 776         # take instruction and process it.  note that it's possible to
 777         # "inspect" the queue contents *without* actually removing the
 778         # items.  items are only removed when the
 779
 780         # in "waiting" state
 781         wait_issue_br = Signal()
 782         wait_issue_alu = Signal()
 783         wait_issue_ls = Signal()
 784
 785         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 786             # set instruction pop length to 1 if the unit accepted
 787             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 788                 with m.If(iq.qlen_o != 0):
 789                     comb += iq.n_sub_i.eq(1)
 790             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 791                 with m.If(iq.qlen_o != 0):
 792                     comb += iq.n_sub_i.eq(1)
 793             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 794                 with m.If(iq.qlen_o != 0):
 795                     comb += iq.n_sub_i.eq(1)
 796
 797         # see if some instruction(s) are here.  note that this is
 798         # "inspecting" the in-place queue.  note also that on the
 799         # cycle following "waiting" for fn_issue_o to be set, the
 800         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 801         with m.If(iq.qlen_o != 0):
 802             # get the operands and operation
 803             imm = iq.data_o[0].imm_i
 804             dest = iq.data_o[0].dest_i
 805             src1 = iq.data_o[0].src1_i
 806             src2 = iq.data_o[0].src2_i
 807             op = iq.data_o[0].oper_i
 808             opi = iq.data_o[0].opim_i # immediate set
 809
 810             # set the src/dest regs
 811             comb += sc.int_dest_i.eq(dest)
 812             comb += sc.int_src1_i.eq(src1)
 813             comb += sc.int_src2_i.eq(src2)
 814             comb += sc.reg_enable_i.eq(1) # enable the regfile
 815
 816             # choose a Function-Unit-Group
 817             with m.If((op & (0x3<<2)) != 0): # branch
 818                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 819                 comb += sc.br_imm_i.eq(imm)
 820                 comb += sc.brissue.insn_i.eq(1)
 821                 comb += wait_issue_br.eq(1)
 822             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 823                 # see compldst.py
 824                 # bit 0: ADD/SUB
 825                 # bit 1: immed
 826                 # bit 4: LD
 827                 # bit 5: ST
 828                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 829                 comb += sc.ls_imm_i.eq(imm)
 830                 comb += sc.lsissue.insn_i.eq(1)
 831                 comb += wait_issue_ls.eq(1)
 832             with m.Else(): # alu
 833                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 834                 comb += sc.alu_imm_i.eq(imm)
 835                 comb += sc.aluissue.insn_i.eq(1)
 836                 comb += wait_issue_alu.eq(1)
 837
 838             # XXX TODO
 839             # these indicate that the instruction is to be made
 840             # shadow-dependent on
 841             # (either) branch success or branch fail
 842             #yield sc.branch_fail_i.eq(branch_fail)
 843             #yield sc.branch_succ_i.eq(branch_success)
 844
 845         return m
 846
 847     def __iter__(self):
 848         yield self.p_ready_o
 849         for o in self.data_i:
 850             yield from list(o)
 851         yield self.p_add_i
 852
 853     def ports(self):
 854         return list(self)
 855
 856
 857 IADD = 0
 858 ISUB = 1
 859 IMUL = 2
 860 ISHF = 3
 861 IBGT = 4
 862 IBLT = 5
 863 IBEQ = 6
 864 IBNE = 7
 865
 866
 867 class RegSim:
 868     def __init__(self, rwidth, nregs):
 869         self.rwidth = rwidth
 870         self.regs = [0] * nregs
 871
 872     def op(self, op, op_imm, imm, src1, src2, dest):
 873         maxbits = (1 << self.rwidth) - 1
 874         src1 = self.regs[src1] & maxbits
 875         if op_imm:
 876             src2 = imm
 877         else:
 878             src2 = self.regs[src2] & maxbits
 879         if op == IADD:
 880             val = src1 + src2
 881         elif op == ISUB:
 882             val = src1 - src2
 883         elif op == IMUL:
 884             val = src1 * src2
 885         elif op == ISHF:
 886             val = src1 >> (src2 & maxbits)
 887         elif op == IBGT:
 888             val = int(src1 > src2)
 889         elif op == IBLT:
 890             val = int(src1 < src2)
 891         elif op == IBEQ:
 892             val = int(src1 == src2)
 893         elif op == IBNE:
 894             val = int(src1 != src2)
 895         else:
 896             return 0 # LD/ST TODO
 897         val &= maxbits
 898         self.setval(dest, val)
 899         return val
 900
 901     def setval(self, dest, val):
 902         print ("sim setval", dest, hex(val))
 903         self.regs[dest] = val
 904
 905     def dump(self, dut):
 906         for i, val in enumerate(self.regs):
 907             reg = yield dut.intregs.regs[i].reg
 908             okstr = "OK" if reg == val else "!ok"
 909             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 910
 911     def check(self, dut):
 912         for i, val in enumerate(self.regs):
 913             reg = yield dut.intregs.regs[i].reg
 914             if reg != val:
 915                 print("reg %d expected %x received %x\n" % (i, val, reg))
 916                 yield from self.dump(dut)
 917                 assert False
 918
 919 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 920             branch_success, branch_fail):
 921     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 922                'src1_i': src1, 'src2_i': src2}]
 923
 924     sendlen = 1
 925     for idx in range(sendlen):
 926         yield from eq(dut.data_i[idx], instrs[idx])
 927         di = yield dut.data_i[idx]
 928         print ("senddata %d %x" % (idx, di))
 929     yield dut.p_add_i.eq(sendlen)
 930     yield
 931     o_p_ready = yield dut.p_ready_o
 932     while not o_p_ready:
 933         yield
 934         o_p_ready = yield dut.p_ready_o
 935
 936     yield dut.p_add_i.eq(0)
 937
 938
 939 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 940     yield from disable_issue(dut)
 941     yield dut.int_dest_i.eq(dest)
 942     yield dut.int_src1_i.eq(src1)
 943     yield dut.int_src2_i.eq(src2)
 944     if (op & (0x3<<2)) != 0: # branch
 945         yield dut.brissue.insn_i.eq(1)
 946         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 947         yield dut.br_imm_i.eq(imm)
 948         dut_issue = dut.brissue
 949     else:
 950         yield dut.aluissue.insn_i.eq(1)
 951         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 952         yield dut.alu_imm_i.eq(imm)
 953         dut_issue = dut.aluissue
 954     yield dut.reg_enable_i.eq(1)
 955
 956     # these indicate that the instruction is to be made shadow-dependent on
 957     # (either) branch success or branch fail
 958     yield dut.branch_fail_i.eq(branch_fail)
 959     yield dut.branch_succ_i.eq(branch_success)
 960
 961     yield
 962     yield from wait_for_issue(dut, dut_issue)
 963
 964
 965 def print_reg(dut, rnums):
 966     rs = []
 967     for rnum in rnums:
 968         reg = yield dut.intregs.regs[rnum].reg
 969         rs.append("%x" % reg)
 970     rnums = map(str, rnums)
 971     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 972
 973
 974 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 975     insts = []
 976     for i in range(n_ops):
 977         src1 = randint(1, dut.n_regs-1)
 978         src2 = randint(1, dut.n_regs-1)
 979         imm = randint(1, (1<<dut.rwid)-1)
 980         dest = randint(1, dut.n_regs-1)
 981         op = randint(0, max_opnums)
 982         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 983
 984         if shadowing:
 985             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 986         else:
 987             insts.append((src1, src2, dest, op, opi, imm))
 988     return insts
 989
 990
 991 def wait_for_busy_clear(dut):
 992     while True:
 993         busy_o = yield dut.busy_o
 994         if not busy_o:
 995             break
 996         print ("busy",)
 997         yield
 998
 999 def disable_issue(dut):
1000     yield dut.aluissue.insn_i.eq(0)
1001     yield dut.brissue.insn_i.eq(0)
1002     yield dut.lsissue.insn_i.eq(0)
1003
1004
1005 def wait_for_issue(dut, dut_issue):
1006     while True:
1007         issue_o = yield dut_issue.fn_issue_o
1008         if issue_o:
1009             yield from disable_issue(dut)
1010             yield dut.reg_enable_i.eq(0)
1011             break
1012         print ("busy",)
1013         #yield from print_reg(dut, [1,2,3])
1014         yield
1015     #yield from print_reg(dut, [1,2,3])
1016
1017 def scoreboard_branch_sim(dut, alusim):
1018
1019     iseed = 3
1020
1021     for i in range(1):
1022
1023         print ("rseed", iseed)
1024         seed(iseed)
1025         iseed += 1
1026
1027         yield dut.branch_direction_o.eq(0)
1028
1029         # set random values in the registers
1030         for i in range(1, dut.n_regs):
1031             val = 31+i*3
1032             val = randint(0, (1<<alusim.rwidth)-1)
1033             yield dut.intregs.regs[i].reg.eq(val)
1034             alusim.setval(i, val)
1035
1036         if False:
1037             # create some instructions: branches create a tree
1038             insts = create_random_ops(dut, 1, True, 1)
1039             #insts.append((6, 6, 1, 2, (0, 0)))
1040             #insts.append((4, 3, 3, 0, (0, 0)))
1041
1042             src1 = randint(1, dut.n_regs-1)
1043             src2 = randint(1, dut.n_regs-1)
1044             #op = randint(4, 7)
1045             op = 4 # only BGT at the moment
1046
1047             branch_ok = create_random_ops(dut, 1, True, 1)
1048             branch_fail = create_random_ops(dut, 1, True, 1)
1049
1050             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1051
1052         if True:
1053             insts = []
1054             insts.append( (3, 5, 2, 0, (0, 0)) )
1055             branch_ok = []
1056             branch_fail = []
1057             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1058             branch_ok.append( None )
1059             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1060             #branch_fail.append( None )
1061             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1062
1063         siminsts = deepcopy(insts)
1064
1065         # issue instruction(s)
1066         i = -1
1067         instrs = insts
1068         branch_direction = 0
1069         while instrs:
1070             yield
1071             yield
1072             i += 1
1073             branch_direction = yield dut.branch_direction_o # way branch went
1074             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1075             if branch_direction == 1 and shadow_on:
1076                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1077                 continue # branch was "success" and this is a "failed"... skip
1078             if branch_direction == 2 and shadow_off:
1079                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1080                 continue # branch was "fail" and this is a "success"... skip
1081             if branch_direction != 0:
1082                 shadow_on = 0
1083                 shadow_off = 0
1084             is_branch = op >= 4
1085             if is_branch:
1086                 branch_ok, branch_fail = dest
1087                 dest = src2
1088                 # ok zip up the branch success / fail instructions and
1089                 # drop them into the queue, one marked "to have branch success"
1090                 # the other to be marked shadow branch "fail".
1091                 # one out of each of these will be cancelled
1092                 for ok, fl in zip(branch_ok, branch_fail):
1093                     if ok:
1094                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1095                     if fl:
1096                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1097             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1098                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1099             yield from int_instr(dut, op, src1, src2, dest,
1100                                  shadow_on, shadow_off)
1101
1102         # wait for all instructions to stop before checking
1103         yield
1104         yield from wait_for_busy_clear(dut)
1105
1106         i = -1
1107         while siminsts:
1108             instr = siminsts.pop(0)
1109             if instr is None:
1110                 continue
1111             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1112             i += 1
1113             is_branch = op >= 4
1114             if is_branch:
1115                 branch_ok, branch_fail = dest
1116                 dest = src2
1117             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1118                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1119             branch_res = alusim.op(op, src1, src2, dest)
1120             if is_branch:
1121                 if branch_res:
1122                     siminsts += branch_ok
1123                 else:
1124                     siminsts += branch_fail
1125
1126         # check status
1127         yield from alusim.check(dut)
1128         yield from alusim.dump(dut)
1129
1130
1131 def scoreboard_sim(dut, alusim):
1132
1133     seed(0)
1134
1135     for i in range(1):
1136
1137         # set random values in the registers
1138         for i in range(1, dut.n_regs):
1139             val = randint(0, (1<<alusim.rwidth)-1)
1140             #val = 31+i*3
1141             #val = i
1142             yield dut.intregs.regs[i].reg.eq(val)
1143             alusim.setval(i, val)
1144
1145         # create some instructions (some random, some regression tests)
1146         instrs = []
1147         if False:
1148             instrs = create_random_ops(dut, 15, True, 4)
1149
1150         if True: # LD/ST test (with immediate)
1151             instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1152             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1153
1154         if False:
1155             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1156
1157         if False:
1158             instrs.append( (7, 3, 2, 4, (0, 0)) )
1159             instrs.append( (7, 6, 6, 2, (0, 0)) )
1160             instrs.append( (1, 7, 2, 2, (0, 0)) )
1161
1162         if False:
1163             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1164             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1165             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1166             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1167             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1168
1169         if False:
1170             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1171             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1172             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1173
1174         if False:
1175             instrs.append((5, 6, 2, 1))
1176             instrs.append((2, 2, 4, 0))
1177             #instrs.append((2, 2, 3, 1))
1178
1179         if False:
1180             instrs.append((2, 1, 2, 3))
1181
1182         if False:
1183             instrs.append((2, 6, 2, 1))
1184             instrs.append((2, 1, 2, 0))
1185
1186         if False:
1187             instrs.append((1, 2, 7, 2))
1188             instrs.append((7, 1, 5, 0))
1189             instrs.append((4, 4, 1, 1))
1190
1191         if False:
1192             instrs.append((5, 6, 2, 2))
1193             instrs.append((1, 1, 4, 1))
1194             instrs.append((6, 5, 3, 0))
1195
1196         if False:
1197             # Write-after-Write Hazard
1198             instrs.append( (3, 6, 7, 2) )
1199             instrs.append( (4, 4, 7, 1) )
1200
1201         if False:
1202             # self-read/write-after-write followed by Read-after-Write
1203             instrs.append((1, 1, 1, 1))
1204             instrs.append((1, 5, 3, 0))
1205
1206         if False:
1207             # Read-after-Write followed by self-read-after-write
1208             instrs.append((5, 6, 1, 2))
1209             instrs.append((1, 1, 1, 1))
1210
1211         if False:
1212             # self-read-write sandwich
1213             instrs.append((5, 6, 1, 2))
1214             instrs.append((1, 1, 1, 1))
1215             instrs.append((1, 5, 3, 0))
1216
1217         if False:
1218             # very weird failure
1219             instrs.append( (5, 2, 5, 2) )
1220             instrs.append( (2, 6, 3, 0) )
1221             instrs.append( (4, 2, 2, 1) )
1222
1223         if False:
1224             v1 = 4
1225             yield dut.intregs.regs[5].reg.eq(v1)
1226             alusim.setval(5, v1)
1227             yield dut.intregs.regs[3].reg.eq(5)
1228             alusim.setval(3, 5)
1229             instrs.append((5, 3, 3, 4, (0, 0)))
1230             instrs.append((4, 2, 1, 2, (0, 1)))
1231
1232         if False:
1233             v1 = 6
1234             yield dut.intregs.regs[5].reg.eq(v1)
1235             alusim.setval(5, v1)
1236             yield dut.intregs.regs[3].reg.eq(5)
1237             alusim.setval(3, 5)
1238             instrs.append((5, 3, 3, 4, (0, 0)))
1239             instrs.append((4, 2, 1, 2, (1, 0)))
1240
1241         if False:
1242             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1243             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1244             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1245             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1246             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1247             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1248             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1249             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1250             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1251
1252         # issue instruction(s), wait for issue to be free before proceeding
1253         for i, instr in enumerate(instrs):
1254             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1255
1256             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1257                     (i, src1, src2, dest, op, opi, imm))
1258             alusim.op(op, opi, imm, src1, src2, dest)
1259             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1260                                br_ok, br_fail)
1261
1262         # wait for all instructions to stop before checking
1263         while True:
1264             iqlen = yield dut.qlen_o
1265             if iqlen == 0:
1266                 break
1267             yield
1268         yield
1269         yield
1270         yield
1271         yield
1272         yield from wait_for_busy_clear(dut)
1273
1274         # check status
1275         yield from alusim.check(dut)
1276         yield from alusim.dump(dut)
1277
1278
1279 def test_scoreboard():
1280     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1281     alusim = RegSim(16, 8)
1282     memsim = MemSim(16, 16)
1283     vl = rtlil.convert(dut, ports=dut.ports())
1284     with open("test_scoreboard6600.il", "w") as f:
1285         f.write(vl)
1286
1287     run_simulation(dut, scoreboard_sim(dut, alusim),
1288                         vcd_name='test_scoreboard6600.vcd')
1289
1290     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1291     #                    vcd_name='test_scoreboard6600.vcd')
1292
1293
1294 if __name__ == '__main__':
1295     test_scoreboard()