src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 125             self.st_o = Signal(n_units, reset_less=True) # op is ST
 126             self.adr_rel_o = Signal(n_units, reset_less=True)
 127             self.sto_rel_o = Signal(n_units, reset_less=True)
 128             self.req_rel_o = Signal(n_units, reset_less=True)
 129             self.load_mem_o = Signal(n_units, reset_less=True)
 130             self.stwd_mem_o = Signal(n_units, reset_less=True)
 131             self.addr_o = Signal(rwid, reset_less=True)
 132
 133         # in/out register data (note: not register#, actual data)
 134         self.data_o = Signal(rwid, reset_less=True)
 135         self.src1_i = Signal(rwid, reset_less=True)
 136         self.src2_i = Signal(rwid, reset_less=True)
 137         # input operand
 138
 139     def elaborate(self, platform):
 140         m = Module()
 141         comb = m.d.comb
 142
 143         for i, alu in enumerate(self.units):
 144             setattr(m.submodules, "comp%d" % i, alu)
 145
 146         go_rd_l = []
 147         go_wr_l = []
 148         issue_l = []
 149         busy_l = []
 150         req_rel_l = []
 151         rd_rel_l = []
 152         shadow_l = []
 153         godie_l = []
 154         for alu in self.units:
 155             req_rel_l.append(alu.req_rel_o)
 156             rd_rel_l.append(alu.rd_rel_o)
 157             shadow_l.append(alu.shadown_i)
 158             godie_l.append(alu.go_die_i)
 159             go_wr_l.append(alu.go_wr_i)
 160             go_rd_l.append(alu.go_rd_i)
 161             issue_l.append(alu.issue_i)
 162             busy_l.append(alu.busy_o)
 163         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 164         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 165         comb += self.busy_o.eq(Cat(*busy_l))
 166         comb += Cat(*godie_l).eq(self.go_die_i)
 167         comb += Cat(*shadow_l).eq(self.shadown_i)
 168         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 169         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 170         comb += Cat(*issue_l).eq(self.issue_i)
 171
 172         # connect data register input/output
 173
 174         # merge (OR) all integer FU / ALU outputs to a single value
 175         if self.units:
 176             data_o = treereduce(self.units, "data_o")
 177             comb += self.data_o.eq(data_o)
 178             if self.ldstmode:
 179                 addr_o = treereduce(self.units, "addr_o")
 180                 comb += self.addr_o.eq(addr_o)
 181
 182         for i, alu in enumerate(self.units):
 183             comb += alu.src1_i.eq(self.src1_i)
 184             comb += alu.src2_i.eq(self.src2_i)
 185
 186         if not self.ldstmode:
 187             return m
 188
 189         ldmem_l = []
 190         stmem_l = []
 191         go_ad_l = []
 192         go_st_l = []
 193         ld_l = []
 194         st_l = []
 195         adr_rel_l = []
 196         sto_rel_l = []
 197         for alu in self.units:
 198             ld_l.append(alu.ld_o)
 199             st_l.append(alu.st_o)
 200             adr_rel_l.append(alu.adr_rel_o)
 201             sto_rel_l.append(alu.sto_rel_o)
 202             ldmem_l.append(alu.load_mem_o)
 203             stmem_l.append(alu.stwd_mem_o)
 204             go_ad_l.append(alu.go_ad_i)
 205             go_st_l.append(alu.go_st_i)
 206         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 207         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 208         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 209         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 210         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 211         comb += Cat(*go_st_l).eq(self.go_st_i)
 212
 213         return m
 214
 215
 216 class CompUnitLDSTs(CompUnitsBase):
 217
 218     def __init__(self, rwid, opwid, n_ldsts, mem):
 219         """ Inputs:
 220
 221             * :rwid:   bit width of register file(s) - both FP and INT
 222             * :opwid:  operand bit width
 223         """
 224         self.opwid = opwid
 225
 226         # inputs
 227         self.oper_i = Signal(opwid, reset_less=True)
 228         self.imm_i = Signal(rwid, reset_less=True)
 229
 230         # Int ALUs
 231         self.alus = []
 232         for i in range(n_ldsts):
 233             self.alus.append(ALU(rwid))
 234
 235         units = []
 236         for alu in self.alus:
 237             aluopwid = 4 # see compldst.py for "internal" opcode
 238             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 239
 240         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 241
 242     def elaborate(self, platform):
 243         m = CompUnitsBase.elaborate(self, platform)
 244         comb = m.d.comb
 245
 246         # hand the same operation to all units, 4 lower bits though
 247         for alu in self.units:
 248             comb += alu.oper_i[0:4].eq(self.oper_i)
 249             comb += alu.imm_i.eq(self.imm_i)
 250             comb += alu.isalu_i.eq(0)
 251
 252         return m
 253
 254
 255 class CompUnitALUs(CompUnitsBase):
 256
 257     def __init__(self, rwid, opwid, n_alus):
 258         """ Inputs:
 259
 260             * :rwid:   bit width of register file(s) - both FP and INT
 261             * :opwid:  operand bit width
 262         """
 263         self.opwid = opwid
 264
 265         # inputs
 266         self.oper_i = Signal(opwid, reset_less=True)
 267         self.imm_i = Signal(rwid, reset_less=True)
 268
 269         # Int ALUs
 270         alus = []
 271         for i in range(n_alus):
 272             alus.append(ALU(rwid))
 273
 274         units = []
 275         for alu in alus:
 276             aluopwid = 3 # extra bit for immediate mode
 277             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 278
 279         CompUnitsBase.__init__(self, rwid, units)
 280
 281     def elaborate(self, platform):
 282         m = CompUnitsBase.elaborate(self, platform)
 283         comb = m.d.comb
 284
 285         # hand the same operation to all units, only lower 3 bits though
 286         for alu in self.units:
 287             comb += alu.oper_i[0:3].eq(self.oper_i)
 288             comb += alu.imm_i.eq(self.imm_i)
 289
 290         return m
 291
 292
 293 class CompUnitBR(CompUnitsBase):
 294
 295     def __init__(self, rwid, opwid):
 296         """ Inputs:
 297
 298             * :rwid:   bit width of register file(s) - both FP and INT
 299             * :opwid:  operand bit width
 300
 301             Note: bgt unit is returned so that a shadow unit can be created
 302             for it
 303         """
 304         self.opwid = opwid
 305
 306         # inputs
 307         self.oper_i = Signal(opwid, reset_less=True)
 308         self.imm_i = Signal(rwid, reset_less=True)
 309
 310         # Branch ALU and CU
 311         self.bgt = BranchALU(rwid)
 312         aluopwid = 3 # extra bit for immediate mode
 313         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 314         CompUnitsBase.__init__(self, rwid, [self.br1])
 315
 316     def elaborate(self, platform):
 317         m = CompUnitsBase.elaborate(self, platform)
 318         comb = m.d.comb
 319
 320         # hand the same operation to all units
 321         for alu in self.units:
 322             comb += alu.oper_i.eq(self.oper_i)
 323             comb += alu.imm_i.eq(self.imm_i)
 324
 325         return m
 326
 327
 328 class FunctionUnits(Elaboratable):
 329
 330     def __init__(self, n_regs, n_int_alus):
 331         self.n_regs = n_regs
 332         self.n_int_alus = n_int_alus
 333
 334         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 335         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 336         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 337
 338         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 339         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 340
 341         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 342         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 343         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 344
 345         self.readable_o = Signal(n_int_alus, reset_less=True)
 346         self.writable_o = Signal(n_int_alus, reset_less=True)
 347
 348         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 349         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 350         self.go_die_i = Signal(n_int_alus, reset_less=True)
 351         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 352
 353         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 354
 355     def elaborate(self, platform):
 356         m = Module()
 357         comb = m.d.comb
 358         sync = m.d.sync
 359
 360         n_intfus = self.n_int_alus
 361
 362         # Integer FU-FU Dep Matrix
 363         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 364         m.submodules.intfudeps = intfudeps
 365         # Integer FU-Reg Dep Matrix
 366         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 367         m.submodules.intregdeps = intregdeps
 368
 369         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 370         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 371
 372         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 373         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 374
 375         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 376         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 377         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 378
 379         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 380         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 381         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 382         comb += intfudeps.go_die_i.eq(self.go_die_i)
 383         comb += self.readable_o.eq(intfudeps.readable_o)
 384         comb += self.writable_o.eq(intfudeps.writable_o)
 385
 386         # Connect function issue / arrays, and dest/src1/src2
 387         comb += intregdeps.dest_i.eq(self.dest_i)
 388         comb += intregdeps.src_i[0].eq(self.src1_i)
 389         comb += intregdeps.src_i[1].eq(self.src2_i)
 390
 391         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 392         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 393         comb += intregdeps.go_die_i.eq(self.go_die_i)
 394         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 395
 396         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 397         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 398         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 399
 400         return m
 401
 402
 403 class Scoreboard(Elaboratable):
 404     def __init__(self, rwid, n_regs):
 405         """ Inputs:
 406
 407             * :rwid:   bit width of register file(s) - both FP and INT
 408             * :n_regs: depth of register file(s) - number of FP and INT regs
 409         """
 410         self.rwid = rwid
 411         self.n_regs = n_regs
 412
 413         # Register Files
 414         self.intregs = RegFileArray(rwid, n_regs)
 415         self.fpregs = RegFileArray(rwid, n_regs)
 416
 417         # issue q needs to get at these
 418         self.aluissue = IssueUnitGroup(2)
 419         self.lsissue = IssueUnitGroup(2)
 420         self.brissue = IssueUnitGroup(1)
 421         # and these
 422         self.alu_oper_i = Signal(4, reset_less=True)
 423         self.alu_imm_i = Signal(rwid, reset_less=True)
 424         self.br_oper_i = Signal(4, reset_less=True)
 425         self.br_imm_i = Signal(rwid, reset_less=True)
 426         self.ls_oper_i = Signal(4, reset_less=True)
 427         self.ls_imm_i = Signal(rwid, reset_less=True)
 428
 429         # inputs
 430         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 431         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 432         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 433         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 434
 435         # outputs
 436         self.issue_o = Signal(reset_less=True) # instruction was accepted
 437         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 438
 439         # for branch speculation experiment.  branch_direction = 0 if
 440         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 441         # branch_succ and branch_fail are requests to have the current
 442         # instruction be dependent on the branch unit "shadow" capability.
 443         self.branch_succ_i = Signal(reset_less=True)
 444         self.branch_fail_i = Signal(reset_less=True)
 445         self.branch_direction_o = Signal(2, reset_less=True)
 446
 447     def elaborate(self, platform):
 448         m = Module()
 449         comb = m.d.comb
 450         sync = m.d.sync
 451
 452         m.submodules.intregs = self.intregs
 453         m.submodules.fpregs = self.fpregs
 454
 455         # register ports
 456         int_dest = self.intregs.write_port("dest")
 457         int_src1 = self.intregs.read_port("src1")
 458         int_src2 = self.intregs.read_port("src2")
 459
 460         fp_dest = self.fpregs.write_port("dest")
 461         fp_src1 = self.fpregs.read_port("src1")
 462         fp_src2 = self.fpregs.read_port("src2")
 463
 464         # Int ALUs and BR ALUs
 465         n_int_alus = 5
 466         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 467         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 468
 469         # LDST Comp Units
 470         n_ldsts = 2
 471         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 472
 473         # Comp Units
 474         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 475         bgt = cub.bgt # get at the branch computation unit
 476         br1 = cub.br1
 477
 478         # Int FUs
 479         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 480
 481         # Memory FUs
 482         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 483
 484         # Count of number of FUs
 485         n_intfus = n_int_alus
 486         n_fp_fus = 0 # for now
 487
 488         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 489         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 490         m.submodules.intpick1 = intpick1
 491
 492         # INT/FP Issue Unit
 493         regdecode = RegDecode(self.n_regs)
 494         m.submodules.regdecode = regdecode
 495         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 496         m.submodules.issueunit = issueunit
 497
 498         # Shadow Matrix.  currently n_intfus shadows, to be used for
 499         # write-after-write hazards.  NOTE: there is one extra for branches,
 500         # so the shadow width is increased by 1
 501         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 502         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 503
 504         # record previous instruction to cast shadow on current instruction
 505         prev_shadow = Signal(n_intfus)
 506
 507         # Branch Speculation recorder.  tracks the success/fail state as
 508         # each instruction is issued, so that when the branch occurs the
 509         # allow/cancel can be issued as appropriate.
 510         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 511
 512         #---------
 513         # ok start wiring things together...
 514         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 515         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 516         #---------
 517
 518         #---------
 519         # Issue Unit is where it starts.  set up some in/outs for this module
 520         #---------
 521         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 522                      regdecode.src1_i.eq(self.int_src1_i),
 523                      regdecode.src2_i.eq(self.int_src2_i),
 524                      regdecode.enable_i.eq(self.reg_enable_i),
 525                      self.issue_o.eq(issueunit.issue_o)
 526                     ]
 527
 528         # take these to outside (issue needs them)
 529         comb += cua.oper_i.eq(self.alu_oper_i)
 530         comb += cua.imm_i.eq(self.alu_imm_i)
 531         comb += cub.oper_i.eq(self.br_oper_i)
 532         comb += cub.imm_i.eq(self.br_imm_i)
 533         comb += cul.oper_i.eq(self.ls_oper_i)
 534         comb += cul.imm_i.eq(self.ls_imm_i)
 535
 536         # TODO: issueunit.f (FP)
 537
 538         # and int function issue / busy arrays, and dest/src1/src2
 539         comb += intfus.dest_i.eq(regdecode.dest_o)
 540         comb += intfus.src1_i.eq(regdecode.src1_o)
 541         comb += intfus.src2_i.eq(regdecode.src2_o)
 542
 543         fn_issue_o = issueunit.fn_issue_o
 544
 545         comb += intfus.fn_issue_i.eq(fn_issue_o)
 546         comb += issueunit.busy_i.eq(cu.busy_o)
 547         comb += self.busy_o.eq(cu.busy_o.bool())
 548
 549         #---------
 550         # Memory Function Unit
 551         #---------
 552         reset_b = Signal(cul.n_units, reset_less=True)
 553         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 554
 555         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 556         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 557         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 558
 559         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 560         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 561         # issue_i.  multi-issue gets a bit more complex but not a lot.
 562         prior_ldsts = Signal(cul.n_units, reset_less=True)
 563         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 564         with m.If(self.ls_oper_i[2]): # LD bit of operand
 565             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 566         with m.If(self.ls_oper_i[3]): # ST bit of operand
 567             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 568
 569         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 570         # just immediately activate go_adr
 571         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 572
 573         # connect up address data
 574         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 575         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 576
 577         # connect loadable / storable to go_ld/go_st.
 578         # XXX should only be done when the memory ld/st has actually happened!
 579         go_st_i = Signal(cul.n_units, reset_less=True)
 580         go_ld_i = Signal(cul.n_units, reset_less=True)
 581         comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 582                                   cul.req_rel_o & cul.ld_o)
 583         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 584                                   cul.sto_rel_o & cul.st_o)
 585         comb += memfus.go_ld_i.eq(go_ld_i)
 586         comb += memfus.go_st_i.eq(go_st_i)
 587         #comb += cul.go_wr_i.eq(go_ld_i)
 588         comb += cul.go_st_i.eq(go_st_i)
 589
 590         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 591         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 592         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 593
 594         #---------
 595         # merge shadow matrices outputs
 596         #---------
 597
 598         # these are explained in ShadowMatrix docstring, and are to be
 599         # connected to the FUReg and FUFU Matrices, to get them to reset
 600         anydie = Signal(n_intfus, reset_less=True)
 601         allshadown = Signal(n_intfus, reset_less=True)
 602         shreset = Signal(n_intfus, reset_less=True)
 603         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 604         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 605         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 606
 607         #---------
 608         # connect fu-fu matrix
 609         #---------
 610
 611         # Group Picker... done manually for now.
 612         go_rd_o = intpick1.go_rd_o
 613         go_wr_o = intpick1.go_wr_o
 614         go_rd_i = intfus.go_rd_i
 615         go_wr_i = intfus.go_wr_i
 616         go_die_i = intfus.go_die_i
 617         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 618         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 619         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 620         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 621
 622         # Connect Picker
 623         #---------
 624         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 625         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 626         int_rd_o = intfus.readable_o
 627         int_wr_o = intfus.writable_o
 628         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 629         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 630
 631         #---------
 632         # Shadow Matrix
 633         #---------
 634
 635         comb += shadows.issue_i.eq(fn_issue_o)
 636         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 637         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 638         #---------
 639         # NOTE; this setup is for the instruction order preservation...
 640
 641         # connect shadows / go_dies to Computation Units
 642         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 643         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 644
 645         # ok connect first n_int_fu shadows to busy lines, to create an
 646         # instruction-order linked-list-like arrangement, using a bit-matrix
 647         # (instead of e.g. a ring buffer).
 648
 649         # when written, the shadow can be cancelled (and was good)
 650         for i in range(n_intfus):
 651             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 652
 653         # *previous* instruction shadows *current* instruction, and, obviously,
 654         # if the previous is completed (!busy) don't cast the shadow!
 655         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 656         for i in range(n_intfus):
 657             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 658
 659         #---------
 660         # ... and this is for branch speculation.  it uses the extra bit
 661         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 662         # only needs to set shadow_i, s_fail_i and s_good_i
 663
 664         # issue captures shadow_i (if enabled)
 665         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 666
 667         bactive = Signal(reset_less=True)
 668         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 669
 670         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 671         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 672             comb += bshadow.issue_i.eq(fn_issue_o)
 673             for i in range(n_intfus):
 674                 with m.If(fn_issue_o & (Const(1<<i))):
 675                     comb += bshadow.shadow_i[i][0].eq(1)
 676
 677         # finally, we need an indicator to the test infrastructure as to
 678         # whether the branch succeeded or failed, plus, link up to the
 679         # "recorder" of whether the instruction was under shadow or not
 680
 681         with m.If(br1.issue_i):
 682             sync += bspec.active_i.eq(1)
 683         with m.If(self.branch_succ_i):
 684             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 685         with m.If(self.branch_fail_i):
 686             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 687
 688         # branch is active (TODO: a better signal: this is over-using the
 689         # go_write signal - actually the branch should not be "writing")
 690         with m.If(br1.go_wr_i):
 691             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 692             sync += bspec.active_i.eq(0)
 693             comb += bspec.br_i.eq(1)
 694             # branch occurs if data == 1, failed if data == 0
 695             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 696             for i in range(n_intfus):
 697                 # *expected* direction of the branch matched against *actual*
 698                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 699                 # ... or it didn't
 700                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 701
 702         #---------
 703         # Connect Register File(s)
 704         #---------
 705         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 706         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 707         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 708
 709         # connect ALUs to regfule
 710         comb += int_dest.data_i.eq(cu.data_o)
 711         comb += cu.src1_i.eq(int_src1.data_o)
 712         comb += cu.src2_i.eq(int_src2.data_o)
 713
 714         # connect ALU Computation Units
 715         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 716         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 717         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 718
 719         return m
 720
 721     def __iter__(self):
 722         yield from self.intregs
 723         yield from self.fpregs
 724         yield self.int_dest_i
 725         yield self.int_src1_i
 726         yield self.int_src2_i
 727         yield self.issue_o
 728         yield self.branch_succ_i
 729         yield self.branch_fail_i
 730         yield self.branch_direction_o
 731
 732     def ports(self):
 733         return list(self)
 734
 735
 736 class IssueToScoreboard(Elaboratable):
 737
 738     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 739         self.qlen = qlen
 740         self.n_in = n_in
 741         self.n_out = n_out
 742         self.rwid = rwid
 743         self.opw = opwid
 744         self.n_regs = n_regs
 745
 746         mqbits = (int(log(qlen) / log(2))+2, False)
 747         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 748         self.p_ready_o = Signal() # instructions were added
 749         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 750
 751         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 752         self.qlen_o = Signal(mqbits, reset_less=True)
 753
 754     def elaborate(self, platform):
 755         m = Module()
 756         comb = m.d.comb
 757         sync = m.d.sync
 758
 759         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 760         sc = Scoreboard(self.rwid, self.n_regs)
 761         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 762         m.submodules.iq = iq
 763         m.submodules.sc = sc
 764         m.submodules.mem = mem
 765
 766         # get at the regfile for testing
 767         self.intregs = sc.intregs
 768
 769         # and the "busy" signal and instruction queue length
 770         comb += self.busy_o.eq(sc.busy_o)
 771         comb += self.qlen_o.eq(iq.qlen_o)
 772
 773         # link up instruction queue
 774         comb += iq.p_add_i.eq(self.p_add_i)
 775         comb += self.p_ready_o.eq(iq.p_ready_o)
 776         for i in range(self.n_in):
 777             comb += eq(iq.data_i[i], self.data_i[i])
 778
 779         # take instruction and process it.  note that it's possible to
 780         # "inspect" the queue contents *without* actually removing the
 781         # items.  items are only removed when the
 782
 783         # in "waiting" state
 784         wait_issue_br = Signal()
 785         wait_issue_alu = Signal()
 786         wait_issue_ls = Signal()
 787
 788         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 789             # set instruction pop length to 1 if the unit accepted
 790             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 791                 with m.If(iq.qlen_o != 0):
 792                     comb += iq.n_sub_i.eq(1)
 793             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 794                 with m.If(iq.qlen_o != 0):
 795                     comb += iq.n_sub_i.eq(1)
 796             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 797                 with m.If(iq.qlen_o != 0):
 798                     comb += iq.n_sub_i.eq(1)
 799
 800         # see if some instruction(s) are here.  note that this is
 801         # "inspecting" the in-place queue.  note also that on the
 802         # cycle following "waiting" for fn_issue_o to be set, the
 803         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 804         with m.If(iq.qlen_o != 0):
 805             # get the operands and operation
 806             imm = iq.data_o[0].imm_i
 807             dest = iq.data_o[0].dest_i
 808             src1 = iq.data_o[0].src1_i
 809             src2 = iq.data_o[0].src2_i
 810             op = iq.data_o[0].oper_i
 811             opi = iq.data_o[0].opim_i # immediate set
 812
 813             # set the src/dest regs
 814             comb += sc.int_dest_i.eq(dest)
 815             comb += sc.int_src1_i.eq(src1)
 816             comb += sc.int_src2_i.eq(src2)
 817             comb += sc.reg_enable_i.eq(1) # enable the regfile
 818
 819             # choose a Function-Unit-Group
 820             with m.If((op & (0x3<<2)) != 0): # branch
 821                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 822                 comb += sc.br_imm_i.eq(imm)
 823                 comb += sc.brissue.insn_i.eq(1)
 824                 comb += wait_issue_br.eq(1)
 825             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 826                 # see compldst.py
 827                 # bit 0: ADD/SUB
 828                 # bit 1: immed
 829                 # bit 4: LD
 830                 # bit 5: ST
 831                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 832                 comb += sc.ls_imm_i.eq(imm)
 833                 comb += sc.lsissue.insn_i.eq(1)
 834                 comb += wait_issue_ls.eq(1)
 835             with m.Else(): # alu
 836                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 837                 comb += sc.alu_imm_i.eq(imm)
 838                 comb += sc.aluissue.insn_i.eq(1)
 839                 comb += wait_issue_alu.eq(1)
 840
 841             # XXX TODO
 842             # these indicate that the instruction is to be made
 843             # shadow-dependent on
 844             # (either) branch success or branch fail
 845             #yield sc.branch_fail_i.eq(branch_fail)
 846             #yield sc.branch_succ_i.eq(branch_success)
 847
 848         return m
 849
 850     def __iter__(self):
 851         yield self.p_ready_o
 852         for o in self.data_i:
 853             yield from list(o)
 854         yield self.p_add_i
 855
 856     def ports(self):
 857         return list(self)
 858
 859
 860 IADD = 0
 861 ISUB = 1
 862 IMUL = 2
 863 ISHF = 3
 864 IBGT = 4
 865 IBLT = 5
 866 IBEQ = 6
 867 IBNE = 7
 868
 869
 870 class RegSim:
 871     def __init__(self, rwidth, nregs):
 872         self.rwidth = rwidth
 873         self.regs = [0] * nregs
 874
 875     def op(self, op, op_imm, imm, src1, src2, dest):
 876         maxbits = (1 << self.rwidth) - 1
 877         src1 = self.regs[src1] & maxbits
 878         if op_imm:
 879             src2 = imm
 880         else:
 881             src2 = self.regs[src2] & maxbits
 882         if op == IADD:
 883             val = src1 + src2
 884         elif op == ISUB:
 885             val = src1 - src2
 886         elif op == IMUL:
 887             val = src1 * src2
 888         elif op == ISHF:
 889             val = src1 >> (src2 & maxbits)
 890         elif op == IBGT:
 891             val = int(src1 > src2)
 892         elif op == IBLT:
 893             val = int(src1 < src2)
 894         elif op == IBEQ:
 895             val = int(src1 == src2)
 896         elif op == IBNE:
 897             val = int(src1 != src2)
 898         else:
 899             return 0 # LD/ST TODO
 900         val &= maxbits
 901         self.setval(dest, val)
 902         return val
 903
 904     def setval(self, dest, val):
 905         print ("sim setval", dest, hex(val))
 906         self.regs[dest] = val
 907
 908     def dump(self, dut):
 909         for i, val in enumerate(self.regs):
 910             reg = yield dut.intregs.regs[i].reg
 911             okstr = "OK" if reg == val else "!ok"
 912             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 913
 914     def check(self, dut):
 915         for i, val in enumerate(self.regs):
 916             reg = yield dut.intregs.regs[i].reg
 917             if reg != val:
 918                 print("reg %d expected %x received %x\n" % (i, val, reg))
 919                 yield from self.dump(dut)
 920                 assert False
 921
 922 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 923             branch_success, branch_fail):
 924     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 925                'src1_i': src1, 'src2_i': src2}]
 926
 927     sendlen = 1
 928     for idx in range(sendlen):
 929         yield from eq(dut.data_i[idx], instrs[idx])
 930         di = yield dut.data_i[idx]
 931         print ("senddata %d %x" % (idx, di))
 932     yield dut.p_add_i.eq(sendlen)
 933     yield
 934     o_p_ready = yield dut.p_ready_o
 935     while not o_p_ready:
 936         yield
 937         o_p_ready = yield dut.p_ready_o
 938
 939     yield dut.p_add_i.eq(0)
 940
 941
 942 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 943     yield from disable_issue(dut)
 944     yield dut.int_dest_i.eq(dest)
 945     yield dut.int_src1_i.eq(src1)
 946     yield dut.int_src2_i.eq(src2)
 947     if (op & (0x3<<2)) != 0: # branch
 948         yield dut.brissue.insn_i.eq(1)
 949         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 950         yield dut.br_imm_i.eq(imm)
 951         dut_issue = dut.brissue
 952     else:
 953         yield dut.aluissue.insn_i.eq(1)
 954         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 955         yield dut.alu_imm_i.eq(imm)
 956         dut_issue = dut.aluissue
 957     yield dut.reg_enable_i.eq(1)
 958
 959     # these indicate that the instruction is to be made shadow-dependent on
 960     # (either) branch success or branch fail
 961     yield dut.branch_fail_i.eq(branch_fail)
 962     yield dut.branch_succ_i.eq(branch_success)
 963
 964     yield
 965     yield from wait_for_issue(dut, dut_issue)
 966
 967
 968 def print_reg(dut, rnums):
 969     rs = []
 970     for rnum in rnums:
 971         reg = yield dut.intregs.regs[rnum].reg
 972         rs.append("%x" % reg)
 973     rnums = map(str, rnums)
 974     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 975
 976
 977 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 978     insts = []
 979     for i in range(n_ops):
 980         src1 = randint(1, dut.n_regs-1)
 981         src2 = randint(1, dut.n_regs-1)
 982         imm = randint(1, (1<<dut.rwid)-1)
 983         dest = randint(1, dut.n_regs-1)
 984         op = randint(0, max_opnums)
 985         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 986
 987         if shadowing:
 988             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 989         else:
 990             insts.append((src1, src2, dest, op, opi, imm))
 991     return insts
 992
 993
 994 def wait_for_busy_clear(dut):
 995     while True:
 996         busy_o = yield dut.busy_o
 997         if not busy_o:
 998             break
 999         print ("busy",)
1000         yield
1001
1002 def disable_issue(dut):
1003     yield dut.aluissue.insn_i.eq(0)
1004     yield dut.brissue.insn_i.eq(0)
1005     yield dut.lsissue.insn_i.eq(0)
1006
1007
1008 def wait_for_issue(dut, dut_issue):
1009     while True:
1010         issue_o = yield dut_issue.fn_issue_o
1011         if issue_o:
1012             yield from disable_issue(dut)
1013             yield dut.reg_enable_i.eq(0)
1014             break
1015         print ("busy",)
1016         #yield from print_reg(dut, [1,2,3])
1017         yield
1018     #yield from print_reg(dut, [1,2,3])
1019
1020 def scoreboard_branch_sim(dut, alusim):
1021
1022     iseed = 3
1023
1024     for i in range(1):
1025
1026         print ("rseed", iseed)
1027         seed(iseed)
1028         iseed += 1
1029
1030         yield dut.branch_direction_o.eq(0)
1031
1032         # set random values in the registers
1033         for i in range(1, dut.n_regs):
1034             val = 31+i*3
1035             val = randint(0, (1<<alusim.rwidth)-1)
1036             yield dut.intregs.regs[i].reg.eq(val)
1037             alusim.setval(i, val)
1038
1039         if False:
1040             # create some instructions: branches create a tree
1041             insts = create_random_ops(dut, 1, True, 1)
1042             #insts.append((6, 6, 1, 2, (0, 0)))
1043             #insts.append((4, 3, 3, 0, (0, 0)))
1044
1045             src1 = randint(1, dut.n_regs-1)
1046             src2 = randint(1, dut.n_regs-1)
1047             #op = randint(4, 7)
1048             op = 4 # only BGT at the moment
1049
1050             branch_ok = create_random_ops(dut, 1, True, 1)
1051             branch_fail = create_random_ops(dut, 1, True, 1)
1052
1053             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1054
1055         if True:
1056             insts = []
1057             insts.append( (3, 5, 2, 0, (0, 0)) )
1058             branch_ok = []
1059             branch_fail = []
1060             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1061             branch_ok.append( None )
1062             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1063             #branch_fail.append( None )
1064             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1065
1066         siminsts = deepcopy(insts)
1067
1068         # issue instruction(s)
1069         i = -1
1070         instrs = insts
1071         branch_direction = 0
1072         while instrs:
1073             yield
1074             yield
1075             i += 1
1076             branch_direction = yield dut.branch_direction_o # way branch went
1077             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1078             if branch_direction == 1 and shadow_on:
1079                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1080                 continue # branch was "success" and this is a "failed"... skip
1081             if branch_direction == 2 and shadow_off:
1082                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1083                 continue # branch was "fail" and this is a "success"... skip
1084             if branch_direction != 0:
1085                 shadow_on = 0
1086                 shadow_off = 0
1087             is_branch = op >= 4
1088             if is_branch:
1089                 branch_ok, branch_fail = dest
1090                 dest = src2
1091                 # ok zip up the branch success / fail instructions and
1092                 # drop them into the queue, one marked "to have branch success"
1093                 # the other to be marked shadow branch "fail".
1094                 # one out of each of these will be cancelled
1095                 for ok, fl in zip(branch_ok, branch_fail):
1096                     if ok:
1097                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1098                     if fl:
1099                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1100             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1101                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1102             yield from int_instr(dut, op, src1, src2, dest,
1103                                  shadow_on, shadow_off)
1104
1105         # wait for all instructions to stop before checking
1106         yield
1107         yield from wait_for_busy_clear(dut)
1108
1109         i = -1
1110         while siminsts:
1111             instr = siminsts.pop(0)
1112             if instr is None:
1113                 continue
1114             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1115             i += 1
1116             is_branch = op >= 4
1117             if is_branch:
1118                 branch_ok, branch_fail = dest
1119                 dest = src2
1120             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1121                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1122             branch_res = alusim.op(op, src1, src2, dest)
1123             if is_branch:
1124                 if branch_res:
1125                     siminsts += branch_ok
1126                 else:
1127                     siminsts += branch_fail
1128
1129         # check status
1130         yield from alusim.check(dut)
1131         yield from alusim.dump(dut)
1132
1133
1134 def scoreboard_sim(dut, alusim):
1135
1136     seed(0)
1137
1138     for i in range(1):
1139
1140         # set random values in the registers
1141         for i in range(1, dut.n_regs):
1142             val = randint(0, (1<<alusim.rwidth)-1)
1143             #val = 31+i*3
1144             #val = i
1145             yield dut.intregs.regs[i].reg.eq(val)
1146             alusim.setval(i, val)
1147
1148         # create some instructions (some random, some regression tests)
1149         instrs = []
1150         if False:
1151             instrs = create_random_ops(dut, 15, True, 4)
1152
1153         if True: # LD/ST test (with immediate)
1154             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1155             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1156
1157         if False:
1158             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1159
1160         if False:
1161             instrs.append( (7, 3, 2, 4, (0, 0)) )
1162             instrs.append( (7, 6, 6, 2, (0, 0)) )
1163             instrs.append( (1, 7, 2, 2, (0, 0)) )
1164
1165         if False:
1166             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1167             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1168             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1169             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1170             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1171
1172         if False:
1173             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1174             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1175             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1176
1177         if False:
1178             instrs.append((5, 6, 2, 1))
1179             instrs.append((2, 2, 4, 0))
1180             #instrs.append((2, 2, 3, 1))
1181
1182         if False:
1183             instrs.append((2, 1, 2, 3))
1184
1185         if False:
1186             instrs.append((2, 6, 2, 1))
1187             instrs.append((2, 1, 2, 0))
1188
1189         if False:
1190             instrs.append((1, 2, 7, 2))
1191             instrs.append((7, 1, 5, 0))
1192             instrs.append((4, 4, 1, 1))
1193
1194         if False:
1195             instrs.append((5, 6, 2, 2))
1196             instrs.append((1, 1, 4, 1))
1197             instrs.append((6, 5, 3, 0))
1198
1199         if False:
1200             # Write-after-Write Hazard
1201             instrs.append( (3, 6, 7, 2) )
1202             instrs.append( (4, 4, 7, 1) )
1203
1204         if False:
1205             # self-read/write-after-write followed by Read-after-Write
1206             instrs.append((1, 1, 1, 1))
1207             instrs.append((1, 5, 3, 0))
1208
1209         if False:
1210             # Read-after-Write followed by self-read-after-write
1211             instrs.append((5, 6, 1, 2))
1212             instrs.append((1, 1, 1, 1))
1213
1214         if False:
1215             # self-read-write sandwich
1216             instrs.append((5, 6, 1, 2))
1217             instrs.append((1, 1, 1, 1))
1218             instrs.append((1, 5, 3, 0))
1219
1220         if False:
1221             # very weird failure
1222             instrs.append( (5, 2, 5, 2) )
1223             instrs.append( (2, 6, 3, 0) )
1224             instrs.append( (4, 2, 2, 1) )
1225
1226         if False:
1227             v1 = 4
1228             yield dut.intregs.regs[5].reg.eq(v1)
1229             alusim.setval(5, v1)
1230             yield dut.intregs.regs[3].reg.eq(5)
1231             alusim.setval(3, 5)
1232             instrs.append((5, 3, 3, 4, (0, 0)))
1233             instrs.append((4, 2, 1, 2, (0, 1)))
1234
1235         if False:
1236             v1 = 6
1237             yield dut.intregs.regs[5].reg.eq(v1)
1238             alusim.setval(5, v1)
1239             yield dut.intregs.regs[3].reg.eq(5)
1240             alusim.setval(3, 5)
1241             instrs.append((5, 3, 3, 4, (0, 0)))
1242             instrs.append((4, 2, 1, 2, (1, 0)))
1243
1244         if False:
1245             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1246             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1247             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1248             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1249             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1250             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1251             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1252             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1253             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1254
1255         # issue instruction(s), wait for issue to be free before proceeding
1256         for i, instr in enumerate(instrs):
1257             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1258
1259             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1260                     (i, src1, src2, dest, op, opi, imm))
1261             alusim.op(op, opi, imm, src1, src2, dest)
1262             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1263                                br_ok, br_fail)
1264
1265         # wait for all instructions to stop before checking
1266         while True:
1267             iqlen = yield dut.qlen_o
1268             if iqlen == 0:
1269                 break
1270             yield
1271         yield
1272         yield
1273         yield
1274         yield
1275         yield from wait_for_busy_clear(dut)
1276
1277         # check status
1278         yield from alusim.check(dut)
1279         yield from alusim.dump(dut)
1280
1281
1282 def test_scoreboard():
1283     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1284     alusim = RegSim(16, 8)
1285     memsim = MemSim(16, 16)
1286     vl = rtlil.convert(dut, ports=dut.ports())
1287     with open("test_scoreboard6600.il", "w") as f:
1288         f.write(vl)
1289
1290     run_simulation(dut, scoreboard_sim(dut, alusim),
1291                         vcd_name='test_scoreboard6600.vcd')
1292
1293     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1294     #                    vcd_name='test_scoreboard6600.vcd')
1295
1296
1297 if __name__ == '__main__':
1298     test_scoreboard()