src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 125             self.st_o = Signal(n_units, reset_less=True) # op is ST
 126             self.adr_rel_o = Signal(n_units, reset_less=True)
 127             self.sto_rel_o = Signal(n_units, reset_less=True)
 128             self.req_rel_o = Signal(n_units, reset_less=True)
 129             self.load_mem_o = Signal(n_units, reset_less=True)
 130             self.stwd_mem_o = Signal(n_units, reset_less=True)
 131             self.addr_o = Signal(rwid, reset_less=True)
 132
 133         # in/out register data (note: not register#, actual data)
 134         self.data_o = Signal(rwid, reset_less=True)
 135         self.src1_i = Signal(rwid, reset_less=True)
 136         self.src2_i = Signal(rwid, reset_less=True)
 137         # input operand
 138
 139     def elaborate(self, platform):
 140         m = Module()
 141         comb = m.d.comb
 142
 143         for i, alu in enumerate(self.units):
 144             setattr(m.submodules, "comp%d" % i, alu)
 145
 146         go_rd_l = []
 147         go_wr_l = []
 148         issue_l = []
 149         busy_l = []
 150         req_rel_l = []
 151         rd_rel_l = []
 152         shadow_l = []
 153         godie_l = []
 154         for alu in self.units:
 155             req_rel_l.append(alu.req_rel_o)
 156             rd_rel_l.append(alu.rd_rel_o)
 157             shadow_l.append(alu.shadown_i)
 158             godie_l.append(alu.go_die_i)
 159             go_wr_l.append(alu.go_wr_i)
 160             go_rd_l.append(alu.go_rd_i)
 161             issue_l.append(alu.issue_i)
 162             busy_l.append(alu.busy_o)
 163         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 164         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 165         comb += self.busy_o.eq(Cat(*busy_l))
 166         comb += Cat(*godie_l).eq(self.go_die_i)
 167         comb += Cat(*shadow_l).eq(self.shadown_i)
 168         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 169         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 170         comb += Cat(*issue_l).eq(self.issue_i)
 171
 172         # connect data register input/output
 173
 174         # merge (OR) all integer FU / ALU outputs to a single value
 175         if self.units:
 176             data_o = treereduce(self.units, "data_o")
 177             comb += self.data_o.eq(data_o)
 178             if self.ldstmode:
 179                 addr_o = treereduce(self.units, "addr_o")
 180                 comb += self.addr_o.eq(addr_o)
 181
 182         for i, alu in enumerate(self.units):
 183             comb += alu.src1_i.eq(self.src1_i)
 184             comb += alu.src2_i.eq(self.src2_i)
 185
 186         if not self.ldstmode:
 187             return m
 188
 189         ldmem_l = []
 190         stmem_l = []
 191         go_ad_l = []
 192         go_st_l = []
 193         ld_l = []
 194         st_l = []
 195         adr_rel_l = []
 196         sto_rel_l = []
 197         for alu in self.units:
 198             ld_l.append(alu.ld_o)
 199             st_l.append(alu.st_o)
 200             adr_rel_l.append(alu.adr_rel_o)
 201             sto_rel_l.append(alu.sto_rel_o)
 202             ldmem_l.append(alu.load_mem_o)
 203             stmem_l.append(alu.stwd_mem_o)
 204             go_ad_l.append(alu.go_ad_i)
 205             go_st_l.append(alu.go_st_i)
 206         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 207         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 208         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 209         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 210         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 211         comb += Cat(*go_st_l).eq(self.go_st_i)
 212
 213         return m
 214
 215
 216 class CompUnitLDSTs(CompUnitsBase):
 217
 218     def __init__(self, rwid, opwid, n_ldsts, mem):
 219         """ Inputs:
 220
 221             * :rwid:   bit width of register file(s) - both FP and INT
 222             * :opwid:  operand bit width
 223         """
 224         self.opwid = opwid
 225
 226         # inputs
 227         self.oper_i = Signal(opwid, reset_less=True)
 228         self.imm_i = Signal(rwid, reset_less=True)
 229
 230         # Int ALUs
 231         self.alus = []
 232         for i in range(n_ldsts):
 233             self.alus.append(ALU(rwid))
 234
 235         units = []
 236         for alu in self.alus:
 237             aluopwid = 4 # see compldst.py for "internal" opcode
 238             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 239
 240         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 241
 242     def elaborate(self, platform):
 243         m = CompUnitsBase.elaborate(self, platform)
 244         comb = m.d.comb
 245
 246         # hand the same operation to all units, 4 lower bits though
 247         for alu in self.units:
 248             comb += alu.oper_i[0:4].eq(self.oper_i)
 249             comb += alu.imm_i.eq(self.imm_i)
 250             comb += alu.isalu_i.eq(0)
 251
 252         return m
 253
 254
 255 class CompUnitALUs(CompUnitsBase):
 256
 257     def __init__(self, rwid, opwid, n_alus):
 258         """ Inputs:
 259
 260             * :rwid:   bit width of register file(s) - both FP and INT
 261             * :opwid:  operand bit width
 262         """
 263         self.opwid = opwid
 264
 265         # inputs
 266         self.oper_i = Signal(opwid, reset_less=True)
 267         self.imm_i = Signal(rwid, reset_less=True)
 268
 269         # Int ALUs
 270         alus = []
 271         for i in range(n_alus):
 272             alus.append(ALU(rwid))
 273
 274         units = []
 275         for alu in alus:
 276             aluopwid = 3 # extra bit for immediate mode
 277             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 278
 279         CompUnitsBase.__init__(self, rwid, units)
 280
 281     def elaborate(self, platform):
 282         m = CompUnitsBase.elaborate(self, platform)
 283         comb = m.d.comb
 284
 285         # hand the same operation to all units, only lower 3 bits though
 286         for alu in self.units:
 287             comb += alu.oper_i[0:3].eq(self.oper_i)
 288             comb += alu.imm_i.eq(self.imm_i)
 289
 290         return m
 291
 292
 293 class CompUnitBR(CompUnitsBase):
 294
 295     def __init__(self, rwid, opwid):
 296         """ Inputs:
 297
 298             * :rwid:   bit width of register file(s) - both FP and INT
 299             * :opwid:  operand bit width
 300
 301             Note: bgt unit is returned so that a shadow unit can be created
 302             for it
 303         """
 304         self.opwid = opwid
 305
 306         # inputs
 307         self.oper_i = Signal(opwid, reset_less=True)
 308         self.imm_i = Signal(rwid, reset_less=True)
 309
 310         # Branch ALU and CU
 311         self.bgt = BranchALU(rwid)
 312         aluopwid = 3 # extra bit for immediate mode
 313         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 314         CompUnitsBase.__init__(self, rwid, [self.br1])
 315
 316     def elaborate(self, platform):
 317         m = CompUnitsBase.elaborate(self, platform)
 318         comb = m.d.comb
 319
 320         # hand the same operation to all units
 321         for alu in self.units:
 322             comb += alu.oper_i.eq(self.oper_i)
 323             comb += alu.imm_i.eq(self.imm_i)
 324
 325         return m
 326
 327
 328 class FunctionUnits(Elaboratable):
 329
 330     def __init__(self, n_regs, n_int_alus):
 331         self.n_regs = n_regs
 332         self.n_int_alus = n_int_alus
 333
 334         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 335         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 336         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 337
 338         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 339         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 340
 341         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 342         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 343         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 344
 345         self.readable_o = Signal(n_int_alus, reset_less=True)
 346         self.writable_o = Signal(n_int_alus, reset_less=True)
 347
 348         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 349         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 350         self.go_die_i = Signal(n_int_alus, reset_less=True)
 351         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 352
 353         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 354
 355     def elaborate(self, platform):
 356         m = Module()
 357         comb = m.d.comb
 358         sync = m.d.sync
 359
 360         n_intfus = self.n_int_alus
 361
 362         # Integer FU-FU Dep Matrix
 363         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 364         m.submodules.intfudeps = intfudeps
 365         # Integer FU-Reg Dep Matrix
 366         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 367         m.submodules.intregdeps = intregdeps
 368
 369         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 370         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 371
 372         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 373         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 374
 375         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 376         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 377         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 378
 379         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 380         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 381         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 382         comb += intfudeps.go_die_i.eq(self.go_die_i)
 383         comb += self.readable_o.eq(intfudeps.readable_o)
 384         comb += self.writable_o.eq(intfudeps.writable_o)
 385
 386         # Connect function issue / arrays, and dest/src1/src2
 387         comb += intregdeps.dest_i.eq(self.dest_i)
 388         comb += intregdeps.src_i[0].eq(self.src1_i)
 389         comb += intregdeps.src_i[1].eq(self.src2_i)
 390
 391         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 392         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 393         comb += intregdeps.go_die_i.eq(self.go_die_i)
 394         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 395
 396         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 397         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 398         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 399
 400         return m
 401
 402
 403 class Scoreboard(Elaboratable):
 404     def __init__(self, rwid, n_regs):
 405         """ Inputs:
 406
 407             * :rwid:   bit width of register file(s) - both FP and INT
 408             * :n_regs: depth of register file(s) - number of FP and INT regs
 409         """
 410         self.rwid = rwid
 411         self.n_regs = n_regs
 412
 413         # Register Files
 414         self.intregs = RegFileArray(rwid, n_regs)
 415         self.fpregs = RegFileArray(rwid, n_regs)
 416
 417         # issue q needs to get at these
 418         self.aluissue = IssueUnitGroup(2)
 419         self.lsissue = IssueUnitGroup(2)
 420         self.brissue = IssueUnitGroup(1)
 421         # and these
 422         self.alu_oper_i = Signal(4, reset_less=True)
 423         self.alu_imm_i = Signal(rwid, reset_less=True)
 424         self.br_oper_i = Signal(4, reset_less=True)
 425         self.br_imm_i = Signal(rwid, reset_less=True)
 426         self.ls_oper_i = Signal(4, reset_less=True)
 427         self.ls_imm_i = Signal(rwid, reset_less=True)
 428
 429         # inputs
 430         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 431         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 432         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 433         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 434
 435         # outputs
 436         self.issue_o = Signal(reset_less=True) # instruction was accepted
 437         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 438
 439         # for branch speculation experiment.  branch_direction = 0 if
 440         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 441         # branch_succ and branch_fail are requests to have the current
 442         # instruction be dependent on the branch unit "shadow" capability.
 443         self.branch_succ_i = Signal(reset_less=True)
 444         self.branch_fail_i = Signal(reset_less=True)
 445         self.branch_direction_o = Signal(2, reset_less=True)
 446
 447     def elaborate(self, platform):
 448         m = Module()
 449         comb = m.d.comb
 450         sync = m.d.sync
 451
 452         m.submodules.intregs = self.intregs
 453         m.submodules.fpregs = self.fpregs
 454
 455         # register ports
 456         int_dest = self.intregs.write_port("dest")
 457         int_src1 = self.intregs.read_port("src1")
 458         int_src2 = self.intregs.read_port("src2")
 459
 460         fp_dest = self.fpregs.write_port("dest")
 461         fp_src1 = self.fpregs.read_port("src1")
 462         fp_src2 = self.fpregs.read_port("src2")
 463
 464         # Int ALUs and BR ALUs
 465         n_int_alus = 5
 466         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 467         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 468
 469         # LDST Comp Units
 470         n_ldsts = 2
 471         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 472
 473         # Comp Units
 474         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 475         bgt = cub.bgt # get at the branch computation unit
 476         br1 = cub.br1
 477
 478         # Int FUs
 479         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 480
 481         # Memory FUs
 482         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 483
 484         # Count of number of FUs
 485         n_intfus = n_int_alus
 486         n_fp_fus = 0 # for now
 487
 488         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 489         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 490         m.submodules.intpick1 = intpick1
 491
 492         # INT/FP Issue Unit
 493         regdecode = RegDecode(self.n_regs)
 494         m.submodules.regdecode = regdecode
 495         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 496         m.submodules.issueunit = issueunit
 497
 498         # Shadow Matrix.  currently n_intfus shadows, to be used for
 499         # write-after-write hazards.  NOTE: there is one extra for branches,
 500         # so the shadow width is increased by 1
 501         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 502         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 503
 504         # record previous instruction to cast shadow on current instruction
 505         prev_shadow = Signal(n_intfus)
 506
 507         # Branch Speculation recorder.  tracks the success/fail state as
 508         # each instruction is issued, so that when the branch occurs the
 509         # allow/cancel can be issued as appropriate.
 510         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 511
 512         #---------
 513         # ok start wiring things together...
 514         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 515         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 516         #---------
 517
 518         #---------
 519         # Issue Unit is where it starts.  set up some in/outs for this module
 520         #---------
 521         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 522                      regdecode.src1_i.eq(self.int_src1_i),
 523                      regdecode.src2_i.eq(self.int_src2_i),
 524                      regdecode.enable_i.eq(self.reg_enable_i),
 525                      self.issue_o.eq(issueunit.issue_o)
 526                     ]
 527
 528         # take these to outside (issue needs them)
 529         comb += cua.oper_i.eq(self.alu_oper_i)
 530         comb += cua.imm_i.eq(self.alu_imm_i)
 531         comb += cub.oper_i.eq(self.br_oper_i)
 532         comb += cub.imm_i.eq(self.br_imm_i)
 533         comb += cul.oper_i.eq(self.ls_oper_i)
 534         comb += cul.imm_i.eq(self.ls_imm_i)
 535
 536         # TODO: issueunit.f (FP)
 537
 538         # and int function issue / busy arrays, and dest/src1/src2
 539         comb += intfus.dest_i.eq(regdecode.dest_o)
 540         comb += intfus.src1_i.eq(regdecode.src1_o)
 541         comb += intfus.src2_i.eq(regdecode.src2_o)
 542
 543         fn_issue_o = issueunit.fn_issue_o
 544
 545         comb += intfus.fn_issue_i.eq(fn_issue_o)
 546         comb += issueunit.busy_i.eq(cu.busy_o)
 547         comb += self.busy_o.eq(cu.busy_o.bool())
 548
 549         #---------
 550         # Memory Function Unit
 551         #---------
 552         reset_b = Signal(cul.n_units, reset_less=True)
 553         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 554
 555
 556         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 557         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 558         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 559
 560         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 561         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 562         # issue_i.  multi-issue gets a bit more complex but not a lot.
 563         prior_ldsts = Signal(cul.n_units, reset_less=True)
 564         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 565         with m.If(self.ls_oper_i[2]): # LD bit of operand
 566             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 567         with m.If(self.ls_oper_i[3]): # ST bit of operand
 568             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 569
 570         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 571         # just immediately activate go_adr
 572         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 573
 574         # connect up address data
 575         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 576         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 577
 578         # connect loadable / storable to go_ld/go_st.
 579         # XXX should only be done when the memory ld/st has actually happened!
 580         go_st_i = Signal(cul.n_units, reset_less=True)
 581         go_ld_i = Signal(cul.n_units, reset_less=True)
 582         comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 583                                   cul.req_rel_o & cul.ld_o)
 584         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 585                                   cul.sto_rel_o & cul.st_o)
 586         comb += memfus.go_ld_i.eq(go_ld_i)
 587         comb += memfus.go_st_i.eq(go_st_i)
 588         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 589         comb += cul.go_st_i.eq(go_st_i)
 590
 591         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 592         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 593         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 594
 595         #---------
 596         # merge shadow matrices outputs
 597         #---------
 598
 599         # these are explained in ShadowMatrix docstring, and are to be
 600         # connected to the FUReg and FUFU Matrices, to get them to reset
 601         anydie = Signal(n_intfus, reset_less=True)
 602         allshadown = Signal(n_intfus, reset_less=True)
 603         shreset = Signal(n_intfus, reset_less=True)
 604         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 605         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 606         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 607
 608         #---------
 609         # connect fu-fu matrix
 610         #---------
 611
 612         # Group Picker... done manually for now.
 613         go_rd_o = intpick1.go_rd_o
 614         go_wr_o = intpick1.go_wr_o
 615         go_rd_i = intfus.go_rd_i
 616         go_wr_i = intfus.go_wr_i
 617         go_die_i = intfus.go_die_i
 618         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 619         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 620         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 621         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 622
 623         # Connect Picker
 624         #---------
 625         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 626         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 627         int_rd_o = intfus.readable_o
 628         int_wr_o = intfus.writable_o
 629         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 630         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 631
 632         #---------
 633         # Shadow Matrix
 634         #---------
 635
 636         comb += shadows.issue_i.eq(fn_issue_o)
 637         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 638         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 639         #---------
 640         # NOTE; this setup is for the instruction order preservation...
 641
 642         # connect shadows / go_dies to Computation Units
 643         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 644         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 645
 646         # ok connect first n_int_fu shadows to busy lines, to create an
 647         # instruction-order linked-list-like arrangement, using a bit-matrix
 648         # (instead of e.g. a ring buffer).
 649
 650         # when written, the shadow can be cancelled (and was good)
 651         for i in range(n_intfus):
 652             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 653
 654         # *previous* instruction shadows *current* instruction, and, obviously,
 655         # if the previous is completed (!busy) don't cast the shadow!
 656         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 657         for i in range(n_intfus):
 658             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 659
 660         #---------
 661         # ... and this is for branch speculation.  it uses the extra bit
 662         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 663         # only needs to set shadow_i, s_fail_i and s_good_i
 664
 665         # issue captures shadow_i (if enabled)
 666         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 667
 668         bactive = Signal(reset_less=True)
 669         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 670
 671         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 672         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 673             comb += bshadow.issue_i.eq(fn_issue_o)
 674             for i in range(n_intfus):
 675                 with m.If(fn_issue_o & (Const(1<<i))):
 676                     comb += bshadow.shadow_i[i][0].eq(1)
 677
 678         # finally, we need an indicator to the test infrastructure as to
 679         # whether the branch succeeded or failed, plus, link up to the
 680         # "recorder" of whether the instruction was under shadow or not
 681
 682         with m.If(br1.issue_i):
 683             sync += bspec.active_i.eq(1)
 684         with m.If(self.branch_succ_i):
 685             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 686         with m.If(self.branch_fail_i):
 687             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 688
 689         # branch is active (TODO: a better signal: this is over-using the
 690         # go_write signal - actually the branch should not be "writing")
 691         with m.If(br1.go_wr_i):
 692             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 693             sync += bspec.active_i.eq(0)
 694             comb += bspec.br_i.eq(1)
 695             # branch occurs if data == 1, failed if data == 0
 696             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 697             for i in range(n_intfus):
 698                 # *expected* direction of the branch matched against *actual*
 699                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 700                 # ... or it didn't
 701                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 702
 703         #---------
 704         # Connect Register File(s)
 705         #---------
 706         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 707         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 708         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 709
 710         # connect ALUs to regfule
 711         comb += int_dest.data_i.eq(cu.data_o)
 712         comb += cu.src1_i.eq(int_src1.data_o)
 713         comb += cu.src2_i.eq(int_src2.data_o)
 714
 715         # connect ALU Computation Units
 716         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 717         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 718         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 719
 720         return m
 721
 722     def __iter__(self):
 723         yield from self.intregs
 724         yield from self.fpregs
 725         yield self.int_dest_i
 726         yield self.int_src1_i
 727         yield self.int_src2_i
 728         yield self.issue_o
 729         yield self.branch_succ_i
 730         yield self.branch_fail_i
 731         yield self.branch_direction_o
 732
 733     def ports(self):
 734         return list(self)
 735
 736
 737 class IssueToScoreboard(Elaboratable):
 738
 739     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 740         self.qlen = qlen
 741         self.n_in = n_in
 742         self.n_out = n_out
 743         self.rwid = rwid
 744         self.opw = opwid
 745         self.n_regs = n_regs
 746
 747         mqbits = (int(log(qlen) / log(2))+2, False)
 748         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 749         self.p_ready_o = Signal() # instructions were added
 750         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 751
 752         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 753         self.qlen_o = Signal(mqbits, reset_less=True)
 754
 755     def elaborate(self, platform):
 756         m = Module()
 757         comb = m.d.comb
 758         sync = m.d.sync
 759
 760         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 761         sc = Scoreboard(self.rwid, self.n_regs)
 762         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 763         m.submodules.iq = iq
 764         m.submodules.sc = sc
 765         m.submodules.mem = mem
 766
 767         # get at the regfile for testing
 768         self.intregs = sc.intregs
 769
 770         # and the "busy" signal and instruction queue length
 771         comb += self.busy_o.eq(sc.busy_o)
 772         comb += self.qlen_o.eq(iq.qlen_o)
 773
 774         # link up instruction queue
 775         comb += iq.p_add_i.eq(self.p_add_i)
 776         comb += self.p_ready_o.eq(iq.p_ready_o)
 777         for i in range(self.n_in):
 778             comb += eq(iq.data_i[i], self.data_i[i])
 779
 780         # take instruction and process it.  note that it's possible to
 781         # "inspect" the queue contents *without* actually removing the
 782         # items.  items are only removed when the
 783
 784         # in "waiting" state
 785         wait_issue_br = Signal()
 786         wait_issue_alu = Signal()
 787         wait_issue_ls = Signal()
 788
 789         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 790             # set instruction pop length to 1 if the unit accepted
 791             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 792                 with m.If(iq.qlen_o != 0):
 793                     comb += iq.n_sub_i.eq(1)
 794             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 795                 with m.If(iq.qlen_o != 0):
 796                     comb += iq.n_sub_i.eq(1)
 797             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 798                 with m.If(iq.qlen_o != 0):
 799                     comb += iq.n_sub_i.eq(1)
 800
 801         # see if some instruction(s) are here.  note that this is
 802         # "inspecting" the in-place queue.  note also that on the
 803         # cycle following "waiting" for fn_issue_o to be set, the
 804         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 805         with m.If(iq.qlen_o != 0):
 806             # get the operands and operation
 807             imm = iq.data_o[0].imm_i
 808             dest = iq.data_o[0].dest_i
 809             src1 = iq.data_o[0].src1_i
 810             src2 = iq.data_o[0].src2_i
 811             op = iq.data_o[0].oper_i
 812             opi = iq.data_o[0].opim_i # immediate set
 813
 814             # set the src/dest regs
 815             comb += sc.int_dest_i.eq(dest)
 816             comb += sc.int_src1_i.eq(src1)
 817             comb += sc.int_src2_i.eq(src2)
 818             comb += sc.reg_enable_i.eq(1) # enable the regfile
 819
 820             # choose a Function-Unit-Group
 821             with m.If((op & (0x3<<2)) != 0): # branch
 822                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 823                 comb += sc.br_imm_i.eq(imm)
 824                 comb += sc.brissue.insn_i.eq(1)
 825                 comb += wait_issue_br.eq(1)
 826             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 827                 # see compldst.py
 828                 # bit 0: ADD/SUB
 829                 # bit 1: immed
 830                 # bit 4: LD
 831                 # bit 5: ST
 832                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 833                 comb += sc.ls_imm_i.eq(imm)
 834                 comb += sc.lsissue.insn_i.eq(1)
 835                 comb += wait_issue_ls.eq(1)
 836             with m.Else(): # alu
 837                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 838                 comb += sc.alu_imm_i.eq(imm)
 839                 comb += sc.aluissue.insn_i.eq(1)
 840                 comb += wait_issue_alu.eq(1)
 841
 842             # XXX TODO
 843             # these indicate that the instruction is to be made
 844             # shadow-dependent on
 845             # (either) branch success or branch fail
 846             #yield sc.branch_fail_i.eq(branch_fail)
 847             #yield sc.branch_succ_i.eq(branch_success)
 848
 849         return m
 850
 851     def __iter__(self):
 852         yield self.p_ready_o
 853         for o in self.data_i:
 854             yield from list(o)
 855         yield self.p_add_i
 856
 857     def ports(self):
 858         return list(self)
 859
 860
 861 IADD = 0
 862 ISUB = 1
 863 IMUL = 2
 864 ISHF = 3
 865 IBGT = 4
 866 IBLT = 5
 867 IBEQ = 6
 868 IBNE = 7
 869
 870
 871 class RegSim:
 872     def __init__(self, rwidth, nregs):
 873         self.rwidth = rwidth
 874         self.regs = [0] * nregs
 875
 876     def op(self, op, op_imm, imm, src1, src2, dest):
 877         maxbits = (1 << self.rwidth) - 1
 878         src1 = self.regs[src1] & maxbits
 879         if op_imm:
 880             src2 = imm
 881         else:
 882             src2 = self.regs[src2] & maxbits
 883         if op == IADD:
 884             val = src1 + src2
 885         elif op == ISUB:
 886             val = src1 - src2
 887         elif op == IMUL:
 888             val = src1 * src2
 889         elif op == ISHF:
 890             val = src1 >> (src2 & maxbits)
 891         elif op == IBGT:
 892             val = int(src1 > src2)
 893         elif op == IBLT:
 894             val = int(src1 < src2)
 895         elif op == IBEQ:
 896             val = int(src1 == src2)
 897         elif op == IBNE:
 898             val = int(src1 != src2)
 899         else:
 900             return 0 # LD/ST TODO
 901         val &= maxbits
 902         self.setval(dest, val)
 903         return val
 904
 905     def setval(self, dest, val):
 906         print ("sim setval", dest, hex(val))
 907         self.regs[dest] = val
 908
 909     def dump(self, dut):
 910         for i, val in enumerate(self.regs):
 911             reg = yield dut.intregs.regs[i].reg
 912             okstr = "OK" if reg == val else "!ok"
 913             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 914
 915     def check(self, dut):
 916         for i, val in enumerate(self.regs):
 917             reg = yield dut.intregs.regs[i].reg
 918             if reg != val:
 919                 print("reg %d expected %x received %x\n" % (i, val, reg))
 920                 yield from self.dump(dut)
 921                 assert False
 922
 923 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 924             branch_success, branch_fail):
 925     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 926                'src1_i': src1, 'src2_i': src2}]
 927
 928     sendlen = 1
 929     for idx in range(sendlen):
 930         yield from eq(dut.data_i[idx], instrs[idx])
 931         di = yield dut.data_i[idx]
 932         print ("senddata %d %x" % (idx, di))
 933     yield dut.p_add_i.eq(sendlen)
 934     yield
 935     o_p_ready = yield dut.p_ready_o
 936     while not o_p_ready:
 937         yield
 938         o_p_ready = yield dut.p_ready_o
 939
 940     yield dut.p_add_i.eq(0)
 941
 942
 943 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 944     yield from disable_issue(dut)
 945     yield dut.int_dest_i.eq(dest)
 946     yield dut.int_src1_i.eq(src1)
 947     yield dut.int_src2_i.eq(src2)
 948     if (op & (0x3<<2)) != 0: # branch
 949         yield dut.brissue.insn_i.eq(1)
 950         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 951         yield dut.br_imm_i.eq(imm)
 952         dut_issue = dut.brissue
 953     else:
 954         yield dut.aluissue.insn_i.eq(1)
 955         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 956         yield dut.alu_imm_i.eq(imm)
 957         dut_issue = dut.aluissue
 958     yield dut.reg_enable_i.eq(1)
 959
 960     # these indicate that the instruction is to be made shadow-dependent on
 961     # (either) branch success or branch fail
 962     yield dut.branch_fail_i.eq(branch_fail)
 963     yield dut.branch_succ_i.eq(branch_success)
 964
 965     yield
 966     yield from wait_for_issue(dut, dut_issue)
 967
 968
 969 def print_reg(dut, rnums):
 970     rs = []
 971     for rnum in rnums:
 972         reg = yield dut.intregs.regs[rnum].reg
 973         rs.append("%x" % reg)
 974     rnums = map(str, rnums)
 975     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 976
 977
 978 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 979     insts = []
 980     for i in range(n_ops):
 981         src1 = randint(1, dut.n_regs-1)
 982         src2 = randint(1, dut.n_regs-1)
 983         imm = randint(1, (1<<dut.rwid)-1)
 984         dest = randint(1, dut.n_regs-1)
 985         op = randint(0, max_opnums)
 986         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 987
 988         if shadowing:
 989             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 990         else:
 991             insts.append((src1, src2, dest, op, opi, imm))
 992     return insts
 993
 994
 995 def wait_for_busy_clear(dut):
 996     while True:
 997         busy_o = yield dut.busy_o
 998         if not busy_o:
 999             break
1000         print ("busy",)
1001         yield
1002
1003 def disable_issue(dut):
1004     yield dut.aluissue.insn_i.eq(0)
1005     yield dut.brissue.insn_i.eq(0)
1006     yield dut.lsissue.insn_i.eq(0)
1007
1008
1009 def wait_for_issue(dut, dut_issue):
1010     while True:
1011         issue_o = yield dut_issue.fn_issue_o
1012         if issue_o:
1013             yield from disable_issue(dut)
1014             yield dut.reg_enable_i.eq(0)
1015             break
1016         print ("busy",)
1017         #yield from print_reg(dut, [1,2,3])
1018         yield
1019     #yield from print_reg(dut, [1,2,3])
1020
1021 def scoreboard_branch_sim(dut, alusim):
1022
1023     iseed = 3
1024
1025     for i in range(1):
1026
1027         print ("rseed", iseed)
1028         seed(iseed)
1029         iseed += 1
1030
1031         yield dut.branch_direction_o.eq(0)
1032
1033         # set random values in the registers
1034         for i in range(1, dut.n_regs):
1035             val = 31+i*3
1036             val = randint(0, (1<<alusim.rwidth)-1)
1037             yield dut.intregs.regs[i].reg.eq(val)
1038             alusim.setval(i, val)
1039
1040         if False:
1041             # create some instructions: branches create a tree
1042             insts = create_random_ops(dut, 1, True, 1)
1043             #insts.append((6, 6, 1, 2, (0, 0)))
1044             #insts.append((4, 3, 3, 0, (0, 0)))
1045
1046             src1 = randint(1, dut.n_regs-1)
1047             src2 = randint(1, dut.n_regs-1)
1048             #op = randint(4, 7)
1049             op = 4 # only BGT at the moment
1050
1051             branch_ok = create_random_ops(dut, 1, True, 1)
1052             branch_fail = create_random_ops(dut, 1, True, 1)
1053
1054             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1055
1056         if True:
1057             insts = []
1058             insts.append( (3, 5, 2, 0, (0, 0)) )
1059             branch_ok = []
1060             branch_fail = []
1061             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1062             branch_ok.append( None )
1063             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1064             #branch_fail.append( None )
1065             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1066
1067         siminsts = deepcopy(insts)
1068
1069         # issue instruction(s)
1070         i = -1
1071         instrs = insts
1072         branch_direction = 0
1073         while instrs:
1074             yield
1075             yield
1076             i += 1
1077             branch_direction = yield dut.branch_direction_o # way branch went
1078             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1079             if branch_direction == 1 and shadow_on:
1080                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1081                 continue # branch was "success" and this is a "failed"... skip
1082             if branch_direction == 2 and shadow_off:
1083                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1084                 continue # branch was "fail" and this is a "success"... skip
1085             if branch_direction != 0:
1086                 shadow_on = 0
1087                 shadow_off = 0
1088             is_branch = op >= 4
1089             if is_branch:
1090                 branch_ok, branch_fail = dest
1091                 dest = src2
1092                 # ok zip up the branch success / fail instructions and
1093                 # drop them into the queue, one marked "to have branch success"
1094                 # the other to be marked shadow branch "fail".
1095                 # one out of each of these will be cancelled
1096                 for ok, fl in zip(branch_ok, branch_fail):
1097                     if ok:
1098                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1099                     if fl:
1100                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1101             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1102                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1103             yield from int_instr(dut, op, src1, src2, dest,
1104                                  shadow_on, shadow_off)
1105
1106         # wait for all instructions to stop before checking
1107         yield
1108         yield from wait_for_busy_clear(dut)
1109
1110         i = -1
1111         while siminsts:
1112             instr = siminsts.pop(0)
1113             if instr is None:
1114                 continue
1115             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1116             i += 1
1117             is_branch = op >= 4
1118             if is_branch:
1119                 branch_ok, branch_fail = dest
1120                 dest = src2
1121             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1122                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1123             branch_res = alusim.op(op, src1, src2, dest)
1124             if is_branch:
1125                 if branch_res:
1126                     siminsts += branch_ok
1127                 else:
1128                     siminsts += branch_fail
1129
1130         # check status
1131         yield from alusim.check(dut)
1132         yield from alusim.dump(dut)
1133
1134
1135 def scoreboard_sim(dut, alusim):
1136
1137     seed(0)
1138
1139     for i in range(1):
1140
1141         # set random values in the registers
1142         for i in range(1, dut.n_regs):
1143             val = randint(0, (1<<alusim.rwidth)-1)
1144             #val = 31+i*3
1145             #val = i
1146             yield dut.intregs.regs[i].reg.eq(val)
1147             alusim.setval(i, val)
1148
1149         # create some instructions (some random, some regression tests)
1150         instrs = []
1151         if False:
1152             instrs = create_random_ops(dut, 15, True, 4)
1153
1154         if True: # LD/ST test (with immediate)
1155             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1156             instrs.append( (1, 2, 7, 0x12, 1, 1, (0, 0)) )
1157
1158         if False:
1159             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1160
1161         if False:
1162             instrs.append( (7, 3, 2, 4, (0, 0)) )
1163             instrs.append( (7, 6, 6, 2, (0, 0)) )
1164             instrs.append( (1, 7, 2, 2, (0, 0)) )
1165
1166         if False:
1167             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1168             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1169             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1170             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1171             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1172
1173         if False:
1174             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1175             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1176             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1177
1178         if False:
1179             instrs.append((5, 6, 2, 1))
1180             instrs.append((2, 2, 4, 0))
1181             #instrs.append((2, 2, 3, 1))
1182
1183         if False:
1184             instrs.append((2, 1, 2, 3))
1185
1186         if False:
1187             instrs.append((2, 6, 2, 1))
1188             instrs.append((2, 1, 2, 0))
1189
1190         if False:
1191             instrs.append((1, 2, 7, 2))
1192             instrs.append((7, 1, 5, 0))
1193             instrs.append((4, 4, 1, 1))
1194
1195         if False:
1196             instrs.append((5, 6, 2, 2))
1197             instrs.append((1, 1, 4, 1))
1198             instrs.append((6, 5, 3, 0))
1199
1200         if False:
1201             # Write-after-Write Hazard
1202             instrs.append( (3, 6, 7, 2) )
1203             instrs.append( (4, 4, 7, 1) )
1204
1205         if False:
1206             # self-read/write-after-write followed by Read-after-Write
1207             instrs.append((1, 1, 1, 1))
1208             instrs.append((1, 5, 3, 0))
1209
1210         if False:
1211             # Read-after-Write followed by self-read-after-write
1212             instrs.append((5, 6, 1, 2))
1213             instrs.append((1, 1, 1, 1))
1214
1215         if False:
1216             # self-read-write sandwich
1217             instrs.append((5, 6, 1, 2))
1218             instrs.append((1, 1, 1, 1))
1219             instrs.append((1, 5, 3, 0))
1220
1221         if False:
1222             # very weird failure
1223             instrs.append( (5, 2, 5, 2) )
1224             instrs.append( (2, 6, 3, 0) )
1225             instrs.append( (4, 2, 2, 1) )
1226
1227         if False:
1228             v1 = 4
1229             yield dut.intregs.regs[5].reg.eq(v1)
1230             alusim.setval(5, v1)
1231             yield dut.intregs.regs[3].reg.eq(5)
1232             alusim.setval(3, 5)
1233             instrs.append((5, 3, 3, 4, (0, 0)))
1234             instrs.append((4, 2, 1, 2, (0, 1)))
1235
1236         if False:
1237             v1 = 6
1238             yield dut.intregs.regs[5].reg.eq(v1)
1239             alusim.setval(5, v1)
1240             yield dut.intregs.regs[3].reg.eq(5)
1241             alusim.setval(3, 5)
1242             instrs.append((5, 3, 3, 4, (0, 0)))
1243             instrs.append((4, 2, 1, 2, (1, 0)))
1244
1245         if False:
1246             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1247             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1248             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1249             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1250             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1251             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1252             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1253             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1254             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1255
1256         # issue instruction(s), wait for issue to be free before proceeding
1257         for i, instr in enumerate(instrs):
1258             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1259
1260             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1261                     (i, src1, src2, dest, op, opi, imm))
1262             alusim.op(op, opi, imm, src1, src2, dest)
1263             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1264                                br_ok, br_fail)
1265
1266         # wait for all instructions to stop before checking
1267         while True:
1268             iqlen = yield dut.qlen_o
1269             if iqlen == 0:
1270                 break
1271             yield
1272         yield
1273         yield
1274         yield
1275         yield
1276         yield from wait_for_busy_clear(dut)
1277
1278         # check status
1279         yield from alusim.check(dut)
1280         yield from alusim.dump(dut)
1281
1282
1283 def test_scoreboard():
1284     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1285     alusim = RegSim(16, 8)
1286     memsim = MemSim(16, 16)
1287     vl = rtlil.convert(dut, ports=dut.ports())
1288     with open("test_scoreboard6600.il", "w") as f:
1289         f.write(vl)
1290
1291     run_simulation(dut, scoreboard_sim(dut, alusim),
1292                         vcd_name='test_scoreboard6600.vcd')
1293
1294     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1295     #                    vcd_name='test_scoreboard6600.vcd')
1296
1297
1298 if __name__ == '__main__':
1299     test_scoreboard()