src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.adr_rel_o = Signal(n_units, reset_less=True)
 125             self.sto_rel_o = Signal(n_units, reset_less=True)
 126             self.req_rel_o = Signal(n_units, reset_less=True)
 127             self.load_mem_o = Signal(n_units, reset_less=True)
 128             self.stwd_mem_o = Signal(n_units, reset_less=True)
 129
 130         # in/out register data (note: not register#, actual data)
 131         self.data_o = Signal(rwid, reset_less=True)
 132         self.src1_i = Signal(rwid, reset_less=True)
 133         self.src2_i = Signal(rwid, reset_less=True)
 134         # input operand
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         comb = m.d.comb
 139
 140         for i, alu in enumerate(self.units):
 141             setattr(m.submodules, "comp%d" % i, alu)
 142
 143         go_rd_l = []
 144         go_wr_l = []
 145         issue_l = []
 146         busy_l = []
 147         req_rel_l = []
 148         rd_rel_l = []
 149         shadow_l = []
 150         godie_l = []
 151         for alu in self.units:
 152             req_rel_l.append(alu.req_rel_o)
 153             rd_rel_l.append(alu.rd_rel_o)
 154             shadow_l.append(alu.shadown_i)
 155             godie_l.append(alu.go_die_i)
 156             go_wr_l.append(alu.go_wr_i)
 157             go_rd_l.append(alu.go_rd_i)
 158             issue_l.append(alu.issue_i)
 159             busy_l.append(alu.busy_o)
 160         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 161         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 162         comb += self.busy_o.eq(Cat(*busy_l))
 163         comb += Cat(*godie_l).eq(self.go_die_i)
 164         comb += Cat(*shadow_l).eq(self.shadown_i)
 165         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 166         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 167         comb += Cat(*issue_l).eq(self.issue_i)
 168
 169         # connect data register input/output
 170
 171         # merge (OR) all integer FU / ALU outputs to a single value
 172         # bit of a hack: treereduce needs a list with an item named "data_o"
 173         if self.units:
 174             data_o = treereduce(self.units)
 175             comb += self.data_o.eq(data_o)
 176
 177         for i, alu in enumerate(self.units):
 178             comb += alu.src1_i.eq(self.src1_i)
 179             comb += alu.src2_i.eq(self.src2_i)
 180
 181         if not self.ldstmode:
 182             return m
 183
 184         ldmem_l = []
 185         stmem_l = []
 186         go_ad_l = []
 187         go_st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             adr_rel_l.append(alu.adr_rel_o)
 192             sto_rel_l.append(alu.sto_rel_o)
 193             ldmem_l.append(alu.load_mem_o)
 194             stmem_l.append(alu.stwd_mem_o)
 195             go_ad_l.append(alu.go_ad_i)
 196             go_st_l.append(alu.go_st_i)
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # issue q needs to get at these
 409         self.aluissue = IssueUnitGroup(2)
 410         self.lsissue = IssueUnitGroup(2)
 411         self.brissue = IssueUnitGroup(1)
 412         # and these
 413         self.alu_oper_i = Signal(4, reset_less=True)
 414         self.alu_imm_i = Signal(rwid, reset_less=True)
 415         self.br_oper_i = Signal(4, reset_less=True)
 416         self.br_imm_i = Signal(rwid, reset_less=True)
 417         self.ls_oper_i = Signal(4, reset_less=True)
 418         self.ls_imm_i = Signal(rwid, reset_less=True)
 419
 420         # inputs
 421         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 422         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 423         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 424         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 425
 426         # outputs
 427         self.issue_o = Signal(reset_less=True) # instruction was accepted
 428         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 429
 430         # for branch speculation experiment.  branch_direction = 0 if
 431         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 432         # branch_succ and branch_fail are requests to have the current
 433         # instruction be dependent on the branch unit "shadow" capability.
 434         self.branch_succ_i = Signal(reset_less=True)
 435         self.branch_fail_i = Signal(reset_less=True)
 436         self.branch_direction_o = Signal(2, reset_less=True)
 437
 438     def elaborate(self, platform):
 439         m = Module()
 440         comb = m.d.comb
 441         sync = m.d.sync
 442
 443         m.submodules.intregs = self.intregs
 444         m.submodules.fpregs = self.fpregs
 445
 446         # register ports
 447         int_dest = self.intregs.write_port("dest")
 448         int_src1 = self.intregs.read_port("src1")
 449         int_src2 = self.intregs.read_port("src2")
 450
 451         fp_dest = self.fpregs.write_port("dest")
 452         fp_src1 = self.fpregs.read_port("src1")
 453         fp_src2 = self.fpregs.read_port("src2")
 454
 455         # Int ALUs and BR ALUs
 456         n_int_alus = 5
 457         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 458         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 459
 460         # LDST Comp Units
 461         n_ldsts = 2
 462         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 463
 464         # Comp Units
 465         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 466         bgt = cub.bgt # get at the branch computation unit
 467         br1 = cub.br1
 468
 469         # Int FUs
 470         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 471
 472         # Memory FUs
 473         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545
 546
 547         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 548         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 549         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 550
 551         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 552         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 553         # issue_i.  multi-issue gets a bit more complex but not a lot.
 554         prior_ldsts = Signal(cul.n_units, reset_less=True)
 555         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 556         with m.If(self.ls_oper_i[2]): # LD bit of operand
 557             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 558         with m.If(self.ls_oper_i[3]): # ST bit of operand
 559             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 560
 561         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 562         # just immediately activate go_adr
 563         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 564
 565         # connect up address data
 566         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 567         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 568
 569         # connect loadable / storable to go_ld/go_st.
 570         # XXX should only be done when the memory ld/st has actually happened!
 571
 572         comb += memfus.go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 573         comb += memfus.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 574         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 575         comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 576
 577         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 578         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 579         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 580
 581         #---------
 582         # merge shadow matrices outputs
 583         #---------
 584
 585         # these are explained in ShadowMatrix docstring, and are to be
 586         # connected to the FUReg and FUFU Matrices, to get them to reset
 587         anydie = Signal(n_intfus, reset_less=True)
 588         allshadown = Signal(n_intfus, reset_less=True)
 589         shreset = Signal(n_intfus, reset_less=True)
 590         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 591         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 592         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 593
 594         #---------
 595         # connect fu-fu matrix
 596         #---------
 597
 598         # Group Picker... done manually for now.
 599         go_rd_o = intpick1.go_rd_o
 600         go_wr_o = intpick1.go_wr_o
 601         go_rd_i = intfus.go_rd_i
 602         go_wr_i = intfus.go_wr_i
 603         go_die_i = intfus.go_die_i
 604         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 605         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 606         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 607         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 608
 609         # Connect Picker
 610         #---------
 611         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 612         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 613         int_rd_o = intfus.readable_o
 614         int_wr_o = intfus.writable_o
 615         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 616         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 617
 618         #---------
 619         # Shadow Matrix
 620         #---------
 621
 622         comb += shadows.issue_i.eq(fn_issue_o)
 623         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 624         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 625         #---------
 626         # NOTE; this setup is for the instruction order preservation...
 627
 628         # connect shadows / go_dies to Computation Units
 629         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 630         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 631
 632         # ok connect first n_int_fu shadows to busy lines, to create an
 633         # instruction-order linked-list-like arrangement, using a bit-matrix
 634         # (instead of e.g. a ring buffer).
 635
 636         # when written, the shadow can be cancelled (and was good)
 637         for i in range(n_intfus):
 638             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 639
 640         # *previous* instruction shadows *current* instruction, and, obviously,
 641         # if the previous is completed (!busy) don't cast the shadow!
 642         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 643         for i in range(n_intfus):
 644             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 645
 646         #---------
 647         # ... and this is for branch speculation.  it uses the extra bit
 648         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 649         # only needs to set shadow_i, s_fail_i and s_good_i
 650
 651         # issue captures shadow_i (if enabled)
 652         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 653
 654         bactive = Signal(reset_less=True)
 655         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 656
 657         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 658         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 659             comb += bshadow.issue_i.eq(fn_issue_o)
 660             for i in range(n_intfus):
 661                 with m.If(fn_issue_o & (Const(1<<i))):
 662                     comb += bshadow.shadow_i[i][0].eq(1)
 663
 664         # finally, we need an indicator to the test infrastructure as to
 665         # whether the branch succeeded or failed, plus, link up to the
 666         # "recorder" of whether the instruction was under shadow or not
 667
 668         with m.If(br1.issue_i):
 669             sync += bspec.active_i.eq(1)
 670         with m.If(self.branch_succ_i):
 671             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 672         with m.If(self.branch_fail_i):
 673             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 674
 675         # branch is active (TODO: a better signal: this is over-using the
 676         # go_write signal - actually the branch should not be "writing")
 677         with m.If(br1.go_wr_i):
 678             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 679             sync += bspec.active_i.eq(0)
 680             comb += bspec.br_i.eq(1)
 681             # branch occurs if data == 1, failed if data == 0
 682             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 683             for i in range(n_intfus):
 684                 # *expected* direction of the branch matched against *actual*
 685                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 686                 # ... or it didn't
 687                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 688
 689         #---------
 690         # Connect Register File(s)
 691         #---------
 692         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 693         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 694         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 695
 696         # connect ALUs to regfule
 697         comb += int_dest.data_i.eq(cu.data_o)
 698         comb += cu.src1_i.eq(int_src1.data_o)
 699         comb += cu.src2_i.eq(int_src2.data_o)
 700
 701         # connect ALU Computation Units
 702         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 703         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 704         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 705
 706         return m
 707
 708     def __iter__(self):
 709         yield from self.intregs
 710         yield from self.fpregs
 711         yield self.int_dest_i
 712         yield self.int_src1_i
 713         yield self.int_src2_i
 714         yield self.issue_o
 715         yield self.branch_succ_i
 716         yield self.branch_fail_i
 717         yield self.branch_direction_o
 718
 719     def ports(self):
 720         return list(self)
 721
 722
 723 class IssueToScoreboard(Elaboratable):
 724
 725     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 726         self.qlen = qlen
 727         self.n_in = n_in
 728         self.n_out = n_out
 729         self.rwid = rwid
 730         self.opw = opwid
 731         self.n_regs = n_regs
 732
 733         mqbits = (int(log(qlen) / log(2))+2, False)
 734         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 735         self.p_ready_o = Signal() # instructions were added
 736         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 737
 738         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 739         self.qlen_o = Signal(mqbits, reset_less=True)
 740
 741     def elaborate(self, platform):
 742         m = Module()
 743         comb = m.d.comb
 744         sync = m.d.sync
 745
 746         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 747         sc = Scoreboard(self.rwid, self.n_regs)
 748         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 749         m.submodules.iq = iq
 750         m.submodules.sc = sc
 751         m.submodules.mem = mem
 752
 753         # get at the regfile for testing
 754         self.intregs = sc.intregs
 755
 756         # and the "busy" signal and instruction queue length
 757         comb += self.busy_o.eq(sc.busy_o)
 758         comb += self.qlen_o.eq(iq.qlen_o)
 759
 760         # link up instruction queue
 761         comb += iq.p_add_i.eq(self.p_add_i)
 762         comb += self.p_ready_o.eq(iq.p_ready_o)
 763         for i in range(self.n_in):
 764             comb += eq(iq.data_i[i], self.data_i[i])
 765
 766         # take instruction and process it.  note that it's possible to
 767         # "inspect" the queue contents *without* actually removing the
 768         # items.  items are only removed when the
 769
 770         # in "waiting" state
 771         wait_issue_br = Signal()
 772         wait_issue_alu = Signal()
 773         wait_issue_ls = Signal()
 774
 775         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 776             # set instruction pop length to 1 if the unit accepted
 777             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 778                 with m.If(iq.qlen_o != 0):
 779                     comb += iq.n_sub_i.eq(1)
 780             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 781                 with m.If(iq.qlen_o != 0):
 782                     comb += iq.n_sub_i.eq(1)
 783             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 784                 with m.If(iq.qlen_o != 0):
 785                     comb += iq.n_sub_i.eq(1)
 786
 787         # see if some instruction(s) are here.  note that this is
 788         # "inspecting" the in-place queue.  note also that on the
 789         # cycle following "waiting" for fn_issue_o to be set, the
 790         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 791         with m.If(iq.qlen_o != 0):
 792             # get the operands and operation
 793             imm = iq.data_o[0].imm_i
 794             dest = iq.data_o[0].dest_i
 795             src1 = iq.data_o[0].src1_i
 796             src2 = iq.data_o[0].src2_i
 797             op = iq.data_o[0].oper_i
 798             opi = iq.data_o[0].opim_i # immediate set
 799
 800             # set the src/dest regs
 801             comb += sc.int_dest_i.eq(dest)
 802             comb += sc.int_src1_i.eq(src1)
 803             comb += sc.int_src2_i.eq(src2)
 804             comb += sc.reg_enable_i.eq(1) # enable the regfile
 805
 806             # choose a Function-Unit-Group
 807             with m.If((op & (0x3<<2)) != 0): # branch
 808                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 809                 comb += sc.br_imm_i.eq(imm)
 810                 comb += sc.brissue.insn_i.eq(1)
 811                 comb += wait_issue_br.eq(1)
 812             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 813                 # see compldst.py
 814                 # bit 0: ADD/SUB
 815                 # bit 1: immed
 816                 # bit 4: LD
 817                 # bit 5: ST
 818                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 819                 comb += sc.ls_imm_i.eq(imm)
 820                 comb += sc.lsissue.insn_i.eq(1)
 821                 comb += wait_issue_ls.eq(1)
 822             with m.Else(): # alu
 823                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 824                 comb += sc.alu_imm_i.eq(imm)
 825                 comb += sc.aluissue.insn_i.eq(1)
 826                 comb += wait_issue_alu.eq(1)
 827
 828             # XXX TODO
 829             # these indicate that the instruction is to be made
 830             # shadow-dependent on
 831             # (either) branch success or branch fail
 832             #yield sc.branch_fail_i.eq(branch_fail)
 833             #yield sc.branch_succ_i.eq(branch_success)
 834
 835         return m
 836
 837     def __iter__(self):
 838         yield self.p_ready_o
 839         for o in self.data_i:
 840             yield from list(o)
 841         yield self.p_add_i
 842
 843     def ports(self):
 844         return list(self)
 845
 846
 847 IADD = 0
 848 ISUB = 1
 849 IMUL = 2
 850 ISHF = 3
 851 IBGT = 4
 852 IBLT = 5
 853 IBEQ = 6
 854 IBNE = 7
 855
 856
 857 class RegSim:
 858     def __init__(self, rwidth, nregs):
 859         self.rwidth = rwidth
 860         self.regs = [0] * nregs
 861
 862     def op(self, op, op_imm, imm, src1, src2, dest):
 863         maxbits = (1 << self.rwidth) - 1
 864         src1 = self.regs[src1] & maxbits
 865         if op_imm:
 866             src2 = imm
 867         else:
 868             src2 = self.regs[src2] & maxbits
 869         if op == IADD:
 870             val = src1 + src2
 871         elif op == ISUB:
 872             val = src1 - src2
 873         elif op == IMUL:
 874             val = src1 * src2
 875         elif op == ISHF:
 876             val = src1 >> (src2 & maxbits)
 877         elif op == IBGT:
 878             val = int(src1 > src2)
 879         elif op == IBLT:
 880             val = int(src1 < src2)
 881         elif op == IBEQ:
 882             val = int(src1 == src2)
 883         elif op == IBNE:
 884             val = int(src1 != src2)
 885         else:
 886             return 0 # LD/ST TODO
 887         val &= maxbits
 888         self.setval(dest, val)
 889         return val
 890
 891     def setval(self, dest, val):
 892         print ("sim setval", dest, hex(val))
 893         self.regs[dest] = val
 894
 895     def dump(self, dut):
 896         for i, val in enumerate(self.regs):
 897             reg = yield dut.intregs.regs[i].reg
 898             okstr = "OK" if reg == val else "!ok"
 899             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 900
 901     def check(self, dut):
 902         for i, val in enumerate(self.regs):
 903             reg = yield dut.intregs.regs[i].reg
 904             if reg != val:
 905                 print("reg %d expected %x received %x\n" % (i, val, reg))
 906                 yield from self.dump(dut)
 907                 assert False
 908
 909 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 910             branch_success, branch_fail):
 911     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 912                'src1_i': src1, 'src2_i': src2}]
 913
 914     sendlen = 1
 915     for idx in range(sendlen):
 916         yield from eq(dut.data_i[idx], instrs[idx])
 917         di = yield dut.data_i[idx]
 918         print ("senddata %d %x" % (idx, di))
 919     yield dut.p_add_i.eq(sendlen)
 920     yield
 921     o_p_ready = yield dut.p_ready_o
 922     while not o_p_ready:
 923         yield
 924         o_p_ready = yield dut.p_ready_o
 925
 926     yield dut.p_add_i.eq(0)
 927
 928
 929 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 930     yield from disable_issue(dut)
 931     yield dut.int_dest_i.eq(dest)
 932     yield dut.int_src1_i.eq(src1)
 933     yield dut.int_src2_i.eq(src2)
 934     if (op & (0x3<<2)) != 0: # branch
 935         yield dut.brissue.insn_i.eq(1)
 936         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 937         yield dut.br_imm_i.eq(imm)
 938         dut_issue = dut.brissue
 939     else:
 940         yield dut.aluissue.insn_i.eq(1)
 941         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 942         yield dut.alu_imm_i.eq(imm)
 943         dut_issue = dut.aluissue
 944     yield dut.reg_enable_i.eq(1)
 945
 946     # these indicate that the instruction is to be made shadow-dependent on
 947     # (either) branch success or branch fail
 948     yield dut.branch_fail_i.eq(branch_fail)
 949     yield dut.branch_succ_i.eq(branch_success)
 950
 951     yield
 952     yield from wait_for_issue(dut, dut_issue)
 953
 954
 955 def print_reg(dut, rnums):
 956     rs = []
 957     for rnum in rnums:
 958         reg = yield dut.intregs.regs[rnum].reg
 959         rs.append("%x" % reg)
 960     rnums = map(str, rnums)
 961     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 962
 963
 964 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 965     insts = []
 966     for i in range(n_ops):
 967         src1 = randint(1, dut.n_regs-1)
 968         src2 = randint(1, dut.n_regs-1)
 969         imm = randint(1, (1<<dut.rwid)-1)
 970         dest = randint(1, dut.n_regs-1)
 971         op = randint(0, max_opnums)
 972         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 973
 974         if shadowing:
 975             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 976         else:
 977             insts.append((src1, src2, dest, op, opi, imm))
 978     return insts
 979
 980
 981 def wait_for_busy_clear(dut):
 982     while True:
 983         busy_o = yield dut.busy_o
 984         if not busy_o:
 985             break
 986         print ("busy",)
 987         yield
 988
 989 def disable_issue(dut):
 990     yield dut.aluissue.insn_i.eq(0)
 991     yield dut.brissue.insn_i.eq(0)
 992     yield dut.lsissue.insn_i.eq(0)
 993
 994
 995 def wait_for_issue(dut, dut_issue):
 996     while True:
 997         issue_o = yield dut_issue.fn_issue_o
 998         if issue_o:
 999             yield from disable_issue(dut)
1000             yield dut.reg_enable_i.eq(0)
1001             break
1002         print ("busy",)
1003         #yield from print_reg(dut, [1,2,3])
1004         yield
1005     #yield from print_reg(dut, [1,2,3])
1006
1007 def scoreboard_branch_sim(dut, alusim):
1008
1009     iseed = 3
1010
1011     for i in range(1):
1012
1013         print ("rseed", iseed)
1014         seed(iseed)
1015         iseed += 1
1016
1017         yield dut.branch_direction_o.eq(0)
1018
1019         # set random values in the registers
1020         for i in range(1, dut.n_regs):
1021             val = 31+i*3
1022             val = randint(0, (1<<alusim.rwidth)-1)
1023             yield dut.intregs.regs[i].reg.eq(val)
1024             alusim.setval(i, val)
1025
1026         if False:
1027             # create some instructions: branches create a tree
1028             insts = create_random_ops(dut, 1, True, 1)
1029             #insts.append((6, 6, 1, 2, (0, 0)))
1030             #insts.append((4, 3, 3, 0, (0, 0)))
1031
1032             src1 = randint(1, dut.n_regs-1)
1033             src2 = randint(1, dut.n_regs-1)
1034             #op = randint(4, 7)
1035             op = 4 # only BGT at the moment
1036
1037             branch_ok = create_random_ops(dut, 1, True, 1)
1038             branch_fail = create_random_ops(dut, 1, True, 1)
1039
1040             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1041
1042         if True:
1043             insts = []
1044             insts.append( (3, 5, 2, 0, (0, 0)) )
1045             branch_ok = []
1046             branch_fail = []
1047             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1048             branch_ok.append( None )
1049             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1050             #branch_fail.append( None )
1051             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1052
1053         siminsts = deepcopy(insts)
1054
1055         # issue instruction(s)
1056         i = -1
1057         instrs = insts
1058         branch_direction = 0
1059         while instrs:
1060             yield
1061             yield
1062             i += 1
1063             branch_direction = yield dut.branch_direction_o # way branch went
1064             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1065             if branch_direction == 1 and shadow_on:
1066                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1067                 continue # branch was "success" and this is a "failed"... skip
1068             if branch_direction == 2 and shadow_off:
1069                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1070                 continue # branch was "fail" and this is a "success"... skip
1071             if branch_direction != 0:
1072                 shadow_on = 0
1073                 shadow_off = 0
1074             is_branch = op >= 4
1075             if is_branch:
1076                 branch_ok, branch_fail = dest
1077                 dest = src2
1078                 # ok zip up the branch success / fail instructions and
1079                 # drop them into the queue, one marked "to have branch success"
1080                 # the other to be marked shadow branch "fail".
1081                 # one out of each of these will be cancelled
1082                 for ok, fl in zip(branch_ok, branch_fail):
1083                     if ok:
1084                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1085                     if fl:
1086                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1087             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1088                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1089             yield from int_instr(dut, op, src1, src2, dest,
1090                                  shadow_on, shadow_off)
1091
1092         # wait for all instructions to stop before checking
1093         yield
1094         yield from wait_for_busy_clear(dut)
1095
1096         i = -1
1097         while siminsts:
1098             instr = siminsts.pop(0)
1099             if instr is None:
1100                 continue
1101             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1102             i += 1
1103             is_branch = op >= 4
1104             if is_branch:
1105                 branch_ok, branch_fail = dest
1106                 dest = src2
1107             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1108                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1109             branch_res = alusim.op(op, src1, src2, dest)
1110             if is_branch:
1111                 if branch_res:
1112                     siminsts += branch_ok
1113                 else:
1114                     siminsts += branch_fail
1115
1116         # check status
1117         yield from alusim.check(dut)
1118         yield from alusim.dump(dut)
1119
1120
1121 def scoreboard_sim(dut, alusim):
1122
1123     seed(0)
1124
1125     for i in range(1):
1126
1127         # set random values in the registers
1128         for i in range(1, dut.n_regs):
1129             val = randint(0, (1<<alusim.rwidth)-1)
1130             #val = 31+i*3
1131             #val = i
1132             yield dut.intregs.regs[i].reg.eq(val)
1133             alusim.setval(i, val)
1134
1135         # create some instructions (some random, some regression tests)
1136         instrs = []
1137         if False:
1138             instrs = create_random_ops(dut, 15, True, 4)
1139
1140         if True: # LD/ST test (with immediate)
1141             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1142             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1143
1144         if False:
1145             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1146
1147         if False:
1148             instrs.append( (7, 3, 2, 4, (0, 0)) )
1149             instrs.append( (7, 6, 6, 2, (0, 0)) )
1150             instrs.append( (1, 7, 2, 2, (0, 0)) )
1151
1152         if False:
1153             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1154             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1155             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1156             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1157             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1158
1159         if False:
1160             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1161             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1162             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1163
1164         if False:
1165             instrs.append((5, 6, 2, 1))
1166             instrs.append((2, 2, 4, 0))
1167             #instrs.append((2, 2, 3, 1))
1168
1169         if False:
1170             instrs.append((2, 1, 2, 3))
1171
1172         if False:
1173             instrs.append((2, 6, 2, 1))
1174             instrs.append((2, 1, 2, 0))
1175
1176         if False:
1177             instrs.append((1, 2, 7, 2))
1178             instrs.append((7, 1, 5, 0))
1179             instrs.append((4, 4, 1, 1))
1180
1181         if False:
1182             instrs.append((5, 6, 2, 2))
1183             instrs.append((1, 1, 4, 1))
1184             instrs.append((6, 5, 3, 0))
1185
1186         if False:
1187             # Write-after-Write Hazard
1188             instrs.append( (3, 6, 7, 2) )
1189             instrs.append( (4, 4, 7, 1) )
1190
1191         if False:
1192             # self-read/write-after-write followed by Read-after-Write
1193             instrs.append((1, 1, 1, 1))
1194             instrs.append((1, 5, 3, 0))
1195
1196         if False:
1197             # Read-after-Write followed by self-read-after-write
1198             instrs.append((5, 6, 1, 2))
1199             instrs.append((1, 1, 1, 1))
1200
1201         if False:
1202             # self-read-write sandwich
1203             instrs.append((5, 6, 1, 2))
1204             instrs.append((1, 1, 1, 1))
1205             instrs.append((1, 5, 3, 0))
1206
1207         if False:
1208             # very weird failure
1209             instrs.append( (5, 2, 5, 2) )
1210             instrs.append( (2, 6, 3, 0) )
1211             instrs.append( (4, 2, 2, 1) )
1212
1213         if False:
1214             v1 = 4
1215             yield dut.intregs.regs[5].reg.eq(v1)
1216             alusim.setval(5, v1)
1217             yield dut.intregs.regs[3].reg.eq(5)
1218             alusim.setval(3, 5)
1219             instrs.append((5, 3, 3, 4, (0, 0)))
1220             instrs.append((4, 2, 1, 2, (0, 1)))
1221
1222         if False:
1223             v1 = 6
1224             yield dut.intregs.regs[5].reg.eq(v1)
1225             alusim.setval(5, v1)
1226             yield dut.intregs.regs[3].reg.eq(5)
1227             alusim.setval(3, 5)
1228             instrs.append((5, 3, 3, 4, (0, 0)))
1229             instrs.append((4, 2, 1, 2, (1, 0)))
1230
1231         if False:
1232             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1233             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1234             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1235             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1236             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1237             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1238             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1239             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1240             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1241
1242         # issue instruction(s), wait for issue to be free before proceeding
1243         for i, instr in enumerate(instrs):
1244             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1245
1246             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1247                     (i, src1, src2, dest, op, opi, imm))
1248             alusim.op(op, opi, imm, src1, src2, dest)
1249             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1250                                br_ok, br_fail)
1251
1252         # wait for all instructions to stop before checking
1253         while True:
1254             iqlen = yield dut.qlen_o
1255             if iqlen == 0:
1256                 break
1257             yield
1258         yield
1259         yield
1260         yield
1261         yield
1262         yield from wait_for_busy_clear(dut)
1263
1264         # check status
1265         yield from alusim.check(dut)
1266         yield from alusim.dump(dut)
1267
1268
1269 def test_scoreboard():
1270     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1271     alusim = RegSim(16, 8)
1272     memsim = MemSim(16, 16)
1273     vl = rtlil.convert(dut, ports=dut.ports())
1274     with open("test_scoreboard6600.il", "w") as f:
1275         f.write(vl)
1276
1277     run_simulation(dut, scoreboard_sim(dut, alusim),
1278                         vcd_name='test_scoreboard6600.vcd')
1279
1280     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1281     #                    vcd_name='test_scoreboard6600.vcd')
1282
1283
1284 if __name__ == '__main__':
1285     test_scoreboard()