src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.adr_rel_o = Signal(n_units, reset_less=True)
 125             self.sto_rel_o = Signal(n_units, reset_less=True)
 126             self.req_rel_o = Signal(n_units, reset_less=True)
 127             self.load_mem_o = Signal(n_units, reset_less=True)
 128             self.stwd_mem_o = Signal(n_units, reset_less=True)
 129
 130         # in/out register data (note: not register#, actual data)
 131         self.data_o = Signal(rwid, reset_less=True)
 132         self.src1_i = Signal(rwid, reset_less=True)
 133         self.src2_i = Signal(rwid, reset_less=True)
 134         # input operand
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         comb = m.d.comb
 139
 140         for i, alu in enumerate(self.units):
 141             setattr(m.submodules, "comp%d" % i, alu)
 142
 143         go_rd_l = []
 144         go_wr_l = []
 145         issue_l = []
 146         busy_l = []
 147         req_rel_l = []
 148         rd_rel_l = []
 149         shadow_l = []
 150         godie_l = []
 151         for alu in self.units:
 152             req_rel_l.append(alu.req_rel_o)
 153             rd_rel_l.append(alu.rd_rel_o)
 154             shadow_l.append(alu.shadown_i)
 155             godie_l.append(alu.go_die_i)
 156             go_wr_l.append(alu.go_wr_i)
 157             go_rd_l.append(alu.go_rd_i)
 158             issue_l.append(alu.issue_i)
 159             busy_l.append(alu.busy_o)
 160         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 161         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 162         comb += self.busy_o.eq(Cat(*busy_l))
 163         comb += Cat(*godie_l).eq(self.go_die_i)
 164         comb += Cat(*shadow_l).eq(self.shadown_i)
 165         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 166         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 167         comb += Cat(*issue_l).eq(self.issue_i)
 168
 169         # connect data register input/output
 170
 171         # merge (OR) all integer FU / ALU outputs to a single value
 172         # bit of a hack: treereduce needs a list with an item named "data_o"
 173         if self.units:
 174             data_o = treereduce(self.units)
 175             comb += self.data_o.eq(data_o)
 176
 177         for i, alu in enumerate(self.units):
 178             comb += alu.src1_i.eq(self.src1_i)
 179             comb += alu.src2_i.eq(self.src2_i)
 180
 181         if not self.ldstmode:
 182             return m
 183
 184         ldmem_l = []
 185         stmem_l = []
 186         go_ad_l = []
 187         go_st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             adr_rel_l.append(alu.adr_rel_o)
 192             sto_rel_l.append(alu.sto_rel_o)
 193             ldmem_l.append(alu.load_mem_o)
 194             stmem_l.append(alu.stwd_mem_o)
 195             go_ad_l.append(alu.go_ad_i)
 196             go_st_l.append(alu.go_st_i)
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # issue q needs to get at these
 409         self.aluissue = IssueUnitGroup(2)
 410         self.lsissue = IssueUnitGroup(2)
 411         self.brissue = IssueUnitGroup(1)
 412         # and these
 413         self.alu_oper_i = Signal(4, reset_less=True)
 414         self.alu_imm_i = Signal(rwid, reset_less=True)
 415         self.br_oper_i = Signal(4, reset_less=True)
 416         self.br_imm_i = Signal(rwid, reset_less=True)
 417         self.ls_oper_i = Signal(4, reset_less=True)
 418         self.ls_imm_i = Signal(rwid, reset_less=True)
 419
 420         # inputs
 421         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 422         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 423         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 424         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 425
 426         # outputs
 427         self.issue_o = Signal(reset_less=True) # instruction was accepted
 428         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 429
 430         # for branch speculation experiment.  branch_direction = 0 if
 431         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 432         # branch_succ and branch_fail are requests to have the current
 433         # instruction be dependent on the branch unit "shadow" capability.
 434         self.branch_succ_i = Signal(reset_less=True)
 435         self.branch_fail_i = Signal(reset_less=True)
 436         self.branch_direction_o = Signal(2, reset_less=True)
 437
 438     def elaborate(self, platform):
 439         m = Module()
 440         comb = m.d.comb
 441         sync = m.d.sync
 442
 443         m.submodules.intregs = self.intregs
 444         m.submodules.fpregs = self.fpregs
 445
 446         # register ports
 447         int_dest = self.intregs.write_port("dest")
 448         int_src1 = self.intregs.read_port("src1")
 449         int_src2 = self.intregs.read_port("src2")
 450
 451         fp_dest = self.fpregs.write_port("dest")
 452         fp_src1 = self.fpregs.read_port("src1")
 453         fp_src2 = self.fpregs.read_port("src2")
 454
 455         # Int ALUs and BR ALUs
 456         n_int_alus = 5
 457         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 458         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 459
 460         # LDST Comp Units
 461         n_ldsts = 2
 462         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 463
 464         # Comp Units
 465         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 466         bgt = cub.bgt # get at the branch computation unit
 467         br1 = cub.br1
 468
 469         # Int FUs
 470         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 471
 472         # Memory FUs
 473         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545
 546
 547         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 548         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 549         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 550
 551         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 552         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 553         # issue_i.  multi-issue gets a bit more complex but not a lot.
 554         prior_ldsts = Signal(cul.n_units, reset_less=True)
 555         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 556         with m.If(self.ls_oper_i[2]): # LD bit of operand
 557             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 558         with m.If(self.ls_oper_i[3]): # ST bit of operand
 559             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 560
 561         # connect up address data
 562         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 563         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 564
 565         # connect loadable / storable to go_ld/go_st.
 566         # XXX should only be done when the memory ld/st has actually happened!
 567
 568         comb += memfus.go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 569         comb += memfus.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 570         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 571         comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 572
 573         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 574         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 575         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 576
 577         #---------
 578         # merge shadow matrices outputs
 579         #---------
 580
 581         # these are explained in ShadowMatrix docstring, and are to be
 582         # connected to the FUReg and FUFU Matrices, to get them to reset
 583         anydie = Signal(n_intfus, reset_less=True)
 584         allshadown = Signal(n_intfus, reset_less=True)
 585         shreset = Signal(n_intfus, reset_less=True)
 586         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 587         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 588         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 589
 590         #---------
 591         # connect fu-fu matrix
 592         #---------
 593
 594         # Group Picker... done manually for now.
 595         go_rd_o = intpick1.go_rd_o
 596         go_wr_o = intpick1.go_wr_o
 597         go_rd_i = intfus.go_rd_i
 598         go_wr_i = intfus.go_wr_i
 599         go_die_i = intfus.go_die_i
 600         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 601         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 602         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 603         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 604
 605         # Connect Picker
 606         #---------
 607         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 608         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 609         int_rd_o = intfus.readable_o
 610         int_wr_o = intfus.writable_o
 611         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 612         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 613
 614         #---------
 615         # Shadow Matrix
 616         #---------
 617
 618         comb += shadows.issue_i.eq(fn_issue_o)
 619         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 620         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 621         #---------
 622         # NOTE; this setup is for the instruction order preservation...
 623
 624         # connect shadows / go_dies to Computation Units
 625         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 626         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 627
 628         # ok connect first n_int_fu shadows to busy lines, to create an
 629         # instruction-order linked-list-like arrangement, using a bit-matrix
 630         # (instead of e.g. a ring buffer).
 631
 632         # when written, the shadow can be cancelled (and was good)
 633         for i in range(n_intfus):
 634             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 635
 636         # *previous* instruction shadows *current* instruction, and, obviously,
 637         # if the previous is completed (!busy) don't cast the shadow!
 638         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 639         for i in range(n_intfus):
 640             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 641
 642         #---------
 643         # ... and this is for branch speculation.  it uses the extra bit
 644         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 645         # only needs to set shadow_i, s_fail_i and s_good_i
 646
 647         # issue captures shadow_i (if enabled)
 648         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 649
 650         bactive = Signal(reset_less=True)
 651         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 652
 653         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 654         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 655             comb += bshadow.issue_i.eq(fn_issue_o)
 656             for i in range(n_intfus):
 657                 with m.If(fn_issue_o & (Const(1<<i))):
 658                     comb += bshadow.shadow_i[i][0].eq(1)
 659
 660         # finally, we need an indicator to the test infrastructure as to
 661         # whether the branch succeeded or failed, plus, link up to the
 662         # "recorder" of whether the instruction was under shadow or not
 663
 664         with m.If(br1.issue_i):
 665             sync += bspec.active_i.eq(1)
 666         with m.If(self.branch_succ_i):
 667             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 668         with m.If(self.branch_fail_i):
 669             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 670
 671         # branch is active (TODO: a better signal: this is over-using the
 672         # go_write signal - actually the branch should not be "writing")
 673         with m.If(br1.go_wr_i):
 674             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 675             sync += bspec.active_i.eq(0)
 676             comb += bspec.br_i.eq(1)
 677             # branch occurs if data == 1, failed if data == 0
 678             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 679             for i in range(n_intfus):
 680                 # *expected* direction of the branch matched against *actual*
 681                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 682                 # ... or it didn't
 683                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 684
 685         #---------
 686         # Connect Register File(s)
 687         #---------
 688         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 689         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 690         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 691
 692         # connect ALUs to regfule
 693         comb += int_dest.data_i.eq(cu.data_o)
 694         comb += cu.src1_i.eq(int_src1.data_o)
 695         comb += cu.src2_i.eq(int_src2.data_o)
 696
 697         # connect ALU Computation Units
 698         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 699         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 700         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 701
 702         return m
 703
 704     def __iter__(self):
 705         yield from self.intregs
 706         yield from self.fpregs
 707         yield self.int_dest_i
 708         yield self.int_src1_i
 709         yield self.int_src2_i
 710         yield self.issue_o
 711         yield self.branch_succ_i
 712         yield self.branch_fail_i
 713         yield self.branch_direction_o
 714
 715     def ports(self):
 716         return list(self)
 717
 718
 719 class IssueToScoreboard(Elaboratable):
 720
 721     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 722         self.qlen = qlen
 723         self.n_in = n_in
 724         self.n_out = n_out
 725         self.rwid = rwid
 726         self.opw = opwid
 727         self.n_regs = n_regs
 728
 729         mqbits = (int(log(qlen) / log(2))+2, False)
 730         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 731         self.p_ready_o = Signal() # instructions were added
 732         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 733
 734         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 735         self.qlen_o = Signal(mqbits, reset_less=True)
 736
 737     def elaborate(self, platform):
 738         m = Module()
 739         comb = m.d.comb
 740         sync = m.d.sync
 741
 742         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 743         sc = Scoreboard(self.rwid, self.n_regs)
 744         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 745         m.submodules.iq = iq
 746         m.submodules.sc = sc
 747         m.submodules.mem = mem
 748
 749         # get at the regfile for testing
 750         self.intregs = sc.intregs
 751
 752         # and the "busy" signal and instruction queue length
 753         comb += self.busy_o.eq(sc.busy_o)
 754         comb += self.qlen_o.eq(iq.qlen_o)
 755
 756         # link up instruction queue
 757         comb += iq.p_add_i.eq(self.p_add_i)
 758         comb += self.p_ready_o.eq(iq.p_ready_o)
 759         for i in range(self.n_in):
 760             comb += eq(iq.data_i[i], self.data_i[i])
 761
 762         # take instruction and process it.  note that it's possible to
 763         # "inspect" the queue contents *without* actually removing the
 764         # items.  items are only removed when the
 765
 766         # in "waiting" state
 767         wait_issue_br = Signal()
 768         wait_issue_alu = Signal()
 769         wait_issue_ls = Signal()
 770
 771         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 772             # set instruction pop length to 1 if the unit accepted
 773             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 774                 with m.If(iq.qlen_o != 0):
 775                     comb += iq.n_sub_i.eq(1)
 776             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 777                 with m.If(iq.qlen_o != 0):
 778                     comb += iq.n_sub_i.eq(1)
 779             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 780                 with m.If(iq.qlen_o != 0):
 781                     comb += iq.n_sub_i.eq(1)
 782
 783         # see if some instruction(s) are here.  note that this is
 784         # "inspecting" the in-place queue.  note also that on the
 785         # cycle following "waiting" for fn_issue_o to be set, the
 786         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 787         with m.If(iq.qlen_o != 0):
 788             # get the operands and operation
 789             imm = iq.data_o[0].imm_i
 790             dest = iq.data_o[0].dest_i
 791             src1 = iq.data_o[0].src1_i
 792             src2 = iq.data_o[0].src2_i
 793             op = iq.data_o[0].oper_i
 794             opi = iq.data_o[0].opim_i # immediate set
 795
 796             # set the src/dest regs
 797             comb += sc.int_dest_i.eq(dest)
 798             comb += sc.int_src1_i.eq(src1)
 799             comb += sc.int_src2_i.eq(src2)
 800             comb += sc.reg_enable_i.eq(1) # enable the regfile
 801
 802             # choose a Function-Unit-Group
 803             with m.If((op & (0x3<<2)) != 0): # branch
 804                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 805                 comb += sc.br_imm_i.eq(imm)
 806                 comb += sc.brissue.insn_i.eq(1)
 807                 comb += wait_issue_br.eq(1)
 808             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 809                 # see compldst.py
 810                 # bit 0: ADD/SUB
 811                 # bit 1: immed
 812                 # bit 4: LD
 813                 # bit 5: ST
 814                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 815                 comb += sc.ls_imm_i.eq(imm)
 816                 comb += sc.lsissue.insn_i.eq(1)
 817                 comb += wait_issue_ls.eq(1)
 818             with m.Else(): # alu
 819                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 820                 comb += sc.alu_imm_i.eq(imm)
 821                 comb += sc.aluissue.insn_i.eq(1)
 822                 comb += wait_issue_alu.eq(1)
 823
 824             # XXX TODO
 825             # these indicate that the instruction is to be made
 826             # shadow-dependent on
 827             # (either) branch success or branch fail
 828             #yield sc.branch_fail_i.eq(branch_fail)
 829             #yield sc.branch_succ_i.eq(branch_success)
 830
 831         return m
 832
 833     def __iter__(self):
 834         yield self.p_ready_o
 835         for o in self.data_i:
 836             yield from list(o)
 837         yield self.p_add_i
 838
 839     def ports(self):
 840         return list(self)
 841
 842
 843 IADD = 0
 844 ISUB = 1
 845 IMUL = 2
 846 ISHF = 3
 847 IBGT = 4
 848 IBLT = 5
 849 IBEQ = 6
 850 IBNE = 7
 851
 852
 853 class RegSim:
 854     def __init__(self, rwidth, nregs):
 855         self.rwidth = rwidth
 856         self.regs = [0] * nregs
 857
 858     def op(self, op, op_imm, imm, src1, src2, dest):
 859         maxbits = (1 << self.rwidth) - 1
 860         src1 = self.regs[src1] & maxbits
 861         if op_imm:
 862             src2 = imm
 863         else:
 864             src2 = self.regs[src2] & maxbits
 865         if op == IADD:
 866             val = src1 + src2
 867         elif op == ISUB:
 868             val = src1 - src2
 869         elif op == IMUL:
 870             val = src1 * src2
 871         elif op == ISHF:
 872             val = src1 >> (src2 & maxbits)
 873         elif op == IBGT:
 874             val = int(src1 > src2)
 875         elif op == IBLT:
 876             val = int(src1 < src2)
 877         elif op == IBEQ:
 878             val = int(src1 == src2)
 879         elif op == IBNE:
 880             val = int(src1 != src2)
 881         else:
 882             return 0 # LD/ST TODO
 883         val &= maxbits
 884         self.setval(dest, val)
 885         return val
 886
 887     def setval(self, dest, val):
 888         print ("sim setval", dest, hex(val))
 889         self.regs[dest] = val
 890
 891     def dump(self, dut):
 892         for i, val in enumerate(self.regs):
 893             reg = yield dut.intregs.regs[i].reg
 894             okstr = "OK" if reg == val else "!ok"
 895             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 896
 897     def check(self, dut):
 898         for i, val in enumerate(self.regs):
 899             reg = yield dut.intregs.regs[i].reg
 900             if reg != val:
 901                 print("reg %d expected %x received %x\n" % (i, val, reg))
 902                 yield from self.dump(dut)
 903                 assert False
 904
 905 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 906             branch_success, branch_fail):
 907     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 908                'src1_i': src1, 'src2_i': src2}]
 909
 910     sendlen = 1
 911     for idx in range(sendlen):
 912         yield from eq(dut.data_i[idx], instrs[idx])
 913         di = yield dut.data_i[idx]
 914         print ("senddata %d %x" % (idx, di))
 915     yield dut.p_add_i.eq(sendlen)
 916     yield
 917     o_p_ready = yield dut.p_ready_o
 918     while not o_p_ready:
 919         yield
 920         o_p_ready = yield dut.p_ready_o
 921
 922     yield dut.p_add_i.eq(0)
 923
 924
 925 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 926     yield from disable_issue(dut)
 927     yield dut.int_dest_i.eq(dest)
 928     yield dut.int_src1_i.eq(src1)
 929     yield dut.int_src2_i.eq(src2)
 930     if (op & (0x3<<2)) != 0: # branch
 931         yield dut.brissue.insn_i.eq(1)
 932         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 933         yield dut.br_imm_i.eq(imm)
 934         dut_issue = dut.brissue
 935     else:
 936         yield dut.aluissue.insn_i.eq(1)
 937         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 938         yield dut.alu_imm_i.eq(imm)
 939         dut_issue = dut.aluissue
 940     yield dut.reg_enable_i.eq(1)
 941
 942     # these indicate that the instruction is to be made shadow-dependent on
 943     # (either) branch success or branch fail
 944     yield dut.branch_fail_i.eq(branch_fail)
 945     yield dut.branch_succ_i.eq(branch_success)
 946
 947     yield
 948     yield from wait_for_issue(dut, dut_issue)
 949
 950
 951 def print_reg(dut, rnums):
 952     rs = []
 953     for rnum in rnums:
 954         reg = yield dut.intregs.regs[rnum].reg
 955         rs.append("%x" % reg)
 956     rnums = map(str, rnums)
 957     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 958
 959
 960 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 961     insts = []
 962     for i in range(n_ops):
 963         src1 = randint(1, dut.n_regs-1)
 964         src2 = randint(1, dut.n_regs-1)
 965         imm = randint(1, (1<<dut.rwid)-1)
 966         dest = randint(1, dut.n_regs-1)
 967         op = randint(0, max_opnums)
 968         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 969
 970         if shadowing:
 971             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 972         else:
 973             insts.append((src1, src2, dest, op, opi, imm))
 974     return insts
 975
 976
 977 def wait_for_busy_clear(dut):
 978     while True:
 979         busy_o = yield dut.busy_o
 980         if not busy_o:
 981             break
 982         print ("busy",)
 983         yield
 984
 985 def disable_issue(dut):
 986     yield dut.aluissue.insn_i.eq(0)
 987     yield dut.brissue.insn_i.eq(0)
 988     yield dut.lsissue.insn_i.eq(0)
 989
 990
 991 def wait_for_issue(dut, dut_issue):
 992     while True:
 993         issue_o = yield dut_issue.fn_issue_o
 994         if issue_o:
 995             yield from disable_issue(dut)
 996             yield dut.reg_enable_i.eq(0)
 997             break
 998         print ("busy",)
 999         #yield from print_reg(dut, [1,2,3])
1000         yield
1001     #yield from print_reg(dut, [1,2,3])
1002
1003 def scoreboard_branch_sim(dut, alusim):
1004
1005     iseed = 3
1006
1007     for i in range(1):
1008
1009         print ("rseed", iseed)
1010         seed(iseed)
1011         iseed += 1
1012
1013         yield dut.branch_direction_o.eq(0)
1014
1015         # set random values in the registers
1016         for i in range(1, dut.n_regs):
1017             val = 31+i*3
1018             val = randint(0, (1<<alusim.rwidth)-1)
1019             yield dut.intregs.regs[i].reg.eq(val)
1020             alusim.setval(i, val)
1021
1022         if False:
1023             # create some instructions: branches create a tree
1024             insts = create_random_ops(dut, 1, True, 1)
1025             #insts.append((6, 6, 1, 2, (0, 0)))
1026             #insts.append((4, 3, 3, 0, (0, 0)))
1027
1028             src1 = randint(1, dut.n_regs-1)
1029             src2 = randint(1, dut.n_regs-1)
1030             #op = randint(4, 7)
1031             op = 4 # only BGT at the moment
1032
1033             branch_ok = create_random_ops(dut, 1, True, 1)
1034             branch_fail = create_random_ops(dut, 1, True, 1)
1035
1036             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1037
1038         if True:
1039             insts = []
1040             insts.append( (3, 5, 2, 0, (0, 0)) )
1041             branch_ok = []
1042             branch_fail = []
1043             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1044             branch_ok.append( None )
1045             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1046             #branch_fail.append( None )
1047             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1048
1049         siminsts = deepcopy(insts)
1050
1051         # issue instruction(s)
1052         i = -1
1053         instrs = insts
1054         branch_direction = 0
1055         while instrs:
1056             yield
1057             yield
1058             i += 1
1059             branch_direction = yield dut.branch_direction_o # way branch went
1060             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1061             if branch_direction == 1 and shadow_on:
1062                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1063                 continue # branch was "success" and this is a "failed"... skip
1064             if branch_direction == 2 and shadow_off:
1065                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1066                 continue # branch was "fail" and this is a "success"... skip
1067             if branch_direction != 0:
1068                 shadow_on = 0
1069                 shadow_off = 0
1070             is_branch = op >= 4
1071             if is_branch:
1072                 branch_ok, branch_fail = dest
1073                 dest = src2
1074                 # ok zip up the branch success / fail instructions and
1075                 # drop them into the queue, one marked "to have branch success"
1076                 # the other to be marked shadow branch "fail".
1077                 # one out of each of these will be cancelled
1078                 for ok, fl in zip(branch_ok, branch_fail):
1079                     if ok:
1080                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1081                     if fl:
1082                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1083             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1084                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1085             yield from int_instr(dut, op, src1, src2, dest,
1086                                  shadow_on, shadow_off)
1087
1088         # wait for all instructions to stop before checking
1089         yield
1090         yield from wait_for_busy_clear(dut)
1091
1092         i = -1
1093         while siminsts:
1094             instr = siminsts.pop(0)
1095             if instr is None:
1096                 continue
1097             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1098             i += 1
1099             is_branch = op >= 4
1100             if is_branch:
1101                 branch_ok, branch_fail = dest
1102                 dest = src2
1103             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1104                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1105             branch_res = alusim.op(op, src1, src2, dest)
1106             if is_branch:
1107                 if branch_res:
1108                     siminsts += branch_ok
1109                 else:
1110                     siminsts += branch_fail
1111
1112         # check status
1113         yield from alusim.check(dut)
1114         yield from alusim.dump(dut)
1115
1116
1117 def scoreboard_sim(dut, alusim):
1118
1119     seed(0)
1120
1121     for i in range(1):
1122
1123         # set random values in the registers
1124         for i in range(1, dut.n_regs):
1125             val = randint(0, (1<<alusim.rwidth)-1)
1126             #val = 31+i*3
1127             #val = i
1128             yield dut.intregs.regs[i].reg.eq(val)
1129             alusim.setval(i, val)
1130
1131         # create some instructions (some random, some regression tests)
1132         instrs = []
1133         if False:
1134             instrs = create_random_ops(dut, 15, True, 4)
1135
1136         if True: # LD/ST test (with immediate)
1137             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1138             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1139
1140         if False:
1141             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1142
1143         if False:
1144             instrs.append( (7, 3, 2, 4, (0, 0)) )
1145             instrs.append( (7, 6, 6, 2, (0, 0)) )
1146             instrs.append( (1, 7, 2, 2, (0, 0)) )
1147
1148         if False:
1149             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1150             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1151             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1152             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1153             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1154
1155         if False:
1156             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1157             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1158             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1159
1160         if False:
1161             instrs.append((5, 6, 2, 1))
1162             instrs.append((2, 2, 4, 0))
1163             #instrs.append((2, 2, 3, 1))
1164
1165         if False:
1166             instrs.append((2, 1, 2, 3))
1167
1168         if False:
1169             instrs.append((2, 6, 2, 1))
1170             instrs.append((2, 1, 2, 0))
1171
1172         if False:
1173             instrs.append((1, 2, 7, 2))
1174             instrs.append((7, 1, 5, 0))
1175             instrs.append((4, 4, 1, 1))
1176
1177         if False:
1178             instrs.append((5, 6, 2, 2))
1179             instrs.append((1, 1, 4, 1))
1180             instrs.append((6, 5, 3, 0))
1181
1182         if False:
1183             # Write-after-Write Hazard
1184             instrs.append( (3, 6, 7, 2) )
1185             instrs.append( (4, 4, 7, 1) )
1186
1187         if False:
1188             # self-read/write-after-write followed by Read-after-Write
1189             instrs.append((1, 1, 1, 1))
1190             instrs.append((1, 5, 3, 0))
1191
1192         if False:
1193             # Read-after-Write followed by self-read-after-write
1194             instrs.append((5, 6, 1, 2))
1195             instrs.append((1, 1, 1, 1))
1196
1197         if False:
1198             # self-read-write sandwich
1199             instrs.append((5, 6, 1, 2))
1200             instrs.append((1, 1, 1, 1))
1201             instrs.append((1, 5, 3, 0))
1202
1203         if False:
1204             # very weird failure
1205             instrs.append( (5, 2, 5, 2) )
1206             instrs.append( (2, 6, 3, 0) )
1207             instrs.append( (4, 2, 2, 1) )
1208
1209         if False:
1210             v1 = 4
1211             yield dut.intregs.regs[5].reg.eq(v1)
1212             alusim.setval(5, v1)
1213             yield dut.intregs.regs[3].reg.eq(5)
1214             alusim.setval(3, 5)
1215             instrs.append((5, 3, 3, 4, (0, 0)))
1216             instrs.append((4, 2, 1, 2, (0, 1)))
1217
1218         if False:
1219             v1 = 6
1220             yield dut.intregs.regs[5].reg.eq(v1)
1221             alusim.setval(5, v1)
1222             yield dut.intregs.regs[3].reg.eq(5)
1223             alusim.setval(3, 5)
1224             instrs.append((5, 3, 3, 4, (0, 0)))
1225             instrs.append((4, 2, 1, 2, (1, 0)))
1226
1227         if False:
1228             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1229             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1230             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1231             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1232             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1233             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1234             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1235             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1236             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1237
1238         # issue instruction(s), wait for issue to be free before proceeding
1239         for i, instr in enumerate(instrs):
1240             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1241
1242             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1243                     (i, src1, src2, dest, op, opi, imm))
1244             alusim.op(op, opi, imm, src1, src2, dest)
1245             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1246                                br_ok, br_fail)
1247
1248         # wait for all instructions to stop before checking
1249         while True:
1250             iqlen = yield dut.qlen_o
1251             if iqlen == 0:
1252                 break
1253             yield
1254         yield
1255         yield
1256         yield
1257         yield
1258         yield from wait_for_busy_clear(dut)
1259
1260         # check status
1261         yield from alusim.check(dut)
1262         yield from alusim.dump(dut)
1263
1264
1265 def test_scoreboard():
1266     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1267     alusim = RegSim(16, 8)
1268     memsim = MemSim(16, 16)
1269     vl = rtlil.convert(dut, ports=dut.ports())
1270     with open("test_scoreboard6600.il", "w") as f:
1271         f.write(vl)
1272
1273     run_simulation(dut, scoreboard_sim(dut, alusim),
1274                         vcd_name='test_scoreboard6600.vcd')
1275
1276     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1277     #                    vcd_name='test_scoreboard6600.vcd')
1278
1279
1280 if __name__ == '__main__':
1281     test_scoreboard()