src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12
  13 from compalu import ComputationUnitNoDelay
  14
  15 from alu_hier import ALU, BranchALU
  16 from nmutil.latch import SRLatch
  17
  18 from random import randint, seed
  19 from copy import deepcopy
  20
  21
  22 class CompUnitsBase(Elaboratable):
  23     """ Computation Unit Base class.
  24
  25         Amazingly, this class works recursively.  It's supposed to just
  26         look after some ALUs (that can handle the same operations),
  27         grouping them together, however it turns out that the same code
  28         can also group *groups* of Computation Units together as well.
  29
  30         Basically it was intended just to concatenate the ALU's issue,
  31         go_rd etc. signals together, which start out as bits and become
  32         sequences.  Turns out that the same trick works just as well
  33         on Computation Units!
  34
  35         So this class may be used recursively to present a top-level
  36         sequential concatenation of all the signals in and out of
  37         ALUs, whilst at the same time making it convenient to group
  38         ALUs together.
  39
  40         At the lower level, the intent is that groups of (identical)
  41         ALUs may be passed the same operation.  Even beyond that,
  42         the intent is that that group of (identical) ALUs actually
  43         share the *same pipeline* and as such become a "Concurrent
  44         Computation Unit" as defined by Mitch Alsup (see section
  45         11.4.9.3)
  46     """
  47     def __init__(self, rwid, units):
  48         """ Inputs:
  49
  50             * :rwid:   bit width of register file(s) - both FP and INT
  51             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  52         """
  53         self.units = units
  54         self.rwid = rwid
  55         self.rwid = rwid
  56         if units and isinstance(units[0], CompUnitsBase):
  57             self.n_units = 0
  58             for u in self.units:
  59                 self.n_units += u.n_units
  60         else:
  61             self.n_units = len(units)
  62
  63         n_units = self.n_units
  64
  65         # inputs
  66         self.issue_i = Signal(n_units, reset_less=True)
  67         self.go_rd_i = Signal(n_units, reset_less=True)
  68         self.go_wr_i = Signal(n_units, reset_less=True)
  69         self.shadown_i = Signal(n_units, reset_less=True)
  70         self.go_die_i = Signal(n_units, reset_less=True)
  71
  72         # outputs
  73         self.busy_o = Signal(n_units, reset_less=True)
  74         self.rd_rel_o = Signal(n_units, reset_less=True)
  75         self.req_rel_o = Signal(n_units, reset_less=True)
  76
  77         # in/out register data (note: not register#, actual data)
  78         self.data_o = Signal(rwid, reset_less=True)
  79         self.src1_i = Signal(rwid, reset_less=True)
  80         self.src2_i = Signal(rwid, reset_less=True)
  81         # input operand
  82
  83     def elaborate(self, platform):
  84         m = Module()
  85         comb = m.d.comb
  86
  87         for i, alu in enumerate(self.units):
  88             setattr(m.submodules, "comp%d" % i, alu)
  89
  90         go_rd_l = []
  91         go_wr_l = []
  92         issue_l = []
  93         busy_l = []
  94         req_rel_l = []
  95         rd_rel_l = []
  96         shadow_l = []
  97         godie_l = []
  98         for alu in self.units:
  99             req_rel_l.append(alu.req_rel_o)
 100             rd_rel_l.append(alu.rd_rel_o)
 101             shadow_l.append(alu.shadown_i)
 102             godie_l.append(alu.go_die_i)
 103             go_wr_l.append(alu.go_wr_i)
 104             go_rd_l.append(alu.go_rd_i)
 105             issue_l.append(alu.issue_i)
 106             busy_l.append(alu.busy_o)
 107         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 108         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 109         comb += self.busy_o.eq(Cat(*busy_l))
 110         comb += Cat(*godie_l).eq(self.go_die_i)
 111         comb += Cat(*shadow_l).eq(self.shadown_i)
 112         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 113         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 114         comb += Cat(*issue_l).eq(self.issue_i)
 115
 116         # connect data register input/output
 117
 118         # merge (OR) all integer FU / ALU outputs to a single value
 119         # bit of a hack: treereduce needs a list with an item named "data_o"
 120         if self.units:
 121             data_o = treereduce(self.units)
 122             comb += self.data_o.eq(data_o)
 123
 124         for i, alu in enumerate(self.units):
 125             comb += alu.src1_i.eq(self.src1_i)
 126             comb += alu.src2_i.eq(self.src2_i)
 127
 128         return m
 129
 130
 131 class CompUnitALUs(CompUnitsBase):
 132
 133     def __init__(self, rwid, opwid):
 134         """ Inputs:
 135
 136             * :rwid:   bit width of register file(s) - both FP and INT
 137             * :opwid:  operand bit width
 138         """
 139         self.opwid = opwid
 140
 141         # inputs
 142         self.oper_i = Signal(opwid, reset_less=True)
 143
 144         # Int ALUs
 145         add = ALU(rwid)
 146         sub = ALU(rwid)
 147         mul = ALU(rwid)
 148         shf = ALU(rwid)
 149
 150         units = []
 151         for alu in [add, sub, mul, shf]:
 152             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 153
 154         CompUnitsBase.__init__(self, rwid, units)
 155
 156     def elaborate(self, platform):
 157         m = CompUnitsBase.elaborate(self, platform)
 158         comb = m.d.comb
 159
 160         # hand the same operation to all units
 161         for alu in self.units:
 162             comb += alu.oper_i.eq(self.oper_i)
 163         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 164         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 165         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 166         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 167
 168         return m
 169
 170
 171 class CompUnitBR(CompUnitsBase):
 172
 173     def __init__(self, rwid, opwid):
 174         """ Inputs:
 175
 176             * :rwid:   bit width of register file(s) - both FP and INT
 177             * :opwid:  operand bit width
 178
 179             Note: bgt unit is returned so that a shadow unit can be created
 180             for it
 181         """
 182         self.opwid = opwid
 183
 184         # inputs
 185         self.oper_i = Signal(opwid, reset_less=True)
 186
 187         # Branch ALU and CU
 188         self.bgt = BranchALU(rwid)
 189         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 190         CompUnitsBase.__init__(self, rwid, [self.br1])
 191
 192     def elaborate(self, platform):
 193         m = CompUnitsBase.elaborate(self, platform)
 194         comb = m.d.comb
 195
 196         # hand the same operation to all units
 197         for alu in self.units:
 198             comb += alu.oper_i.eq(self.oper_i)
 199         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 200
 201         return m
 202
 203
 204 class FunctionUnits(Elaboratable):
 205
 206     def __init__(self, n_regs, n_int_alus):
 207         self.n_regs = n_regs
 208         self.n_int_alus = n_int_alus
 209
 210         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 211         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 212         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 213
 214         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 215         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 216
 217         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 218         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 219         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 220
 221         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 222         self.readable_o = Signal(n_int_alus, reset_less=True)
 223         self.writable_o = Signal(n_int_alus, reset_less=True)
 224
 225         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 226         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 227         self.go_die_i = Signal(n_int_alus, reset_less=True)
 228         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 229         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 230
 231         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 232
 233     def elaborate(self, platform):
 234         m = Module()
 235         comb = m.d.comb
 236         sync = m.d.sync
 237
 238         n_intfus = self.n_int_alus
 239
 240         # Integer FU-FU Dep Matrix
 241         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 242         m.submodules.intfudeps = intfudeps
 243         # Integer FU-Reg Dep Matrix
 244         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 245         m.submodules.intregdeps = intregdeps
 246
 247         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 248         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 249
 250         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 251         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 252
 253         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 254         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 255         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 256
 257         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 258         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 259         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 260         comb += intfudeps.go_die_i.eq(self.go_die_i)
 261         comb += self.readable_o.eq(intfudeps.readable_o)
 262         comb += self.writable_o.eq(intfudeps.writable_o)
 263
 264         # Connect function issue / arrays, and dest/src1/src2
 265         comb += intregdeps.dest_i.eq(self.dest_i)
 266         comb += intregdeps.src1_i.eq(self.src1_i)
 267         comb += intregdeps.src2_i.eq(self.src2_i)
 268
 269         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 270         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 271         comb += intregdeps.go_die_i.eq(self.go_die_i)
 272         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 273
 274         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 275         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 276         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 277
 278         return m
 279
 280
 281 class Scoreboard(Elaboratable):
 282     def __init__(self, rwid, n_regs):
 283         """ Inputs:
 284
 285             * :rwid:   bit width of register file(s) - both FP and INT
 286             * :n_regs: depth of register file(s) - number of FP and INT regs
 287         """
 288         self.rwid = rwid
 289         self.n_regs = n_regs
 290
 291         # Register Files
 292         self.intregs = RegFileArray(rwid, n_regs)
 293         self.fpregs = RegFileArray(rwid, n_regs)
 294
 295         # inputs
 296         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 297         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 298         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 299         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 300
 301         # outputs
 302         self.issue_o = Signal(reset_less=True) # instruction was accepted
 303         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 304
 305         # for branch speculation experiment.  branch_direction = 0 if
 306         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 307         # branch_succ and branch_fail are requests to have the current
 308         # instruction be dependent on the branch unit "shadow" capability.
 309         self.branch_succ_i = Signal(reset_less=True)
 310         self.branch_fail_i = Signal(reset_less=True)
 311         self.branch_direction_o = Signal(2, reset_less=True)
 312
 313     def elaborate(self, platform):
 314         m = Module()
 315         comb = m.d.comb
 316         sync = m.d.sync
 317
 318         m.submodules.intregs = self.intregs
 319         m.submodules.fpregs = self.fpregs
 320
 321         # register ports
 322         int_dest = self.intregs.write_port("dest")
 323         int_src1 = self.intregs.read_port("src1")
 324         int_src2 = self.intregs.read_port("src2")
 325
 326         fp_dest = self.fpregs.write_port("dest")
 327         fp_src1 = self.fpregs.read_port("src1")
 328         fp_src2 = self.fpregs.read_port("src2")
 329
 330         # Int ALUs and Comp Units
 331         n_int_alus = 5
 332         cua = CompUnitALUs(self.rwid, 2)
 333         cub = CompUnitBR(self.rwid, 2)
 334         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 335         bgt = cub.bgt # get at the branch computation unit
 336         br1 = cub.br1
 337
 338         # Int FUs
 339         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 340
 341         # Count of number of FUs
 342         n_intfus = n_int_alus
 343         n_fp_fus = 0 # for now
 344
 345         # Integer Priority Picker 1: Adder + Subtractor
 346         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 347         m.submodules.intpick1 = intpick1
 348
 349         # INT/FP Issue Unit
 350         regdecode = RegDecode(self.n_regs)
 351         m.submodules.regdecode = regdecode
 352         aluissue = IssueUnitGroup(4)
 353         brissue = IssueUnitGroup(1)
 354         issueunit = IssueUnitArray([aluissue, brissue])
 355         m.submodules.issueunit = issueunit
 356
 357         # Shadow Matrix.  currently n_intfus shadows, to be used for
 358         # write-after-write hazards.  NOTE: there is one extra for branches,
 359         # so the shadow width is increased by 1
 360         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 361         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 362
 363         # record previous instruction to cast shadow on current instruction
 364         fn_issue_prev = Signal(n_intfus)
 365         prev_shadow = Signal(n_intfus)
 366
 367         # Branch Speculation recorder.  tracks the success/fail state as
 368         # each instruction is issued, so that when the branch occurs the
 369         # allow/cancel can be issued as appropriate.
 370         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 371
 372         #---------
 373         # ok start wiring things together...
 374         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 375         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 376         #---------
 377
 378         #---------
 379         # Issue Unit is where it starts.  set up some in/outs for this module
 380         #---------
 381         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 382                      regdecode.src1_i.eq(self.int_src1_i),
 383                      regdecode.src2_i.eq(self.int_src2_i),
 384                      regdecode.enable_i.eq(self.reg_enable_i),
 385                      self.issue_o.eq(issueunit.issue_o)
 386                     ]
 387
 388         # take these to outside (for testing)
 389         self.alu_insn_i = aluissue.insn_i # enabled by instruction decode
 390         self.br_insn_i = brissue.insn_i # enabled by instruction decode
 391         self.alu_oper_i = cua.oper_i
 392         self.br_oper_i = cub.oper_i
 393
 394         # TODO: issueunit.f (FP)
 395
 396         # and int function issue / busy arrays, and dest/src1/src2
 397         comb += intfus.dest_i.eq(regdecode.dest_o)
 398         comb += intfus.src1_i.eq(regdecode.src1_o)
 399         comb += intfus.src2_i.eq(regdecode.src2_o)
 400
 401         fn_issue_o = issueunit.fn_issue_o
 402
 403         comb += intfus.fn_issue_i.eq(fn_issue_o)
 404         comb += issueunit.busy_i.eq(cu.busy_o)
 405         comb += self.busy_o.eq(cu.busy_o.bool())
 406
 407         #---------
 408         # merge shadow matrices outputs
 409         #---------
 410
 411         # these are explained in ShadowMatrix docstring, and are to be
 412         # connected to the FUReg and FUFU Matrices, to get them to reset
 413         anydie = Signal(n_intfus, reset_less=True)
 414         allshadown = Signal(n_intfus, reset_less=True)
 415         shreset = Signal(n_intfus, reset_less=True)
 416         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 417         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 418         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 419
 420         #---------
 421         # connect fu-fu matrix
 422         #---------
 423
 424         # Group Picker... done manually for now.
 425         go_rd_o = intpick1.go_rd_o
 426         go_wr_o = intpick1.go_wr_o
 427         go_rd_i = intfus.go_rd_i
 428         go_wr_i = intfus.go_wr_i
 429         go_die_i = intfus.go_die_i
 430         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 431         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 432         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 433         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 434
 435         # Connect Picker
 436         #---------
 437         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 438         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 439         int_rd_o = intfus.readable_o
 440         int_wr_o = intfus.writable_o
 441         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 442         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 443
 444         #---------
 445         # Shadow Matrix
 446         #---------
 447
 448         comb += shadows.issue_i.eq(fn_issue_o)
 449         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 450         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 451         #---------
 452         # NOTE; this setup is for the instruction order preservation...
 453
 454         # connect shadows / go_dies to Computation Units
 455         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 456         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 457
 458         # ok connect first n_int_fu shadows to busy lines, to create an
 459         # instruction-order linked-list-like arrangement, using a bit-matrix
 460         # (instead of e.g. a ring buffer).
 461         # XXX TODO
 462
 463         # when written, the shadow can be cancelled (and was good)
 464         for i in range(n_intfus):
 465             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 466
 467         # work out the current-activated busy unit (by recording the old one)
 468         with m.If(fn_issue_o): # only update prev bit if instruction issued
 469             sync += fn_issue_prev.eq(fn_issue_o)
 470
 471         # *previous* instruction shadows *current* instruction, and, obviously,
 472         # if the previous is completed (!busy) don't cast the shadow!
 473         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 474         for i in range(n_intfus):
 475             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 476
 477         #---------
 478         # ... and this is for branch speculation.  it uses the extra bit
 479         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 480         # only needs to set shadow_i, s_fail_i and s_good_i
 481
 482         # issue captures shadow_i (if enabled)
 483         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 484
 485         bactive = Signal(reset_less=True)
 486         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 487
 488         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 489         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 490             comb += bshadow.issue_i.eq(fn_issue_o)
 491             for i in range(n_intfus):
 492                 with m.If(fn_issue_o & (Const(1<<i))):
 493                     comb += bshadow.shadow_i[i][0].eq(1)
 494
 495         # finally, we need an indicator to the test infrastructure as to
 496         # whether the branch succeeded or failed, plus, link up to the
 497         # "recorder" of whether the instruction was under shadow or not
 498
 499         with m.If(br1.issue_i):
 500             sync += bspec.active_i.eq(1)
 501         with m.If(self.branch_succ_i):
 502             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 503         with m.If(self.branch_fail_i):
 504             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 505
 506         # branch is active (TODO: a better signal: this is over-using the
 507         # go_write signal - actually the branch should not be "writing")
 508         with m.If(br1.go_wr_i):
 509             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 510             sync += bspec.active_i.eq(0)
 511             comb += bspec.br_i.eq(1)
 512             # branch occurs if data == 1, failed if data == 0
 513             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 514             for i in range(n_intfus):
 515                 # *expected* direction of the branch matched against *actual*
 516                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 517                 # ... or it didn't
 518                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 519
 520         #---------
 521         # Connect Register File(s)
 522         #---------
 523         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 524         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 525         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 526
 527         # connect ALUs to regfule
 528         comb += int_dest.data_i.eq(cu.data_o)
 529         comb += cu.src1_i.eq(int_src1.data_o)
 530         comb += cu.src2_i.eq(int_src2.data_o)
 531
 532         # connect ALU Computation Units
 533         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 534         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 535         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 536
 537         return m
 538
 539
 540     def __iter__(self):
 541         yield from self.intregs
 542         yield from self.fpregs
 543         yield self.int_dest_i
 544         yield self.int_src1_i
 545         yield self.int_src2_i
 546         yield self.issue_o
 547         yield self.branch_succ_i
 548         yield self.branch_fail_i
 549         yield self.branch_direction_o
 550
 551     def ports(self):
 552         return list(self)
 553
 554 IADD = 0
 555 ISUB = 1
 556 IMUL = 2
 557 ISHF = 3
 558 IBGT = 4
 559 IBLT = 5
 560 IBEQ = 6
 561 IBNE = 7
 562
 563 class RegSim:
 564     def __init__(self, rwidth, nregs):
 565         self.rwidth = rwidth
 566         self.regs = [0] * nregs
 567
 568     def op(self, op, src1, src2, dest):
 569         maxbits = (1 << self.rwidth) - 1
 570         src1 = self.regs[src1] & maxbits
 571         src2 = self.regs[src2] & maxbits
 572         if op == IADD:
 573             val = src1 + src2
 574         elif op == ISUB:
 575             val = src1 - src2
 576         elif op == IMUL:
 577             val = src1 * src2
 578         elif op == ISHF:
 579             val = src1 >> (src2 & maxbits)
 580         elif op == IBGT:
 581             val = int(src1 > src2)
 582         elif op == IBLT:
 583             val = int(src1 < src2)
 584         elif op == IBEQ:
 585             val = int(src1 == src2)
 586         elif op == IBNE:
 587             val = int(src1 != src2)
 588         val &= maxbits
 589         self.setval(dest, val)
 590         return val
 591
 592     def setval(self, dest, val):
 593         print ("sim setval", dest, hex(val))
 594         self.regs[dest] = val
 595
 596     def dump(self, dut):
 597         for i, val in enumerate(self.regs):
 598             reg = yield dut.intregs.regs[i].reg
 599             okstr = "OK" if reg == val else "!ok"
 600             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 601
 602     def check(self, dut):
 603         for i, val in enumerate(self.regs):
 604             reg = yield dut.intregs.regs[i].reg
 605             if reg != val:
 606                 print("reg %d expected %x received %x\n" % (i, val, reg))
 607                 yield from self.dump(dut)
 608                 assert False
 609
 610 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 611     yield from disable_issue(dut)
 612     yield dut.int_dest_i.eq(dest)
 613     yield dut.int_src1_i.eq(src1)
 614     yield dut.int_src2_i.eq(src2)
 615     if (op & (0x3<<2)) != 0: # branch
 616         yield dut.br_insn_i.eq(1)
 617         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 618     else:
 619         yield dut.alu_insn_i.eq(1)
 620         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 621     yield dut.reg_enable_i.eq(1)
 622
 623     # these indicate that the instruction is to be made shadow-dependent on
 624     # (either) branch success or branch fail
 625     yield dut.branch_fail_i.eq(branch_fail)
 626     yield dut.branch_succ_i.eq(branch_success)
 627
 628
 629 def print_reg(dut, rnums):
 630     rs = []
 631     for rnum in rnums:
 632         reg = yield dut.intregs.regs[rnum].reg
 633         rs.append("%x" % reg)
 634     rnums = map(str, rnums)
 635     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 636
 637
 638 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 639     insts = []
 640     for i in range(n_ops):
 641         src1 = randint(1, dut.n_regs-1)
 642         src2 = randint(1, dut.n_regs-1)
 643         dest = randint(1, dut.n_regs-1)
 644         op = randint(0, max_opnums)
 645
 646         if shadowing:
 647             insts.append((src1, src2, dest, op, (0, 0)))
 648         else:
 649             insts.append((src1, src2, dest, op))
 650     return insts
 651
 652
 653 def wait_for_busy_clear(dut):
 654     while True:
 655         busy_o = yield dut.busy_o
 656         if not busy_o:
 657             break
 658         print ("busy",)
 659         yield
 660
 661 def disable_issue(dut):
 662     yield dut.alu_insn_i.eq(0)
 663     yield dut.br_insn_i.eq(0)
 664
 665
 666 def wait_for_issue(dut):
 667     while True:
 668         issue_o = yield dut.issue_o
 669         if issue_o:
 670             yield from disable_issue(dut)
 671             yield dut.reg_enable_i.eq(0)
 672             break
 673         #print ("busy",)
 674         #yield from print_reg(dut, [1,2,3])
 675         yield
 676     #yield from print_reg(dut, [1,2,3])
 677
 678 def scoreboard_branch_sim(dut, alusim):
 679
 680     iseed = 3
 681
 682     for i in range(1):
 683
 684         print ("rseed", iseed)
 685         seed(iseed)
 686         iseed += 1
 687
 688         yield dut.branch_direction_o.eq(0)
 689
 690         # set random values in the registers
 691         for i in range(1, dut.n_regs):
 692             val = 31+i*3
 693             val = randint(0, (1<<alusim.rwidth)-1)
 694             yield dut.intregs.regs[i].reg.eq(val)
 695             alusim.setval(i, val)
 696
 697         if False:
 698             # create some instructions: branches create a tree
 699             insts = create_random_ops(dut, 1, True, 1)
 700             #insts.append((6, 6, 1, 2, (0, 0)))
 701             #insts.append((4, 3, 3, 0, (0, 0)))
 702
 703             src1 = randint(1, dut.n_regs-1)
 704             src2 = randint(1, dut.n_regs-1)
 705             #op = randint(4, 7)
 706             op = 4 # only BGT at the moment
 707
 708             branch_ok = create_random_ops(dut, 1, True, 1)
 709             branch_fail = create_random_ops(dut, 1, True, 1)
 710
 711             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 712
 713         if True:
 714             insts = []
 715             #insts.append( (3, 5, 2, 0, (0, 0)) )
 716             branch_ok = []
 717             branch_fail = []
 718             branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 719             #branch_ok.append( None )
 720             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 721             #branch_fail.append( None )
 722             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 723
 724         siminsts = deepcopy(insts)
 725
 726         # issue instruction(s)
 727         i = -1
 728         instrs = insts
 729         branch_direction = 0
 730         while instrs:
 731             yield
 732             yield
 733             i += 1
 734             branch_direction = yield dut.branch_direction_o # way branch went
 735             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 736             if branch_direction == 1 and shadow_on:
 737                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 738                 continue # branch was "success" and this is a "failed"... skip
 739             if branch_direction == 2 and shadow_off:
 740                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 741                 continue # branch was "fail" and this is a "success"... skip
 742             if branch_direction != 0:
 743                 shadow_on = 0
 744                 shadow_off = 0
 745             is_branch = op >= 4
 746             if is_branch:
 747                 branch_ok, branch_fail = dest
 748                 dest = src2
 749                 # ok zip up the branch success / fail instructions and
 750                 # drop them into the queue, one marked "to have branch success"
 751                 # the other to be marked shadow branch "fail".
 752                 # one out of each of these will be cancelled
 753                 for ok, fl in zip(branch_ok, branch_fail):
 754                     if ok:
 755                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 756                     if fl:
 757                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 758             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 759                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 760             yield from int_instr(dut, op, src1, src2, dest,
 761                                  shadow_on, shadow_off)
 762             yield
 763             yield from wait_for_issue(dut)
 764
 765         # wait for all instructions to stop before checking
 766         yield
 767         yield from wait_for_busy_clear(dut)
 768
 769         i = -1
 770         while siminsts:
 771             instr = siminsts.pop(0)
 772             if instr is None:
 773                 continue
 774             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 775             i += 1
 776             is_branch = op >= 4
 777             if is_branch:
 778                 branch_ok, branch_fail = dest
 779                 dest = src2
 780             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 781                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 782             branch_res = alusim.op(op, src1, src2, dest)
 783             if is_branch:
 784                 if branch_res:
 785                     siminsts += branch_ok
 786                 else:
 787                     siminsts += branch_fail
 788
 789         # check status
 790         yield from alusim.check(dut)
 791         yield from alusim.dump(dut)
 792
 793
 794 def scoreboard_sim(dut, alusim):
 795
 796     seed(0)
 797
 798     for i in range(20):
 799
 800         # set random values in the registers
 801         for i in range(1, dut.n_regs):
 802             val = randint(0, (1<<alusim.rwidth)-1)
 803             #val = 31+i*3
 804             val = i
 805             yield dut.intregs.regs[i].reg.eq(val)
 806             alusim.setval(i, val)
 807
 808         # create some instructions (some random, some regression tests)
 809         instrs = []
 810         if True:
 811             instrs = create_random_ops(dut, 10, True, 4)
 812
 813         if False:
 814             instrs.append((2, 3, 3, 0, (0, 0)))
 815             instrs.append((5, 3, 3, 1, (0, 0)))
 816             instrs.append((3, 5, 5, 2, (0, 0)))
 817             instrs.append((5, 3, 3, 3, (0, 0)))
 818             instrs.append((3, 5, 5, 0, (0, 0)))
 819
 820         if False:
 821             instrs.append((5, 6, 2, 1))
 822             instrs.append((2, 2, 4, 0))
 823             #instrs.append((2, 2, 3, 1))
 824
 825         if False:
 826             instrs.append((2, 1, 2, 3))
 827
 828         if False:
 829             instrs.append((2, 6, 2, 1))
 830             instrs.append((2, 1, 2, 0))
 831
 832         if False:
 833             instrs.append((1, 2, 7, 2))
 834             instrs.append((7, 1, 5, 0))
 835             instrs.append((4, 4, 1, 1))
 836
 837         if False:
 838             instrs.append((5, 6, 2, 2))
 839             instrs.append((1, 1, 4, 1))
 840             instrs.append((6, 5, 3, 0))
 841
 842         if False:
 843             # Write-after-Write Hazard
 844             instrs.append( (3, 6, 7, 2) )
 845             instrs.append( (4, 4, 7, 1) )
 846
 847         if False:
 848             # self-read/write-after-write followed by Read-after-Write
 849             instrs.append((1, 1, 1, 1))
 850             instrs.append((1, 5, 3, 0))
 851
 852         if False:
 853             # Read-after-Write followed by self-read-after-write
 854             instrs.append((5, 6, 1, 2))
 855             instrs.append((1, 1, 1, 1))
 856
 857         if False:
 858             # self-read-write sandwich
 859             instrs.append((5, 6, 1, 2))
 860             instrs.append((1, 1, 1, 1))
 861             instrs.append((1, 5, 3, 0))
 862
 863         if False:
 864             # very weird failure
 865             instrs.append( (5, 2, 5, 2) )
 866             instrs.append( (2, 6, 3, 0) )
 867             instrs.append( (4, 2, 2, 1) )
 868
 869         if False:
 870             v1 = 4
 871             yield dut.intregs.regs[5].reg.eq(v1)
 872             alusim.setval(5, v1)
 873             yield dut.intregs.regs[3].reg.eq(5)
 874             alusim.setval(3, 5)
 875             instrs.append((5, 3, 3, 4, (0, 0)))
 876             instrs.append((4, 2, 1, 2, (0, 1)))
 877
 878         if False:
 879             v1 = 6
 880             yield dut.intregs.regs[5].reg.eq(v1)
 881             alusim.setval(5, v1)
 882             yield dut.intregs.regs[3].reg.eq(5)
 883             alusim.setval(3, 5)
 884             instrs.append((5, 3, 3, 4, (0, 0)))
 885             instrs.append((4, 2, 1, 2, (1, 0)))
 886
 887         if False:
 888             instrs.append( (4, 3, 5, 1, (0, 0)) )
 889             instrs.append( (5, 2, 3, 1, (0, 0)) )
 890             instrs.append( (7, 1, 5, 2, (0, 0)) )
 891             instrs.append( (5, 6, 6, 4, (0, 0)) )
 892             instrs.append( (7, 5, 2, 2, (1, 0)) )
 893             instrs.append( (1, 7, 5, 0, (0, 1)) )
 894             instrs.append( (1, 6, 1, 2, (1, 0)) )
 895             instrs.append( (1, 6, 7, 3, (0, 0)) )
 896             instrs.append( (6, 7, 7, 0, (0, 0)) )
 897
 898         # issue instruction(s), wait for issue to be free before proceeding
 899         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
 900
 901             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
 902             alusim.op(op, src1, src2, dest)
 903             yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
 904             yield
 905             yield from wait_for_issue(dut)
 906
 907         # wait for all instructions to stop before checking
 908         yield
 909         yield from wait_for_busy_clear(dut)
 910
 911         # check status
 912         yield from alusim.check(dut)
 913         yield from alusim.dump(dut)
 914
 915
 916 def test_scoreboard():
 917     dut = Scoreboard(16, 8)
 918     alusim = RegSim(16, 8)
 919     vl = rtlil.convert(dut, ports=dut.ports())
 920     with open("test_scoreboard6600.il", "w") as f:
 921         f.write(vl)
 922
 923     run_simulation(dut, scoreboard_sim(dut, alusim),
 924                         vcd_name='test_scoreboard6600.vcd')
 925
 926     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
 927     #                    vcd_name='test_scoreboard6600.vcd')
 928
 929
 930 if __name__ == '__main__':
 931     test_scoreboard()