src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12
  13 from compalu import ComputationUnitNoDelay
  14
  15 from alu_hier import ALU, BranchALU
  16 from nmutil.latch import SRLatch
  17
  18 from random import randint, seed
  19 from copy import deepcopy
  20
  21
  22 class CompUnitsBase(Elaboratable):
  23     """ Computation Unit Base class.
  24
  25         Amazingly, this class works recursively.  It's supposed to just
  26         look after some ALUs (that can handle the same operations),
  27         grouping them together, however it turns out that the same code
  28         can also group *groups* of Computation Units together as well.
  29
  30         Basically it was intended just to concatenate the ALU's issue,
  31         go_rd etc. signals together, which start out as bits and become
  32         sequences.  Turns out that the same trick works just as well
  33         on Computation Units!
  34
  35         So this class may be used recursively to present a top-level
  36         sequential concatenation of all the signals in and out of
  37         ALUs, whilst at the same time making it convenient to group
  38         ALUs together.
  39
  40         At the lower level, the intent is that groups of (identical)
  41         ALUs may be passed the same operation.  Even beyond that,
  42         the intent is that that group of (identical) ALUs actually
  43         share the *same pipeline* and as such become a "Concurrent
  44         Computation Unit" as defined by Mitch Alsup (see section
  45         11.4.9.3)
  46     """
  47     def __init__(self, rwid, units):
  48         """ Inputs:
  49
  50             * :rwid:   bit width of register file(s) - both FP and INT
  51             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  52         """
  53         self.units = units
  54         self.rwid = rwid
  55         self.rwid = rwid
  56         if units and isinstance(units[0], CompUnitsBase):
  57             self.n_units = 0
  58             for u in self.units:
  59                 self.n_units += u.n_units
  60         else:
  61             self.n_units = len(units)
  62
  63         n_units = self.n_units
  64
  65         # inputs
  66         self.issue_i = Signal(n_units, reset_less=True)
  67         self.go_rd_i = Signal(n_units, reset_less=True)
  68         self.go_wr_i = Signal(n_units, reset_less=True)
  69         self.shadown_i = Signal(n_units, reset_less=True)
  70         self.go_die_i = Signal(n_units, reset_less=True)
  71
  72         # outputs
  73         self.busy_o = Signal(n_units, reset_less=True)
  74         self.rd_rel_o = Signal(n_units, reset_less=True)
  75         self.req_rel_o = Signal(n_units, reset_less=True)
  76
  77         # in/out register data (note: not register#, actual data)
  78         self.data_o = Signal(rwid, reset_less=True)
  79         self.src1_i = Signal(rwid, reset_less=True)
  80         self.src2_i = Signal(rwid, reset_less=True)
  81         # input operand
  82
  83     def elaborate(self, platform):
  84         m = Module()
  85         comb = m.d.comb
  86
  87         for i, alu in enumerate(self.units):
  88             setattr(m.submodules, "comp%d" % i, alu)
  89
  90         go_rd_l = []
  91         go_wr_l = []
  92         issue_l = []
  93         busy_l = []
  94         req_rel_l = []
  95         rd_rel_l = []
  96         shadow_l = []
  97         godie_l = []
  98         for alu in self.units:
  99             req_rel_l.append(alu.req_rel_o)
 100             rd_rel_l.append(alu.rd_rel_o)
 101             shadow_l.append(alu.shadown_i)
 102             godie_l.append(alu.go_die_i)
 103             go_wr_l.append(alu.go_wr_i)
 104             go_rd_l.append(alu.go_rd_i)
 105             issue_l.append(alu.issue_i)
 106             busy_l.append(alu.busy_o)
 107         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 108         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 109         comb += self.busy_o.eq(Cat(*busy_l))
 110         comb += Cat(*godie_l).eq(self.go_die_i)
 111         comb += Cat(*shadow_l).eq(self.shadown_i)
 112         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 113         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 114         comb += Cat(*issue_l).eq(self.issue_i)
 115
 116         # connect data register input/output
 117
 118         # merge (OR) all integer FU / ALU outputs to a single value
 119         # bit of a hack: treereduce needs a list with an item named "data_o"
 120         if self.units:
 121             data_o = treereduce(self.units)
 122             comb += self.data_o.eq(data_o)
 123
 124         for i, alu in enumerate(self.units):
 125             comb += alu.src1_i.eq(self.src1_i)
 126             comb += alu.src2_i.eq(self.src2_i)
 127
 128         return m
 129
 130
 131 class CompUnitALUs(CompUnitsBase):
 132
 133     def __init__(self, rwid, opwid):
 134         """ Inputs:
 135
 136             * :rwid:   bit width of register file(s) - both FP and INT
 137             * :opwid:  operand bit width
 138         """
 139         self.opwid = opwid
 140
 141         # inputs
 142         self.oper_i = Signal(opwid, reset_less=True)
 143
 144         # Int ALUs
 145         add = ALU(rwid)
 146         sub = ALU(rwid)
 147         mul = ALU(rwid)
 148         shf = ALU(rwid)
 149
 150         units = []
 151         for alu in [add, sub, mul, shf]:
 152             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 153
 154         CompUnitsBase.__init__(self, rwid, units)
 155
 156     def elaborate(self, platform):
 157         m = CompUnitsBase.elaborate(self, platform)
 158         comb = m.d.comb
 159
 160         # hand the same operation to all units
 161         for alu in self.units:
 162             comb += alu.oper_i.eq(self.oper_i)
 163         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 164         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 165         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 166         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 167
 168         return m
 169
 170
 171 class CompUnitBR(CompUnitsBase):
 172
 173     def __init__(self, rwid, opwid):
 174         """ Inputs:
 175
 176             * :rwid:   bit width of register file(s) - both FP and INT
 177             * :opwid:  operand bit width
 178
 179             Note: bgt unit is returned so that a shadow unit can be created
 180             for it
 181         """
 182         self.opwid = opwid
 183
 184         # inputs
 185         self.oper_i = Signal(opwid, reset_less=True)
 186
 187         # Branch ALU and CU
 188         self.bgt = BranchALU(rwid)
 189         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 190         CompUnitsBase.__init__(self, rwid, [self.br1])
 191
 192     def elaborate(self, platform):
 193         m = CompUnitsBase.elaborate(self, platform)
 194         comb = m.d.comb
 195
 196         # hand the same operation to all units
 197         for alu in self.units:
 198             comb += alu.oper_i.eq(self.oper_i)
 199         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 200
 201         return m
 202
 203
 204 class FunctionUnits(Elaboratable):
 205
 206     def __init__(self, n_regs, n_int_alus):
 207         self.n_regs = n_regs
 208         self.n_int_alus = n_int_alus
 209
 210         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 211         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 212         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 213
 214         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 215         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 216
 217         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 218         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 219         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 220
 221         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 222         self.readable_o = Signal(n_int_alus, reset_less=True)
 223         self.writable_o = Signal(n_int_alus, reset_less=True)
 224
 225         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 226         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 227         self.go_die_i = Signal(n_int_alus, reset_less=True)
 228         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 229         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 230
 231         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 232
 233     def elaborate(self, platform):
 234         m = Module()
 235         comb = m.d.comb
 236         sync = m.d.sync
 237
 238         n_intfus = self.n_int_alus
 239
 240         # Integer FU-FU Dep Matrix
 241         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 242         m.submodules.intfudeps = intfudeps
 243         # Integer FU-Reg Dep Matrix
 244         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 245         m.submodules.intregdeps = intregdeps
 246
 247         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 248         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 249
 250         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 251         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 252
 253         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 254         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 255         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 256
 257         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 258         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 259         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 260         comb += intfudeps.go_die_i.eq(self.go_die_i)
 261         comb += self.readable_o.eq(intfudeps.readable_o)
 262         comb += self.writable_o.eq(intfudeps.writable_o)
 263
 264         # Connect function issue / arrays, and dest/src1/src2
 265         comb += intregdeps.dest_i.eq(self.dest_i)
 266         comb += intregdeps.src1_i.eq(self.src1_i)
 267         comb += intregdeps.src2_i.eq(self.src2_i)
 268
 269         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 270         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 271         comb += intregdeps.go_die_i.eq(self.go_die_i)
 272         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 273
 274         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 275         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 276         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 277
 278         return m
 279
 280
 281 class Scoreboard(Elaboratable):
 282     def __init__(self, rwid, n_regs):
 283         """ Inputs:
 284
 285             * :rwid:   bit width of register file(s) - both FP and INT
 286             * :n_regs: depth of register file(s) - number of FP and INT regs
 287         """
 288         self.rwid = rwid
 289         self.n_regs = n_regs
 290
 291         # Register Files
 292         self.intregs = RegFileArray(rwid, n_regs)
 293         self.fpregs = RegFileArray(rwid, n_regs)
 294
 295         # inputs
 296         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 297         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 298         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 299         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 300
 301         # outputs
 302         self.issue_o = Signal(reset_less=True) # instruction was accepted
 303         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 304
 305         # for branch speculation experiment.  branch_direction = 0 if
 306         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 307         # branch_succ and branch_fail are requests to have the current
 308         # instruction be dependent on the branch unit "shadow" capability.
 309         self.branch_succ_i = Signal(reset_less=True)
 310         self.branch_fail_i = Signal(reset_less=True)
 311         self.branch_direction_o = Signal(2, reset_less=True)
 312
 313     def elaborate(self, platform):
 314         m = Module()
 315         comb = m.d.comb
 316         sync = m.d.sync
 317
 318         m.submodules.intregs = self.intregs
 319         m.submodules.fpregs = self.fpregs
 320
 321         # register ports
 322         int_dest = self.intregs.write_port("dest")
 323         int_src1 = self.intregs.read_port("src1")
 324         int_src2 = self.intregs.read_port("src2")
 325
 326         fp_dest = self.fpregs.write_port("dest")
 327         fp_src1 = self.fpregs.read_port("src1")
 328         fp_src2 = self.fpregs.read_port("src2")
 329
 330         # Int ALUs and Comp Units
 331         n_int_alus = 5
 332         cua = CompUnitALUs(self.rwid, 2)
 333         cub = CompUnitBR(self.rwid, 2)
 334         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 335         bgt = cub.bgt # get at the branch computation unit
 336         br1 = cub.br1
 337
 338         # Int FUs
 339         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 340
 341         # Count of number of FUs
 342         n_intfus = n_int_alus
 343         n_fp_fus = 0 # for now
 344
 345         # Integer Priority Picker 1: Adder + Subtractor
 346         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 347         m.submodules.intpick1 = intpick1
 348
 349         # INT/FP Issue Unit
 350         regdecode = RegDecode(self.n_regs)
 351         m.submodules.regdecode = regdecode
 352         aluissue = IssueUnitGroup(4)
 353         brissue = IssueUnitGroup(1)
 354         issueunit = IssueUnitArray([aluissue, brissue])
 355         m.submodules.issueunit = issueunit
 356
 357         # Shadow Matrix.  currently n_intfus shadows, to be used for
 358         # write-after-write hazards.  NOTE: there is one extra for branches,
 359         # so the shadow width is increased by 1
 360         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 361         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 362
 363         # record previous instruction to cast shadow on current instruction
 364         fn_issue_prev = Signal(n_intfus)
 365         prev_shadow = Signal(n_intfus)
 366
 367         # Branch Speculation recorder.  tracks the success/fail state as
 368         # each instruction is issued, so that when the branch occurs the
 369         # allow/cancel can be issued as appropriate.
 370         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 371
 372         #---------
 373         # ok start wiring things together...
 374         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 375         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 376         #---------
 377
 378         #---------
 379         # Issue Unit is where it starts.  set up some in/outs for this module
 380         #---------
 381         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 382                      regdecode.src1_i.eq(self.int_src1_i),
 383                      regdecode.src2_i.eq(self.int_src2_i),
 384                      regdecode.enable_i.eq(self.reg_enable_i),
 385                      self.issue_o.eq(issueunit.issue_o)
 386                     ]
 387
 388         # take these to outside (for testing)
 389         self.aluissue = aluissue
 390         self.brissue = brissue
 391         self.alu_oper_i = cua.oper_i
 392         self.br_oper_i = cub.oper_i
 393
 394         # TODO: issueunit.f (FP)
 395
 396         # and int function issue / busy arrays, and dest/src1/src2
 397         comb += intfus.dest_i.eq(regdecode.dest_o)
 398         comb += intfus.src1_i.eq(regdecode.src1_o)
 399         comb += intfus.src2_i.eq(regdecode.src2_o)
 400
 401         fn_issue_o = issueunit.fn_issue_o
 402
 403         comb += intfus.fn_issue_i.eq(fn_issue_o)
 404         comb += issueunit.busy_i.eq(cu.busy_o)
 405         comb += self.busy_o.eq(cu.busy_o.bool())
 406
 407         #---------
 408         # merge shadow matrices outputs
 409         #---------
 410
 411         # these are explained in ShadowMatrix docstring, and are to be
 412         # connected to the FUReg and FUFU Matrices, to get them to reset
 413         anydie = Signal(n_intfus, reset_less=True)
 414         allshadown = Signal(n_intfus, reset_less=True)
 415         shreset = Signal(n_intfus, reset_less=True)
 416         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 417         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 418         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 419
 420         #---------
 421         # connect fu-fu matrix
 422         #---------
 423
 424         # Group Picker... done manually for now.
 425         go_rd_o = intpick1.go_rd_o
 426         go_wr_o = intpick1.go_wr_o
 427         go_rd_i = intfus.go_rd_i
 428         go_wr_i = intfus.go_wr_i
 429         go_die_i = intfus.go_die_i
 430         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 431         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 432         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 433         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 434
 435         # Connect Picker
 436         #---------
 437         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 438         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 439         int_rd_o = intfus.readable_o
 440         int_wr_o = intfus.writable_o
 441         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 442         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 443
 444         #---------
 445         # Shadow Matrix
 446         #---------
 447
 448         comb += shadows.issue_i.eq(fn_issue_o)
 449         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 450         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 451         #---------
 452         # NOTE; this setup is for the instruction order preservation...
 453
 454         # connect shadows / go_dies to Computation Units
 455         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 456         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 457
 458         # ok connect first n_int_fu shadows to busy lines, to create an
 459         # instruction-order linked-list-like arrangement, using a bit-matrix
 460         # (instead of e.g. a ring buffer).
 461         # XXX TODO
 462
 463         # when written, the shadow can be cancelled (and was good)
 464         for i in range(n_intfus):
 465             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 466
 467         # work out the current-activated busy unit (by recording the old one)
 468         with m.If(fn_issue_o): # only update prev bit if instruction issued
 469             sync += fn_issue_prev.eq(fn_issue_o)
 470
 471         # *previous* instruction shadows *current* instruction, and, obviously,
 472         # if the previous is completed (!busy) don't cast the shadow!
 473         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 474         for i in range(n_intfus):
 475             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 476
 477         #---------
 478         # ... and this is for branch speculation.  it uses the extra bit
 479         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 480         # only needs to set shadow_i, s_fail_i and s_good_i
 481
 482         # issue captures shadow_i (if enabled)
 483         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 484
 485         bactive = Signal(reset_less=True)
 486         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 487
 488         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 489         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 490             comb += bshadow.issue_i.eq(fn_issue_o)
 491             for i in range(n_intfus):
 492                 with m.If(fn_issue_o & (Const(1<<i))):
 493                     comb += bshadow.shadow_i[i][0].eq(1)
 494
 495         # finally, we need an indicator to the test infrastructure as to
 496         # whether the branch succeeded or failed, plus, link up to the
 497         # "recorder" of whether the instruction was under shadow or not
 498
 499         with m.If(br1.issue_i):
 500             sync += bspec.active_i.eq(1)
 501         with m.If(self.branch_succ_i):
 502             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 503         with m.If(self.branch_fail_i):
 504             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 505
 506         # branch is active (TODO: a better signal: this is over-using the
 507         # go_write signal - actually the branch should not be "writing")
 508         with m.If(br1.go_wr_i):
 509             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 510             sync += bspec.active_i.eq(0)
 511             comb += bspec.br_i.eq(1)
 512             # branch occurs if data == 1, failed if data == 0
 513             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 514             for i in range(n_intfus):
 515                 # *expected* direction of the branch matched against *actual*
 516                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 517                 # ... or it didn't
 518                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 519
 520         #---------
 521         # Connect Register File(s)
 522         #---------
 523         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 524         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 525         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 526
 527         # connect ALUs to regfule
 528         comb += int_dest.data_i.eq(cu.data_o)
 529         comb += cu.src1_i.eq(int_src1.data_o)
 530         comb += cu.src2_i.eq(int_src2.data_o)
 531
 532         # connect ALU Computation Units
 533         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 534         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 535         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 536
 537         return m
 538
 539
 540     def __iter__(self):
 541         yield from self.intregs
 542         yield from self.fpregs
 543         yield self.int_dest_i
 544         yield self.int_src1_i
 545         yield self.int_src2_i
 546         yield self.issue_o
 547         yield self.branch_succ_i
 548         yield self.branch_fail_i
 549         yield self.branch_direction_o
 550
 551     def ports(self):
 552         return list(self)
 553
 554 IADD = 0
 555 ISUB = 1
 556 IMUL = 2
 557 ISHF = 3
 558 IBGT = 4
 559 IBLT = 5
 560 IBEQ = 6
 561 IBNE = 7
 562
 563 class RegSim:
 564     def __init__(self, rwidth, nregs):
 565         self.rwidth = rwidth
 566         self.regs = [0] * nregs
 567
 568     def op(self, op, src1, src2, dest):
 569         maxbits = (1 << self.rwidth) - 1
 570         src1 = self.regs[src1] & maxbits
 571         src2 = self.regs[src2] & maxbits
 572         if op == IADD:
 573             val = src1 + src2
 574         elif op == ISUB:
 575             val = src1 - src2
 576         elif op == IMUL:
 577             val = src1 * src2
 578         elif op == ISHF:
 579             val = src1 >> (src2 & maxbits)
 580         elif op == IBGT:
 581             val = int(src1 > src2)
 582         elif op == IBLT:
 583             val = int(src1 < src2)
 584         elif op == IBEQ:
 585             val = int(src1 == src2)
 586         elif op == IBNE:
 587             val = int(src1 != src2)
 588         val &= maxbits
 589         self.setval(dest, val)
 590         return val
 591
 592     def setval(self, dest, val):
 593         print ("sim setval", dest, hex(val))
 594         self.regs[dest] = val
 595
 596     def dump(self, dut):
 597         for i, val in enumerate(self.regs):
 598             reg = yield dut.intregs.regs[i].reg
 599             okstr = "OK" if reg == val else "!ok"
 600             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 601
 602     def check(self, dut):
 603         for i, val in enumerate(self.regs):
 604             reg = yield dut.intregs.regs[i].reg
 605             if reg != val:
 606                 print("reg %d expected %x received %x\n" % (i, val, reg))
 607                 yield from self.dump(dut)
 608                 assert False
 609
 610 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 611     yield from disable_issue(dut)
 612     yield dut.int_dest_i.eq(dest)
 613     yield dut.int_src1_i.eq(src1)
 614     yield dut.int_src2_i.eq(src2)
 615     if (op & (0x3<<2)) != 0: # branch
 616         yield dut.brissue.insn_i.eq(1)
 617         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 618         dut_issue = dut.brissue
 619     else:
 620         yield dut.aluissue.insn_i.eq(1)
 621         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 622         dut_issue = dut.aluissue
 623     yield dut.reg_enable_i.eq(1)
 624
 625     # these indicate that the instruction is to be made shadow-dependent on
 626     # (either) branch success or branch fail
 627     yield dut.branch_fail_i.eq(branch_fail)
 628     yield dut.branch_succ_i.eq(branch_success)
 629
 630     yield
 631     yield from wait_for_issue(dut, dut_issue)
 632
 633
 634 def print_reg(dut, rnums):
 635     rs = []
 636     for rnum in rnums:
 637         reg = yield dut.intregs.regs[rnum].reg
 638         rs.append("%x" % reg)
 639     rnums = map(str, rnums)
 640     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 641
 642
 643 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 644     insts = []
 645     for i in range(n_ops):
 646         src1 = randint(1, dut.n_regs-1)
 647         src2 = randint(1, dut.n_regs-1)
 648         dest = randint(1, dut.n_regs-1)
 649         op = randint(0, max_opnums)
 650
 651         if shadowing:
 652             insts.append((src1, src2, dest, op, (0, 0)))
 653         else:
 654             insts.append((src1, src2, dest, op))
 655     return insts
 656
 657
 658 def wait_for_busy_clear(dut):
 659     while True:
 660         busy_o = yield dut.busy_o
 661         if not busy_o:
 662             break
 663         print ("busy",)
 664         yield
 665
 666 def disable_issue(dut):
 667     yield dut.aluissue.insn_i.eq(0)
 668     yield dut.brissue.insn_i.eq(0)
 669
 670
 671 def wait_for_issue(dut, dut_issue):
 672     while True:
 673         issue_o = yield dut_issue.fn_issue_o
 674         if issue_o:
 675             yield from disable_issue(dut)
 676             yield dut.reg_enable_i.eq(0)
 677             break
 678         print ("busy",)
 679         #yield from print_reg(dut, [1,2,3])
 680         yield
 681     #yield from print_reg(dut, [1,2,3])
 682
 683 def scoreboard_branch_sim(dut, alusim):
 684
 685     iseed = 3
 686
 687     for i in range(1):
 688
 689         print ("rseed", iseed)
 690         seed(iseed)
 691         iseed += 1
 692
 693         yield dut.branch_direction_o.eq(0)
 694
 695         # set random values in the registers
 696         for i in range(1, dut.n_regs):
 697             val = 31+i*3
 698             val = randint(0, (1<<alusim.rwidth)-1)
 699             yield dut.intregs.regs[i].reg.eq(val)
 700             alusim.setval(i, val)
 701
 702         if False:
 703             # create some instructions: branches create a tree
 704             insts = create_random_ops(dut, 1, True, 1)
 705             #insts.append((6, 6, 1, 2, (0, 0)))
 706             #insts.append((4, 3, 3, 0, (0, 0)))
 707
 708             src1 = randint(1, dut.n_regs-1)
 709             src2 = randint(1, dut.n_regs-1)
 710             #op = randint(4, 7)
 711             op = 4 # only BGT at the moment
 712
 713             branch_ok = create_random_ops(dut, 1, True, 1)
 714             branch_fail = create_random_ops(dut, 1, True, 1)
 715
 716             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 717
 718         if True:
 719             insts = []
 720             insts.append( (3, 5, 2, 0, (0, 0)) )
 721             branch_ok = []
 722             branch_fail = []
 723             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 724             branch_ok.append( None )
 725             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 726             #branch_fail.append( None )
 727             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 728
 729         siminsts = deepcopy(insts)
 730
 731         # issue instruction(s)
 732         i = -1
 733         instrs = insts
 734         branch_direction = 0
 735         while instrs:
 736             yield
 737             yield
 738             i += 1
 739             branch_direction = yield dut.branch_direction_o # way branch went
 740             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 741             if branch_direction == 1 and shadow_on:
 742                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 743                 continue # branch was "success" and this is a "failed"... skip
 744             if branch_direction == 2 and shadow_off:
 745                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 746                 continue # branch was "fail" and this is a "success"... skip
 747             if branch_direction != 0:
 748                 shadow_on = 0
 749                 shadow_off = 0
 750             is_branch = op >= 4
 751             if is_branch:
 752                 branch_ok, branch_fail = dest
 753                 dest = src2
 754                 # ok zip up the branch success / fail instructions and
 755                 # drop them into the queue, one marked "to have branch success"
 756                 # the other to be marked shadow branch "fail".
 757                 # one out of each of these will be cancelled
 758                 for ok, fl in zip(branch_ok, branch_fail):
 759                     if ok:
 760                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 761                     if fl:
 762                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 763             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 764                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 765             yield from int_instr(dut, op, src1, src2, dest,
 766                                  shadow_on, shadow_off)
 767
 768         # wait for all instructions to stop before checking
 769         yield
 770         yield from wait_for_busy_clear(dut)
 771
 772         i = -1
 773         while siminsts:
 774             instr = siminsts.pop(0)
 775             if instr is None:
 776                 continue
 777             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 778             i += 1
 779             is_branch = op >= 4
 780             if is_branch:
 781                 branch_ok, branch_fail = dest
 782                 dest = src2
 783             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 784                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 785             branch_res = alusim.op(op, src1, src2, dest)
 786             if is_branch:
 787                 if branch_res:
 788                     siminsts += branch_ok
 789                 else:
 790                     siminsts += branch_fail
 791
 792         # check status
 793         yield from alusim.check(dut)
 794         yield from alusim.dump(dut)
 795
 796
 797 def scoreboard_sim(dut, alusim):
 798
 799     seed(0)
 800
 801     for i in range(20):
 802
 803         # set random values in the registers
 804         for i in range(1, dut.n_regs):
 805             val = randint(0, (1<<alusim.rwidth)-1)
 806             #val = 31+i*3
 807             #val = i
 808             yield dut.intregs.regs[i].reg.eq(val)
 809             alusim.setval(i, val)
 810
 811         # create some instructions (some random, some regression tests)
 812         instrs = []
 813         if True:
 814             instrs = create_random_ops(dut, 10, True, 4)
 815
 816         if False:
 817             instrs.append( (7, 3, 2, 4, (0, 0)) )
 818             instrs.append( (7, 6, 6, 2, (0, 0)) )
 819             instrs.append( (1, 7, 2, 2, (0, 0)) )
 820
 821
 822         if False:
 823             instrs.append((2, 3, 3, 0, (0, 0)))
 824             instrs.append((5, 3, 3, 1, (0, 0)))
 825             instrs.append((3, 5, 5, 2, (0, 0)))
 826             instrs.append((5, 3, 3, 3, (0, 0)))
 827             instrs.append((3, 5, 5, 0, (0, 0)))
 828
 829         if False:
 830             instrs.append((5, 6, 2, 1))
 831             instrs.append((2, 2, 4, 0))
 832             #instrs.append((2, 2, 3, 1))
 833
 834         if False:
 835             instrs.append((2, 1, 2, 3))
 836
 837         if False:
 838             instrs.append((2, 6, 2, 1))
 839             instrs.append((2, 1, 2, 0))
 840
 841         if False:
 842             instrs.append((1, 2, 7, 2))
 843             instrs.append((7, 1, 5, 0))
 844             instrs.append((4, 4, 1, 1))
 845
 846         if False:
 847             instrs.append((5, 6, 2, 2))
 848             instrs.append((1, 1, 4, 1))
 849             instrs.append((6, 5, 3, 0))
 850
 851         if False:
 852             # Write-after-Write Hazard
 853             instrs.append( (3, 6, 7, 2) )
 854             instrs.append( (4, 4, 7, 1) )
 855
 856         if False:
 857             # self-read/write-after-write followed by Read-after-Write
 858             instrs.append((1, 1, 1, 1))
 859             instrs.append((1, 5, 3, 0))
 860
 861         if False:
 862             # Read-after-Write followed by self-read-after-write
 863             instrs.append((5, 6, 1, 2))
 864             instrs.append((1, 1, 1, 1))
 865
 866         if False:
 867             # self-read-write sandwich
 868             instrs.append((5, 6, 1, 2))
 869             instrs.append((1, 1, 1, 1))
 870             instrs.append((1, 5, 3, 0))
 871
 872         if False:
 873             # very weird failure
 874             instrs.append( (5, 2, 5, 2) )
 875             instrs.append( (2, 6, 3, 0) )
 876             instrs.append( (4, 2, 2, 1) )
 877
 878         if False:
 879             v1 = 4
 880             yield dut.intregs.regs[5].reg.eq(v1)
 881             alusim.setval(5, v1)
 882             yield dut.intregs.regs[3].reg.eq(5)
 883             alusim.setval(3, 5)
 884             instrs.append((5, 3, 3, 4, (0, 0)))
 885             instrs.append((4, 2, 1, 2, (0, 1)))
 886
 887         if False:
 888             v1 = 6
 889             yield dut.intregs.regs[5].reg.eq(v1)
 890             alusim.setval(5, v1)
 891             yield dut.intregs.regs[3].reg.eq(5)
 892             alusim.setval(3, 5)
 893             instrs.append((5, 3, 3, 4, (0, 0)))
 894             instrs.append((4, 2, 1, 2, (1, 0)))
 895
 896         if False:
 897             instrs.append( (4, 3, 5, 1, (0, 0)) )
 898             instrs.append( (5, 2, 3, 1, (0, 0)) )
 899             instrs.append( (7, 1, 5, 2, (0, 0)) )
 900             instrs.append( (5, 6, 6, 4, (0, 0)) )
 901             instrs.append( (7, 5, 2, 2, (1, 0)) )
 902             instrs.append( (1, 7, 5, 0, (0, 1)) )
 903             instrs.append( (1, 6, 1, 2, (1, 0)) )
 904             instrs.append( (1, 6, 7, 3, (0, 0)) )
 905             instrs.append( (6, 7, 7, 0, (0, 0)) )
 906
 907         # issue instruction(s), wait for issue to be free before proceeding
 908         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
 909
 910             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
 911             alusim.op(op, src1, src2, dest)
 912             yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
 913
 914         # wait for all instructions to stop before checking
 915         yield
 916         yield from wait_for_busy_clear(dut)
 917
 918         # check status
 919         yield from alusim.check(dut)
 920         yield from alusim.dump(dut)
 921
 922
 923 def test_scoreboard():
 924     dut = Scoreboard(16, 8)
 925     alusim = RegSim(16, 8)
 926     vl = rtlil.convert(dut, ports=dut.ports())
 927     with open("test_scoreboard6600.il", "w") as f:
 928         f.write(vl)
 929
 930     run_simulation(dut, scoreboard_sim(dut, alusim),
 931                         vcd_name='test_scoreboard6600.vcd')
 932
 933     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
 934     #                    vcd_name='test_scoreboard6600.vcd')
 935
 936
 937 if __name__ == '__main__':
 938     test_scoreboard()