src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12
  13 from compalu import ComputationUnitNoDelay
  14
  15 from alu_hier import ALU, BranchALU
  16 from nmutil.latch import SRLatch
  17
  18 from random import randint, seed
  19 from copy import deepcopy
  20
  21
  22 class CompUnitsBase(Elaboratable):
  23     """ Computation Unit Base class.
  24
  25         Amazingly, this class works recursively.  It's supposed to just
  26         look after some ALUs (that can handle the same operations),
  27         grouping them together, however it turns out that the same code
  28         can also group *groups* of Computation Units together as well.
  29     """
  30     def __init__(self, rwid, units):
  31         """ Inputs:
  32
  33             * :rwid:   bit width of register file(s) - both FP and INT
  34             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  35         """
  36         self.units = units
  37         self.rwid = rwid
  38         if units and isinstance(units[0], CompUnitsBase):
  39             self.n_units = 0
  40             for u in self.units:
  41                 self.n_units += u.n_units
  42         else:
  43             self.n_units = len(units)
  44
  45         n_units = self.n_units
  46
  47         # inputs
  48         self.issue_i = Signal(n_units, reset_less=True)
  49         self.go_rd_i = Signal(n_units, reset_less=True)
  50         self.go_wr_i = Signal(n_units, reset_less=True)
  51         self.shadown_i = Signal(n_units, reset_less=True)
  52         self.go_die_i = Signal(n_units, reset_less=True)
  53
  54         # outputs
  55         self.busy_o = Signal(n_units, reset_less=True)
  56         self.rd_rel_o = Signal(n_units, reset_less=True)
  57         self.req_rel_o = Signal(n_units, reset_less=True)
  58
  59         # in/out register data (note: not register#, actual data)
  60         self.data_o = Signal(rwid, reset_less=True)
  61         self.src1_i = Signal(rwid, reset_less=True)
  62         self.src2_i = Signal(rwid, reset_less=True)
  63
  64     def elaborate(self, platform):
  65         m = Module()
  66         comb = m.d.comb
  67
  68         for i, alu in enumerate(self.units):
  69             print ("elaborate comp%d" % i, self, alu)
  70             setattr(m.submodules, "comp%d" % i, alu)
  71
  72         go_rd_l = []
  73         go_wr_l = []
  74         issue_l = []
  75         busy_l = []
  76         req_rel_l = []
  77         rd_rel_l = []
  78         shadow_l = []
  79         godie_l = []
  80         for alu in self.units:
  81             req_rel_l.append(alu.req_rel_o)
  82             rd_rel_l.append(alu.rd_rel_o)
  83             shadow_l.append(alu.shadown_i)
  84             godie_l.append(alu.go_die_i)
  85             go_wr_l.append(alu.go_wr_i)
  86             go_rd_l.append(alu.go_rd_i)
  87             issue_l.append(alu.issue_i)
  88             busy_l.append(alu.busy_o)
  89         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
  90         comb += self.req_rel_o.eq(Cat(*req_rel_l))
  91         comb += self.busy_o.eq(Cat(*busy_l))
  92         comb += Cat(*godie_l).eq(self.go_die_i)
  93         comb += Cat(*shadow_l).eq(self.shadown_i)
  94         comb += Cat(*go_wr_l).eq(self.go_wr_i)
  95         comb += Cat(*go_rd_l).eq(self.go_rd_i)
  96         comb += Cat(*issue_l).eq(self.issue_i)
  97
  98         # connect data register input/output
  99
 100         # merge (OR) all integer FU / ALU outputs to a single value
 101         # bit of a hack: treereduce needs a list with an item named "data_o"
 102         if self.units:
 103             data_o = treereduce(self.units)
 104             comb += self.data_o.eq(data_o)
 105
 106         for i, alu in enumerate(self.units):
 107             comb += alu.src1_i.eq(self.src1_i)
 108             comb += alu.src2_i.eq(self.src2_i)
 109
 110         return m
 111
 112
 113 class CompUnitALUs(CompUnitsBase):
 114
 115     def __init__(self, rwid):
 116         """ Inputs:
 117
 118             * :rwid:   bit width of register file(s) - both FP and INT
 119         """
 120
 121         # Int ALUs
 122         add = ALU(rwid)
 123         sub = ALU(rwid)
 124         mul = ALU(rwid)
 125         shf = ALU(rwid)
 126
 127         units = []
 128         for alu in [add, sub, mul, shf]:
 129             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 130
 131         print ("alu units", units)
 132         CompUnitsBase.__init__(self, rwid, units)
 133         print ("alu base init done")
 134
 135     def elaborate(self, platform):
 136         print ("alu elaborate start")
 137         m = CompUnitsBase.elaborate(self, platform)
 138         print ("alu elaborate done")
 139         comb = m.d.comb
 140
 141         comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 142         comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 143         comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 144         comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 145
 146         return m
 147
 148
 149 class CompUnitBR(CompUnitsBase):
 150
 151     def __init__(self, rwid):
 152         """ Inputs:
 153
 154             * :rwid:   bit width of register file(s) - both FP and INT
 155
 156             Note: bgt unit is returned so that a shadow unit can be created
 157             for it
 158
 159         """
 160
 161         # Branch ALU and CU
 162         self.bgt = BranchALU(rwid)
 163         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 164         print ("br units", [self.br1])
 165         CompUnitsBase.__init__(self, rwid, [self.br1])
 166         print ("br base init done")
 167
 168     def elaborate(self, platform):
 169         print ("br elaborate start")
 170         m = CompUnitsBase.elaborate(self, platform)
 171         print ("br elaborate done")
 172         comb = m.d.comb
 173
 174         comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 175
 176         return m
 177
 178
 179 class FunctionUnits(Elaboratable):
 180
 181     def __init__(self, n_regs, n_int_alus):
 182         self.n_regs = n_regs
 183         self.n_int_alus = n_int_alus
 184
 185         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 186         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 187         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 188
 189         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 190         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 191
 192         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 193         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 194         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 195
 196         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 197         self.readable_o = Signal(n_int_alus, reset_less=True)
 198         self.writable_o = Signal(n_int_alus, reset_less=True)
 199
 200         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 201         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 202         self.go_die_i = Signal(n_int_alus, reset_less=True)
 203         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 204         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 205
 206         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 207
 208     def elaborate(self, platform):
 209         m = Module()
 210         comb = m.d.comb
 211         sync = m.d.sync
 212
 213         n_int_fus = self.n_int_alus
 214
 215         # Integer FU-FU Dep Matrix
 216         intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
 217         m.submodules.intfudeps = intfudeps
 218         # Integer FU-Reg Dep Matrix
 219         intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
 220         m.submodules.intregdeps = intregdeps
 221
 222         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 223         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 224
 225         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 226         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 227
 228         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 229         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 230         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 231
 232         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 233         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 234         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 235         comb += intfudeps.go_die_i.eq(self.go_die_i)
 236         comb += self.readable_o.eq(intfudeps.readable_o)
 237         comb += self.writable_o.eq(intfudeps.writable_o)
 238
 239         # Connect function issue / arrays, and dest/src1/src2
 240         comb += intregdeps.dest_i.eq(self.dest_i)
 241         comb += intregdeps.src1_i.eq(self.src1_i)
 242         comb += intregdeps.src2_i.eq(self.src2_i)
 243
 244         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 245         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 246         comb += intregdeps.go_die_i.eq(self.go_die_i)
 247         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 248
 249         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 250         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 251         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 252
 253         return m
 254
 255
 256 class Scoreboard(Elaboratable):
 257     def __init__(self, rwid, n_regs):
 258         """ Inputs:
 259
 260             * :rwid:   bit width of register file(s) - both FP and INT
 261             * :n_regs: depth of register file(s) - number of FP and INT regs
 262         """
 263         self.rwid = rwid
 264         self.n_regs = n_regs
 265
 266         # Register Files
 267         self.intregs = RegFileArray(rwid, n_regs)
 268         self.fpregs = RegFileArray(rwid, n_regs)
 269
 270         # inputs
 271         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 272         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 273         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 274         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 275
 276         # outputs
 277         self.issue_o = Signal(reset_less=True) # instruction was accepted
 278         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 279
 280         # for branch speculation experiment.  branch_direction = 0 if
 281         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 282         # branch_succ and branch_fail are requests to have the current
 283         # instruction be dependent on the branch unit "shadow" capability.
 284         self.branch_succ_i = Signal(reset_less=True)
 285         self.branch_fail_i = Signal(reset_less=True)
 286         self.branch_direction_o = Signal(2, reset_less=True)
 287
 288     def elaborate(self, platform):
 289         m = Module()
 290         comb = m.d.comb
 291         sync = m.d.sync
 292
 293         m.submodules.intregs = self.intregs
 294         m.submodules.fpregs = self.fpregs
 295
 296         # register ports
 297         int_dest = self.intregs.write_port("dest")
 298         int_src1 = self.intregs.read_port("src1")
 299         int_src2 = self.intregs.read_port("src2")
 300
 301         fp_dest = self.fpregs.write_port("dest")
 302         fp_src1 = self.fpregs.read_port("src1")
 303         fp_src2 = self.fpregs.read_port("src2")
 304
 305         # Int ALUs and Comp Units
 306         n_int_alus = 5
 307         cua = CompUnitALUs(self.rwid)
 308         cub = CompUnitBR(self.rwid)
 309         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 310         bgt = cub.bgt # get at the branch computation unit
 311         br1 = cub.br1
 312
 313         # Int FUs
 314         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 315
 316         # Count of number of FUs
 317         n_int_fus = n_int_alus
 318         n_fp_fus = 0 # for now
 319
 320         # Integer Priority Picker 1: Adder + Subtractor
 321         intpick1 = GroupPicker(n_int_fus) # picks between add, sub, mul and shf
 322         m.submodules.intpick1 = intpick1
 323
 324         # INT/FP Issue Unit
 325         regdecode = RegDecode(self.n_regs)
 326         m.submodules.regdecode = regdecode
 327         issueunit = IntFPIssueUnit(n_int_fus, n_fp_fus)
 328         m.submodules.issueunit = issueunit
 329
 330         # Shadow Matrix.  currently n_int_fus shadows, to be used for
 331         # write-after-write hazards.  NOTE: there is one extra for branches,
 332         # so the shadow width is increased by 1
 333         m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus, True)
 334         m.submodules.bshadow = bshadow = ShadowMatrix(n_int_fus, 1, False)
 335
 336         # record previous instruction to cast shadow on current instruction
 337         fn_issue_prev = Signal(n_int_fus)
 338         prev_shadow = Signal(n_int_fus)
 339
 340         # Branch Speculation recorder.  tracks the success/fail state as
 341         # each instruction is issued, so that when the branch occurs the
 342         # allow/cancel can be issued as appropriate.
 343         m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
 344
 345         #---------
 346         # ok start wiring things together...
 347         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 348         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 349         #---------
 350
 351         #---------
 352         # Issue Unit is where it starts.  set up some in/outs for this module
 353         #---------
 354         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 355                      regdecode.src1_i.eq(self.int_src1_i),
 356                      regdecode.src2_i.eq(self.int_src2_i),
 357                      regdecode.enable_i.eq(self.reg_enable_i),
 358                      self.issue_o.eq(issueunit.issue_o)
 359                     ]
 360         self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
 361
 362         # TODO: issueunit.f (FP)
 363
 364         # and int function issue / busy arrays, and dest/src1/src2
 365         comb += intfus.dest_i.eq(regdecode.dest_o)
 366         comb += intfus.src1_i.eq(regdecode.src1_o)
 367         comb += intfus.src2_i.eq(regdecode.src2_o)
 368
 369         fn_issue_o = issueunit.i.fn_issue_o
 370
 371         comb += intfus.fn_issue_i.eq(fn_issue_o)
 372         comb += issueunit.i.busy_i.eq(cu.busy_o)
 373         comb += self.busy_o.eq(cu.busy_o.bool())
 374
 375         #---------
 376         # merge shadow matrices outputs
 377         #---------
 378
 379         # these are explained in ShadowMatrix docstring, and are to be
 380         # connected to the FUReg and FUFU Matrices, to get them to reset
 381         anydie = Signal(n_int_fus, reset_less=True)
 382         allshadown = Signal(n_int_fus, reset_less=True)
 383         shreset = Signal(n_int_fus, reset_less=True)
 384         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 385         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 386         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 387
 388         #---------
 389         # connect fu-fu matrix
 390         #---------
 391
 392         # Group Picker... done manually for now.
 393         go_rd_o = intpick1.go_rd_o
 394         go_wr_o = intpick1.go_wr_o
 395         go_rd_i = intfus.go_rd_i
 396         go_wr_i = intfus.go_wr_i
 397         go_die_i = intfus.go_die_i
 398         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 399         comb += go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus]) # rd
 400         comb += go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) # wr
 401         comb += go_die_i[0:n_int_fus].eq(anydie[0:n_int_fus]) # die
 402
 403         # Connect Picker
 404         #---------
 405         comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus])
 406         comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus])
 407         int_rd_o = intfus.readable_o
 408         int_wr_o = intfus.writable_o
 409         comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus])
 410         comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus])
 411
 412         #---------
 413         # Shadow Matrix
 414         #---------
 415
 416         comb += shadows.issue_i.eq(fn_issue_o)
 417         #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
 418         comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
 419         #---------
 420         # NOTE; this setup is for the instruction order preservation...
 421
 422         # connect shadows / go_dies to Computation Units
 423         comb += cu.shadown_i[0:n_int_fus].eq(allshadown)
 424         comb += cu.go_die_i[0:n_int_fus].eq(anydie)
 425
 426         # ok connect first n_int_fu shadows to busy lines, to create an
 427         # instruction-order linked-list-like arrangement, using a bit-matrix
 428         # (instead of e.g. a ring buffer).
 429         # XXX TODO
 430
 431         # when written, the shadow can be cancelled (and was good)
 432         for i in range(n_int_fus):
 433             comb += shadows.s_good_i[i][0:n_int_fus].eq(go_wr_o[0:n_int_fus])
 434
 435         # work out the current-activated busy unit (by recording the old one)
 436         with m.If(fn_issue_o): # only update prev bit if instruction issued
 437             sync += fn_issue_prev.eq(fn_issue_o)
 438
 439         # *previous* instruction shadows *current* instruction, and, obviously,
 440         # if the previous is completed (!busy) don't cast the shadow!
 441         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 442         for i in range(n_int_fus):
 443             comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
 444
 445         #---------
 446         # ... and this is for branch speculation.  it uses the extra bit
 447         # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
 448         # only needs to set shadow_i, s_fail_i and s_good_i
 449
 450         # issue captures shadow_i (if enabled)
 451         comb += bshadow.reset_i[0:n_int_fus].eq(shreset[0:n_int_fus])
 452
 453         bactive = Signal(reset_less=True)
 454         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 455
 456         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 457         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 458             comb += bshadow.issue_i.eq(fn_issue_o)
 459             for i in range(n_int_fus):
 460                 with m.If(fn_issue_o & (Const(1<<i))):
 461                     comb += bshadow.shadow_i[i][0].eq(1)
 462
 463         # finally, we need an indicator to the test infrastructure as to
 464         # whether the branch succeeded or failed, plus, link up to the
 465         # "recorder" of whether the instruction was under shadow or not
 466
 467         with m.If(br1.issue_i):
 468             sync += bspec.active_i.eq(1)
 469         with m.If(self.branch_succ_i):
 470             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 471         with m.If(self.branch_fail_i):
 472             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 473
 474         # branch is active (TODO: a better signal: this is over-using the
 475         # go_write signal - actually the branch should not be "writing")
 476         with m.If(br1.go_wr_i):
 477             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 478             sync += bspec.active_i.eq(0)
 479             comb += bspec.br_i.eq(1)
 480             # branch occurs if data == 1, failed if data == 0
 481             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 482             for i in range(n_int_fus):
 483                 # *expected* direction of the branch matched against *actual*
 484                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 485                 # ... or it didn't
 486                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 487
 488         #---------
 489         # Connect Register File(s)
 490         #---------
 491         print ("intregdeps wen len", len(intfus.dest_rsel_o))
 492         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 493         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 494         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 495
 496         # connect ALUs to regfule
 497         comb += int_dest.data_i.eq(cu.data_o)
 498         comb += cu.src1_i.eq(int_src1.data_o)
 499         comb += cu.src2_i.eq(int_src2.data_o)
 500
 501         # connect ALU Computation Units
 502         comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])
 503         comb += cu.go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus])
 504         comb += cu.issue_i[0:n_int_fus].eq(fn_issue_o[0:n_int_fus])
 505
 506         return m
 507
 508
 509     def __iter__(self):
 510         yield from self.intregs
 511         yield from self.fpregs
 512         yield self.int_dest_i
 513         yield self.int_src1_i
 514         yield self.int_src2_i
 515         yield self.issue_o
 516         yield self.branch_succ_i
 517         yield self.branch_fail_i
 518         yield self.branch_direction_o
 519
 520     def ports(self):
 521         return list(self)
 522
 523 IADD = 0
 524 ISUB = 1
 525 IMUL = 2
 526 ISHF = 3
 527 IBGT = 4
 528 IBLT = 5
 529 IBEQ = 6
 530 IBNE = 7
 531
 532 class RegSim:
 533     def __init__(self, rwidth, nregs):
 534         self.rwidth = rwidth
 535         self.regs = [0] * nregs
 536
 537     def op(self, op, src1, src2, dest):
 538         maxbits = (1 << self.rwidth) - 1
 539         src1 = self.regs[src1] & maxbits
 540         src2 = self.regs[src2] & maxbits
 541         if op == IADD:
 542             val = src1 + src2
 543         elif op == ISUB:
 544             val = src1 - src2
 545         elif op == IMUL:
 546             val = src1 * src2
 547         elif op == ISHF:
 548             val = src1 >> (src2 & maxbits)
 549         elif op == IBGT:
 550             val = int(src1 > src2)
 551         elif op == IBLT:
 552             val = int(src1 < src2)
 553         elif op == IBEQ:
 554             val = int(src1 == src2)
 555         elif op == IBNE:
 556             val = int(src1 != src2)
 557         val &= maxbits
 558         self.setval(dest, val)
 559         return val
 560
 561     def setval(self, dest, val):
 562         print ("sim setval", dest, hex(val))
 563         self.regs[dest] = val
 564
 565     def dump(self, dut):
 566         for i, val in enumerate(self.regs):
 567             reg = yield dut.intregs.regs[i].reg
 568             okstr = "OK" if reg == val else "!ok"
 569             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 570
 571     def check(self, dut):
 572         for i, val in enumerate(self.regs):
 573             reg = yield dut.intregs.regs[i].reg
 574             if reg != val:
 575                 print("reg %d expected %x received %x\n" % (i, val, reg))
 576                 yield from self.dump(dut)
 577                 assert False
 578
 579 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 580     for i in range(len(dut.int_insn_i)):
 581         yield dut.int_insn_i[i].eq(0)
 582     yield dut.int_dest_i.eq(dest)
 583     yield dut.int_src1_i.eq(src1)
 584     yield dut.int_src2_i.eq(src2)
 585     yield dut.int_insn_i[op].eq(1)
 586     yield dut.reg_enable_i.eq(1)
 587
 588     # these indicate that the instruction is to be made shadow-dependent on
 589     # (either) branch success or branch fail
 590     yield dut.branch_fail_i.eq(branch_fail)
 591     yield dut.branch_succ_i.eq(branch_success)
 592
 593
 594 def print_reg(dut, rnums):
 595     rs = []
 596     for rnum in rnums:
 597         reg = yield dut.intregs.regs[rnum].reg
 598         rs.append("%x" % reg)
 599     rnums = map(str, rnums)
 600     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 601
 602
 603 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 604     insts = []
 605     for i in range(n_ops):
 606         src1 = randint(1, dut.n_regs-1)
 607         src2 = randint(1, dut.n_regs-1)
 608         dest = randint(1, dut.n_regs-1)
 609         op = randint(0, max_opnums)
 610
 611         if shadowing:
 612             insts.append((src1, src2, dest, op, (0, 0)))
 613         else:
 614             insts.append((src1, src2, dest, op))
 615     return insts
 616
 617
 618 def wait_for_busy_clear(dut):
 619     while True:
 620         busy_o = yield dut.busy_o
 621         if not busy_o:
 622             break
 623         print ("busy",)
 624         yield
 625
 626
 627 def wait_for_issue(dut):
 628     while True:
 629         issue_o = yield dut.issue_o
 630         if issue_o:
 631             for i in range(len(dut.int_insn_i)):
 632                 yield dut.int_insn_i[i].eq(0)
 633                 yield dut.reg_enable_i.eq(0)
 634             break
 635         #print ("busy",)
 636         #yield from print_reg(dut, [1,2,3])
 637         yield
 638     #yield from print_reg(dut, [1,2,3])
 639
 640 def scoreboard_branch_sim(dut, alusim):
 641
 642     iseed = 3
 643
 644     for i in range(1):
 645
 646         print ("rseed", iseed)
 647         seed(iseed)
 648         iseed += 1
 649
 650         yield dut.branch_direction_o.eq(0)
 651
 652         # set random values in the registers
 653         for i in range(1, dut.n_regs):
 654             val = 31+i*3
 655             val = randint(0, (1<<alusim.rwidth)-1)
 656             yield dut.intregs.regs[i].reg.eq(val)
 657             alusim.setval(i, val)
 658
 659         if False:
 660             # create some instructions: branches create a tree
 661             insts = create_random_ops(dut, 1, True, 1)
 662             #insts.append((6, 6, 1, 2, (0, 0)))
 663             #insts.append((4, 3, 3, 0, (0, 0)))
 664
 665             src1 = randint(1, dut.n_regs-1)
 666             src2 = randint(1, dut.n_regs-1)
 667             #op = randint(4, 7)
 668             op = 4 # only BGT at the moment
 669
 670             branch_ok = create_random_ops(dut, 1, True, 1)
 671             branch_fail = create_random_ops(dut, 1, True, 1)
 672
 673             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 674
 675         if True:
 676             insts = []
 677             #insts.append( (3, 5, 2, 0, (0, 0)) )
 678             branch_ok = []
 679             branch_fail = []
 680             branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 681             #branch_ok.append( None )
 682             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 683             #branch_fail.append( None )
 684             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 685
 686         siminsts = deepcopy(insts)
 687
 688         # issue instruction(s)
 689         i = -1
 690         instrs = insts
 691         branch_direction = 0
 692         while instrs:
 693             yield
 694             yield
 695             i += 1
 696             branch_direction = yield dut.branch_direction_o # way branch went
 697             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 698             if branch_direction == 1 and shadow_on:
 699                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 700                 continue # branch was "success" and this is a "failed"... skip
 701             if branch_direction == 2 and shadow_off:
 702                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 703                 continue # branch was "fail" and this is a "success"... skip
 704             if branch_direction != 0:
 705                 shadow_on = 0
 706                 shadow_off = 0
 707             is_branch = op >= 4
 708             if is_branch:
 709                 branch_ok, branch_fail = dest
 710                 dest = src2
 711                 # ok zip up the branch success / fail instructions and
 712                 # drop them into the queue, one marked "to have branch success"
 713                 # the other to be marked shadow branch "fail".
 714                 # one out of each of these will be cancelled
 715                 for ok, fl in zip(branch_ok, branch_fail):
 716                     if ok:
 717                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 718                     if fl:
 719                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 720             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 721                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 722             yield from int_instr(dut, op, src1, src2, dest,
 723                                  shadow_on, shadow_off)
 724             yield
 725             yield from wait_for_issue(dut)
 726
 727         # wait for all instructions to stop before checking
 728         yield
 729         yield from wait_for_busy_clear(dut)
 730
 731         i = -1
 732         while siminsts:
 733             instr = siminsts.pop(0)
 734             if instr is None:
 735                 continue
 736             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 737             i += 1
 738             is_branch = op >= 4
 739             if is_branch:
 740                 branch_ok, branch_fail = dest
 741                 dest = src2
 742             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 743                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 744             branch_res = alusim.op(op, src1, src2, dest)
 745             if is_branch:
 746                 if branch_res:
 747                     siminsts += branch_ok
 748                 else:
 749                     siminsts += branch_fail
 750
 751         # check status
 752         yield from alusim.check(dut)
 753         yield from alusim.dump(dut)
 754
 755
 756 def scoreboard_sim(dut, alusim):
 757
 758     seed(0)
 759
 760     for i in range(20):
 761
 762         # set random values in the registers
 763         for i in range(1, dut.n_regs):
 764             val = 31+i*3
 765             val = randint(0, (1<<alusim.rwidth)-1)
 766             yield dut.intregs.regs[i].reg.eq(val)
 767             alusim.setval(i, val)
 768
 769         # create some instructions (some random, some regression tests)
 770         instrs = []
 771         if True:
 772             instrs = create_random_ops(dut, 10, True, 4)
 773
 774         if False:
 775             instrs.append((2, 3, 3, 0))
 776             instrs.append((5, 3, 3, 1))
 777
 778         if False:
 779             instrs.append((5, 6, 2, 1))
 780             instrs.append((2, 2, 4, 0))
 781             #instrs.append((2, 2, 3, 1))
 782
 783         if False:
 784             instrs.append((2, 1, 2, 3))
 785
 786         if False:
 787             instrs.append((2, 6, 2, 1))
 788             instrs.append((2, 1, 2, 0))
 789
 790         if False:
 791             instrs.append((1, 2, 7, 2))
 792             instrs.append((7, 1, 5, 0))
 793             instrs.append((4, 4, 1, 1))
 794
 795         if False:
 796             instrs.append((5, 6, 2, 2))
 797             instrs.append((1, 1, 4, 1))
 798             instrs.append((6, 5, 3, 0))
 799
 800         if False:
 801             # Write-after-Write Hazard
 802             instrs.append( (3, 6, 7, 2) )
 803             instrs.append( (4, 4, 7, 1) )
 804
 805         if False:
 806             # self-read/write-after-write followed by Read-after-Write
 807             instrs.append((1, 1, 1, 1))
 808             instrs.append((1, 5, 3, 0))
 809
 810         if False:
 811             # Read-after-Write followed by self-read-after-write
 812             instrs.append((5, 6, 1, 2))
 813             instrs.append((1, 1, 1, 1))
 814
 815         if False:
 816             # self-read-write sandwich
 817             instrs.append((5, 6, 1, 2))
 818             instrs.append((1, 1, 1, 1))
 819             instrs.append((1, 5, 3, 0))
 820
 821         if False:
 822             # very weird failure
 823             instrs.append( (5, 2, 5, 2) )
 824             instrs.append( (2, 6, 3, 0) )
 825             instrs.append( (4, 2, 2, 1) )
 826
 827         if False:
 828             v1 = 4
 829             yield dut.intregs.regs[5].reg.eq(v1)
 830             alusim.setval(5, v1)
 831             yield dut.intregs.regs[3].reg.eq(5)
 832             alusim.setval(3, 5)
 833             instrs.append((5, 3, 3, 4, (0, 0)))
 834             instrs.append((4, 2, 1, 2, (0, 1)))
 835
 836         if False:
 837             v1 = 6
 838             yield dut.intregs.regs[5].reg.eq(v1)
 839             alusim.setval(5, v1)
 840             yield dut.intregs.regs[3].reg.eq(5)
 841             alusim.setval(3, 5)
 842             instrs.append((5, 3, 3, 4, (0, 0)))
 843             instrs.append((4, 2, 1, 2, (1, 0)))
 844
 845         if False:
 846             instrs.append( (4, 3, 5, 1, (0, 0)) )
 847             instrs.append( (5, 2, 3, 1, (0, 0)) )
 848             instrs.append( (7, 1, 5, 2, (0, 0)) )
 849             instrs.append( (5, 6, 6, 4, (0, 0)) )
 850             instrs.append( (7, 5, 2, 2, (1, 0)) )
 851             instrs.append( (1, 7, 5, 0, (0, 1)) )
 852             instrs.append( (1, 6, 1, 2, (1, 0)) )
 853             instrs.append( (1, 6, 7, 3, (0, 0)) )
 854             instrs.append( (6, 7, 7, 0, (0, 0)) )
 855
 856         # issue instruction(s), wait for issue to be free before proceeding
 857         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
 858
 859             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
 860             alusim.op(op, src1, src2, dest)
 861             yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
 862             yield
 863             yield from wait_for_issue(dut)
 864
 865         # wait for all instructions to stop before checking
 866         yield
 867         yield from wait_for_busy_clear(dut)
 868
 869         # check status
 870         yield from alusim.check(dut)
 871         yield from alusim.dump(dut)
 872
 873
 874 def test_scoreboard():
 875     dut = Scoreboard(16, 8)
 876     alusim = RegSim(16, 8)
 877     vl = rtlil.convert(dut, ports=dut.ports())
 878     with open("test_scoreboard6600.il", "w") as f:
 879         f.write(vl)
 880
 881     run_simulation(dut, scoreboard_sim(dut, alusim),
 882                         vcd_name='test_scoreboard6600.vcd')
 883
 884     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
 885     #                    vcd_name='test_scoreboard6600.vcd')
 886
 887
 888 if __name__ == '__main__':
 889     test_scoreboard()