src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12
  13 from compalu import ComputationUnitNoDelay
  14
  15 from alu_hier import ALU, BranchALU
  16 from nmutil.latch import SRLatch
  17
  18 from random import randint, seed
  19 from copy import deepcopy
  20
  21
  22 class CompUnitsBase(Elaboratable):
  23     """ Computation Unit Base class.
  24
  25         Amazingly, this class works recursively.  It's supposed to just
  26         look after some ALUs (that can handle the same operations),
  27         grouping them together, however it turns out that the same code
  28         can also group *groups* of Computation Units together as well.
  29
  30         Basically it was intended just to concatenate the ALU's issue,
  31         go_rd etc. signals together, which start out as bits and become
  32         sequences.  Turns out that the same trick works just as well
  33         on Computation Units!
  34
  35         So this class may be used recursively to present a top-level
  36         sequential concatenation of all the signals in and out of
  37         ALUs, whilst at the same time making it convenient to group
  38         ALUs together.
  39
  40         At the lower level, the intent is that groups of (identical)
  41         ALUs may be passed the same operation.  Even beyond that,
  42         the intent is that that group of (identical) ALUs actually
  43         share the *same pipeline* and as such become a "Concurrent
  44         Computation Unit" as defined by Mitch Alsup (see section
  45         11.4.9.3)
  46     """
  47     def __init__(self, rwid, units):
  48         """ Inputs:
  49
  50             * :rwid:   bit width of register file(s) - both FP and INT
  51             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  52         """
  53         self.units = units
  54         self.rwid = rwid
  55         if units and isinstance(units[0], CompUnitsBase):
  56             self.n_units = 0
  57             for u in self.units:
  58                 self.n_units += u.n_units
  59         else:
  60             self.n_units = len(units)
  61
  62         n_units = self.n_units
  63
  64         # inputs
  65         self.issue_i = Signal(n_units, reset_less=True)
  66         self.go_rd_i = Signal(n_units, reset_less=True)
  67         self.go_wr_i = Signal(n_units, reset_less=True)
  68         self.shadown_i = Signal(n_units, reset_less=True)
  69         self.go_die_i = Signal(n_units, reset_less=True)
  70
  71         # outputs
  72         self.busy_o = Signal(n_units, reset_less=True)
  73         self.rd_rel_o = Signal(n_units, reset_less=True)
  74         self.req_rel_o = Signal(n_units, reset_less=True)
  75
  76         # in/out register data (note: not register#, actual data)
  77         self.data_o = Signal(rwid, reset_less=True)
  78         self.src1_i = Signal(rwid, reset_less=True)
  79         self.src2_i = Signal(rwid, reset_less=True)
  80
  81     def elaborate(self, platform):
  82         m = Module()
  83         comb = m.d.comb
  84
  85         for i, alu in enumerate(self.units):
  86             print ("elaborate comp%d" % i, self, alu)
  87             setattr(m.submodules, "comp%d" % i, alu)
  88
  89         go_rd_l = []
  90         go_wr_l = []
  91         issue_l = []
  92         busy_l = []
  93         req_rel_l = []
  94         rd_rel_l = []
  95         shadow_l = []
  96         godie_l = []
  97         for alu in self.units:
  98             req_rel_l.append(alu.req_rel_o)
  99             rd_rel_l.append(alu.rd_rel_o)
 100             shadow_l.append(alu.shadown_i)
 101             godie_l.append(alu.go_die_i)
 102             go_wr_l.append(alu.go_wr_i)
 103             go_rd_l.append(alu.go_rd_i)
 104             issue_l.append(alu.issue_i)
 105             busy_l.append(alu.busy_o)
 106         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 107         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 108         comb += self.busy_o.eq(Cat(*busy_l))
 109         comb += Cat(*godie_l).eq(self.go_die_i)
 110         comb += Cat(*shadow_l).eq(self.shadown_i)
 111         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 112         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 113         comb += Cat(*issue_l).eq(self.issue_i)
 114
 115         # connect data register input/output
 116
 117         # merge (OR) all integer FU / ALU outputs to a single value
 118         # bit of a hack: treereduce needs a list with an item named "data_o"
 119         if self.units:
 120             data_o = treereduce(self.units)
 121             comb += self.data_o.eq(data_o)
 122
 123         for i, alu in enumerate(self.units):
 124             comb += alu.src1_i.eq(self.src1_i)
 125             comb += alu.src2_i.eq(self.src2_i)
 126
 127         return m
 128
 129
 130 class CompUnitALUs(CompUnitsBase):
 131
 132     def __init__(self, rwid):
 133         """ Inputs:
 134
 135             * :rwid:   bit width of register file(s) - both FP and INT
 136         """
 137
 138         # Int ALUs
 139         add = ALU(rwid)
 140         sub = ALU(rwid)
 141         mul = ALU(rwid)
 142         shf = ALU(rwid)
 143
 144         units = []
 145         for alu in [add, sub, mul, shf]:
 146             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 147
 148         print ("alu units", units)
 149         CompUnitsBase.__init__(self, rwid, units)
 150         print ("alu base init done")
 151
 152     def elaborate(self, platform):
 153         print ("alu elaborate start")
 154         m = CompUnitsBase.elaborate(self, platform)
 155         print ("alu elaborate done")
 156         comb = m.d.comb
 157
 158         comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 159         comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 160         comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 161         comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 162
 163         return m
 164
 165
 166 class CompUnitBR(CompUnitsBase):
 167
 168     def __init__(self, rwid):
 169         """ Inputs:
 170
 171             * :rwid:   bit width of register file(s) - both FP and INT
 172
 173             Note: bgt unit is returned so that a shadow unit can be created
 174             for it
 175
 176         """
 177
 178         # Branch ALU and CU
 179         self.bgt = BranchALU(rwid)
 180         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 181         print ("br units", [self.br1])
 182         CompUnitsBase.__init__(self, rwid, [self.br1])
 183         print ("br base init done")
 184
 185     def elaborate(self, platform):
 186         print ("br elaborate start")
 187         m = CompUnitsBase.elaborate(self, platform)
 188         print ("br elaborate done")
 189         comb = m.d.comb
 190
 191         comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 192
 193         return m
 194
 195
 196 class FunctionUnits(Elaboratable):
 197
 198     def __init__(self, n_regs, n_int_alus):
 199         self.n_regs = n_regs
 200         self.n_int_alus = n_int_alus
 201
 202         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 203         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 204         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 205
 206         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 207         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 208
 209         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 210         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 211         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 212
 213         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 214         self.readable_o = Signal(n_int_alus, reset_less=True)
 215         self.writable_o = Signal(n_int_alus, reset_less=True)
 216
 217         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 218         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 219         self.go_die_i = Signal(n_int_alus, reset_less=True)
 220         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 221         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 222
 223         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 224
 225     def elaborate(self, platform):
 226         m = Module()
 227         comb = m.d.comb
 228         sync = m.d.sync
 229
 230         n_int_fus = self.n_int_alus
 231
 232         # Integer FU-FU Dep Matrix
 233         intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
 234         m.submodules.intfudeps = intfudeps
 235         # Integer FU-Reg Dep Matrix
 236         intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
 237         m.submodules.intregdeps = intregdeps
 238
 239         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 240         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 241
 242         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 243         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 244
 245         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 246         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 247         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 248
 249         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 250         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 251         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 252         comb += intfudeps.go_die_i.eq(self.go_die_i)
 253         comb += self.readable_o.eq(intfudeps.readable_o)
 254         comb += self.writable_o.eq(intfudeps.writable_o)
 255
 256         # Connect function issue / arrays, and dest/src1/src2
 257         comb += intregdeps.dest_i.eq(self.dest_i)
 258         comb += intregdeps.src1_i.eq(self.src1_i)
 259         comb += intregdeps.src2_i.eq(self.src2_i)
 260
 261         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 262         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 263         comb += intregdeps.go_die_i.eq(self.go_die_i)
 264         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 265
 266         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 267         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 268         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 269
 270         return m
 271
 272
 273 class Scoreboard(Elaboratable):
 274     def __init__(self, rwid, n_regs):
 275         """ Inputs:
 276
 277             * :rwid:   bit width of register file(s) - both FP and INT
 278             * :n_regs: depth of register file(s) - number of FP and INT regs
 279         """
 280         self.rwid = rwid
 281         self.n_regs = n_regs
 282
 283         # Register Files
 284         self.intregs = RegFileArray(rwid, n_regs)
 285         self.fpregs = RegFileArray(rwid, n_regs)
 286
 287         # inputs
 288         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 289         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 290         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 291         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 292
 293         # outputs
 294         self.issue_o = Signal(reset_less=True) # instruction was accepted
 295         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 296
 297         # for branch speculation experiment.  branch_direction = 0 if
 298         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 299         # branch_succ and branch_fail are requests to have the current
 300         # instruction be dependent on the branch unit "shadow" capability.
 301         self.branch_succ_i = Signal(reset_less=True)
 302         self.branch_fail_i = Signal(reset_less=True)
 303         self.branch_direction_o = Signal(2, reset_less=True)
 304
 305     def elaborate(self, platform):
 306         m = Module()
 307         comb = m.d.comb
 308         sync = m.d.sync
 309
 310         m.submodules.intregs = self.intregs
 311         m.submodules.fpregs = self.fpregs
 312
 313         # register ports
 314         int_dest = self.intregs.write_port("dest")
 315         int_src1 = self.intregs.read_port("src1")
 316         int_src2 = self.intregs.read_port("src2")
 317
 318         fp_dest = self.fpregs.write_port("dest")
 319         fp_src1 = self.fpregs.read_port("src1")
 320         fp_src2 = self.fpregs.read_port("src2")
 321
 322         # Int ALUs and Comp Units
 323         n_int_alus = 5
 324         cua = CompUnitALUs(self.rwid)
 325         cub = CompUnitBR(self.rwid)
 326         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 327         bgt = cub.bgt # get at the branch computation unit
 328         br1 = cub.br1
 329
 330         # Int FUs
 331         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 332
 333         # Count of number of FUs
 334         n_int_fus = n_int_alus
 335         n_fp_fus = 0 # for now
 336
 337         # Integer Priority Picker 1: Adder + Subtractor
 338         intpick1 = GroupPicker(n_int_fus) # picks between add, sub, mul and shf
 339         m.submodules.intpick1 = intpick1
 340
 341         # INT/FP Issue Unit
 342         regdecode = RegDecode(self.n_regs)
 343         m.submodules.regdecode = regdecode
 344         issueunit = IntFPIssueUnit(n_int_fus, n_fp_fus)
 345         m.submodules.issueunit = issueunit
 346
 347         # Shadow Matrix.  currently n_int_fus shadows, to be used for
 348         # write-after-write hazards.  NOTE: there is one extra for branches,
 349         # so the shadow width is increased by 1
 350         m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus, True)
 351         m.submodules.bshadow = bshadow = ShadowMatrix(n_int_fus, 1, False)
 352
 353         # record previous instruction to cast shadow on current instruction
 354         fn_issue_prev = Signal(n_int_fus)
 355         prev_shadow = Signal(n_int_fus)
 356
 357         # Branch Speculation recorder.  tracks the success/fail state as
 358         # each instruction is issued, so that when the branch occurs the
 359         # allow/cancel can be issued as appropriate.
 360         m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
 361
 362         #---------
 363         # ok start wiring things together...
 364         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 365         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 366         #---------
 367
 368         #---------
 369         # Issue Unit is where it starts.  set up some in/outs for this module
 370         #---------
 371         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 372                      regdecode.src1_i.eq(self.int_src1_i),
 373                      regdecode.src2_i.eq(self.int_src2_i),
 374                      regdecode.enable_i.eq(self.reg_enable_i),
 375                      self.issue_o.eq(issueunit.issue_o)
 376                     ]
 377         self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
 378
 379         # TODO: issueunit.f (FP)
 380
 381         # and int function issue / busy arrays, and dest/src1/src2
 382         comb += intfus.dest_i.eq(regdecode.dest_o)
 383         comb += intfus.src1_i.eq(regdecode.src1_o)
 384         comb += intfus.src2_i.eq(regdecode.src2_o)
 385
 386         fn_issue_o = issueunit.i.fn_issue_o
 387
 388         comb += intfus.fn_issue_i.eq(fn_issue_o)
 389         comb += issueunit.i.busy_i.eq(cu.busy_o)
 390         comb += self.busy_o.eq(cu.busy_o.bool())
 391
 392         #---------
 393         # merge shadow matrices outputs
 394         #---------
 395
 396         # these are explained in ShadowMatrix docstring, and are to be
 397         # connected to the FUReg and FUFU Matrices, to get them to reset
 398         anydie = Signal(n_int_fus, reset_less=True)
 399         allshadown = Signal(n_int_fus, reset_less=True)
 400         shreset = Signal(n_int_fus, reset_less=True)
 401         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 402         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 403         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 404
 405         #---------
 406         # connect fu-fu matrix
 407         #---------
 408
 409         # Group Picker... done manually for now.
 410         go_rd_o = intpick1.go_rd_o
 411         go_wr_o = intpick1.go_wr_o
 412         go_rd_i = intfus.go_rd_i
 413         go_wr_i = intfus.go_wr_i
 414         go_die_i = intfus.go_die_i
 415         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 416         comb += go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus]) # rd
 417         comb += go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) # wr
 418         comb += go_die_i[0:n_int_fus].eq(anydie[0:n_int_fus]) # die
 419
 420         # Connect Picker
 421         #---------
 422         comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus])
 423         comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus])
 424         int_rd_o = intfus.readable_o
 425         int_wr_o = intfus.writable_o
 426         comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus])
 427         comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus])
 428
 429         #---------
 430         # Shadow Matrix
 431         #---------
 432
 433         comb += shadows.issue_i.eq(fn_issue_o)
 434         #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
 435         comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
 436         #---------
 437         # NOTE; this setup is for the instruction order preservation...
 438
 439         # connect shadows / go_dies to Computation Units
 440         comb += cu.shadown_i[0:n_int_fus].eq(allshadown)
 441         comb += cu.go_die_i[0:n_int_fus].eq(anydie)
 442
 443         # ok connect first n_int_fu shadows to busy lines, to create an
 444         # instruction-order linked-list-like arrangement, using a bit-matrix
 445         # (instead of e.g. a ring buffer).
 446         # XXX TODO
 447
 448         # when written, the shadow can be cancelled (and was good)
 449         for i in range(n_int_fus):
 450             comb += shadows.s_good_i[i][0:n_int_fus].eq(go_wr_o[0:n_int_fus])
 451
 452         # work out the current-activated busy unit (by recording the old one)
 453         with m.If(fn_issue_o): # only update prev bit if instruction issued
 454             sync += fn_issue_prev.eq(fn_issue_o)
 455
 456         # *previous* instruction shadows *current* instruction, and, obviously,
 457         # if the previous is completed (!busy) don't cast the shadow!
 458         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 459         for i in range(n_int_fus):
 460             comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
 461
 462         #---------
 463         # ... and this is for branch speculation.  it uses the extra bit
 464         # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
 465         # only needs to set shadow_i, s_fail_i and s_good_i
 466
 467         # issue captures shadow_i (if enabled)
 468         comb += bshadow.reset_i[0:n_int_fus].eq(shreset[0:n_int_fus])
 469
 470         bactive = Signal(reset_less=True)
 471         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 472
 473         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 474         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 475             comb += bshadow.issue_i.eq(fn_issue_o)
 476             for i in range(n_int_fus):
 477                 with m.If(fn_issue_o & (Const(1<<i))):
 478                     comb += bshadow.shadow_i[i][0].eq(1)
 479
 480         # finally, we need an indicator to the test infrastructure as to
 481         # whether the branch succeeded or failed, plus, link up to the
 482         # "recorder" of whether the instruction was under shadow or not
 483
 484         with m.If(br1.issue_i):
 485             sync += bspec.active_i.eq(1)
 486         with m.If(self.branch_succ_i):
 487             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 488         with m.If(self.branch_fail_i):
 489             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 490
 491         # branch is active (TODO: a better signal: this is over-using the
 492         # go_write signal - actually the branch should not be "writing")
 493         with m.If(br1.go_wr_i):
 494             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 495             sync += bspec.active_i.eq(0)
 496             comb += bspec.br_i.eq(1)
 497             # branch occurs if data == 1, failed if data == 0
 498             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 499             for i in range(n_int_fus):
 500                 # *expected* direction of the branch matched against *actual*
 501                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 502                 # ... or it didn't
 503                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 504
 505         #---------
 506         # Connect Register File(s)
 507         #---------
 508         print ("intregdeps wen len", len(intfus.dest_rsel_o))
 509         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 510         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 511         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 512
 513         # connect ALUs to regfule
 514         comb += int_dest.data_i.eq(cu.data_o)
 515         comb += cu.src1_i.eq(int_src1.data_o)
 516         comb += cu.src2_i.eq(int_src2.data_o)
 517
 518         # connect ALU Computation Units
 519         comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])
 520         comb += cu.go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus])
 521         comb += cu.issue_i[0:n_int_fus].eq(fn_issue_o[0:n_int_fus])
 522
 523         return m
 524
 525
 526     def __iter__(self):
 527         yield from self.intregs
 528         yield from self.fpregs
 529         yield self.int_dest_i
 530         yield self.int_src1_i
 531         yield self.int_src2_i
 532         yield self.issue_o
 533         yield self.branch_succ_i
 534         yield self.branch_fail_i
 535         yield self.branch_direction_o
 536
 537     def ports(self):
 538         return list(self)
 539
 540 IADD = 0
 541 ISUB = 1
 542 IMUL = 2
 543 ISHF = 3
 544 IBGT = 4
 545 IBLT = 5
 546 IBEQ = 6
 547 IBNE = 7
 548
 549 class RegSim:
 550     def __init__(self, rwidth, nregs):
 551         self.rwidth = rwidth
 552         self.regs = [0] * nregs
 553
 554     def op(self, op, src1, src2, dest):
 555         maxbits = (1 << self.rwidth) - 1
 556         src1 = self.regs[src1] & maxbits
 557         src2 = self.regs[src2] & maxbits
 558         if op == IADD:
 559             val = src1 + src2
 560         elif op == ISUB:
 561             val = src1 - src2
 562         elif op == IMUL:
 563             val = src1 * src2
 564         elif op == ISHF:
 565             val = src1 >> (src2 & maxbits)
 566         elif op == IBGT:
 567             val = int(src1 > src2)
 568         elif op == IBLT:
 569             val = int(src1 < src2)
 570         elif op == IBEQ:
 571             val = int(src1 == src2)
 572         elif op == IBNE:
 573             val = int(src1 != src2)
 574         val &= maxbits
 575         self.setval(dest, val)
 576         return val
 577
 578     def setval(self, dest, val):
 579         print ("sim setval", dest, hex(val))
 580         self.regs[dest] = val
 581
 582     def dump(self, dut):
 583         for i, val in enumerate(self.regs):
 584             reg = yield dut.intregs.regs[i].reg
 585             okstr = "OK" if reg == val else "!ok"
 586             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 587
 588     def check(self, dut):
 589         for i, val in enumerate(self.regs):
 590             reg = yield dut.intregs.regs[i].reg
 591             if reg != val:
 592                 print("reg %d expected %x received %x\n" % (i, val, reg))
 593                 yield from self.dump(dut)
 594                 assert False
 595
 596 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 597     for i in range(len(dut.int_insn_i)):
 598         yield dut.int_insn_i[i].eq(0)
 599     yield dut.int_dest_i.eq(dest)
 600     yield dut.int_src1_i.eq(src1)
 601     yield dut.int_src2_i.eq(src2)
 602     yield dut.int_insn_i[op].eq(1)
 603     yield dut.reg_enable_i.eq(1)
 604
 605     # these indicate that the instruction is to be made shadow-dependent on
 606     # (either) branch success or branch fail
 607     yield dut.branch_fail_i.eq(branch_fail)
 608     yield dut.branch_succ_i.eq(branch_success)
 609
 610
 611 def print_reg(dut, rnums):
 612     rs = []
 613     for rnum in rnums:
 614         reg = yield dut.intregs.regs[rnum].reg
 615         rs.append("%x" % reg)
 616     rnums = map(str, rnums)
 617     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 618
 619
 620 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 621     insts = []
 622     for i in range(n_ops):
 623         src1 = randint(1, dut.n_regs-1)
 624         src2 = randint(1, dut.n_regs-1)
 625         dest = randint(1, dut.n_regs-1)
 626         op = randint(0, max_opnums)
 627
 628         if shadowing:
 629             insts.append((src1, src2, dest, op, (0, 0)))
 630         else:
 631             insts.append((src1, src2, dest, op))
 632     return insts
 633
 634
 635 def wait_for_busy_clear(dut):
 636     while True:
 637         busy_o = yield dut.busy_o
 638         if not busy_o:
 639             break
 640         print ("busy",)
 641         yield
 642
 643
 644 def wait_for_issue(dut):
 645     while True:
 646         issue_o = yield dut.issue_o
 647         if issue_o:
 648             for i in range(len(dut.int_insn_i)):
 649                 yield dut.int_insn_i[i].eq(0)
 650                 yield dut.reg_enable_i.eq(0)
 651             break
 652         #print ("busy",)
 653         #yield from print_reg(dut, [1,2,3])
 654         yield
 655     #yield from print_reg(dut, [1,2,3])
 656
 657 def scoreboard_branch_sim(dut, alusim):
 658
 659     iseed = 3
 660
 661     for i in range(1):
 662
 663         print ("rseed", iseed)
 664         seed(iseed)
 665         iseed += 1
 666
 667         yield dut.branch_direction_o.eq(0)
 668
 669         # set random values in the registers
 670         for i in range(1, dut.n_regs):
 671             val = 31+i*3
 672             val = randint(0, (1<<alusim.rwidth)-1)
 673             yield dut.intregs.regs[i].reg.eq(val)
 674             alusim.setval(i, val)
 675
 676         if False:
 677             # create some instructions: branches create a tree
 678             insts = create_random_ops(dut, 1, True, 1)
 679             #insts.append((6, 6, 1, 2, (0, 0)))
 680             #insts.append((4, 3, 3, 0, (0, 0)))
 681
 682             src1 = randint(1, dut.n_regs-1)
 683             src2 = randint(1, dut.n_regs-1)
 684             #op = randint(4, 7)
 685             op = 4 # only BGT at the moment
 686
 687             branch_ok = create_random_ops(dut, 1, True, 1)
 688             branch_fail = create_random_ops(dut, 1, True, 1)
 689
 690             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 691
 692         if True:
 693             insts = []
 694             #insts.append( (3, 5, 2, 0, (0, 0)) )
 695             branch_ok = []
 696             branch_fail = []
 697             branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 698             #branch_ok.append( None )
 699             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 700             #branch_fail.append( None )
 701             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 702
 703         siminsts = deepcopy(insts)
 704
 705         # issue instruction(s)
 706         i = -1
 707         instrs = insts
 708         branch_direction = 0
 709         while instrs:
 710             yield
 711             yield
 712             i += 1
 713             branch_direction = yield dut.branch_direction_o # way branch went
 714             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 715             if branch_direction == 1 and shadow_on:
 716                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 717                 continue # branch was "success" and this is a "failed"... skip
 718             if branch_direction == 2 and shadow_off:
 719                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 720                 continue # branch was "fail" and this is a "success"... skip
 721             if branch_direction != 0:
 722                 shadow_on = 0
 723                 shadow_off = 0
 724             is_branch = op >= 4
 725             if is_branch:
 726                 branch_ok, branch_fail = dest
 727                 dest = src2
 728                 # ok zip up the branch success / fail instructions and
 729                 # drop them into the queue, one marked "to have branch success"
 730                 # the other to be marked shadow branch "fail".
 731                 # one out of each of these will be cancelled
 732                 for ok, fl in zip(branch_ok, branch_fail):
 733                     if ok:
 734                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 735                     if fl:
 736                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 737             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 738                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 739             yield from int_instr(dut, op, src1, src2, dest,
 740                                  shadow_on, shadow_off)
 741             yield
 742             yield from wait_for_issue(dut)
 743
 744         # wait for all instructions to stop before checking
 745         yield
 746         yield from wait_for_busy_clear(dut)
 747
 748         i = -1
 749         while siminsts:
 750             instr = siminsts.pop(0)
 751             if instr is None:
 752                 continue
 753             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 754             i += 1
 755             is_branch = op >= 4
 756             if is_branch:
 757                 branch_ok, branch_fail = dest
 758                 dest = src2
 759             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 760                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 761             branch_res = alusim.op(op, src1, src2, dest)
 762             if is_branch:
 763                 if branch_res:
 764                     siminsts += branch_ok
 765                 else:
 766                     siminsts += branch_fail
 767
 768         # check status
 769         yield from alusim.check(dut)
 770         yield from alusim.dump(dut)
 771
 772
 773 def scoreboard_sim(dut, alusim):
 774
 775     seed(0)
 776
 777     for i in range(20):
 778
 779         # set random values in the registers
 780         for i in range(1, dut.n_regs):
 781             val = 31+i*3
 782             val = randint(0, (1<<alusim.rwidth)-1)
 783             yield dut.intregs.regs[i].reg.eq(val)
 784             alusim.setval(i, val)
 785
 786         # create some instructions (some random, some regression tests)
 787         instrs = []
 788         if True:
 789             instrs = create_random_ops(dut, 10, True, 4)
 790
 791         if False:
 792             instrs.append((2, 3, 3, 0))
 793             instrs.append((5, 3, 3, 1))
 794
 795         if False:
 796             instrs.append((5, 6, 2, 1))
 797             instrs.append((2, 2, 4, 0))
 798             #instrs.append((2, 2, 3, 1))
 799
 800         if False:
 801             instrs.append((2, 1, 2, 3))
 802
 803         if False:
 804             instrs.append((2, 6, 2, 1))
 805             instrs.append((2, 1, 2, 0))
 806
 807         if False:
 808             instrs.append((1, 2, 7, 2))
 809             instrs.append((7, 1, 5, 0))
 810             instrs.append((4, 4, 1, 1))
 811
 812         if False:
 813             instrs.append((5, 6, 2, 2))
 814             instrs.append((1, 1, 4, 1))
 815             instrs.append((6, 5, 3, 0))
 816
 817         if False:
 818             # Write-after-Write Hazard
 819             instrs.append( (3, 6, 7, 2) )
 820             instrs.append( (4, 4, 7, 1) )
 821
 822         if False:
 823             # self-read/write-after-write followed by Read-after-Write
 824             instrs.append((1, 1, 1, 1))
 825             instrs.append((1, 5, 3, 0))
 826
 827         if False:
 828             # Read-after-Write followed by self-read-after-write
 829             instrs.append((5, 6, 1, 2))
 830             instrs.append((1, 1, 1, 1))
 831
 832         if False:
 833             # self-read-write sandwich
 834             instrs.append((5, 6, 1, 2))
 835             instrs.append((1, 1, 1, 1))
 836             instrs.append((1, 5, 3, 0))
 837
 838         if False:
 839             # very weird failure
 840             instrs.append( (5, 2, 5, 2) )
 841             instrs.append( (2, 6, 3, 0) )
 842             instrs.append( (4, 2, 2, 1) )
 843
 844         if False:
 845             v1 = 4
 846             yield dut.intregs.regs[5].reg.eq(v1)
 847             alusim.setval(5, v1)
 848             yield dut.intregs.regs[3].reg.eq(5)
 849             alusim.setval(3, 5)
 850             instrs.append((5, 3, 3, 4, (0, 0)))
 851             instrs.append((4, 2, 1, 2, (0, 1)))
 852
 853         if False:
 854             v1 = 6
 855             yield dut.intregs.regs[5].reg.eq(v1)
 856             alusim.setval(5, v1)
 857             yield dut.intregs.regs[3].reg.eq(5)
 858             alusim.setval(3, 5)
 859             instrs.append((5, 3, 3, 4, (0, 0)))
 860             instrs.append((4, 2, 1, 2, (1, 0)))
 861
 862         if False:
 863             instrs.append( (4, 3, 5, 1, (0, 0)) )
 864             instrs.append( (5, 2, 3, 1, (0, 0)) )
 865             instrs.append( (7, 1, 5, 2, (0, 0)) )
 866             instrs.append( (5, 6, 6, 4, (0, 0)) )
 867             instrs.append( (7, 5, 2, 2, (1, 0)) )
 868             instrs.append( (1, 7, 5, 0, (0, 1)) )
 869             instrs.append( (1, 6, 1, 2, (1, 0)) )
 870             instrs.append( (1, 6, 7, 3, (0, 0)) )
 871             instrs.append( (6, 7, 7, 0, (0, 0)) )
 872
 873         # issue instruction(s), wait for issue to be free before proceeding
 874         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
 875
 876             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
 877             alusim.op(op, src1, src2, dest)
 878             yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
 879             yield
 880             yield from wait_for_issue(dut)
 881
 882         # wait for all instructions to stop before checking
 883         yield
 884         yield from wait_for_busy_clear(dut)
 885
 886         # check status
 887         yield from alusim.check(dut)
 888         yield from alusim.dump(dut)
 889
 890
 891 def test_scoreboard():
 892     dut = Scoreboard(16, 8)
 893     alusim = RegSim(16, 8)
 894     vl = rtlil.convert(dut, ports=dut.ports())
 895     with open("test_scoreboard6600.il", "w") as f:
 896         f.write(vl)
 897
 898     run_simulation(dut, scoreboard_sim(dut, alusim),
 899                         vcd_name='test_scoreboard6600.vcd')
 900
 901     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
 902     #                    vcd_name='test_scoreboard6600.vcd')
 903
 904
 905 if __name__ == '__main__':
 906     test_scoreboard()