src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class CompUnitsBase(Elaboratable):
  26     """ Computation Unit Base class.
  27
  28         Amazingly, this class works recursively.  It's supposed to just
  29         look after some ALUs (that can handle the same operations),
  30         grouping them together, however it turns out that the same code
  31         can also group *groups* of Computation Units together as well.
  32
  33         Basically it was intended just to concatenate the ALU's issue,
  34         go_rd etc. signals together, which start out as bits and become
  35         sequences.  Turns out that the same trick works just as well
  36         on Computation Units!
  37
  38         So this class may be used recursively to present a top-level
  39         sequential concatenation of all the signals in and out of
  40         ALUs, whilst at the same time making it convenient to group
  41         ALUs together.
  42
  43         At the lower level, the intent is that groups of (identical)
  44         ALUs may be passed the same operation.  Even beyond that,
  45         the intent is that that group of (identical) ALUs actually
  46         share the *same pipeline* and as such become a "Concurrent
  47         Computation Unit" as defined by Mitch Alsup (see section
  48         11.4.9.3)
  49     """
  50     def __init__(self, rwid, units):
  51         """ Inputs:
  52
  53             * :rwid:   bit width of register file(s) - both FP and INT
  54             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  55         """
  56         self.units = units
  57         self.rwid = rwid
  58         self.rwid = rwid
  59         if units and isinstance(units[0], CompUnitsBase):
  60             self.n_units = 0
  61             for u in self.units:
  62                 self.n_units += u.n_units
  63         else:
  64             self.n_units = len(units)
  65
  66         n_units = self.n_units
  67
  68         # inputs
  69         self.issue_i = Signal(n_units, reset_less=True)
  70         self.go_rd_i = Signal(n_units, reset_less=True)
  71         self.go_wr_i = Signal(n_units, reset_less=True)
  72         self.shadown_i = Signal(n_units, reset_less=True)
  73         self.go_die_i = Signal(n_units, reset_less=True)
  74
  75         # outputs
  76         self.busy_o = Signal(n_units, reset_less=True)
  77         self.rd_rel_o = Signal(n_units, reset_less=True)
  78         self.req_rel_o = Signal(n_units, reset_less=True)
  79
  80         # in/out register data (note: not register#, actual data)
  81         self.data_o = Signal(rwid, reset_less=True)
  82         self.src1_i = Signal(rwid, reset_less=True)
  83         self.src2_i = Signal(rwid, reset_less=True)
  84         # input operand
  85
  86     def elaborate(self, platform):
  87         m = Module()
  88         comb = m.d.comb
  89
  90         for i, alu in enumerate(self.units):
  91             setattr(m.submodules, "comp%d" % i, alu)
  92
  93         go_rd_l = []
  94         go_wr_l = []
  95         issue_l = []
  96         busy_l = []
  97         req_rel_l = []
  98         rd_rel_l = []
  99         shadow_l = []
 100         godie_l = []
 101         for alu in self.units:
 102             req_rel_l.append(alu.req_rel_o)
 103             rd_rel_l.append(alu.rd_rel_o)
 104             shadow_l.append(alu.shadown_i)
 105             godie_l.append(alu.go_die_i)
 106             go_wr_l.append(alu.go_wr_i)
 107             go_rd_l.append(alu.go_rd_i)
 108             issue_l.append(alu.issue_i)
 109             busy_l.append(alu.busy_o)
 110         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 111         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 112         comb += self.busy_o.eq(Cat(*busy_l))
 113         comb += Cat(*godie_l).eq(self.go_die_i)
 114         comb += Cat(*shadow_l).eq(self.shadown_i)
 115         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 116         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 117         comb += Cat(*issue_l).eq(self.issue_i)
 118
 119         # connect data register input/output
 120
 121         # merge (OR) all integer FU / ALU outputs to a single value
 122         # bit of a hack: treereduce needs a list with an item named "data_o"
 123         if self.units:
 124             data_o = treereduce(self.units)
 125             comb += self.data_o.eq(data_o)
 126
 127         for i, alu in enumerate(self.units):
 128             comb += alu.src1_i.eq(self.src1_i)
 129             comb += alu.src2_i.eq(self.src2_i)
 130
 131         return m
 132
 133
 134 class CompUnitALUs(CompUnitsBase):
 135
 136     def __init__(self, rwid, opwid):
 137         """ Inputs:
 138
 139             * :rwid:   bit width of register file(s) - both FP and INT
 140             * :opwid:  operand bit width
 141         """
 142         self.opwid = opwid
 143
 144         # inputs
 145         self.oper_i = Signal(opwid, reset_less=True)
 146
 147         # Int ALUs
 148         add = ALU(rwid)
 149         sub = ALU(rwid)
 150         mul = ALU(rwid)
 151         shf = ALU(rwid)
 152
 153         units = []
 154         for alu in [add, sub, mul, shf]:
 155             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 156
 157         CompUnitsBase.__init__(self, rwid, units)
 158
 159     def elaborate(self, platform):
 160         m = CompUnitsBase.elaborate(self, platform)
 161         comb = m.d.comb
 162
 163         # hand the same operation to all units
 164         for alu in self.units:
 165             comb += alu.oper_i.eq(self.oper_i)
 166         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 167         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 168         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 169         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 170
 171         return m
 172
 173
 174 class CompUnitBR(CompUnitsBase):
 175
 176     def __init__(self, rwid, opwid):
 177         """ Inputs:
 178
 179             * :rwid:   bit width of register file(s) - both FP and INT
 180             * :opwid:  operand bit width
 181
 182             Note: bgt unit is returned so that a shadow unit can be created
 183             for it
 184         """
 185         self.opwid = opwid
 186
 187         # inputs
 188         self.oper_i = Signal(opwid, reset_less=True)
 189
 190         # Branch ALU and CU
 191         self.bgt = BranchALU(rwid)
 192         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 193         CompUnitsBase.__init__(self, rwid, [self.br1])
 194
 195     def elaborate(self, platform):
 196         m = CompUnitsBase.elaborate(self, platform)
 197         comb = m.d.comb
 198
 199         # hand the same operation to all units
 200         for alu in self.units:
 201             comb += alu.oper_i.eq(self.oper_i)
 202         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 203
 204         return m
 205
 206
 207 class FunctionUnits(Elaboratable):
 208
 209     def __init__(self, n_regs, n_int_alus):
 210         self.n_regs = n_regs
 211         self.n_int_alus = n_int_alus
 212
 213         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 214         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 215         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 216
 217         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 218         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 219
 220         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 221         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 222         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 223
 224         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 225         self.readable_o = Signal(n_int_alus, reset_less=True)
 226         self.writable_o = Signal(n_int_alus, reset_less=True)
 227
 228         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 229         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 230         self.go_die_i = Signal(n_int_alus, reset_less=True)
 231         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 232         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 233
 234         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 235
 236     def elaborate(self, platform):
 237         m = Module()
 238         comb = m.d.comb
 239         sync = m.d.sync
 240
 241         n_intfus = self.n_int_alus
 242
 243         # Integer FU-FU Dep Matrix
 244         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 245         m.submodules.intfudeps = intfudeps
 246         # Integer FU-Reg Dep Matrix
 247         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 248         m.submodules.intregdeps = intregdeps
 249
 250         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 251         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 252
 253         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 254         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 255
 256         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 257         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 258         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 259
 260         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 261         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 262         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 263         comb += intfudeps.go_die_i.eq(self.go_die_i)
 264         comb += self.readable_o.eq(intfudeps.readable_o)
 265         comb += self.writable_o.eq(intfudeps.writable_o)
 266
 267         # Connect function issue / arrays, and dest/src1/src2
 268         comb += intregdeps.dest_i.eq(self.dest_i)
 269         comb += intregdeps.src1_i.eq(self.src1_i)
 270         comb += intregdeps.src2_i.eq(self.src2_i)
 271
 272         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 273         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 274         comb += intregdeps.go_die_i.eq(self.go_die_i)
 275         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 276
 277         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 278         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 279         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 280
 281         return m
 282
 283
 284 class Scoreboard(Elaboratable):
 285     def __init__(self, rwid, n_regs):
 286         """ Inputs:
 287
 288             * :rwid:   bit width of register file(s) - both FP and INT
 289             * :n_regs: depth of register file(s) - number of FP and INT regs
 290         """
 291         self.rwid = rwid
 292         self.n_regs = n_regs
 293
 294         # Register Files
 295         self.intregs = RegFileArray(rwid, n_regs)
 296         self.fpregs = RegFileArray(rwid, n_regs)
 297
 298         # issue q needs to get at these
 299         self.aluissue = IssueUnitGroup(4)
 300         self.brissue = IssueUnitGroup(1)
 301         # and these
 302         self.alu_oper_i = Signal(4, reset_less=True)
 303         self.br_oper_i = Signal(4, reset_less=True)
 304
 305         # inputs
 306         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 307         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 308         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 309         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 310
 311         # outputs
 312         self.issue_o = Signal(reset_less=True) # instruction was accepted
 313         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 314
 315         # for branch speculation experiment.  branch_direction = 0 if
 316         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 317         # branch_succ and branch_fail are requests to have the current
 318         # instruction be dependent on the branch unit "shadow" capability.
 319         self.branch_succ_i = Signal(reset_less=True)
 320         self.branch_fail_i = Signal(reset_less=True)
 321         self.branch_direction_o = Signal(2, reset_less=True)
 322
 323     def elaborate(self, platform):
 324         m = Module()
 325         comb = m.d.comb
 326         sync = m.d.sync
 327
 328         m.submodules.intregs = self.intregs
 329         m.submodules.fpregs = self.fpregs
 330
 331         # register ports
 332         int_dest = self.intregs.write_port("dest")
 333         int_src1 = self.intregs.read_port("src1")
 334         int_src2 = self.intregs.read_port("src2")
 335
 336         fp_dest = self.fpregs.write_port("dest")
 337         fp_src1 = self.fpregs.read_port("src1")
 338         fp_src2 = self.fpregs.read_port("src2")
 339
 340         # Int ALUs and Comp Units
 341         n_int_alus = 5
 342         cua = CompUnitALUs(self.rwid, 2)
 343         cub = CompUnitBR(self.rwid, 2)
 344         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 345         bgt = cub.bgt # get at the branch computation unit
 346         br1 = cub.br1
 347
 348         # Int FUs
 349         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 350
 351         # Count of number of FUs
 352         n_intfus = n_int_alus
 353         n_fp_fus = 0 # for now
 354
 355         # Integer Priority Picker 1: Adder + Subtractor
 356         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 357         m.submodules.intpick1 = intpick1
 358
 359         # INT/FP Issue Unit
 360         regdecode = RegDecode(self.n_regs)
 361         m.submodules.regdecode = regdecode
 362         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 363         m.submodules.issueunit = issueunit
 364
 365         # Shadow Matrix.  currently n_intfus shadows, to be used for
 366         # write-after-write hazards.  NOTE: there is one extra for branches,
 367         # so the shadow width is increased by 1
 368         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 369         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 370
 371         # record previous instruction to cast shadow on current instruction
 372         fn_issue_prev = Signal(n_intfus)
 373         prev_shadow = Signal(n_intfus)
 374
 375         # Branch Speculation recorder.  tracks the success/fail state as
 376         # each instruction is issued, so that when the branch occurs the
 377         # allow/cancel can be issued as appropriate.
 378         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 379
 380         #---------
 381         # ok start wiring things together...
 382         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 383         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 384         #---------
 385
 386         #---------
 387         # Issue Unit is where it starts.  set up some in/outs for this module
 388         #---------
 389         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 390                      regdecode.src1_i.eq(self.int_src1_i),
 391                      regdecode.src2_i.eq(self.int_src2_i),
 392                      regdecode.enable_i.eq(self.reg_enable_i),
 393                      self.issue_o.eq(issueunit.issue_o)
 394                     ]
 395
 396         # take these to outside (issue needs them)
 397         comb += cua.oper_i.eq(self.alu_oper_i)
 398         comb += cub.oper_i.eq(self.br_oper_i)
 399
 400         # TODO: issueunit.f (FP)
 401
 402         # and int function issue / busy arrays, and dest/src1/src2
 403         comb += intfus.dest_i.eq(regdecode.dest_o)
 404         comb += intfus.src1_i.eq(regdecode.src1_o)
 405         comb += intfus.src2_i.eq(regdecode.src2_o)
 406
 407         fn_issue_o = issueunit.fn_issue_o
 408
 409         comb += intfus.fn_issue_i.eq(fn_issue_o)
 410         comb += issueunit.busy_i.eq(cu.busy_o)
 411         comb += self.busy_o.eq(cu.busy_o.bool())
 412
 413         #---------
 414         # merge shadow matrices outputs
 415         #---------
 416
 417         # these are explained in ShadowMatrix docstring, and are to be
 418         # connected to the FUReg and FUFU Matrices, to get them to reset
 419         anydie = Signal(n_intfus, reset_less=True)
 420         allshadown = Signal(n_intfus, reset_less=True)
 421         shreset = Signal(n_intfus, reset_less=True)
 422         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 423         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 424         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 425
 426         #---------
 427         # connect fu-fu matrix
 428         #---------
 429
 430         # Group Picker... done manually for now.
 431         go_rd_o = intpick1.go_rd_o
 432         go_wr_o = intpick1.go_wr_o
 433         go_rd_i = intfus.go_rd_i
 434         go_wr_i = intfus.go_wr_i
 435         go_die_i = intfus.go_die_i
 436         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 437         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 438         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 439         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 440
 441         # Connect Picker
 442         #---------
 443         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 444         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 445         int_rd_o = intfus.readable_o
 446         int_wr_o = intfus.writable_o
 447         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 448         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 449
 450         #---------
 451         # Shadow Matrix
 452         #---------
 453
 454         comb += shadows.issue_i.eq(fn_issue_o)
 455         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 456         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 457         #---------
 458         # NOTE; this setup is for the instruction order preservation...
 459
 460         # connect shadows / go_dies to Computation Units
 461         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 462         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 463
 464         # ok connect first n_int_fu shadows to busy lines, to create an
 465         # instruction-order linked-list-like arrangement, using a bit-matrix
 466         # (instead of e.g. a ring buffer).
 467         # XXX TODO
 468
 469         # when written, the shadow can be cancelled (and was good)
 470         for i in range(n_intfus):
 471             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 472
 473         # work out the current-activated busy unit (by recording the old one)
 474         with m.If(fn_issue_o): # only update prev bit if instruction issued
 475             sync += fn_issue_prev.eq(fn_issue_o)
 476
 477         # *previous* instruction shadows *current* instruction, and, obviously,
 478         # if the previous is completed (!busy) don't cast the shadow!
 479         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 480         for i in range(n_intfus):
 481             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 482
 483         #---------
 484         # ... and this is for branch speculation.  it uses the extra bit
 485         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 486         # only needs to set shadow_i, s_fail_i and s_good_i
 487
 488         # issue captures shadow_i (if enabled)
 489         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 490
 491         bactive = Signal(reset_less=True)
 492         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 493
 494         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 495         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 496             comb += bshadow.issue_i.eq(fn_issue_o)
 497             for i in range(n_intfus):
 498                 with m.If(fn_issue_o & (Const(1<<i))):
 499                     comb += bshadow.shadow_i[i][0].eq(1)
 500
 501         # finally, we need an indicator to the test infrastructure as to
 502         # whether the branch succeeded or failed, plus, link up to the
 503         # "recorder" of whether the instruction was under shadow or not
 504
 505         with m.If(br1.issue_i):
 506             sync += bspec.active_i.eq(1)
 507         with m.If(self.branch_succ_i):
 508             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 509         with m.If(self.branch_fail_i):
 510             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 511
 512         # branch is active (TODO: a better signal: this is over-using the
 513         # go_write signal - actually the branch should not be "writing")
 514         with m.If(br1.go_wr_i):
 515             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 516             sync += bspec.active_i.eq(0)
 517             comb += bspec.br_i.eq(1)
 518             # branch occurs if data == 1, failed if data == 0
 519             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 520             for i in range(n_intfus):
 521                 # *expected* direction of the branch matched against *actual*
 522                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 523                 # ... or it didn't
 524                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 525
 526         #---------
 527         # Connect Register File(s)
 528         #---------
 529         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 530         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 531         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 532
 533         # connect ALUs to regfule
 534         comb += int_dest.data_i.eq(cu.data_o)
 535         comb += cu.src1_i.eq(int_src1.data_o)
 536         comb += cu.src2_i.eq(int_src2.data_o)
 537
 538         # connect ALU Computation Units
 539         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 540         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 541         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 542
 543         return m
 544
 545     def __iter__(self):
 546         yield from self.intregs
 547         yield from self.fpregs
 548         yield self.int_dest_i
 549         yield self.int_src1_i
 550         yield self.int_src2_i
 551         yield self.issue_o
 552         yield self.branch_succ_i
 553         yield self.branch_fail_i
 554         yield self.branch_direction_o
 555
 556     def ports(self):
 557         return list(self)
 558
 559 class IssueToScoreboard(Elaboratable):
 560
 561     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 562         self.qlen = qlen
 563         self.n_in = n_in
 564         self.n_out = n_out
 565         self.rwid = rwid
 566         self.opw = opwid
 567         self.n_regs = n_regs
 568
 569         mqbits = (int(log(qlen) / log(2))+2, False)
 570         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 571         self.p_ready_o = Signal() # instructions were added
 572         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 573
 574         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 575         self.qlen_o = Signal(mqbits, reset_less=True)
 576
 577     def elaborate(self, platform):
 578         m = Module()
 579         comb = m.d.comb
 580         sync = m.d.sync
 581
 582         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 583         sc = Scoreboard(self.rwid, self.n_regs)
 584         m.submodules.iq = iq
 585         m.submodules.sc = sc
 586
 587         # get at the regfile for testing
 588         self.intregs = sc.intregs
 589
 590         # and the "busy" signal and instruction queue length
 591         comb += self.busy_o.eq(sc.busy_o)
 592         comb += self.qlen_o.eq(iq.qlen_o)
 593
 594         # link up instruction queue
 595         comb += iq.p_add_i.eq(self.p_add_i)
 596         comb += self.p_ready_o.eq(iq.p_ready_o)
 597         for i in range(self.n_in):
 598             comb += eq(iq.data_i[i], self.data_i[i])
 599
 600         # take instruction and process it.  note that it's possible to
 601         # "inspect" the queue contents *without* actually removing the
 602         # items.  items are only removed when the
 603
 604         # in "waiting" state
 605         wait_issue_br = Signal()
 606         wait_issue_alu = Signal()
 607
 608         with m.If(wait_issue_br | wait_issue_alu):
 609             # set instruction pop length to 1 if the unit accepted
 610             # also tell the unit-group to stop accepting the instruction
 611             # and disable the regfile
 612             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 613                 with m.If(iq.qlen_o != 0):
 614                     comb += iq.n_sub_i.eq(1)
 615                 comb += wait_issue_br.eq(0)
 616                 comb += sc.brissue.insn_i.eq(0)
 617                 comb += sc.int_dest_i.eq(0)
 618                 comb += sc.int_src1_i.eq(0)
 619                 comb += sc.int_src2_i.eq(0)
 620                 comb += sc.reg_enable_i.eq(0)
 621             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 622                 with m.If(iq.qlen_o != 0):
 623                     comb += iq.n_sub_i.eq(1)
 624                 comb += wait_issue_alu.eq(0)
 625                 comb += sc.aluissue.insn_i.eq(0)
 626                 comb += sc.int_dest_i.eq(0)
 627                 comb += sc.int_src1_i.eq(0)
 628                 comb += sc.int_src2_i.eq(0)
 629                 comb += sc.reg_enable_i.eq(0)
 630
 631         # see if some instruction(s) are here.  note that this is
 632         # "inspecting" the in-place queue.  note also that on the
 633         # cycle following "waiting" for fn_issue_o to be set, the
 634         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 635         with m.If(iq.qlen_o != 0):
 636             # get the operands and operation
 637             dest = iq.data_o[0].dest_i
 638             src1 = iq.data_o[0].src1_i
 639             src2 = iq.data_o[0].src2_i
 640             op = iq.data_o[0].oper_i
 641
 642             # set the src/dest regs
 643             comb += sc.int_dest_i.eq(dest)
 644             comb += sc.int_src1_i.eq(src1)
 645             comb += sc.int_src2_i.eq(src2)
 646             comb += sc.reg_enable_i.eq(1) # enable the regfile
 647
 648             # choose a Function-Unit-Group
 649             with m.If((op & (0x3<<2)) != 0): # branch
 650                 comb += sc.brissue.insn_i.eq(1)
 651                 comb += sc.br_oper_i.eq(op & 0x3)
 652                 comb += wait_issue_br.eq(1)
 653             with m.Else():                   # alu
 654                 comb += sc.aluissue.insn_i.eq(1)
 655                 comb += sc.alu_oper_i.eq(op & 0x3)
 656                 comb += wait_issue_alu.eq(1)
 657
 658             # XXX TODO
 659             # these indicate that the instruction is to be made
 660             # shadow-dependent on
 661             # (either) branch success or branch fail
 662             #yield sc.branch_fail_i.eq(branch_fail)
 663             #yield sc.branch_succ_i.eq(branch_success)
 664
 665         return m
 666
 667     def __iter__(self):
 668         yield self.p_ready_o
 669         for o in self.data_i:
 670             yield from list(o)
 671         yield self.p_add_i
 672
 673     def ports(self):
 674         return list(self)
 675
 676 IADD = 0
 677 ISUB = 1
 678 IMUL = 2
 679 ISHF = 3
 680 IBGT = 4
 681 IBLT = 5
 682 IBEQ = 6
 683 IBNE = 7
 684
 685 class RegSim:
 686     def __init__(self, rwidth, nregs):
 687         self.rwidth = rwidth
 688         self.regs = [0] * nregs
 689
 690     def op(self, op, src1, src2, dest):
 691         maxbits = (1 << self.rwidth) - 1
 692         src1 = self.regs[src1] & maxbits
 693         src2 = self.regs[src2] & maxbits
 694         if op == IADD:
 695             val = src1 + src2
 696         elif op == ISUB:
 697             val = src1 - src2
 698         elif op == IMUL:
 699             val = src1 * src2
 700         elif op == ISHF:
 701             val = src1 >> (src2 & maxbits)
 702         elif op == IBGT:
 703             val = int(src1 > src2)
 704         elif op == IBLT:
 705             val = int(src1 < src2)
 706         elif op == IBEQ:
 707             val = int(src1 == src2)
 708         elif op == IBNE:
 709             val = int(src1 != src2)
 710         val &= maxbits
 711         self.setval(dest, val)
 712         return val
 713
 714     def setval(self, dest, val):
 715         print ("sim setval", dest, hex(val))
 716         self.regs[dest] = val
 717
 718     def dump(self, dut):
 719         for i, val in enumerate(self.regs):
 720             reg = yield dut.intregs.regs[i].reg
 721             okstr = "OK" if reg == val else "!ok"
 722             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 723
 724     def check(self, dut):
 725         for i, val in enumerate(self.regs):
 726             reg = yield dut.intregs.regs[i].reg
 727             if reg != val:
 728                 print("reg %d expected %x received %x\n" % (i, val, reg))
 729                 yield from self.dump(dut)
 730                 assert False
 731
 732 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
 733     instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
 734
 735     sendlen = 1
 736     for idx in range(sendlen):
 737         yield from eq(dut.data_i[idx], instrs[idx])
 738         di = yield dut.data_i[idx]
 739         print ("senddata %d %x" % (idx, di))
 740     yield dut.p_add_i.eq(sendlen)
 741     yield
 742     o_p_ready = yield dut.p_ready_o
 743     while not o_p_ready:
 744         yield
 745         o_p_ready = yield dut.p_ready_o
 746
 747     yield dut.p_add_i.eq(0)
 748
 749
 750 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 751     yield from disable_issue(dut)
 752     yield dut.int_dest_i.eq(dest)
 753     yield dut.int_src1_i.eq(src1)
 754     yield dut.int_src2_i.eq(src2)
 755     if (op & (0x3<<2)) != 0: # branch
 756         yield dut.brissue.insn_i.eq(1)
 757         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 758         dut_issue = dut.brissue
 759     else:
 760         yield dut.aluissue.insn_i.eq(1)
 761         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 762         dut_issue = dut.aluissue
 763     yield dut.reg_enable_i.eq(1)
 764
 765     # these indicate that the instruction is to be made shadow-dependent on
 766     # (either) branch success or branch fail
 767     yield dut.branch_fail_i.eq(branch_fail)
 768     yield dut.branch_succ_i.eq(branch_success)
 769
 770     yield
 771     yield from wait_for_issue(dut, dut_issue)
 772
 773
 774 def print_reg(dut, rnums):
 775     rs = []
 776     for rnum in rnums:
 777         reg = yield dut.intregs.regs[rnum].reg
 778         rs.append("%x" % reg)
 779     rnums = map(str, rnums)
 780     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 781
 782
 783 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 784     insts = []
 785     for i in range(n_ops):
 786         src1 = randint(1, dut.n_regs-1)
 787         src2 = randint(1, dut.n_regs-1)
 788         dest = randint(1, dut.n_regs-1)
 789         op = randint(0, max_opnums)
 790
 791         if shadowing:
 792             insts.append((src1, src2, dest, op, (0, 0)))
 793         else:
 794             insts.append((src1, src2, dest, op))
 795     return insts
 796
 797
 798 def wait_for_busy_clear(dut):
 799     while True:
 800         busy_o = yield dut.busy_o
 801         if not busy_o:
 802             break
 803         print ("busy",)
 804         yield
 805
 806 def disable_issue(dut):
 807     yield dut.aluissue.insn_i.eq(0)
 808     yield dut.brissue.insn_i.eq(0)
 809
 810
 811 def wait_for_issue(dut, dut_issue):
 812     while True:
 813         issue_o = yield dut_issue.fn_issue_o
 814         if issue_o:
 815             yield from disable_issue(dut)
 816             yield dut.reg_enable_i.eq(0)
 817             break
 818         print ("busy",)
 819         #yield from print_reg(dut, [1,2,3])
 820         yield
 821     #yield from print_reg(dut, [1,2,3])
 822
 823 def scoreboard_branch_sim(dut, alusim):
 824
 825     iseed = 3
 826
 827     for i in range(1):
 828
 829         print ("rseed", iseed)
 830         seed(iseed)
 831         iseed += 1
 832
 833         yield dut.branch_direction_o.eq(0)
 834
 835         # set random values in the registers
 836         for i in range(1, dut.n_regs):
 837             val = 31+i*3
 838             val = randint(0, (1<<alusim.rwidth)-1)
 839             yield dut.intregs.regs[i].reg.eq(val)
 840             alusim.setval(i, val)
 841
 842         if False:
 843             # create some instructions: branches create a tree
 844             insts = create_random_ops(dut, 1, True, 1)
 845             #insts.append((6, 6, 1, 2, (0, 0)))
 846             #insts.append((4, 3, 3, 0, (0, 0)))
 847
 848             src1 = randint(1, dut.n_regs-1)
 849             src2 = randint(1, dut.n_regs-1)
 850             #op = randint(4, 7)
 851             op = 4 # only BGT at the moment
 852
 853             branch_ok = create_random_ops(dut, 1, True, 1)
 854             branch_fail = create_random_ops(dut, 1, True, 1)
 855
 856             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 857
 858         if True:
 859             insts = []
 860             insts.append( (3, 5, 2, 0, (0, 0)) )
 861             branch_ok = []
 862             branch_fail = []
 863             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 864             branch_ok.append( None )
 865             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 866             #branch_fail.append( None )
 867             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 868
 869         siminsts = deepcopy(insts)
 870
 871         # issue instruction(s)
 872         i = -1
 873         instrs = insts
 874         branch_direction = 0
 875         while instrs:
 876             yield
 877             yield
 878             i += 1
 879             branch_direction = yield dut.branch_direction_o # way branch went
 880             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 881             if branch_direction == 1 and shadow_on:
 882                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 883                 continue # branch was "success" and this is a "failed"... skip
 884             if branch_direction == 2 and shadow_off:
 885                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 886                 continue # branch was "fail" and this is a "success"... skip
 887             if branch_direction != 0:
 888                 shadow_on = 0
 889                 shadow_off = 0
 890             is_branch = op >= 4
 891             if is_branch:
 892                 branch_ok, branch_fail = dest
 893                 dest = src2
 894                 # ok zip up the branch success / fail instructions and
 895                 # drop them into the queue, one marked "to have branch success"
 896                 # the other to be marked shadow branch "fail".
 897                 # one out of each of these will be cancelled
 898                 for ok, fl in zip(branch_ok, branch_fail):
 899                     if ok:
 900                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 901                     if fl:
 902                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 903             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 904                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 905             yield from int_instr(dut, op, src1, src2, dest,
 906                                  shadow_on, shadow_off)
 907
 908         # wait for all instructions to stop before checking
 909         yield
 910         yield from wait_for_busy_clear(dut)
 911
 912         i = -1
 913         while siminsts:
 914             instr = siminsts.pop(0)
 915             if instr is None:
 916                 continue
 917             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 918             i += 1
 919             is_branch = op >= 4
 920             if is_branch:
 921                 branch_ok, branch_fail = dest
 922                 dest = src2
 923             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 924                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 925             branch_res = alusim.op(op, src1, src2, dest)
 926             if is_branch:
 927                 if branch_res:
 928                     siminsts += branch_ok
 929                 else:
 930                     siminsts += branch_fail
 931
 932         # check status
 933         yield from alusim.check(dut)
 934         yield from alusim.dump(dut)
 935
 936
 937 def scoreboard_sim(dut, alusim):
 938
 939     #seed(2)
 940
 941     for i in range(1):
 942
 943         # set random values in the registers
 944         for i in range(1, dut.n_regs):
 945             val = randint(0, (1<<alusim.rwidth)-1)
 946             #val = 31+i*3
 947             #val = i
 948             yield dut.intregs.regs[i].reg.eq(val)
 949             alusim.setval(i, val)
 950
 951         # create some instructions (some random, some regression tests)
 952         instrs = []
 953         if True:
 954             instrs = create_random_ops(dut, 15, True, 3)
 955
 956         if False:
 957             instrs.append( (7, 3, 2, 4, (0, 0)) )
 958             instrs.append( (7, 6, 6, 2, (0, 0)) )
 959             instrs.append( (1, 7, 2, 2, (0, 0)) )
 960
 961
 962         if False:
 963             instrs.append((2, 3, 3, 0, (0, 0)))
 964             instrs.append((5, 3, 3, 1, (0, 0)))
 965             instrs.append((3, 5, 5, 2, (0, 0)))
 966             instrs.append((5, 3, 3, 3, (0, 0)))
 967             instrs.append((3, 5, 5, 0, (0, 0)))
 968
 969         if False:
 970             instrs.append((5, 6, 2, 1))
 971             instrs.append((2, 2, 4, 0))
 972             #instrs.append((2, 2, 3, 1))
 973
 974         if False:
 975             instrs.append((2, 1, 2, 3))
 976
 977         if False:
 978             instrs.append((2, 6, 2, 1))
 979             instrs.append((2, 1, 2, 0))
 980
 981         if False:
 982             instrs.append((1, 2, 7, 2))
 983             instrs.append((7, 1, 5, 0))
 984             instrs.append((4, 4, 1, 1))
 985
 986         if False:
 987             instrs.append((5, 6, 2, 2))
 988             instrs.append((1, 1, 4, 1))
 989             instrs.append((6, 5, 3, 0))
 990
 991         if False:
 992             # Write-after-Write Hazard
 993             instrs.append( (3, 6, 7, 2) )
 994             instrs.append( (4, 4, 7, 1) )
 995
 996         if False:
 997             # self-read/write-after-write followed by Read-after-Write
 998             instrs.append((1, 1, 1, 1))
 999             instrs.append((1, 5, 3, 0))
1000
1001         if False:
1002             # Read-after-Write followed by self-read-after-write
1003             instrs.append((5, 6, 1, 2))
1004             instrs.append((1, 1, 1, 1))
1005
1006         if False:
1007             # self-read-write sandwich
1008             instrs.append((5, 6, 1, 2))
1009             instrs.append((1, 1, 1, 1))
1010             instrs.append((1, 5, 3, 0))
1011
1012         if False:
1013             # very weird failure
1014             instrs.append( (5, 2, 5, 2) )
1015             instrs.append( (2, 6, 3, 0) )
1016             instrs.append( (4, 2, 2, 1) )
1017
1018         if False:
1019             v1 = 4
1020             yield dut.intregs.regs[5].reg.eq(v1)
1021             alusim.setval(5, v1)
1022             yield dut.intregs.regs[3].reg.eq(5)
1023             alusim.setval(3, 5)
1024             instrs.append((5, 3, 3, 4, (0, 0)))
1025             instrs.append((4, 2, 1, 2, (0, 1)))
1026
1027         if False:
1028             v1 = 6
1029             yield dut.intregs.regs[5].reg.eq(v1)
1030             alusim.setval(5, v1)
1031             yield dut.intregs.regs[3].reg.eq(5)
1032             alusim.setval(3, 5)
1033             instrs.append((5, 3, 3, 4, (0, 0)))
1034             instrs.append((4, 2, 1, 2, (1, 0)))
1035
1036         if False:
1037             instrs.append( (4, 3, 5, 1, (0, 0)) )
1038             instrs.append( (5, 2, 3, 1, (0, 0)) )
1039             instrs.append( (7, 1, 5, 2, (0, 0)) )
1040             instrs.append( (5, 6, 6, 4, (0, 0)) )
1041             instrs.append( (7, 5, 2, 2, (1, 0)) )
1042             instrs.append( (1, 7, 5, 0, (0, 1)) )
1043             instrs.append( (1, 6, 1, 2, (1, 0)) )
1044             instrs.append( (1, 6, 7, 3, (0, 0)) )
1045             instrs.append( (6, 7, 7, 0, (0, 0)) )
1046
1047         # issue instruction(s), wait for issue to be free before proceeding
1048         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1049
1050             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1051             alusim.op(op, src1, src2, dest)
1052             yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1053
1054         # wait for all instructions to stop before checking
1055         while True:
1056             iqlen = yield dut.qlen_o
1057             if iqlen == 0:
1058                 break
1059             yield
1060         yield
1061         yield
1062         yield
1063         yield
1064         yield from wait_for_busy_clear(dut)
1065
1066         # check status
1067         yield from alusim.check(dut)
1068         yield from alusim.dump(dut)
1069
1070
1071 def test_scoreboard():
1072     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1073     alusim = RegSim(16, 8)
1074     vl = rtlil.convert(dut, ports=dut.ports())
1075     with open("test_scoreboard6600.il", "w") as f:
1076         f.write(vl)
1077
1078     run_simulation(dut, scoreboard_sim(dut, alusim),
1079                         vcd_name='test_scoreboard6600.vcd')
1080
1081     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1082     #                    vcd_name='test_scoreboard6600.vcd')
1083
1084
1085 if __name__ == '__main__':
1086     test_scoreboard()