src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class CompUnitsBase(Elaboratable):
  26     """ Computation Unit Base class.
  27
  28         Amazingly, this class works recursively.  It's supposed to just
  29         look after some ALUs (that can handle the same operations),
  30         grouping them together, however it turns out that the same code
  31         can also group *groups* of Computation Units together as well.
  32
  33         Basically it was intended just to concatenate the ALU's issue,
  34         go_rd etc. signals together, which start out as bits and become
  35         sequences.  Turns out that the same trick works just as well
  36         on Computation Units!
  37
  38         So this class may be used recursively to present a top-level
  39         sequential concatenation of all the signals in and out of
  40         ALUs, whilst at the same time making it convenient to group
  41         ALUs together.
  42
  43         At the lower level, the intent is that groups of (identical)
  44         ALUs may be passed the same operation.  Even beyond that,
  45         the intent is that that group of (identical) ALUs actually
  46         share the *same pipeline* and as such become a "Concurrent
  47         Computation Unit" as defined by Mitch Alsup (see section
  48         11.4.9.3)
  49     """
  50     def __init__(self, rwid, units):
  51         """ Inputs:
  52
  53             * :rwid:   bit width of register file(s) - both FP and INT
  54             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  55         """
  56         self.units = units
  57         self.rwid = rwid
  58         self.rwid = rwid
  59         if units and isinstance(units[0], CompUnitsBase):
  60             self.n_units = 0
  61             for u in self.units:
  62                 self.n_units += u.n_units
  63         else:
  64             self.n_units = len(units)
  65
  66         n_units = self.n_units
  67
  68         # inputs
  69         self.issue_i = Signal(n_units, reset_less=True)
  70         self.go_rd_i = Signal(n_units, reset_less=True)
  71         self.go_wr_i = Signal(n_units, reset_less=True)
  72         self.shadown_i = Signal(n_units, reset_less=True)
  73         self.go_die_i = Signal(n_units, reset_less=True)
  74
  75         # outputs
  76         self.busy_o = Signal(n_units, reset_less=True)
  77         self.rd_rel_o = Signal(n_units, reset_less=True)
  78         self.req_rel_o = Signal(n_units, reset_less=True)
  79
  80         # in/out register data (note: not register#, actual data)
  81         self.data_o = Signal(rwid, reset_less=True)
  82         self.src1_i = Signal(rwid, reset_less=True)
  83         self.src2_i = Signal(rwid, reset_less=True)
  84         # input operand
  85
  86     def elaborate(self, platform):
  87         m = Module()
  88         comb = m.d.comb
  89
  90         for i, alu in enumerate(self.units):
  91             setattr(m.submodules, "comp%d" % i, alu)
  92
  93         go_rd_l = []
  94         go_wr_l = []
  95         issue_l = []
  96         busy_l = []
  97         req_rel_l = []
  98         rd_rel_l = []
  99         shadow_l = []
 100         godie_l = []
 101         for alu in self.units:
 102             req_rel_l.append(alu.req_rel_o)
 103             rd_rel_l.append(alu.rd_rel_o)
 104             shadow_l.append(alu.shadown_i)
 105             godie_l.append(alu.go_die_i)
 106             go_wr_l.append(alu.go_wr_i)
 107             go_rd_l.append(alu.go_rd_i)
 108             issue_l.append(alu.issue_i)
 109             busy_l.append(alu.busy_o)
 110         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 111         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 112         comb += self.busy_o.eq(Cat(*busy_l))
 113         comb += Cat(*godie_l).eq(self.go_die_i)
 114         comb += Cat(*shadow_l).eq(self.shadown_i)
 115         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 116         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 117         comb += Cat(*issue_l).eq(self.issue_i)
 118
 119         # connect data register input/output
 120
 121         # merge (OR) all integer FU / ALU outputs to a single value
 122         # bit of a hack: treereduce needs a list with an item named "data_o"
 123         if self.units:
 124             data_o = treereduce(self.units)
 125             comb += self.data_o.eq(data_o)
 126
 127         for i, alu in enumerate(self.units):
 128             comb += alu.src1_i.eq(self.src1_i)
 129             comb += alu.src2_i.eq(self.src2_i)
 130
 131         return m
 132
 133
 134 class CompUnitALUs(CompUnitsBase):
 135
 136     def __init__(self, rwid, opwid):
 137         """ Inputs:
 138
 139             * :rwid:   bit width of register file(s) - both FP and INT
 140             * :opwid:  operand bit width
 141         """
 142         self.opwid = opwid
 143
 144         # inputs
 145         self.oper_i = Signal(opwid, reset_less=True)
 146
 147         # Int ALUs
 148         add = ALU(rwid)
 149         sub = ALU(rwid)
 150         mul = ALU(rwid)
 151         shf = ALU(rwid)
 152
 153         units = []
 154         for alu in [add, sub, mul, shf]:
 155             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 156
 157         CompUnitsBase.__init__(self, rwid, units)
 158
 159     def elaborate(self, platform):
 160         m = CompUnitsBase.elaborate(self, platform)
 161         comb = m.d.comb
 162
 163         # hand the same operation to all units
 164         for alu in self.units:
 165             comb += alu.oper_i.eq(self.oper_i)
 166         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 167         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 168         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 169         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 170
 171         return m
 172
 173
 174 class CompUnitBR(CompUnitsBase):
 175
 176     def __init__(self, rwid, opwid):
 177         """ Inputs:
 178
 179             * :rwid:   bit width of register file(s) - both FP and INT
 180             * :opwid:  operand bit width
 181
 182             Note: bgt unit is returned so that a shadow unit can be created
 183             for it
 184         """
 185         self.opwid = opwid
 186
 187         # inputs
 188         self.oper_i = Signal(opwid, reset_less=True)
 189
 190         # Branch ALU and CU
 191         self.bgt = BranchALU(rwid)
 192         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 193         CompUnitsBase.__init__(self, rwid, [self.br1])
 194
 195     def elaborate(self, platform):
 196         m = CompUnitsBase.elaborate(self, platform)
 197         comb = m.d.comb
 198
 199         # hand the same operation to all units
 200         for alu in self.units:
 201             comb += alu.oper_i.eq(self.oper_i)
 202         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 203
 204         return m
 205
 206
 207 class FunctionUnits(Elaboratable):
 208
 209     def __init__(self, n_regs, n_int_alus):
 210         self.n_regs = n_regs
 211         self.n_int_alus = n_int_alus
 212
 213         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 214         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 215         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 216
 217         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 218         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 219
 220         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 221         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 222         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 223
 224         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 225         self.readable_o = Signal(n_int_alus, reset_less=True)
 226         self.writable_o = Signal(n_int_alus, reset_less=True)
 227
 228         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 229         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 230         self.go_die_i = Signal(n_int_alus, reset_less=True)
 231         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 232         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 233
 234         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 235
 236     def elaborate(self, platform):
 237         m = Module()
 238         comb = m.d.comb
 239         sync = m.d.sync
 240
 241         n_intfus = self.n_int_alus
 242
 243         # Integer FU-FU Dep Matrix
 244         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 245         m.submodules.intfudeps = intfudeps
 246         # Integer FU-Reg Dep Matrix
 247         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 248         m.submodules.intregdeps = intregdeps
 249
 250         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 251         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 252
 253         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 254         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 255
 256         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 257         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 258         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 259
 260         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 261         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 262         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 263         comb += intfudeps.go_die_i.eq(self.go_die_i)
 264         comb += self.readable_o.eq(intfudeps.readable_o)
 265         comb += self.writable_o.eq(intfudeps.writable_o)
 266
 267         # Connect function issue / arrays, and dest/src1/src2
 268         comb += intregdeps.dest_i.eq(self.dest_i)
 269         comb += intregdeps.src1_i.eq(self.src1_i)
 270         comb += intregdeps.src2_i.eq(self.src2_i)
 271
 272         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 273         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 274         comb += intregdeps.go_die_i.eq(self.go_die_i)
 275         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 276
 277         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 278         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 279         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 280
 281         return m
 282
 283
 284 class Scoreboard(Elaboratable):
 285     def __init__(self, rwid, n_regs):
 286         """ Inputs:
 287
 288             * :rwid:   bit width of register file(s) - both FP and INT
 289             * :n_regs: depth of register file(s) - number of FP and INT regs
 290         """
 291         self.rwid = rwid
 292         self.n_regs = n_regs
 293
 294         # Register Files
 295         self.intregs = RegFileArray(rwid, n_regs)
 296         self.fpregs = RegFileArray(rwid, n_regs)
 297
 298         # issue q needs to get at these
 299         self.aluissue = IssueUnitGroup(4)
 300         self.brissue = IssueUnitGroup(1)
 301         # and these
 302         self.alu_oper_i = Signal(4, reset_less=True)
 303         self.br_oper_i = Signal(4, reset_less=True)
 304
 305         # inputs
 306         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 307         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 308         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 309         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 310
 311         # outputs
 312         self.issue_o = Signal(reset_less=True) # instruction was accepted
 313         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 314
 315         # for branch speculation experiment.  branch_direction = 0 if
 316         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 317         # branch_succ and branch_fail are requests to have the current
 318         # instruction be dependent on the branch unit "shadow" capability.
 319         self.branch_succ_i = Signal(reset_less=True)
 320         self.branch_fail_i = Signal(reset_less=True)
 321         self.branch_direction_o = Signal(2, reset_less=True)
 322
 323     def elaborate(self, platform):
 324         m = Module()
 325         comb = m.d.comb
 326         sync = m.d.sync
 327
 328         m.submodules.intregs = self.intregs
 329         m.submodules.fpregs = self.fpregs
 330
 331         # register ports
 332         int_dest = self.intregs.write_port("dest")
 333         int_src1 = self.intregs.read_port("src1")
 334         int_src2 = self.intregs.read_port("src2")
 335
 336         fp_dest = self.fpregs.write_port("dest")
 337         fp_src1 = self.fpregs.read_port("src1")
 338         fp_src2 = self.fpregs.read_port("src2")
 339
 340         # Int ALUs and Comp Units
 341         n_int_alus = 5
 342         cua = CompUnitALUs(self.rwid, 2)
 343         cub = CompUnitBR(self.rwid, 2)
 344         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 345         bgt = cub.bgt # get at the branch computation unit
 346         br1 = cub.br1
 347
 348         # Int FUs
 349         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 350
 351         # Count of number of FUs
 352         n_intfus = n_int_alus
 353         n_fp_fus = 0 # for now
 354
 355         # Integer Priority Picker 1: Adder + Subtractor
 356         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 357         m.submodules.intpick1 = intpick1
 358
 359         # INT/FP Issue Unit
 360         regdecode = RegDecode(self.n_regs)
 361         m.submodules.regdecode = regdecode
 362         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 363         m.submodules.issueunit = issueunit
 364
 365         # Shadow Matrix.  currently n_intfus shadows, to be used for
 366         # write-after-write hazards.  NOTE: there is one extra for branches,
 367         # so the shadow width is increased by 1
 368         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 369         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 370
 371         # record previous instruction to cast shadow on current instruction
 372         fn_issue_prev = Signal(n_intfus)
 373         prev_shadow = Signal(n_intfus)
 374
 375         # Branch Speculation recorder.  tracks the success/fail state as
 376         # each instruction is issued, so that when the branch occurs the
 377         # allow/cancel can be issued as appropriate.
 378         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 379
 380         #---------
 381         # ok start wiring things together...
 382         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 383         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 384         #---------
 385
 386         #---------
 387         # Issue Unit is where it starts.  set up some in/outs for this module
 388         #---------
 389         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 390                      regdecode.src1_i.eq(self.int_src1_i),
 391                      regdecode.src2_i.eq(self.int_src2_i),
 392                      regdecode.enable_i.eq(self.reg_enable_i),
 393                      self.issue_o.eq(issueunit.issue_o)
 394                     ]
 395
 396         # take these to outside (issue needs them)
 397         comb += cua.oper_i.eq(self.alu_oper_i)
 398         comb += cub.oper_i.eq(self.br_oper_i)
 399
 400         # TODO: issueunit.f (FP)
 401
 402         # and int function issue / busy arrays, and dest/src1/src2
 403         comb += intfus.dest_i.eq(regdecode.dest_o)
 404         comb += intfus.src1_i.eq(regdecode.src1_o)
 405         comb += intfus.src2_i.eq(regdecode.src2_o)
 406
 407         fn_issue_o = issueunit.fn_issue_o
 408
 409         comb += intfus.fn_issue_i.eq(fn_issue_o)
 410         comb += issueunit.busy_i.eq(cu.busy_o)
 411         comb += self.busy_o.eq(cu.busy_o.bool())
 412
 413         #---------
 414         # merge shadow matrices outputs
 415         #---------
 416
 417         # these are explained in ShadowMatrix docstring, and are to be
 418         # connected to the FUReg and FUFU Matrices, to get them to reset
 419         anydie = Signal(n_intfus, reset_less=True)
 420         allshadown = Signal(n_intfus, reset_less=True)
 421         shreset = Signal(n_intfus, reset_less=True)
 422         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 423         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 424         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 425
 426         #---------
 427         # connect fu-fu matrix
 428         #---------
 429
 430         # Group Picker... done manually for now.
 431         go_rd_o = intpick1.go_rd_o
 432         go_wr_o = intpick1.go_wr_o
 433         go_rd_i = intfus.go_rd_i
 434         go_wr_i = intfus.go_wr_i
 435         go_die_i = intfus.go_die_i
 436         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 437         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 438         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 439         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 440
 441         # Connect Picker
 442         #---------
 443         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 444         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 445         int_rd_o = intfus.readable_o
 446         int_wr_o = intfus.writable_o
 447         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 448         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 449
 450         #---------
 451         # Shadow Matrix
 452         #---------
 453
 454         comb += shadows.issue_i.eq(fn_issue_o)
 455         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 456         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 457         #---------
 458         # NOTE; this setup is for the instruction order preservation...
 459
 460         # connect shadows / go_dies to Computation Units
 461         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 462         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 463
 464         # ok connect first n_int_fu shadows to busy lines, to create an
 465         # instruction-order linked-list-like arrangement, using a bit-matrix
 466         # (instead of e.g. a ring buffer).
 467         # XXX TODO
 468
 469         # when written, the shadow can be cancelled (and was good)
 470         for i in range(n_intfus):
 471             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 472
 473         # work out the current-activated busy unit (by recording the old one)
 474         with m.If(fn_issue_o): # only update prev bit if instruction issued
 475             sync += fn_issue_prev.eq(fn_issue_o)
 476
 477         # *previous* instruction shadows *current* instruction, and, obviously,
 478         # if the previous is completed (!busy) don't cast the shadow!
 479         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 480         for i in range(n_intfus):
 481             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 482
 483         #---------
 484         # ... and this is for branch speculation.  it uses the extra bit
 485         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 486         # only needs to set shadow_i, s_fail_i and s_good_i
 487
 488         # issue captures shadow_i (if enabled)
 489         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 490
 491         bactive = Signal(reset_less=True)
 492         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 493
 494         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 495         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 496             comb += bshadow.issue_i.eq(fn_issue_o)
 497             for i in range(n_intfus):
 498                 with m.If(fn_issue_o & (Const(1<<i))):
 499                     comb += bshadow.shadow_i[i][0].eq(1)
 500
 501         # finally, we need an indicator to the test infrastructure as to
 502         # whether the branch succeeded or failed, plus, link up to the
 503         # "recorder" of whether the instruction was under shadow or not
 504
 505         with m.If(br1.issue_i):
 506             sync += bspec.active_i.eq(1)
 507         with m.If(self.branch_succ_i):
 508             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 509         with m.If(self.branch_fail_i):
 510             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 511
 512         # branch is active (TODO: a better signal: this is over-using the
 513         # go_write signal - actually the branch should not be "writing")
 514         with m.If(br1.go_wr_i):
 515             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 516             sync += bspec.active_i.eq(0)
 517             comb += bspec.br_i.eq(1)
 518             # branch occurs if data == 1, failed if data == 0
 519             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 520             for i in range(n_intfus):
 521                 # *expected* direction of the branch matched against *actual*
 522                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 523                 # ... or it didn't
 524                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 525
 526         #---------
 527         # Connect Register File(s)
 528         #---------
 529         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 530         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 531         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 532
 533         # connect ALUs to regfule
 534         comb += int_dest.data_i.eq(cu.data_o)
 535         comb += cu.src1_i.eq(int_src1.data_o)
 536         comb += cu.src2_i.eq(int_src2.data_o)
 537
 538         # connect ALU Computation Units
 539         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 540         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 541         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 542
 543         return m
 544
 545     def __iter__(self):
 546         yield from self.intregs
 547         yield from self.fpregs
 548         yield self.int_dest_i
 549         yield self.int_src1_i
 550         yield self.int_src2_i
 551         yield self.issue_o
 552         yield self.branch_succ_i
 553         yield self.branch_fail_i
 554         yield self.branch_direction_o
 555
 556     def ports(self):
 557         return list(self)
 558
 559 class IssueToScoreboard(Elaboratable):
 560
 561     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 562         self.qlen = qlen
 563         self.n_in = n_in
 564         self.n_out = n_out
 565         self.rwid = rwid
 566         self.opw = opwid
 567         self.n_regs = n_regs
 568
 569         mqbits = (int(log(qlen) / log(2))+2, False)
 570         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 571         self.p_ready_o = Signal() # instructions were added
 572         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 573
 574         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 575         self.qlen_o = Signal(mqbits, reset_less=True)
 576
 577     def elaborate(self, platform):
 578         m = Module()
 579         comb = m.d.comb
 580         sync = m.d.sync
 581
 582         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 583         sc = Scoreboard(self.rwid, self.n_regs)
 584         m.submodules.iq = iq
 585         m.submodules.sc = sc
 586
 587         # get at the regfile for testing
 588         self.intregs = sc.intregs
 589
 590         # and the "busy" signal and instruction queue length
 591         comb += self.busy_o.eq(sc.busy_o)
 592         comb += self.qlen_o.eq(iq.qlen_o)
 593
 594         # link up instruction queue
 595         comb += iq.p_add_i.eq(self.p_add_i)
 596         comb += self.p_ready_o.eq(iq.p_ready_o)
 597         for i in range(self.n_in):
 598             comb += eq(iq.data_i[i], self.data_i[i])
 599
 600         # take instruction and process it.  note that it's possible to
 601         # "inspect" the queue contents *without* actually removing the
 602         # items.  items are only removed when the
 603
 604         # in "waiting" state
 605         wait_issue_br = Signal()
 606         wait_issue_alu = Signal()
 607
 608         with m.If(wait_issue_br | wait_issue_alu):
 609             # set instruction pop length to 1 if the unit accepted
 610             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 611                 with m.If(iq.qlen_o != 0):
 612                     comb += iq.n_sub_i.eq(1)
 613             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 614                 with m.If(iq.qlen_o != 0):
 615                     comb += iq.n_sub_i.eq(1)
 616
 617         # see if some instruction(s) are here.  note that this is
 618         # "inspecting" the in-place queue.  note also that on the
 619         # cycle following "waiting" for fn_issue_o to be set, the
 620         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 621         with m.If(iq.qlen_o != 0):
 622             # get the operands and operation
 623             dest = iq.data_o[0].dest_i
 624             src1 = iq.data_o[0].src1_i
 625             src2 = iq.data_o[0].src2_i
 626             op = iq.data_o[0].oper_i
 627
 628             # set the src/dest regs
 629             comb += sc.int_dest_i.eq(dest)
 630             comb += sc.int_src1_i.eq(src1)
 631             comb += sc.int_src2_i.eq(src2)
 632             comb += sc.reg_enable_i.eq(1) # enable the regfile
 633
 634             # choose a Function-Unit-Group
 635             with m.If((op & (0x3<<2)) != 0): # branch
 636                 comb += sc.brissue.insn_i.eq(1)
 637                 comb += sc.br_oper_i.eq(op & 0x3)
 638                 comb += wait_issue_br.eq(1)
 639             with m.Else():                   # alu
 640                 comb += sc.aluissue.insn_i.eq(1)
 641                 comb += sc.alu_oper_i.eq(op & 0x3)
 642                 comb += wait_issue_alu.eq(1)
 643
 644             # XXX TODO
 645             # these indicate that the instruction is to be made
 646             # shadow-dependent on
 647             # (either) branch success or branch fail
 648             #yield sc.branch_fail_i.eq(branch_fail)
 649             #yield sc.branch_succ_i.eq(branch_success)
 650
 651         return m
 652
 653     def __iter__(self):
 654         yield self.p_ready_o
 655         for o in self.data_i:
 656             yield from list(o)
 657         yield self.p_add_i
 658
 659     def ports(self):
 660         return list(self)
 661
 662 IADD = 0
 663 ISUB = 1
 664 IMUL = 2
 665 ISHF = 3
 666 IBGT = 4
 667 IBLT = 5
 668 IBEQ = 6
 669 IBNE = 7
 670
 671 class RegSim:
 672     def __init__(self, rwidth, nregs):
 673         self.rwidth = rwidth
 674         self.regs = [0] * nregs
 675
 676     def op(self, op, src1, src2, dest):
 677         maxbits = (1 << self.rwidth) - 1
 678         src1 = self.regs[src1] & maxbits
 679         src2 = self.regs[src2] & maxbits
 680         if op == IADD:
 681             val = src1 + src2
 682         elif op == ISUB:
 683             val = src1 - src2
 684         elif op == IMUL:
 685             val = src1 * src2
 686         elif op == ISHF:
 687             val = src1 >> (src2 & maxbits)
 688         elif op == IBGT:
 689             val = int(src1 > src2)
 690         elif op == IBLT:
 691             val = int(src1 < src2)
 692         elif op == IBEQ:
 693             val = int(src1 == src2)
 694         elif op == IBNE:
 695             val = int(src1 != src2)
 696         val &= maxbits
 697         self.setval(dest, val)
 698         return val
 699
 700     def setval(self, dest, val):
 701         print ("sim setval", dest, hex(val))
 702         self.regs[dest] = val
 703
 704     def dump(self, dut):
 705         for i, val in enumerate(self.regs):
 706             reg = yield dut.intregs.regs[i].reg
 707             okstr = "OK" if reg == val else "!ok"
 708             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 709
 710     def check(self, dut):
 711         for i, val in enumerate(self.regs):
 712             reg = yield dut.intregs.regs[i].reg
 713             if reg != val:
 714                 print("reg %d expected %x received %x\n" % (i, val, reg))
 715                 yield from self.dump(dut)
 716                 assert False
 717
 718 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
 719     instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
 720
 721     sendlen = 1
 722     for idx in range(sendlen):
 723         yield from eq(dut.data_i[idx], instrs[idx])
 724         di = yield dut.data_i[idx]
 725         print ("senddata %d %x" % (idx, di))
 726     yield dut.p_add_i.eq(sendlen)
 727     yield
 728     o_p_ready = yield dut.p_ready_o
 729     while not o_p_ready:
 730         yield
 731         o_p_ready = yield dut.p_ready_o
 732
 733     yield dut.p_add_i.eq(0)
 734
 735
 736 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 737     yield from disable_issue(dut)
 738     yield dut.int_dest_i.eq(dest)
 739     yield dut.int_src1_i.eq(src1)
 740     yield dut.int_src2_i.eq(src2)
 741     if (op & (0x3<<2)) != 0: # branch
 742         yield dut.brissue.insn_i.eq(1)
 743         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 744         dut_issue = dut.brissue
 745     else:
 746         yield dut.aluissue.insn_i.eq(1)
 747         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 748         dut_issue = dut.aluissue
 749     yield dut.reg_enable_i.eq(1)
 750
 751     # these indicate that the instruction is to be made shadow-dependent on
 752     # (either) branch success or branch fail
 753     yield dut.branch_fail_i.eq(branch_fail)
 754     yield dut.branch_succ_i.eq(branch_success)
 755
 756     yield
 757     yield from wait_for_issue(dut, dut_issue)
 758
 759
 760 def print_reg(dut, rnums):
 761     rs = []
 762     for rnum in rnums:
 763         reg = yield dut.intregs.regs[rnum].reg
 764         rs.append("%x" % reg)
 765     rnums = map(str, rnums)
 766     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 767
 768
 769 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 770     insts = []
 771     for i in range(n_ops):
 772         src1 = randint(1, dut.n_regs-1)
 773         src2 = randint(1, dut.n_regs-1)
 774         dest = randint(1, dut.n_regs-1)
 775         op = randint(0, max_opnums)
 776
 777         if shadowing:
 778             insts.append((src1, src2, dest, op, (0, 0)))
 779         else:
 780             insts.append((src1, src2, dest, op))
 781     return insts
 782
 783
 784 def wait_for_busy_clear(dut):
 785     while True:
 786         busy_o = yield dut.busy_o
 787         if not busy_o:
 788             break
 789         print ("busy",)
 790         yield
 791
 792 def disable_issue(dut):
 793     yield dut.aluissue.insn_i.eq(0)
 794     yield dut.brissue.insn_i.eq(0)
 795
 796
 797 def wait_for_issue(dut, dut_issue):
 798     while True:
 799         issue_o = yield dut_issue.fn_issue_o
 800         if issue_o:
 801             yield from disable_issue(dut)
 802             yield dut.reg_enable_i.eq(0)
 803             break
 804         print ("busy",)
 805         #yield from print_reg(dut, [1,2,3])
 806         yield
 807     #yield from print_reg(dut, [1,2,3])
 808
 809 def scoreboard_branch_sim(dut, alusim):
 810
 811     iseed = 3
 812
 813     for i in range(1):
 814
 815         print ("rseed", iseed)
 816         seed(iseed)
 817         iseed += 1
 818
 819         yield dut.branch_direction_o.eq(0)
 820
 821         # set random values in the registers
 822         for i in range(1, dut.n_regs):
 823             val = 31+i*3
 824             val = randint(0, (1<<alusim.rwidth)-1)
 825             yield dut.intregs.regs[i].reg.eq(val)
 826             alusim.setval(i, val)
 827
 828         if False:
 829             # create some instructions: branches create a tree
 830             insts = create_random_ops(dut, 1, True, 1)
 831             #insts.append((6, 6, 1, 2, (0, 0)))
 832             #insts.append((4, 3, 3, 0, (0, 0)))
 833
 834             src1 = randint(1, dut.n_regs-1)
 835             src2 = randint(1, dut.n_regs-1)
 836             #op = randint(4, 7)
 837             op = 4 # only BGT at the moment
 838
 839             branch_ok = create_random_ops(dut, 1, True, 1)
 840             branch_fail = create_random_ops(dut, 1, True, 1)
 841
 842             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 843
 844         if True:
 845             insts = []
 846             insts.append( (3, 5, 2, 0, (0, 0)) )
 847             branch_ok = []
 848             branch_fail = []
 849             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 850             branch_ok.append( None )
 851             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 852             #branch_fail.append( None )
 853             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 854
 855         siminsts = deepcopy(insts)
 856
 857         # issue instruction(s)
 858         i = -1
 859         instrs = insts
 860         branch_direction = 0
 861         while instrs:
 862             yield
 863             yield
 864             i += 1
 865             branch_direction = yield dut.branch_direction_o # way branch went
 866             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 867             if branch_direction == 1 and shadow_on:
 868                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 869                 continue # branch was "success" and this is a "failed"... skip
 870             if branch_direction == 2 and shadow_off:
 871                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 872                 continue # branch was "fail" and this is a "success"... skip
 873             if branch_direction != 0:
 874                 shadow_on = 0
 875                 shadow_off = 0
 876             is_branch = op >= 4
 877             if is_branch:
 878                 branch_ok, branch_fail = dest
 879                 dest = src2
 880                 # ok zip up the branch success / fail instructions and
 881                 # drop them into the queue, one marked "to have branch success"
 882                 # the other to be marked shadow branch "fail".
 883                 # one out of each of these will be cancelled
 884                 for ok, fl in zip(branch_ok, branch_fail):
 885                     if ok:
 886                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 887                     if fl:
 888                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 889             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 890                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 891             yield from int_instr(dut, op, src1, src2, dest,
 892                                  shadow_on, shadow_off)
 893
 894         # wait for all instructions to stop before checking
 895         yield
 896         yield from wait_for_busy_clear(dut)
 897
 898         i = -1
 899         while siminsts:
 900             instr = siminsts.pop(0)
 901             if instr is None:
 902                 continue
 903             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 904             i += 1
 905             is_branch = op >= 4
 906             if is_branch:
 907                 branch_ok, branch_fail = dest
 908                 dest = src2
 909             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 910                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 911             branch_res = alusim.op(op, src1, src2, dest)
 912             if is_branch:
 913                 if branch_res:
 914                     siminsts += branch_ok
 915                 else:
 916                     siminsts += branch_fail
 917
 918         # check status
 919         yield from alusim.check(dut)
 920         yield from alusim.dump(dut)
 921
 922
 923 def scoreboard_sim(dut, alusim):
 924
 925     #seed(2)
 926
 927     for i in range(1):
 928
 929         # set random values in the registers
 930         for i in range(1, dut.n_regs):
 931             val = randint(0, (1<<alusim.rwidth)-1)
 932             #val = 31+i*3
 933             #val = i
 934             yield dut.intregs.regs[i].reg.eq(val)
 935             alusim.setval(i, val)
 936
 937         # create some instructions (some random, some regression tests)
 938         instrs = []
 939         if True:
 940             instrs = create_random_ops(dut, 15, True, 3)
 941
 942         if False:
 943             instrs.append( (7, 3, 2, 4, (0, 0)) )
 944             instrs.append( (7, 6, 6, 2, (0, 0)) )
 945             instrs.append( (1, 7, 2, 2, (0, 0)) )
 946
 947
 948         if False:
 949             instrs.append((2, 3, 3, 0, (0, 0)))
 950             instrs.append((5, 3, 3, 1, (0, 0)))
 951             instrs.append((3, 5, 5, 2, (0, 0)))
 952             instrs.append((5, 3, 3, 3, (0, 0)))
 953             instrs.append((3, 5, 5, 0, (0, 0)))
 954
 955         if False:
 956             instrs.append((5, 6, 2, 1))
 957             instrs.append((2, 2, 4, 0))
 958             #instrs.append((2, 2, 3, 1))
 959
 960         if False:
 961             instrs.append((2, 1, 2, 3))
 962
 963         if False:
 964             instrs.append((2, 6, 2, 1))
 965             instrs.append((2, 1, 2, 0))
 966
 967         if False:
 968             instrs.append((1, 2, 7, 2))
 969             instrs.append((7, 1, 5, 0))
 970             instrs.append((4, 4, 1, 1))
 971
 972         if False:
 973             instrs.append((5, 6, 2, 2))
 974             instrs.append((1, 1, 4, 1))
 975             instrs.append((6, 5, 3, 0))
 976
 977         if False:
 978             # Write-after-Write Hazard
 979             instrs.append( (3, 6, 7, 2) )
 980             instrs.append( (4, 4, 7, 1) )
 981
 982         if False:
 983             # self-read/write-after-write followed by Read-after-Write
 984             instrs.append((1, 1, 1, 1))
 985             instrs.append((1, 5, 3, 0))
 986
 987         if False:
 988             # Read-after-Write followed by self-read-after-write
 989             instrs.append((5, 6, 1, 2))
 990             instrs.append((1, 1, 1, 1))
 991
 992         if False:
 993             # self-read-write sandwich
 994             instrs.append((5, 6, 1, 2))
 995             instrs.append((1, 1, 1, 1))
 996             instrs.append((1, 5, 3, 0))
 997
 998         if False:
 999             # very weird failure
1000             instrs.append( (5, 2, 5, 2) )
1001             instrs.append( (2, 6, 3, 0) )
1002             instrs.append( (4, 2, 2, 1) )
1003
1004         if False:
1005             v1 = 4
1006             yield dut.intregs.regs[5].reg.eq(v1)
1007             alusim.setval(5, v1)
1008             yield dut.intregs.regs[3].reg.eq(5)
1009             alusim.setval(3, 5)
1010             instrs.append((5, 3, 3, 4, (0, 0)))
1011             instrs.append((4, 2, 1, 2, (0, 1)))
1012
1013         if False:
1014             v1 = 6
1015             yield dut.intregs.regs[5].reg.eq(v1)
1016             alusim.setval(5, v1)
1017             yield dut.intregs.regs[3].reg.eq(5)
1018             alusim.setval(3, 5)
1019             instrs.append((5, 3, 3, 4, (0, 0)))
1020             instrs.append((4, 2, 1, 2, (1, 0)))
1021
1022         if False:
1023             instrs.append( (4, 3, 5, 1, (0, 0)) )
1024             instrs.append( (5, 2, 3, 1, (0, 0)) )
1025             instrs.append( (7, 1, 5, 2, (0, 0)) )
1026             instrs.append( (5, 6, 6, 4, (0, 0)) )
1027             instrs.append( (7, 5, 2, 2, (1, 0)) )
1028             instrs.append( (1, 7, 5, 0, (0, 1)) )
1029             instrs.append( (1, 6, 1, 2, (1, 0)) )
1030             instrs.append( (1, 6, 7, 3, (0, 0)) )
1031             instrs.append( (6, 7, 7, 0, (0, 0)) )
1032
1033         # issue instruction(s), wait for issue to be free before proceeding
1034         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1035
1036             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1037             alusim.op(op, src1, src2, dest)
1038             yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1039
1040         # wait for all instructions to stop before checking
1041         while True:
1042             iqlen = yield dut.qlen_o
1043             if iqlen == 0:
1044                 break
1045             yield
1046         yield
1047         yield
1048         yield
1049         yield
1050         yield from wait_for_busy_clear(dut)
1051
1052         # check status
1053         yield from alusim.check(dut)
1054         yield from alusim.dump(dut)
1055
1056
1057 def test_scoreboard():
1058     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1059     alusim = RegSim(16, 8)
1060     vl = rtlil.convert(dut, ports=dut.ports())
1061     with open("test_scoreboard6600.il", "w") as f:
1062         f.write(vl)
1063
1064     run_simulation(dut, scoreboard_sim(dut, alusim),
1065                         vcd_name='test_scoreboard6600.vcd')
1066
1067     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1068     #                    vcd_name='test_scoreboard6600.vcd')
1069
1070
1071 if __name__ == '__main__':
1072     test_scoreboard()