src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         depth = (1<<addrw) / (regwid/8)
  28         self.adr   = Signal(addrw)
  29         self.dat_r = Signal(regwid)
  30         self.dat_w = Signal(regwid)
  31         self.we    = Signal()
  32         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  33
  34     def elaborate(self, platform):
  35         m = Module()
  36         m.submodules.rdport = rdport = self.mem.read_port()
  37         m.submodules.wrport = wrport = self.mem.write_port()
  38         m.d.comb += [
  39             rdport.addr.eq(self.adr[2:]),
  40             self.dat_r.eq(rdport.data),
  41             wrport.addr.eq(self.adr),
  42             wrport.data.eq(self.dat_w),
  43             wrport.en.eq(self.we),
  44         ]
  45         return m
  46
  47
  48 class CompUnitsBase(Elaboratable):
  49     """ Computation Unit Base class.
  50
  51         Amazingly, this class works recursively.  It's supposed to just
  52         look after some ALUs (that can handle the same operations),
  53         grouping them together, however it turns out that the same code
  54         can also group *groups* of Computation Units together as well.
  55
  56         Basically it was intended just to concatenate the ALU's issue,
  57         go_rd etc. signals together, which start out as bits and become
  58         sequences.  Turns out that the same trick works just as well
  59         on Computation Units!
  60
  61         So this class may be used recursively to present a top-level
  62         sequential concatenation of all the signals in and out of
  63         ALUs, whilst at the same time making it convenient to group
  64         ALUs together.
  65
  66         At the lower level, the intent is that groups of (identical)
  67         ALUs may be passed the same operation.  Even beyond that,
  68         the intent is that that group of (identical) ALUs actually
  69         share the *same pipeline* and as such become a "Concurrent
  70         Computation Unit" as defined by Mitch Alsup (see section
  71         11.4.9.3)
  72     """
  73     def __init__(self, rwid, units):
  74         """ Inputs:
  75
  76             * :rwid:   bit width of register file(s) - both FP and INT
  77             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  78         """
  79         self.units = units
  80         self.rwid = rwid
  81         self.rwid = rwid
  82         if units and isinstance(units[0], CompUnitsBase):
  83             self.n_units = 0
  84             for u in self.units:
  85                 self.n_units += u.n_units
  86         else:
  87             self.n_units = len(units)
  88
  89         n_units = self.n_units
  90
  91         # inputs
  92         self.issue_i = Signal(n_units, reset_less=True)
  93         self.go_rd_i = Signal(n_units, reset_less=True)
  94         self.go_wr_i = Signal(n_units, reset_less=True)
  95         self.shadown_i = Signal(n_units, reset_less=True)
  96         self.go_die_i = Signal(n_units, reset_less=True)
  97
  98         # outputs
  99         self.busy_o = Signal(n_units, reset_less=True)
 100         self.rd_rel_o = Signal(n_units, reset_less=True)
 101         self.req_rel_o = Signal(n_units, reset_less=True)
 102
 103         # in/out register data (note: not register#, actual data)
 104         self.data_o = Signal(rwid, reset_less=True)
 105         self.src1_i = Signal(rwid, reset_less=True)
 106         self.src2_i = Signal(rwid, reset_less=True)
 107         # input operand
 108
 109     def elaborate(self, platform):
 110         m = Module()
 111         comb = m.d.comb
 112
 113         for i, alu in enumerate(self.units):
 114             setattr(m.submodules, "comp%d" % i, alu)
 115
 116         go_rd_l = []
 117         go_wr_l = []
 118         issue_l = []
 119         busy_l = []
 120         req_rel_l = []
 121         rd_rel_l = []
 122         shadow_l = []
 123         godie_l = []
 124         for alu in self.units:
 125             req_rel_l.append(alu.req_rel_o)
 126             rd_rel_l.append(alu.rd_rel_o)
 127             shadow_l.append(alu.shadown_i)
 128             godie_l.append(alu.go_die_i)
 129             go_wr_l.append(alu.go_wr_i)
 130             go_rd_l.append(alu.go_rd_i)
 131             issue_l.append(alu.issue_i)
 132             busy_l.append(alu.busy_o)
 133         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 134         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 135         comb += self.busy_o.eq(Cat(*busy_l))
 136         comb += Cat(*godie_l).eq(self.go_die_i)
 137         comb += Cat(*shadow_l).eq(self.shadown_i)
 138         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 139         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 140         comb += Cat(*issue_l).eq(self.issue_i)
 141
 142         # connect data register input/output
 143
 144         # merge (OR) all integer FU / ALU outputs to a single value
 145         # bit of a hack: treereduce needs a list with an item named "data_o"
 146         if self.units:
 147             data_o = treereduce(self.units)
 148             comb += self.data_o.eq(data_o)
 149
 150         for i, alu in enumerate(self.units):
 151             comb += alu.src1_i.eq(self.src1_i)
 152             comb += alu.src2_i.eq(self.src2_i)
 153
 154         return m
 155
 156
 157 class CompUnitALUs(CompUnitsBase):
 158
 159     def __init__(self, rwid, opwid):
 160         """ Inputs:
 161
 162             * :rwid:   bit width of register file(s) - both FP and INT
 163             * :opwid:  operand bit width
 164         """
 165         self.opwid = opwid
 166
 167         # inputs
 168         self.oper_i = Signal(opwid, reset_less=True)
 169
 170         # Int ALUs
 171         add = ALU(rwid)
 172         sub = ALU(rwid)
 173         mul = ALU(rwid)
 174         shf = ALU(rwid)
 175
 176         units = []
 177         for alu in [add, sub, mul, shf]:
 178             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 179
 180         CompUnitsBase.__init__(self, rwid, units)
 181
 182     def elaborate(self, platform):
 183         m = CompUnitsBase.elaborate(self, platform)
 184         comb = m.d.comb
 185
 186         # hand the same operation to all units
 187         for alu in self.units:
 188             comb += alu.oper_i.eq(self.oper_i)
 189         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 190         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 191         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 192         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 193
 194         return m
 195
 196
 197 class CompUnitBR(CompUnitsBase):
 198
 199     def __init__(self, rwid, opwid):
 200         """ Inputs:
 201
 202             * :rwid:   bit width of register file(s) - both FP and INT
 203             * :opwid:  operand bit width
 204
 205             Note: bgt unit is returned so that a shadow unit can be created
 206             for it
 207         """
 208         self.opwid = opwid
 209
 210         # inputs
 211         self.oper_i = Signal(opwid, reset_less=True)
 212
 213         # Branch ALU and CU
 214         self.bgt = BranchALU(rwid)
 215         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 216         CompUnitsBase.__init__(self, rwid, [self.br1])
 217
 218     def elaborate(self, platform):
 219         m = CompUnitsBase.elaborate(self, platform)
 220         comb = m.d.comb
 221
 222         # hand the same operation to all units
 223         for alu in self.units:
 224             comb += alu.oper_i.eq(self.oper_i)
 225         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 226
 227         return m
 228
 229
 230 class FunctionUnits(Elaboratable):
 231
 232     def __init__(self, n_regs, n_int_alus):
 233         self.n_regs = n_regs
 234         self.n_int_alus = n_int_alus
 235
 236         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 237         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 238         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 239
 240         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 241         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 242
 243         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 244         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 245         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 246
 247         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 248         self.readable_o = Signal(n_int_alus, reset_less=True)
 249         self.writable_o = Signal(n_int_alus, reset_less=True)
 250
 251         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 252         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 253         self.go_die_i = Signal(n_int_alus, reset_less=True)
 254         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 255         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 256
 257         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 258
 259     def elaborate(self, platform):
 260         m = Module()
 261         comb = m.d.comb
 262         sync = m.d.sync
 263
 264         n_intfus = self.n_int_alus
 265
 266         # Integer FU-FU Dep Matrix
 267         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 268         m.submodules.intfudeps = intfudeps
 269         # Integer FU-Reg Dep Matrix
 270         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 271         m.submodules.intregdeps = intregdeps
 272
 273         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 274         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 275
 276         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 277         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 278
 279         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 280         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 281         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 282
 283         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 284         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 285         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 286         comb += intfudeps.go_die_i.eq(self.go_die_i)
 287         comb += self.readable_o.eq(intfudeps.readable_o)
 288         comb += self.writable_o.eq(intfudeps.writable_o)
 289
 290         # Connect function issue / arrays, and dest/src1/src2
 291         comb += intregdeps.dest_i.eq(self.dest_i)
 292         comb += intregdeps.src1_i.eq(self.src1_i)
 293         comb += intregdeps.src2_i.eq(self.src2_i)
 294
 295         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 296         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 297         comb += intregdeps.go_die_i.eq(self.go_die_i)
 298         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 299
 300         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 301         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 302         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 303
 304         return m
 305
 306
 307 class Scoreboard(Elaboratable):
 308     def __init__(self, rwid, n_regs):
 309         """ Inputs:
 310
 311             * :rwid:   bit width of register file(s) - both FP and INT
 312             * :n_regs: depth of register file(s) - number of FP and INT regs
 313         """
 314         self.rwid = rwid
 315         self.n_regs = n_regs
 316
 317         # Register Files
 318         self.intregs = RegFileArray(rwid, n_regs)
 319         self.fpregs = RegFileArray(rwid, n_regs)
 320
 321         # issue q needs to get at these
 322         self.aluissue = IssueUnitGroup(4)
 323         self.brissue = IssueUnitGroup(1)
 324         # and these
 325         self.alu_oper_i = Signal(4, reset_less=True)
 326         self.br_oper_i = Signal(4, reset_less=True)
 327
 328         # inputs
 329         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 330         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 331         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 332         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 333
 334         # outputs
 335         self.issue_o = Signal(reset_less=True) # instruction was accepted
 336         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 337
 338         # for branch speculation experiment.  branch_direction = 0 if
 339         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 340         # branch_succ and branch_fail are requests to have the current
 341         # instruction be dependent on the branch unit "shadow" capability.
 342         self.branch_succ_i = Signal(reset_less=True)
 343         self.branch_fail_i = Signal(reset_less=True)
 344         self.branch_direction_o = Signal(2, reset_less=True)
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         m.submodules.intregs = self.intregs
 352         m.submodules.fpregs = self.fpregs
 353
 354         # register ports
 355         int_dest = self.intregs.write_port("dest")
 356         int_src1 = self.intregs.read_port("src1")
 357         int_src2 = self.intregs.read_port("src2")
 358
 359         fp_dest = self.fpregs.write_port("dest")
 360         fp_src1 = self.fpregs.read_port("src1")
 361         fp_src2 = self.fpregs.read_port("src2")
 362
 363         # Int ALUs and Comp Units
 364         n_int_alus = 5
 365         cua = CompUnitALUs(self.rwid, 2)
 366         cub = CompUnitBR(self.rwid, 2)
 367         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 368         bgt = cub.bgt # get at the branch computation unit
 369         br1 = cub.br1
 370
 371         # Int FUs
 372         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 373
 374         # Count of number of FUs
 375         n_intfus = n_int_alus
 376         n_fp_fus = 0 # for now
 377
 378         # Integer Priority Picker 1: Adder + Subtractor
 379         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 380         m.submodules.intpick1 = intpick1
 381
 382         # INT/FP Issue Unit
 383         regdecode = RegDecode(self.n_regs)
 384         m.submodules.regdecode = regdecode
 385         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 386         m.submodules.issueunit = issueunit
 387
 388         # Shadow Matrix.  currently n_intfus shadows, to be used for
 389         # write-after-write hazards.  NOTE: there is one extra for branches,
 390         # so the shadow width is increased by 1
 391         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 392         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 393
 394         # record previous instruction to cast shadow on current instruction
 395         fn_issue_prev = Signal(n_intfus)
 396         prev_shadow = Signal(n_intfus)
 397
 398         # Branch Speculation recorder.  tracks the success/fail state as
 399         # each instruction is issued, so that when the branch occurs the
 400         # allow/cancel can be issued as appropriate.
 401         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 402
 403         #---------
 404         # ok start wiring things together...
 405         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 406         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 407         #---------
 408
 409         #---------
 410         # Issue Unit is where it starts.  set up some in/outs for this module
 411         #---------
 412         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 413                      regdecode.src1_i.eq(self.int_src1_i),
 414                      regdecode.src2_i.eq(self.int_src2_i),
 415                      regdecode.enable_i.eq(self.reg_enable_i),
 416                      self.issue_o.eq(issueunit.issue_o)
 417                     ]
 418
 419         # take these to outside (issue needs them)
 420         comb += cua.oper_i.eq(self.alu_oper_i)
 421         comb += cub.oper_i.eq(self.br_oper_i)
 422
 423         # TODO: issueunit.f (FP)
 424
 425         # and int function issue / busy arrays, and dest/src1/src2
 426         comb += intfus.dest_i.eq(regdecode.dest_o)
 427         comb += intfus.src1_i.eq(regdecode.src1_o)
 428         comb += intfus.src2_i.eq(regdecode.src2_o)
 429
 430         fn_issue_o = issueunit.fn_issue_o
 431
 432         comb += intfus.fn_issue_i.eq(fn_issue_o)
 433         comb += issueunit.busy_i.eq(cu.busy_o)
 434         comb += self.busy_o.eq(cu.busy_o.bool())
 435
 436         #---------
 437         # merge shadow matrices outputs
 438         #---------
 439
 440         # these are explained in ShadowMatrix docstring, and are to be
 441         # connected to the FUReg and FUFU Matrices, to get them to reset
 442         anydie = Signal(n_intfus, reset_less=True)
 443         allshadown = Signal(n_intfus, reset_less=True)
 444         shreset = Signal(n_intfus, reset_less=True)
 445         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 446         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 447         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 448
 449         #---------
 450         # connect fu-fu matrix
 451         #---------
 452
 453         # Group Picker... done manually for now.
 454         go_rd_o = intpick1.go_rd_o
 455         go_wr_o = intpick1.go_wr_o
 456         go_rd_i = intfus.go_rd_i
 457         go_wr_i = intfus.go_wr_i
 458         go_die_i = intfus.go_die_i
 459         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 460         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 461         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 462         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 463
 464         # Connect Picker
 465         #---------
 466         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 467         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 468         int_rd_o = intfus.readable_o
 469         int_wr_o = intfus.writable_o
 470         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 471         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 472
 473         #---------
 474         # Shadow Matrix
 475         #---------
 476
 477         comb += shadows.issue_i.eq(fn_issue_o)
 478         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 479         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 480         #---------
 481         # NOTE; this setup is for the instruction order preservation...
 482
 483         # connect shadows / go_dies to Computation Units
 484         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 485         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 486
 487         # ok connect first n_int_fu shadows to busy lines, to create an
 488         # instruction-order linked-list-like arrangement, using a bit-matrix
 489         # (instead of e.g. a ring buffer).
 490         # XXX TODO
 491
 492         # when written, the shadow can be cancelled (and was good)
 493         for i in range(n_intfus):
 494             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 495
 496         # work out the current-activated busy unit (by recording the old one)
 497         with m.If(fn_issue_o): # only update prev bit if instruction issued
 498             sync += fn_issue_prev.eq(fn_issue_o)
 499
 500         # *previous* instruction shadows *current* instruction, and, obviously,
 501         # if the previous is completed (!busy) don't cast the shadow!
 502         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 503         for i in range(n_intfus):
 504             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 505
 506         #---------
 507         # ... and this is for branch speculation.  it uses the extra bit
 508         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 509         # only needs to set shadow_i, s_fail_i and s_good_i
 510
 511         # issue captures shadow_i (if enabled)
 512         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 513
 514         bactive = Signal(reset_less=True)
 515         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 516
 517         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 518         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 519             comb += bshadow.issue_i.eq(fn_issue_o)
 520             for i in range(n_intfus):
 521                 with m.If(fn_issue_o & (Const(1<<i))):
 522                     comb += bshadow.shadow_i[i][0].eq(1)
 523
 524         # finally, we need an indicator to the test infrastructure as to
 525         # whether the branch succeeded or failed, plus, link up to the
 526         # "recorder" of whether the instruction was under shadow or not
 527
 528         with m.If(br1.issue_i):
 529             sync += bspec.active_i.eq(1)
 530         with m.If(self.branch_succ_i):
 531             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 532         with m.If(self.branch_fail_i):
 533             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 534
 535         # branch is active (TODO: a better signal: this is over-using the
 536         # go_write signal - actually the branch should not be "writing")
 537         with m.If(br1.go_wr_i):
 538             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 539             sync += bspec.active_i.eq(0)
 540             comb += bspec.br_i.eq(1)
 541             # branch occurs if data == 1, failed if data == 0
 542             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 543             for i in range(n_intfus):
 544                 # *expected* direction of the branch matched against *actual*
 545                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 546                 # ... or it didn't
 547                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 548
 549         #---------
 550         # Connect Register File(s)
 551         #---------
 552         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 553         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 554         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 555
 556         # connect ALUs to regfule
 557         comb += int_dest.data_i.eq(cu.data_o)
 558         comb += cu.src1_i.eq(int_src1.data_o)
 559         comb += cu.src2_i.eq(int_src2.data_o)
 560
 561         # connect ALU Computation Units
 562         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 563         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 564         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 565
 566         return m
 567
 568     def __iter__(self):
 569         yield from self.intregs
 570         yield from self.fpregs
 571         yield self.int_dest_i
 572         yield self.int_src1_i
 573         yield self.int_src2_i
 574         yield self.issue_o
 575         yield self.branch_succ_i
 576         yield self.branch_fail_i
 577         yield self.branch_direction_o
 578
 579     def ports(self):
 580         return list(self)
 581
 582 class IssueToScoreboard(Elaboratable):
 583
 584     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 585         self.qlen = qlen
 586         self.n_in = n_in
 587         self.n_out = n_out
 588         self.rwid = rwid
 589         self.opw = opwid
 590         self.n_regs = n_regs
 591
 592         mqbits = (int(log(qlen) / log(2))+2, False)
 593         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 594         self.p_ready_o = Signal() # instructions were added
 595         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 596
 597         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 598         self.qlen_o = Signal(mqbits, reset_less=True)
 599
 600     def elaborate(self, platform):
 601         m = Module()
 602         comb = m.d.comb
 603         sync = m.d.sync
 604
 605         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 606         sc = Scoreboard(self.rwid, self.n_regs)
 607         m.submodules.iq = iq
 608         m.submodules.sc = sc
 609
 610         # get at the regfile for testing
 611         self.intregs = sc.intregs
 612
 613         # and the "busy" signal and instruction queue length
 614         comb += self.busy_o.eq(sc.busy_o)
 615         comb += self.qlen_o.eq(iq.qlen_o)
 616
 617         # link up instruction queue
 618         comb += iq.p_add_i.eq(self.p_add_i)
 619         comb += self.p_ready_o.eq(iq.p_ready_o)
 620         for i in range(self.n_in):
 621             comb += eq(iq.data_i[i], self.data_i[i])
 622
 623         # take instruction and process it.  note that it's possible to
 624         # "inspect" the queue contents *without* actually removing the
 625         # items.  items are only removed when the
 626
 627         # in "waiting" state
 628         wait_issue_br = Signal()
 629         wait_issue_alu = Signal()
 630
 631         with m.If(wait_issue_br | wait_issue_alu):
 632             # set instruction pop length to 1 if the unit accepted
 633             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 634                 with m.If(iq.qlen_o != 0):
 635                     comb += iq.n_sub_i.eq(1)
 636             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 637                 with m.If(iq.qlen_o != 0):
 638                     comb += iq.n_sub_i.eq(1)
 639
 640         # see if some instruction(s) are here.  note that this is
 641         # "inspecting" the in-place queue.  note also that on the
 642         # cycle following "waiting" for fn_issue_o to be set, the
 643         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 644         with m.If(iq.qlen_o != 0):
 645             # get the operands and operation
 646             dest = iq.data_o[0].dest_i
 647             src1 = iq.data_o[0].src1_i
 648             src2 = iq.data_o[0].src2_i
 649             op = iq.data_o[0].oper_i
 650
 651             # set the src/dest regs
 652             comb += sc.int_dest_i.eq(dest)
 653             comb += sc.int_src1_i.eq(src1)
 654             comb += sc.int_src2_i.eq(src2)
 655             comb += sc.reg_enable_i.eq(1) # enable the regfile
 656
 657             # choose a Function-Unit-Group
 658             with m.If((op & (0x3<<2)) != 0): # branch
 659                 comb += sc.brissue.insn_i.eq(1)
 660                 comb += sc.br_oper_i.eq(op & 0x3)
 661                 comb += wait_issue_br.eq(1)
 662             with m.Else():                   # alu
 663                 comb += sc.aluissue.insn_i.eq(1)
 664                 comb += sc.alu_oper_i.eq(op & 0x3)
 665                 comb += wait_issue_alu.eq(1)
 666
 667             # XXX TODO
 668             # these indicate that the instruction is to be made
 669             # shadow-dependent on
 670             # (either) branch success or branch fail
 671             #yield sc.branch_fail_i.eq(branch_fail)
 672             #yield sc.branch_succ_i.eq(branch_success)
 673
 674         return m
 675
 676     def __iter__(self):
 677         yield self.p_ready_o
 678         for o in self.data_i:
 679             yield from list(o)
 680         yield self.p_add_i
 681
 682     def ports(self):
 683         return list(self)
 684
 685 IADD = 0
 686 ISUB = 1
 687 IMUL = 2
 688 ISHF = 3
 689 IBGT = 4
 690 IBLT = 5
 691 IBEQ = 6
 692 IBNE = 7
 693
 694 class RegSim:
 695     def __init__(self, rwidth, nregs):
 696         self.rwidth = rwidth
 697         self.regs = [0] * nregs
 698
 699     def op(self, op, src1, src2, dest):
 700         maxbits = (1 << self.rwidth) - 1
 701         src1 = self.regs[src1] & maxbits
 702         src2 = self.regs[src2] & maxbits
 703         if op == IADD:
 704             val = src1 + src2
 705         elif op == ISUB:
 706             val = src1 - src2
 707         elif op == IMUL:
 708             val = src1 * src2
 709         elif op == ISHF:
 710             val = src1 >> (src2 & maxbits)
 711         elif op == IBGT:
 712             val = int(src1 > src2)
 713         elif op == IBLT:
 714             val = int(src1 < src2)
 715         elif op == IBEQ:
 716             val = int(src1 == src2)
 717         elif op == IBNE:
 718             val = int(src1 != src2)
 719         val &= maxbits
 720         self.setval(dest, val)
 721         return val
 722
 723     def setval(self, dest, val):
 724         print ("sim setval", dest, hex(val))
 725         self.regs[dest] = val
 726
 727     def dump(self, dut):
 728         for i, val in enumerate(self.regs):
 729             reg = yield dut.intregs.regs[i].reg
 730             okstr = "OK" if reg == val else "!ok"
 731             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 732
 733     def check(self, dut):
 734         for i, val in enumerate(self.regs):
 735             reg = yield dut.intregs.regs[i].reg
 736             if reg != val:
 737                 print("reg %d expected %x received %x\n" % (i, val, reg))
 738                 yield from self.dump(dut)
 739                 assert False
 740
 741 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
 742     instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
 743
 744     sendlen = 1
 745     for idx in range(sendlen):
 746         yield from eq(dut.data_i[idx], instrs[idx])
 747         di = yield dut.data_i[idx]
 748         print ("senddata %d %x" % (idx, di))
 749     yield dut.p_add_i.eq(sendlen)
 750     yield
 751     o_p_ready = yield dut.p_ready_o
 752     while not o_p_ready:
 753         yield
 754         o_p_ready = yield dut.p_ready_o
 755
 756     yield dut.p_add_i.eq(0)
 757
 758
 759 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 760     yield from disable_issue(dut)
 761     yield dut.int_dest_i.eq(dest)
 762     yield dut.int_src1_i.eq(src1)
 763     yield dut.int_src2_i.eq(src2)
 764     if (op & (0x3<<2)) != 0: # branch
 765         yield dut.brissue.insn_i.eq(1)
 766         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 767         dut_issue = dut.brissue
 768     else:
 769         yield dut.aluissue.insn_i.eq(1)
 770         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 771         dut_issue = dut.aluissue
 772     yield dut.reg_enable_i.eq(1)
 773
 774     # these indicate that the instruction is to be made shadow-dependent on
 775     # (either) branch success or branch fail
 776     yield dut.branch_fail_i.eq(branch_fail)
 777     yield dut.branch_succ_i.eq(branch_success)
 778
 779     yield
 780     yield from wait_for_issue(dut, dut_issue)
 781
 782
 783 def print_reg(dut, rnums):
 784     rs = []
 785     for rnum in rnums:
 786         reg = yield dut.intregs.regs[rnum].reg
 787         rs.append("%x" % reg)
 788     rnums = map(str, rnums)
 789     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 790
 791
 792 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 793     insts = []
 794     for i in range(n_ops):
 795         src1 = randint(1, dut.n_regs-1)
 796         src2 = randint(1, dut.n_regs-1)
 797         dest = randint(1, dut.n_regs-1)
 798         op = randint(0, max_opnums)
 799
 800         if shadowing:
 801             insts.append((src1, src2, dest, op, (0, 0)))
 802         else:
 803             insts.append((src1, src2, dest, op))
 804     return insts
 805
 806
 807 def wait_for_busy_clear(dut):
 808     while True:
 809         busy_o = yield dut.busy_o
 810         if not busy_o:
 811             break
 812         print ("busy",)
 813         yield
 814
 815 def disable_issue(dut):
 816     yield dut.aluissue.insn_i.eq(0)
 817     yield dut.brissue.insn_i.eq(0)
 818
 819
 820 def wait_for_issue(dut, dut_issue):
 821     while True:
 822         issue_o = yield dut_issue.fn_issue_o
 823         if issue_o:
 824             yield from disable_issue(dut)
 825             yield dut.reg_enable_i.eq(0)
 826             break
 827         print ("busy",)
 828         #yield from print_reg(dut, [1,2,3])
 829         yield
 830     #yield from print_reg(dut, [1,2,3])
 831
 832 def scoreboard_branch_sim(dut, alusim):
 833
 834     iseed = 3
 835
 836     for i in range(1):
 837
 838         print ("rseed", iseed)
 839         seed(iseed)
 840         iseed += 1
 841
 842         yield dut.branch_direction_o.eq(0)
 843
 844         # set random values in the registers
 845         for i in range(1, dut.n_regs):
 846             val = 31+i*3
 847             val = randint(0, (1<<alusim.rwidth)-1)
 848             yield dut.intregs.regs[i].reg.eq(val)
 849             alusim.setval(i, val)
 850
 851         if False:
 852             # create some instructions: branches create a tree
 853             insts = create_random_ops(dut, 1, True, 1)
 854             #insts.append((6, 6, 1, 2, (0, 0)))
 855             #insts.append((4, 3, 3, 0, (0, 0)))
 856
 857             src1 = randint(1, dut.n_regs-1)
 858             src2 = randint(1, dut.n_regs-1)
 859             #op = randint(4, 7)
 860             op = 4 # only BGT at the moment
 861
 862             branch_ok = create_random_ops(dut, 1, True, 1)
 863             branch_fail = create_random_ops(dut, 1, True, 1)
 864
 865             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 866
 867         if True:
 868             insts = []
 869             insts.append( (3, 5, 2, 0, (0, 0)) )
 870             branch_ok = []
 871             branch_fail = []
 872             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 873             branch_ok.append( None )
 874             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 875             #branch_fail.append( None )
 876             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 877
 878         siminsts = deepcopy(insts)
 879
 880         # issue instruction(s)
 881         i = -1
 882         instrs = insts
 883         branch_direction = 0
 884         while instrs:
 885             yield
 886             yield
 887             i += 1
 888             branch_direction = yield dut.branch_direction_o # way branch went
 889             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 890             if branch_direction == 1 and shadow_on:
 891                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 892                 continue # branch was "success" and this is a "failed"... skip
 893             if branch_direction == 2 and shadow_off:
 894                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 895                 continue # branch was "fail" and this is a "success"... skip
 896             if branch_direction != 0:
 897                 shadow_on = 0
 898                 shadow_off = 0
 899             is_branch = op >= 4
 900             if is_branch:
 901                 branch_ok, branch_fail = dest
 902                 dest = src2
 903                 # ok zip up the branch success / fail instructions and
 904                 # drop them into the queue, one marked "to have branch success"
 905                 # the other to be marked shadow branch "fail".
 906                 # one out of each of these will be cancelled
 907                 for ok, fl in zip(branch_ok, branch_fail):
 908                     if ok:
 909                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 910                     if fl:
 911                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 912             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 913                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 914             yield from int_instr(dut, op, src1, src2, dest,
 915                                  shadow_on, shadow_off)
 916
 917         # wait for all instructions to stop before checking
 918         yield
 919         yield from wait_for_busy_clear(dut)
 920
 921         i = -1
 922         while siminsts:
 923             instr = siminsts.pop(0)
 924             if instr is None:
 925                 continue
 926             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 927             i += 1
 928             is_branch = op >= 4
 929             if is_branch:
 930                 branch_ok, branch_fail = dest
 931                 dest = src2
 932             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 933                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 934             branch_res = alusim.op(op, src1, src2, dest)
 935             if is_branch:
 936                 if branch_res:
 937                     siminsts += branch_ok
 938                 else:
 939                     siminsts += branch_fail
 940
 941         # check status
 942         yield from alusim.check(dut)
 943         yield from alusim.dump(dut)
 944
 945
 946 def scoreboard_sim(dut, alusim):
 947
 948     #seed(2)
 949
 950     for i in range(1):
 951
 952         # set random values in the registers
 953         for i in range(1, dut.n_regs):
 954             val = randint(0, (1<<alusim.rwidth)-1)
 955             #val = 31+i*3
 956             #val = i
 957             yield dut.intregs.regs[i].reg.eq(val)
 958             alusim.setval(i, val)
 959
 960         # create some instructions (some random, some regression tests)
 961         instrs = []
 962         if True:
 963             instrs = create_random_ops(dut, 15, True, 3)
 964
 965         if False:
 966             instrs.append( (7, 3, 2, 4, (0, 0)) )
 967             instrs.append( (7, 6, 6, 2, (0, 0)) )
 968             instrs.append( (1, 7, 2, 2, (0, 0)) )
 969
 970
 971         if False:
 972             instrs.append((2, 3, 3, 0, (0, 0)))
 973             instrs.append((5, 3, 3, 1, (0, 0)))
 974             instrs.append((3, 5, 5, 2, (0, 0)))
 975             instrs.append((5, 3, 3, 3, (0, 0)))
 976             instrs.append((3, 5, 5, 0, (0, 0)))
 977
 978         if False:
 979             instrs.append((5, 6, 2, 1))
 980             instrs.append((2, 2, 4, 0))
 981             #instrs.append((2, 2, 3, 1))
 982
 983         if False:
 984             instrs.append((2, 1, 2, 3))
 985
 986         if False:
 987             instrs.append((2, 6, 2, 1))
 988             instrs.append((2, 1, 2, 0))
 989
 990         if False:
 991             instrs.append((1, 2, 7, 2))
 992             instrs.append((7, 1, 5, 0))
 993             instrs.append((4, 4, 1, 1))
 994
 995         if False:
 996             instrs.append((5, 6, 2, 2))
 997             instrs.append((1, 1, 4, 1))
 998             instrs.append((6, 5, 3, 0))
 999
1000         if False:
1001             # Write-after-Write Hazard
1002             instrs.append( (3, 6, 7, 2) )
1003             instrs.append( (4, 4, 7, 1) )
1004
1005         if False:
1006             # self-read/write-after-write followed by Read-after-Write
1007             instrs.append((1, 1, 1, 1))
1008             instrs.append((1, 5, 3, 0))
1009
1010         if False:
1011             # Read-after-Write followed by self-read-after-write
1012             instrs.append((5, 6, 1, 2))
1013             instrs.append((1, 1, 1, 1))
1014
1015         if False:
1016             # self-read-write sandwich
1017             instrs.append((5, 6, 1, 2))
1018             instrs.append((1, 1, 1, 1))
1019             instrs.append((1, 5, 3, 0))
1020
1021         if False:
1022             # very weird failure
1023             instrs.append( (5, 2, 5, 2) )
1024             instrs.append( (2, 6, 3, 0) )
1025             instrs.append( (4, 2, 2, 1) )
1026
1027         if False:
1028             v1 = 4
1029             yield dut.intregs.regs[5].reg.eq(v1)
1030             alusim.setval(5, v1)
1031             yield dut.intregs.regs[3].reg.eq(5)
1032             alusim.setval(3, 5)
1033             instrs.append((5, 3, 3, 4, (0, 0)))
1034             instrs.append((4, 2, 1, 2, (0, 1)))
1035
1036         if False:
1037             v1 = 6
1038             yield dut.intregs.regs[5].reg.eq(v1)
1039             alusim.setval(5, v1)
1040             yield dut.intregs.regs[3].reg.eq(5)
1041             alusim.setval(3, 5)
1042             instrs.append((5, 3, 3, 4, (0, 0)))
1043             instrs.append((4, 2, 1, 2, (1, 0)))
1044
1045         if False:
1046             instrs.append( (4, 3, 5, 1, (0, 0)) )
1047             instrs.append( (5, 2, 3, 1, (0, 0)) )
1048             instrs.append( (7, 1, 5, 2, (0, 0)) )
1049             instrs.append( (5, 6, 6, 4, (0, 0)) )
1050             instrs.append( (7, 5, 2, 2, (1, 0)) )
1051             instrs.append( (1, 7, 5, 0, (0, 1)) )
1052             instrs.append( (1, 6, 1, 2, (1, 0)) )
1053             instrs.append( (1, 6, 7, 3, (0, 0)) )
1054             instrs.append( (6, 7, 7, 0, (0, 0)) )
1055
1056         # issue instruction(s), wait for issue to be free before proceeding
1057         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1058
1059             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1060             alusim.op(op, src1, src2, dest)
1061             yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1062
1063         # wait for all instructions to stop before checking
1064         while True:
1065             iqlen = yield dut.qlen_o
1066             if iqlen == 0:
1067                 break
1068             yield
1069         yield
1070         yield
1071         yield
1072         yield
1073         yield from wait_for_busy_clear(dut)
1074
1075         # check status
1076         yield from alusim.check(dut)
1077         yield from alusim.dump(dut)
1078
1079
1080 def test_scoreboard():
1081     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1082     alusim = RegSim(16, 8)
1083     vl = rtlil.convert(dut, ports=dut.ports())
1084     with open("test_scoreboard6600.il", "w") as f:
1085         f.write(vl)
1086
1087     run_simulation(dut, scoreboard_sim(dut, alusim),
1088                         vcd_name='test_scoreboard6600.vcd')
1089
1090     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1091     #                    vcd_name='test_scoreboard6600.vcd')
1092
1093
1094 if __name__ == '__main__':
1095     test_scoreboard()