src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184
 185         # Int ALUs
 186         add = ALU(rwid)
 187         sub = ALU(rwid)
 188         mul = ALU(rwid)
 189         shf = ALU(rwid)
 190
 191         units = []
 192         for alu in [add, sub, mul, shf]:
 193             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 194
 195         CompUnitsBase.__init__(self, rwid, units)
 196
 197     def elaborate(self, platform):
 198         m = CompUnitsBase.elaborate(self, platform)
 199         comb = m.d.comb
 200
 201         # hand the same operation to all units
 202         for alu in self.units:
 203             comb += alu.oper_i.eq(self.oper_i)
 204         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 205         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 206         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 207         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 208
 209         return m
 210
 211
 212 class CompUnitBR(CompUnitsBase):
 213
 214     def __init__(self, rwid, opwid):
 215         """ Inputs:
 216
 217             * :rwid:   bit width of register file(s) - both FP and INT
 218             * :opwid:  operand bit width
 219
 220             Note: bgt unit is returned so that a shadow unit can be created
 221             for it
 222         """
 223         self.opwid = opwid
 224
 225         # inputs
 226         self.oper_i = Signal(opwid, reset_less=True)
 227
 228         # Branch ALU and CU
 229         self.bgt = BranchALU(rwid)
 230         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 231         CompUnitsBase.__init__(self, rwid, [self.br1])
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units
 238         for alu in self.units:
 239             comb += alu.oper_i.eq(self.oper_i)
 240         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 241
 242         return m
 243
 244
 245 class FunctionUnits(Elaboratable):
 246
 247     def __init__(self, n_regs, n_int_alus):
 248         self.n_regs = n_regs
 249         self.n_int_alus = n_int_alus
 250
 251         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 252         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 253         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 254
 255         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 256         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 257
 258         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 259         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 260         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 261
 262         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 263         self.readable_o = Signal(n_int_alus, reset_less=True)
 264         self.writable_o = Signal(n_int_alus, reset_less=True)
 265
 266         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 267         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 268         self.go_die_i = Signal(n_int_alus, reset_less=True)
 269         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 270         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 271
 272         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 273
 274     def elaborate(self, platform):
 275         m = Module()
 276         comb = m.d.comb
 277         sync = m.d.sync
 278
 279         n_intfus = self.n_int_alus
 280
 281         # Integer FU-FU Dep Matrix
 282         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 283         m.submodules.intfudeps = intfudeps
 284         # Integer FU-Reg Dep Matrix
 285         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 286         m.submodules.intregdeps = intregdeps
 287
 288         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 289         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 290
 291         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 292         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 293
 294         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 295         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 296         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 297
 298         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 299         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 300         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 301         comb += intfudeps.go_die_i.eq(self.go_die_i)
 302         comb += self.readable_o.eq(intfudeps.readable_o)
 303         comb += self.writable_o.eq(intfudeps.writable_o)
 304
 305         # Connect function issue / arrays, and dest/src1/src2
 306         comb += intregdeps.dest_i.eq(self.dest_i)
 307         comb += intregdeps.src1_i.eq(self.src1_i)
 308         comb += intregdeps.src2_i.eq(self.src2_i)
 309
 310         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 311         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 312         comb += intregdeps.go_die_i.eq(self.go_die_i)
 313         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 314
 315         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 316         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 317         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 318
 319         return m
 320
 321
 322 class Scoreboard(Elaboratable):
 323     def __init__(self, rwid, n_regs):
 324         """ Inputs:
 325
 326             * :rwid:   bit width of register file(s) - both FP and INT
 327             * :n_regs: depth of register file(s) - number of FP and INT regs
 328         """
 329         self.rwid = rwid
 330         self.n_regs = n_regs
 331
 332         # Register Files
 333         self.intregs = RegFileArray(rwid, n_regs)
 334         self.fpregs = RegFileArray(rwid, n_regs)
 335
 336         # issue q needs to get at these
 337         self.aluissue = IssueUnitGroup(4)
 338         self.brissue = IssueUnitGroup(1)
 339         # and these
 340         self.alu_oper_i = Signal(4, reset_less=True)
 341         self.br_oper_i = Signal(4, reset_less=True)
 342
 343         # inputs
 344         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 345         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 346         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 347         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 348
 349         # outputs
 350         self.issue_o = Signal(reset_less=True) # instruction was accepted
 351         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 352
 353         # for branch speculation experiment.  branch_direction = 0 if
 354         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 355         # branch_succ and branch_fail are requests to have the current
 356         # instruction be dependent on the branch unit "shadow" capability.
 357         self.branch_succ_i = Signal(reset_less=True)
 358         self.branch_fail_i = Signal(reset_less=True)
 359         self.branch_direction_o = Signal(2, reset_less=True)
 360
 361     def elaborate(self, platform):
 362         m = Module()
 363         comb = m.d.comb
 364         sync = m.d.sync
 365
 366         m.submodules.intregs = self.intregs
 367         m.submodules.fpregs = self.fpregs
 368
 369         # register ports
 370         int_dest = self.intregs.write_port("dest")
 371         int_src1 = self.intregs.read_port("src1")
 372         int_src2 = self.intregs.read_port("src2")
 373
 374         fp_dest = self.fpregs.write_port("dest")
 375         fp_src1 = self.fpregs.read_port("src1")
 376         fp_src2 = self.fpregs.read_port("src2")
 377
 378         # Int ALUs and Comp Units
 379         n_int_alus = 5
 380         cua = CompUnitALUs(self.rwid, 2)
 381         cub = CompUnitBR(self.rwid, 2)
 382         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 383         bgt = cub.bgt # get at the branch computation unit
 384         br1 = cub.br1
 385
 386         # Int FUs
 387         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 388
 389         # Count of number of FUs
 390         n_intfus = n_int_alus
 391         n_fp_fus = 0 # for now
 392
 393         # Integer Priority Picker 1: Adder + Subtractor
 394         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 395         m.submodules.intpick1 = intpick1
 396
 397         # INT/FP Issue Unit
 398         regdecode = RegDecode(self.n_regs)
 399         m.submodules.regdecode = regdecode
 400         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 401         m.submodules.issueunit = issueunit
 402
 403         # Shadow Matrix.  currently n_intfus shadows, to be used for
 404         # write-after-write hazards.  NOTE: there is one extra for branches,
 405         # so the shadow width is increased by 1
 406         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 407         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 408
 409         # record previous instruction to cast shadow on current instruction
 410         prev_shadow = Signal(n_intfus)
 411
 412         # Branch Speculation recorder.  tracks the success/fail state as
 413         # each instruction is issued, so that when the branch occurs the
 414         # allow/cancel can be issued as appropriate.
 415         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 416
 417         #---------
 418         # ok start wiring things together...
 419         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 420         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 421         #---------
 422
 423         #---------
 424         # Issue Unit is where it starts.  set up some in/outs for this module
 425         #---------
 426         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 427                      regdecode.src1_i.eq(self.int_src1_i),
 428                      regdecode.src2_i.eq(self.int_src2_i),
 429                      regdecode.enable_i.eq(self.reg_enable_i),
 430                      self.issue_o.eq(issueunit.issue_o)
 431                     ]
 432
 433         # take these to outside (issue needs them)
 434         comb += cua.oper_i.eq(self.alu_oper_i)
 435         comb += cub.oper_i.eq(self.br_oper_i)
 436
 437         # TODO: issueunit.f (FP)
 438
 439         # and int function issue / busy arrays, and dest/src1/src2
 440         comb += intfus.dest_i.eq(regdecode.dest_o)
 441         comb += intfus.src1_i.eq(regdecode.src1_o)
 442         comb += intfus.src2_i.eq(regdecode.src2_o)
 443
 444         fn_issue_o = issueunit.fn_issue_o
 445
 446         comb += intfus.fn_issue_i.eq(fn_issue_o)
 447         comb += issueunit.busy_i.eq(cu.busy_o)
 448         comb += self.busy_o.eq(cu.busy_o.bool())
 449
 450         #---------
 451         # merge shadow matrices outputs
 452         #---------
 453
 454         # these are explained in ShadowMatrix docstring, and are to be
 455         # connected to the FUReg and FUFU Matrices, to get them to reset
 456         anydie = Signal(n_intfus, reset_less=True)
 457         allshadown = Signal(n_intfus, reset_less=True)
 458         shreset = Signal(n_intfus, reset_less=True)
 459         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 460         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 461         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 462
 463         #---------
 464         # connect fu-fu matrix
 465         #---------
 466
 467         # Group Picker... done manually for now.
 468         go_rd_o = intpick1.go_rd_o
 469         go_wr_o = intpick1.go_wr_o
 470         go_rd_i = intfus.go_rd_i
 471         go_wr_i = intfus.go_wr_i
 472         go_die_i = intfus.go_die_i
 473         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 474         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 475         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 476         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 477
 478         # Connect Picker
 479         #---------
 480         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 481         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 482         int_rd_o = intfus.readable_o
 483         int_wr_o = intfus.writable_o
 484         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 485         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 486
 487         #---------
 488         # Shadow Matrix
 489         #---------
 490
 491         comb += shadows.issue_i.eq(fn_issue_o)
 492         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 493         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 494         #---------
 495         # NOTE; this setup is for the instruction order preservation...
 496
 497         # connect shadows / go_dies to Computation Units
 498         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 499         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 500
 501         # ok connect first n_int_fu shadows to busy lines, to create an
 502         # instruction-order linked-list-like arrangement, using a bit-matrix
 503         # (instead of e.g. a ring buffer).
 504         # XXX TODO
 505
 506         # when written, the shadow can be cancelled (and was good)
 507         for i in range(n_intfus):
 508             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 509
 510         # *previous* instruction shadows *current* instruction, and, obviously,
 511         # if the previous is completed (!busy) don't cast the shadow!
 512         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 513         for i in range(n_intfus):
 514             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 515
 516         #---------
 517         # ... and this is for branch speculation.  it uses the extra bit
 518         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 519         # only needs to set shadow_i, s_fail_i and s_good_i
 520
 521         # issue captures shadow_i (if enabled)
 522         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 523
 524         bactive = Signal(reset_less=True)
 525         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 526
 527         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 528         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 529             comb += bshadow.issue_i.eq(fn_issue_o)
 530             for i in range(n_intfus):
 531                 with m.If(fn_issue_o & (Const(1<<i))):
 532                     comb += bshadow.shadow_i[i][0].eq(1)
 533
 534         # finally, we need an indicator to the test infrastructure as to
 535         # whether the branch succeeded or failed, plus, link up to the
 536         # "recorder" of whether the instruction was under shadow or not
 537
 538         with m.If(br1.issue_i):
 539             sync += bspec.active_i.eq(1)
 540         with m.If(self.branch_succ_i):
 541             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 542         with m.If(self.branch_fail_i):
 543             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 544
 545         # branch is active (TODO: a better signal: this is over-using the
 546         # go_write signal - actually the branch should not be "writing")
 547         with m.If(br1.go_wr_i):
 548             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 549             sync += bspec.active_i.eq(0)
 550             comb += bspec.br_i.eq(1)
 551             # branch occurs if data == 1, failed if data == 0
 552             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 553             for i in range(n_intfus):
 554                 # *expected* direction of the branch matched against *actual*
 555                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 556                 # ... or it didn't
 557                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 558
 559         #---------
 560         # Connect Register File(s)
 561         #---------
 562         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 563         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 564         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 565
 566         # connect ALUs to regfule
 567         comb += int_dest.data_i.eq(cu.data_o)
 568         comb += cu.src1_i.eq(int_src1.data_o)
 569         comb += cu.src2_i.eq(int_src2.data_o)
 570
 571         # connect ALU Computation Units
 572         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 573         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 574         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 575
 576         return m
 577
 578     def __iter__(self):
 579         yield from self.intregs
 580         yield from self.fpregs
 581         yield self.int_dest_i
 582         yield self.int_src1_i
 583         yield self.int_src2_i
 584         yield self.issue_o
 585         yield self.branch_succ_i
 586         yield self.branch_fail_i
 587         yield self.branch_direction_o
 588
 589     def ports(self):
 590         return list(self)
 591
 592
 593 class IssueToScoreboard(Elaboratable):
 594
 595     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 596         self.qlen = qlen
 597         self.n_in = n_in
 598         self.n_out = n_out
 599         self.rwid = rwid
 600         self.opw = opwid
 601         self.n_regs = n_regs
 602
 603         mqbits = (int(log(qlen) / log(2))+2, False)
 604         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 605         self.p_ready_o = Signal() # instructions were added
 606         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 607
 608         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 609         self.qlen_o = Signal(mqbits, reset_less=True)
 610
 611     def elaborate(self, platform):
 612         m = Module()
 613         comb = m.d.comb
 614         sync = m.d.sync
 615
 616         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 617         sc = Scoreboard(self.rwid, self.n_regs)
 618         m.submodules.iq = iq
 619         m.submodules.sc = sc
 620
 621         # get at the regfile for testing
 622         self.intregs = sc.intregs
 623
 624         # and the "busy" signal and instruction queue length
 625         comb += self.busy_o.eq(sc.busy_o)
 626         comb += self.qlen_o.eq(iq.qlen_o)
 627
 628         # link up instruction queue
 629         comb += iq.p_add_i.eq(self.p_add_i)
 630         comb += self.p_ready_o.eq(iq.p_ready_o)
 631         for i in range(self.n_in):
 632             comb += eq(iq.data_i[i], self.data_i[i])
 633
 634         # take instruction and process it.  note that it's possible to
 635         # "inspect" the queue contents *without* actually removing the
 636         # items.  items are only removed when the
 637
 638         # in "waiting" state
 639         wait_issue_br = Signal()
 640         wait_issue_alu = Signal()
 641
 642         with m.If(wait_issue_br | wait_issue_alu):
 643             # set instruction pop length to 1 if the unit accepted
 644             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 645                 with m.If(iq.qlen_o != 0):
 646                     comb += iq.n_sub_i.eq(1)
 647             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 648                 with m.If(iq.qlen_o != 0):
 649                     comb += iq.n_sub_i.eq(1)
 650
 651         # see if some instruction(s) are here.  note that this is
 652         # "inspecting" the in-place queue.  note also that on the
 653         # cycle following "waiting" for fn_issue_o to be set, the
 654         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 655         with m.If(iq.qlen_o != 0):
 656             # get the operands and operation
 657             dest = iq.data_o[0].dest_i
 658             src1 = iq.data_o[0].src1_i
 659             src2 = iq.data_o[0].src2_i
 660             op = iq.data_o[0].oper_i
 661
 662             # set the src/dest regs
 663             comb += sc.int_dest_i.eq(dest)
 664             comb += sc.int_src1_i.eq(src1)
 665             comb += sc.int_src2_i.eq(src2)
 666             comb += sc.reg_enable_i.eq(1) # enable the regfile
 667
 668             # choose a Function-Unit-Group
 669             with m.If((op & (0x3<<2)) != 0): # branch
 670                 comb += sc.brissue.insn_i.eq(1)
 671                 comb += sc.br_oper_i.eq(op & 0x3)
 672                 comb += wait_issue_br.eq(1)
 673             with m.Else():                   # alu
 674                 comb += sc.aluissue.insn_i.eq(1)
 675                 comb += sc.alu_oper_i.eq(op & 0x3)
 676                 comb += wait_issue_alu.eq(1)
 677
 678             # XXX TODO
 679             # these indicate that the instruction is to be made
 680             # shadow-dependent on
 681             # (either) branch success or branch fail
 682             #yield sc.branch_fail_i.eq(branch_fail)
 683             #yield sc.branch_succ_i.eq(branch_success)
 684
 685         return m
 686
 687     def __iter__(self):
 688         yield self.p_ready_o
 689         for o in self.data_i:
 690             yield from list(o)
 691         yield self.p_add_i
 692
 693     def ports(self):
 694         return list(self)
 695
 696
 697 IADD = 0
 698 ISUB = 1
 699 IMUL = 2
 700 ISHF = 3
 701 IBGT = 4
 702 IBLT = 5
 703 IBEQ = 6
 704 IBNE = 7
 705
 706 class RegSim:
 707     def __init__(self, rwidth, nregs):
 708         self.rwidth = rwidth
 709         self.regs = [0] * nregs
 710
 711     def op(self, op, op_imm, src1, src2, dest):
 712         maxbits = (1 << self.rwidth) - 1
 713         src1 = self.regs[src1] & maxbits
 714         if not op_imm: # put op in src2
 715             src2 = self.regs[src2] & maxbits
 716         if op == IADD:
 717             val = src1 + src2
 718         elif op == ISUB:
 719             val = src1 - src2
 720         elif op == IMUL:
 721             val = src1 * src2
 722         elif op == ISHF:
 723             val = src1 >> (src2 & maxbits)
 724         elif op == IBGT:
 725             val = int(src1 > src2)
 726         elif op == IBLT:
 727             val = int(src1 < src2)
 728         elif op == IBEQ:
 729             val = int(src1 == src2)
 730         elif op == IBNE:
 731             val = int(src1 != src2)
 732         val &= maxbits
 733         self.setval(dest, val)
 734         return val
 735
 736     def setval(self, dest, val):
 737         print ("sim setval", dest, hex(val))
 738         self.regs[dest] = val
 739
 740     def dump(self, dut):
 741         for i, val in enumerate(self.regs):
 742             reg = yield dut.intregs.regs[i].reg
 743             okstr = "OK" if reg == val else "!ok"
 744             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 745
 746     def check(self, dut):
 747         for i, val in enumerate(self.regs):
 748             reg = yield dut.intregs.regs[i].reg
 749             if reg != val:
 750                 print("reg %d expected %x received %x\n" % (i, val, reg))
 751                 yield from self.dump(dut)
 752                 assert False
 753
 754 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
 755     instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
 756                'src1_i': src1, 'src2_i': src2}]
 757
 758     sendlen = 1
 759     for idx in range(sendlen):
 760         yield from eq(dut.data_i[idx], instrs[idx])
 761         di = yield dut.data_i[idx]
 762         print ("senddata %d %x" % (idx, di))
 763     yield dut.p_add_i.eq(sendlen)
 764     yield
 765     o_p_ready = yield dut.p_ready_o
 766     while not o_p_ready:
 767         yield
 768         o_p_ready = yield dut.p_ready_o
 769
 770     yield dut.p_add_i.eq(0)
 771
 772
 773 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 774     yield from disable_issue(dut)
 775     yield dut.int_dest_i.eq(dest)
 776     yield dut.int_src1_i.eq(src1)
 777     yield dut.int_src2_i.eq(src2)
 778     if (op & (0x3<<2)) != 0: # branch
 779         yield dut.brissue.insn_i.eq(1)
 780         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 781         dut_issue = dut.brissue
 782     else:
 783         yield dut.aluissue.insn_i.eq(1)
 784         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 785         dut_issue = dut.aluissue
 786     yield dut.reg_enable_i.eq(1)
 787
 788     # these indicate that the instruction is to be made shadow-dependent on
 789     # (either) branch success or branch fail
 790     yield dut.branch_fail_i.eq(branch_fail)
 791     yield dut.branch_succ_i.eq(branch_success)
 792
 793     yield
 794     yield from wait_for_issue(dut, dut_issue)
 795
 796
 797 def print_reg(dut, rnums):
 798     rs = []
 799     for rnum in rnums:
 800         reg = yield dut.intregs.regs[rnum].reg
 801         rs.append("%x" % reg)
 802     rnums = map(str, rnums)
 803     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 804
 805
 806 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 807     insts = []
 808     for i in range(n_ops):
 809         src1 = randint(1, dut.n_regs-1)
 810         src2 = randint(1, dut.n_regs-1)
 811         dest = randint(1, dut.n_regs-1)
 812         op = randint(0, max_opnums)
 813         opi = 0 # if randint(0, 3) else 1 # set true if random is nonzero
 814
 815         if shadowing:
 816             insts.append((src1, src2, dest, op, opi, (0, 0)))
 817         else:
 818             insts.append((src1, src2, dest, op, opi))
 819     return insts
 820
 821
 822 def wait_for_busy_clear(dut):
 823     while True:
 824         busy_o = yield dut.busy_o
 825         if not busy_o:
 826             break
 827         print ("busy",)
 828         yield
 829
 830 def disable_issue(dut):
 831     yield dut.aluissue.insn_i.eq(0)
 832     yield dut.brissue.insn_i.eq(0)
 833
 834
 835 def wait_for_issue(dut, dut_issue):
 836     while True:
 837         issue_o = yield dut_issue.fn_issue_o
 838         if issue_o:
 839             yield from disable_issue(dut)
 840             yield dut.reg_enable_i.eq(0)
 841             break
 842         print ("busy",)
 843         #yield from print_reg(dut, [1,2,3])
 844         yield
 845     #yield from print_reg(dut, [1,2,3])
 846
 847 def scoreboard_branch_sim(dut, alusim):
 848
 849     iseed = 3
 850
 851     for i in range(1):
 852
 853         print ("rseed", iseed)
 854         seed(iseed)
 855         iseed += 1
 856
 857         yield dut.branch_direction_o.eq(0)
 858
 859         # set random values in the registers
 860         for i in range(1, dut.n_regs):
 861             val = 31+i*3
 862             val = randint(0, (1<<alusim.rwidth)-1)
 863             yield dut.intregs.regs[i].reg.eq(val)
 864             alusim.setval(i, val)
 865
 866         if False:
 867             # create some instructions: branches create a tree
 868             insts = create_random_ops(dut, 1, True, 1)
 869             #insts.append((6, 6, 1, 2, (0, 0)))
 870             #insts.append((4, 3, 3, 0, (0, 0)))
 871
 872             src1 = randint(1, dut.n_regs-1)
 873             src2 = randint(1, dut.n_regs-1)
 874             #op = randint(4, 7)
 875             op = 4 # only BGT at the moment
 876
 877             branch_ok = create_random_ops(dut, 1, True, 1)
 878             branch_fail = create_random_ops(dut, 1, True, 1)
 879
 880             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 881
 882         if True:
 883             insts = []
 884             insts.append( (3, 5, 2, 0, (0, 0)) )
 885             branch_ok = []
 886             branch_fail = []
 887             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 888             branch_ok.append( None )
 889             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 890             #branch_fail.append( None )
 891             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 892
 893         siminsts = deepcopy(insts)
 894
 895         # issue instruction(s)
 896         i = -1
 897         instrs = insts
 898         branch_direction = 0
 899         while instrs:
 900             yield
 901             yield
 902             i += 1
 903             branch_direction = yield dut.branch_direction_o # way branch went
 904             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 905             if branch_direction == 1 and shadow_on:
 906                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 907                 continue # branch was "success" and this is a "failed"... skip
 908             if branch_direction == 2 and shadow_off:
 909                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 910                 continue # branch was "fail" and this is a "success"... skip
 911             if branch_direction != 0:
 912                 shadow_on = 0
 913                 shadow_off = 0
 914             is_branch = op >= 4
 915             if is_branch:
 916                 branch_ok, branch_fail = dest
 917                 dest = src2
 918                 # ok zip up the branch success / fail instructions and
 919                 # drop them into the queue, one marked "to have branch success"
 920                 # the other to be marked shadow branch "fail".
 921                 # one out of each of these will be cancelled
 922                 for ok, fl in zip(branch_ok, branch_fail):
 923                     if ok:
 924                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 925                     if fl:
 926                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 927             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 928                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 929             yield from int_instr(dut, op, src1, src2, dest,
 930                                  shadow_on, shadow_off)
 931
 932         # wait for all instructions to stop before checking
 933         yield
 934         yield from wait_for_busy_clear(dut)
 935
 936         i = -1
 937         while siminsts:
 938             instr = siminsts.pop(0)
 939             if instr is None:
 940                 continue
 941             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 942             i += 1
 943             is_branch = op >= 4
 944             if is_branch:
 945                 branch_ok, branch_fail = dest
 946                 dest = src2
 947             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 948                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 949             branch_res = alusim.op(op, src1, src2, dest)
 950             if is_branch:
 951                 if branch_res:
 952                     siminsts += branch_ok
 953                 else:
 954                     siminsts += branch_fail
 955
 956         # check status
 957         yield from alusim.check(dut)
 958         yield from alusim.dump(dut)
 959
 960
 961 def scoreboard_sim(dut, alusim):
 962
 963     #seed(2)
 964
 965     for i in range(1):
 966
 967         # set random values in the registers
 968         for i in range(1, dut.n_regs):
 969             val = randint(0, (1<<alusim.rwidth)-1)
 970             #val = 31+i*3
 971             #val = i
 972             yield dut.intregs.regs[i].reg.eq(val)
 973             alusim.setval(i, val)
 974
 975         # create some instructions (some random, some regression tests)
 976         instrs = []
 977         if True:
 978             instrs = create_random_ops(dut, 15, True, 3)
 979
 980         if False:
 981             instrs.append( (7, 3, 2, 4, (0, 0)) )
 982             instrs.append( (7, 6, 6, 2, (0, 0)) )
 983             instrs.append( (1, 7, 2, 2, (0, 0)) )
 984
 985
 986         if False:
 987             instrs.append((2, 3, 3, 0, (0, 0)))
 988             instrs.append((5, 3, 3, 1, (0, 0)))
 989             instrs.append((3, 5, 5, 2, (0, 0)))
 990             instrs.append((5, 3, 3, 3, (0, 0)))
 991             instrs.append((3, 5, 5, 0, (0, 0)))
 992
 993         if False:
 994             instrs.append((5, 6, 2, 1))
 995             instrs.append((2, 2, 4, 0))
 996             #instrs.append((2, 2, 3, 1))
 997
 998         if False:
 999             instrs.append((2, 1, 2, 3))
1000
1001         if False:
1002             instrs.append((2, 6, 2, 1))
1003             instrs.append((2, 1, 2, 0))
1004
1005         if False:
1006             instrs.append((1, 2, 7, 2))
1007             instrs.append((7, 1, 5, 0))
1008             instrs.append((4, 4, 1, 1))
1009
1010         if False:
1011             instrs.append((5, 6, 2, 2))
1012             instrs.append((1, 1, 4, 1))
1013             instrs.append((6, 5, 3, 0))
1014
1015         if False:
1016             # Write-after-Write Hazard
1017             instrs.append( (3, 6, 7, 2) )
1018             instrs.append( (4, 4, 7, 1) )
1019
1020         if False:
1021             # self-read/write-after-write followed by Read-after-Write
1022             instrs.append((1, 1, 1, 1))
1023             instrs.append((1, 5, 3, 0))
1024
1025         if False:
1026             # Read-after-Write followed by self-read-after-write
1027             instrs.append((5, 6, 1, 2))
1028             instrs.append((1, 1, 1, 1))
1029
1030         if False:
1031             # self-read-write sandwich
1032             instrs.append((5, 6, 1, 2))
1033             instrs.append((1, 1, 1, 1))
1034             instrs.append((1, 5, 3, 0))
1035
1036         if False:
1037             # very weird failure
1038             instrs.append( (5, 2, 5, 2) )
1039             instrs.append( (2, 6, 3, 0) )
1040             instrs.append( (4, 2, 2, 1) )
1041
1042         if False:
1043             v1 = 4
1044             yield dut.intregs.regs[5].reg.eq(v1)
1045             alusim.setval(5, v1)
1046             yield dut.intregs.regs[3].reg.eq(5)
1047             alusim.setval(3, 5)
1048             instrs.append((5, 3, 3, 4, (0, 0)))
1049             instrs.append((4, 2, 1, 2, (0, 1)))
1050
1051         if False:
1052             v1 = 6
1053             yield dut.intregs.regs[5].reg.eq(v1)
1054             alusim.setval(5, v1)
1055             yield dut.intregs.regs[3].reg.eq(5)
1056             alusim.setval(3, 5)
1057             instrs.append((5, 3, 3, 4, (0, 0)))
1058             instrs.append((4, 2, 1, 2, (1, 0)))
1059
1060         if False:
1061             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1062             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1063             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1064             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1065             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1066             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1067             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1068             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1069             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1070
1071         # issue instruction(s), wait for issue to be free before proceeding
1072         for i, instr in enumerate(instrs):
1073             src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1074
1075             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1076             alusim.op(op, opi, src1, src2, dest)
1077             yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1078
1079         # wait for all instructions to stop before checking
1080         while True:
1081             iqlen = yield dut.qlen_o
1082             if iqlen == 0:
1083                 break
1084             yield
1085         yield
1086         yield
1087         yield
1088         yield
1089         yield from wait_for_busy_clear(dut)
1090
1091         # check status
1092         yield from alusim.check(dut)
1093         yield from alusim.dump(dut)
1094
1095
1096 def test_scoreboard():
1097     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1098     alusim = RegSim(16, 8)
1099     memsim = MemSim(16, 16)
1100     vl = rtlil.convert(dut, ports=dut.ports())
1101     with open("test_scoreboard6600.il", "w") as f:
1102         f.write(vl)
1103
1104     run_simulation(dut, scoreboard_sim(dut, alusim),
1105                         vcd_name='test_scoreboard6600.vcd')
1106
1107     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1108     #                    vcd_name='test_scoreboard6600.vcd')
1109
1110
1111 if __name__ == '__main__':
1112     test_scoreboard()