src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184         self.imm_i = Signal(rwid, reset_less=True)
 185
 186         # Int ALUs
 187         add = ALU(rwid)
 188         sub = ALU(rwid)
 189         mul = ALU(rwid)
 190         shf = ALU(rwid)
 191
 192         units = []
 193         for alu in [add, sub, mul, shf]:
 194             aluopwid = 3 # extra bit for immediate mode
 195             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 196
 197         CompUnitsBase.__init__(self, rwid, units)
 198
 199     def elaborate(self, platform):
 200         m = CompUnitsBase.elaborate(self, platform)
 201         comb = m.d.comb
 202
 203         # hand the same operation to all units, only lower 2 bits though
 204         for alu in self.units:
 205             comb += alu.oper_i[0:3].eq(self.oper_i)
 206             comb += alu.imm_i.eq(self.imm_i)
 207
 208         return m
 209
 210
 211 class CompUnitBR(CompUnitsBase):
 212
 213     def __init__(self, rwid, opwid):
 214         """ Inputs:
 215
 216             * :rwid:   bit width of register file(s) - both FP and INT
 217             * :opwid:  operand bit width
 218
 219             Note: bgt unit is returned so that a shadow unit can be created
 220             for it
 221         """
 222         self.opwid = opwid
 223
 224         # inputs
 225         self.oper_i = Signal(opwid, reset_less=True)
 226         self.imm_i = Signal(rwid, reset_less=True)
 227
 228         # Branch ALU and CU
 229         self.bgt = BranchALU(rwid)
 230         aluopwid = 3 # extra bit for immediate mode
 231         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 232         CompUnitsBase.__init__(self, rwid, [self.br1])
 233
 234     def elaborate(self, platform):
 235         m = CompUnitsBase.elaborate(self, platform)
 236         comb = m.d.comb
 237
 238         # hand the same operation to all units
 239         for alu in self.units:
 240             comb += alu.oper_i.eq(self.oper_i)
 241             comb += alu.imm_i.eq(self.imm_i)
 242
 243         return m
 244
 245
 246 class FunctionUnits(Elaboratable):
 247
 248     def __init__(self, n_regs, n_int_alus):
 249         self.n_regs = n_regs
 250         self.n_int_alus = n_int_alus
 251
 252         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 253         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 254         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 255
 256         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 257         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 258
 259         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 260         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 261         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 262
 263         self.readable_o = Signal(n_int_alus, reset_less=True)
 264         self.writable_o = Signal(n_int_alus, reset_less=True)
 265
 266         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 267         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 268         self.go_die_i = Signal(n_int_alus, reset_less=True)
 269         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 270
 271         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 272
 273     def elaborate(self, platform):
 274         m = Module()
 275         comb = m.d.comb
 276         sync = m.d.sync
 277
 278         n_intfus = self.n_int_alus
 279
 280         # Integer FU-FU Dep Matrix
 281         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 282         m.submodules.intfudeps = intfudeps
 283         # Integer FU-Reg Dep Matrix
 284         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 285         m.submodules.intregdeps = intregdeps
 286
 287         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 288         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 289
 290         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 291         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 292
 293         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 294         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 295         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 296
 297         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 298         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 299         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 300         comb += intfudeps.go_die_i.eq(self.go_die_i)
 301         comb += self.readable_o.eq(intfudeps.readable_o)
 302         comb += self.writable_o.eq(intfudeps.writable_o)
 303
 304         # Connect function issue / arrays, and dest/src1/src2
 305         comb += intregdeps.dest_i.eq(self.dest_i)
 306         comb += intregdeps.src_i[0].eq(self.src1_i)
 307         comb += intregdeps.src_i[1].eq(self.src2_i)
 308
 309         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 310         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 311         comb += intregdeps.go_die_i.eq(self.go_die_i)
 312         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 313
 314         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 315         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 316         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 317
 318         return m
 319
 320
 321 class Scoreboard(Elaboratable):
 322     def __init__(self, rwid, n_regs):
 323         """ Inputs:
 324
 325             * :rwid:   bit width of register file(s) - both FP and INT
 326             * :n_regs: depth of register file(s) - number of FP and INT regs
 327         """
 328         self.rwid = rwid
 329         self.n_regs = n_regs
 330
 331         # Register Files
 332         self.intregs = RegFileArray(rwid, n_regs)
 333         self.fpregs = RegFileArray(rwid, n_regs)
 334
 335         # issue q needs to get at these
 336         self.aluissue = IssueUnitGroup(4)
 337         self.brissue = IssueUnitGroup(1)
 338         # and these
 339         self.alu_oper_i = Signal(4, reset_less=True)
 340         self.alu_imm_i = Signal(rwid, reset_less=True)
 341         self.br_oper_i = Signal(4, reset_less=True)
 342         self.br_imm_i = Signal(rwid, reset_less=True)
 343
 344         # inputs
 345         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 346         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 347         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 348         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 349
 350         # outputs
 351         self.issue_o = Signal(reset_less=True) # instruction was accepted
 352         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 353
 354         # for branch speculation experiment.  branch_direction = 0 if
 355         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 356         # branch_succ and branch_fail are requests to have the current
 357         # instruction be dependent on the branch unit "shadow" capability.
 358         self.branch_succ_i = Signal(reset_less=True)
 359         self.branch_fail_i = Signal(reset_less=True)
 360         self.branch_direction_o = Signal(2, reset_less=True)
 361
 362     def elaborate(self, platform):
 363         m = Module()
 364         comb = m.d.comb
 365         sync = m.d.sync
 366
 367         m.submodules.intregs = self.intregs
 368         m.submodules.fpregs = self.fpregs
 369
 370         # register ports
 371         int_dest = self.intregs.write_port("dest")
 372         int_src1 = self.intregs.read_port("src1")
 373         int_src2 = self.intregs.read_port("src2")
 374
 375         fp_dest = self.fpregs.write_port("dest")
 376         fp_src1 = self.fpregs.read_port("src1")
 377         fp_src2 = self.fpregs.read_port("src2")
 378
 379         # Int ALUs and Comp Units
 380         n_int_alus = 5
 381         cua = CompUnitALUs(self.rwid, 3)
 382         cub = CompUnitBR(self.rwid, 3)
 383         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 384         bgt = cub.bgt # get at the branch computation unit
 385         br1 = cub.br1
 386
 387         # Int FUs
 388         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 389
 390         # Count of number of FUs
 391         n_intfus = n_int_alus
 392         n_fp_fus = 0 # for now
 393
 394         # Integer Priority Picker 1: Adder + Subtractor
 395         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 396         m.submodules.intpick1 = intpick1
 397
 398         # INT/FP Issue Unit
 399         regdecode = RegDecode(self.n_regs)
 400         m.submodules.regdecode = regdecode
 401         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 402         m.submodules.issueunit = issueunit
 403
 404         # Shadow Matrix.  currently n_intfus shadows, to be used for
 405         # write-after-write hazards.  NOTE: there is one extra for branches,
 406         # so the shadow width is increased by 1
 407         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 408         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 409
 410         # record previous instruction to cast shadow on current instruction
 411         prev_shadow = Signal(n_intfus)
 412
 413         # Branch Speculation recorder.  tracks the success/fail state as
 414         # each instruction is issued, so that when the branch occurs the
 415         # allow/cancel can be issued as appropriate.
 416         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 417
 418         #---------
 419         # ok start wiring things together...
 420         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 421         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 422         #---------
 423
 424         #---------
 425         # Issue Unit is where it starts.  set up some in/outs for this module
 426         #---------
 427         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 428                      regdecode.src1_i.eq(self.int_src1_i),
 429                      regdecode.src2_i.eq(self.int_src2_i),
 430                      regdecode.enable_i.eq(self.reg_enable_i),
 431                      self.issue_o.eq(issueunit.issue_o)
 432                     ]
 433
 434         # take these to outside (issue needs them)
 435         comb += cua.oper_i.eq(self.alu_oper_i)
 436         comb += cua.imm_i.eq(self.alu_imm_i)
 437         comb += cub.oper_i.eq(self.br_oper_i)
 438         comb += cub.imm_i.eq(self.br_imm_i)
 439
 440         # TODO: issueunit.f (FP)
 441
 442         # and int function issue / busy arrays, and dest/src1/src2
 443         comb += intfus.dest_i.eq(regdecode.dest_o)
 444         comb += intfus.src1_i.eq(regdecode.src1_o)
 445         comb += intfus.src2_i.eq(regdecode.src2_o)
 446
 447         fn_issue_o = issueunit.fn_issue_o
 448
 449         comb += intfus.fn_issue_i.eq(fn_issue_o)
 450         comb += issueunit.busy_i.eq(cu.busy_o)
 451         comb += self.busy_o.eq(cu.busy_o.bool())
 452
 453         #---------
 454         # merge shadow matrices outputs
 455         #---------
 456
 457         # these are explained in ShadowMatrix docstring, and are to be
 458         # connected to the FUReg and FUFU Matrices, to get them to reset
 459         anydie = Signal(n_intfus, reset_less=True)
 460         allshadown = Signal(n_intfus, reset_less=True)
 461         shreset = Signal(n_intfus, reset_less=True)
 462         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 463         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 464         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 465
 466         #---------
 467         # connect fu-fu matrix
 468         #---------
 469
 470         # Group Picker... done manually for now.
 471         go_rd_o = intpick1.go_rd_o
 472         go_wr_o = intpick1.go_wr_o
 473         go_rd_i = intfus.go_rd_i
 474         go_wr_i = intfus.go_wr_i
 475         go_die_i = intfus.go_die_i
 476         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 477         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 478         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 479         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 480
 481         # Connect Picker
 482         #---------
 483         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 484         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 485         int_rd_o = intfus.readable_o
 486         int_wr_o = intfus.writable_o
 487         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 488         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 489
 490         #---------
 491         # Shadow Matrix
 492         #---------
 493
 494         comb += shadows.issue_i.eq(fn_issue_o)
 495         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 496         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 497         #---------
 498         # NOTE; this setup is for the instruction order preservation...
 499
 500         # connect shadows / go_dies to Computation Units
 501         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 502         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 503
 504         # ok connect first n_int_fu shadows to busy lines, to create an
 505         # instruction-order linked-list-like arrangement, using a bit-matrix
 506         # (instead of e.g. a ring buffer).
 507         # XXX TODO
 508
 509         # when written, the shadow can be cancelled (and was good)
 510         for i in range(n_intfus):
 511             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 512
 513         # *previous* instruction shadows *current* instruction, and, obviously,
 514         # if the previous is completed (!busy) don't cast the shadow!
 515         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 516         for i in range(n_intfus):
 517             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 518
 519         #---------
 520         # ... and this is for branch speculation.  it uses the extra bit
 521         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 522         # only needs to set shadow_i, s_fail_i and s_good_i
 523
 524         # issue captures shadow_i (if enabled)
 525         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 526
 527         bactive = Signal(reset_less=True)
 528         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 529
 530         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 531         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 532             comb += bshadow.issue_i.eq(fn_issue_o)
 533             for i in range(n_intfus):
 534                 with m.If(fn_issue_o & (Const(1<<i))):
 535                     comb += bshadow.shadow_i[i][0].eq(1)
 536
 537         # finally, we need an indicator to the test infrastructure as to
 538         # whether the branch succeeded or failed, plus, link up to the
 539         # "recorder" of whether the instruction was under shadow or not
 540
 541         with m.If(br1.issue_i):
 542             sync += bspec.active_i.eq(1)
 543         with m.If(self.branch_succ_i):
 544             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 545         with m.If(self.branch_fail_i):
 546             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 547
 548         # branch is active (TODO: a better signal: this is over-using the
 549         # go_write signal - actually the branch should not be "writing")
 550         with m.If(br1.go_wr_i):
 551             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 552             sync += bspec.active_i.eq(0)
 553             comb += bspec.br_i.eq(1)
 554             # branch occurs if data == 1, failed if data == 0
 555             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 556             for i in range(n_intfus):
 557                 # *expected* direction of the branch matched against *actual*
 558                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 559                 # ... or it didn't
 560                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 561
 562         #---------
 563         # Connect Register File(s)
 564         #---------
 565         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 566         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 567         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 568
 569         # connect ALUs to regfule
 570         comb += int_dest.data_i.eq(cu.data_o)
 571         comb += cu.src1_i.eq(int_src1.data_o)
 572         comb += cu.src2_i.eq(int_src2.data_o)
 573
 574         # connect ALU Computation Units
 575         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 576         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 577         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 578
 579         return m
 580
 581     def __iter__(self):
 582         yield from self.intregs
 583         yield from self.fpregs
 584         yield self.int_dest_i
 585         yield self.int_src1_i
 586         yield self.int_src2_i
 587         yield self.issue_o
 588         yield self.branch_succ_i
 589         yield self.branch_fail_i
 590         yield self.branch_direction_o
 591
 592     def ports(self):
 593         return list(self)
 594
 595
 596 class IssueToScoreboard(Elaboratable):
 597
 598     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 599         self.qlen = qlen
 600         self.n_in = n_in
 601         self.n_out = n_out
 602         self.rwid = rwid
 603         self.opw = opwid
 604         self.n_regs = n_regs
 605
 606         mqbits = (int(log(qlen) / log(2))+2, False)
 607         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 608         self.p_ready_o = Signal() # instructions were added
 609         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 610
 611         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 612         self.qlen_o = Signal(mqbits, reset_less=True)
 613
 614     def elaborate(self, platform):
 615         m = Module()
 616         comb = m.d.comb
 617         sync = m.d.sync
 618
 619         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 620         sc = Scoreboard(self.rwid, self.n_regs)
 621         m.submodules.iq = iq
 622         m.submodules.sc = sc
 623
 624         # get at the regfile for testing
 625         self.intregs = sc.intregs
 626
 627         # and the "busy" signal and instruction queue length
 628         comb += self.busy_o.eq(sc.busy_o)
 629         comb += self.qlen_o.eq(iq.qlen_o)
 630
 631         # link up instruction queue
 632         comb += iq.p_add_i.eq(self.p_add_i)
 633         comb += self.p_ready_o.eq(iq.p_ready_o)
 634         for i in range(self.n_in):
 635             comb += eq(iq.data_i[i], self.data_i[i])
 636
 637         # take instruction and process it.  note that it's possible to
 638         # "inspect" the queue contents *without* actually removing the
 639         # items.  items are only removed when the
 640
 641         # in "waiting" state
 642         wait_issue_br = Signal()
 643         wait_issue_alu = Signal()
 644
 645         with m.If(wait_issue_br | wait_issue_alu):
 646             # set instruction pop length to 1 if the unit accepted
 647             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 648                 with m.If(iq.qlen_o != 0):
 649                     comb += iq.n_sub_i.eq(1)
 650             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 651                 with m.If(iq.qlen_o != 0):
 652                     comb += iq.n_sub_i.eq(1)
 653
 654         # see if some instruction(s) are here.  note that this is
 655         # "inspecting" the in-place queue.  note also that on the
 656         # cycle following "waiting" for fn_issue_o to be set, the
 657         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 658         with m.If(iq.qlen_o != 0):
 659             # get the operands and operation
 660             imm = iq.data_o[0].imm_i
 661             dest = iq.data_o[0].dest_i
 662             src1 = iq.data_o[0].src1_i
 663             src2 = iq.data_o[0].src2_i
 664             op = iq.data_o[0].oper_i
 665             opi = iq.data_o[0].opim_i # immediate set
 666
 667             # set the src/dest regs
 668             comb += sc.int_dest_i.eq(dest)
 669             comb += sc.int_src1_i.eq(src1)
 670             comb += sc.int_src2_i.eq(src2)
 671             comb += sc.reg_enable_i.eq(1) # enable the regfile
 672
 673             # choose a Function-Unit-Group
 674             with m.If((op & (0x3<<2)) != 0): # branch
 675                 comb += sc.brissue.insn_i.eq(1)
 676                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 677                 comb += sc.br_imm_i.eq(imm)
 678                 comb += wait_issue_br.eq(1)
 679             with m.Else():                   # alu
 680                 comb += sc.aluissue.insn_i.eq(1)
 681                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 682                 comb += sc.alu_imm_i.eq(imm)
 683                 comb += wait_issue_alu.eq(1)
 684
 685             # XXX TODO
 686             # these indicate that the instruction is to be made
 687             # shadow-dependent on
 688             # (either) branch success or branch fail
 689             #yield sc.branch_fail_i.eq(branch_fail)
 690             #yield sc.branch_succ_i.eq(branch_success)
 691
 692         return m
 693
 694     def __iter__(self):
 695         yield self.p_ready_o
 696         for o in self.data_i:
 697             yield from list(o)
 698         yield self.p_add_i
 699
 700     def ports(self):
 701         return list(self)
 702
 703
 704 IADD = 0
 705 ISUB = 1
 706 IMUL = 2
 707 ISHF = 3
 708 IBGT = 4
 709 IBLT = 5
 710 IBEQ = 6
 711 IBNE = 7
 712
 713 class RegSim:
 714     def __init__(self, rwidth, nregs):
 715         self.rwidth = rwidth
 716         self.regs = [0] * nregs
 717
 718     def op(self, op, op_imm, imm, src1, src2, dest):
 719         maxbits = (1 << self.rwidth) - 1
 720         src1 = self.regs[src1] & maxbits
 721         if op_imm:
 722             src2 = imm
 723         else:
 724             src2 = self.regs[src2] & maxbits
 725         if op == IADD:
 726             val = src1 + src2
 727         elif op == ISUB:
 728             val = src1 - src2
 729         elif op == IMUL:
 730             val = src1 * src2
 731         elif op == ISHF:
 732             val = src1 >> (src2 & maxbits)
 733         elif op == IBGT:
 734             val = int(src1 > src2)
 735         elif op == IBLT:
 736             val = int(src1 < src2)
 737         elif op == IBEQ:
 738             val = int(src1 == src2)
 739         elif op == IBNE:
 740             val = int(src1 != src2)
 741         val &= maxbits
 742         self.setval(dest, val)
 743         return val
 744
 745     def setval(self, dest, val):
 746         print ("sim setval", dest, hex(val))
 747         self.regs[dest] = val
 748
 749     def dump(self, dut):
 750         for i, val in enumerate(self.regs):
 751             reg = yield dut.intregs.regs[i].reg
 752             okstr = "OK" if reg == val else "!ok"
 753             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 754
 755     def check(self, dut):
 756         for i, val in enumerate(self.regs):
 757             reg = yield dut.intregs.regs[i].reg
 758             if reg != val:
 759                 print("reg %d expected %x received %x\n" % (i, val, reg))
 760                 yield from self.dump(dut)
 761                 assert False
 762
 763 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 764             branch_success, branch_fail):
 765     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 766                'src1_i': src1, 'src2_i': src2}]
 767
 768     sendlen = 1
 769     for idx in range(sendlen):
 770         yield from eq(dut.data_i[idx], instrs[idx])
 771         di = yield dut.data_i[idx]
 772         print ("senddata %d %x" % (idx, di))
 773     yield dut.p_add_i.eq(sendlen)
 774     yield
 775     o_p_ready = yield dut.p_ready_o
 776     while not o_p_ready:
 777         yield
 778         o_p_ready = yield dut.p_ready_o
 779
 780     yield dut.p_add_i.eq(0)
 781
 782
 783 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 784     yield from disable_issue(dut)
 785     yield dut.int_dest_i.eq(dest)
 786     yield dut.int_src1_i.eq(src1)
 787     yield dut.int_src2_i.eq(src2)
 788     if (op & (0x3<<2)) != 0: # branch
 789         yield dut.brissue.insn_i.eq(1)
 790         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 791         yield dut.br_imm_i.eq(imm)
 792         dut_issue = dut.brissue
 793     else:
 794         yield dut.aluissue.insn_i.eq(1)
 795         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 796         yield dut.alu_imm_i.eq(imm)
 797         dut_issue = dut.aluissue
 798     yield dut.reg_enable_i.eq(1)
 799
 800     # these indicate that the instruction is to be made shadow-dependent on
 801     # (either) branch success or branch fail
 802     yield dut.branch_fail_i.eq(branch_fail)
 803     yield dut.branch_succ_i.eq(branch_success)
 804
 805     yield
 806     yield from wait_for_issue(dut, dut_issue)
 807
 808
 809 def print_reg(dut, rnums):
 810     rs = []
 811     for rnum in rnums:
 812         reg = yield dut.intregs.regs[rnum].reg
 813         rs.append("%x" % reg)
 814     rnums = map(str, rnums)
 815     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 816
 817
 818 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 819     insts = []
 820     for i in range(n_ops):
 821         src1 = randint(1, dut.n_regs-1)
 822         src2 = randint(1, dut.n_regs-1)
 823         imm = randint(1, (1<<dut.rwid)-1)
 824         dest = randint(1, dut.n_regs-1)
 825         op = randint(0, max_opnums)
 826         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 827
 828         if shadowing:
 829             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 830         else:
 831             insts.append((src1, src2, dest, op, opi, imm))
 832     return insts
 833
 834
 835 def wait_for_busy_clear(dut):
 836     while True:
 837         busy_o = yield dut.busy_o
 838         if not busy_o:
 839             break
 840         print ("busy",)
 841         yield
 842
 843 def disable_issue(dut):
 844     yield dut.aluissue.insn_i.eq(0)
 845     yield dut.brissue.insn_i.eq(0)
 846
 847
 848 def wait_for_issue(dut, dut_issue):
 849     while True:
 850         issue_o = yield dut_issue.fn_issue_o
 851         if issue_o:
 852             yield from disable_issue(dut)
 853             yield dut.reg_enable_i.eq(0)
 854             break
 855         print ("busy",)
 856         #yield from print_reg(dut, [1,2,3])
 857         yield
 858     #yield from print_reg(dut, [1,2,3])
 859
 860 def scoreboard_branch_sim(dut, alusim):
 861
 862     iseed = 3
 863
 864     for i in range(1):
 865
 866         print ("rseed", iseed)
 867         seed(iseed)
 868         iseed += 1
 869
 870         yield dut.branch_direction_o.eq(0)
 871
 872         # set random values in the registers
 873         for i in range(1, dut.n_regs):
 874             val = 31+i*3
 875             val = randint(0, (1<<alusim.rwidth)-1)
 876             yield dut.intregs.regs[i].reg.eq(val)
 877             alusim.setval(i, val)
 878
 879         if False:
 880             # create some instructions: branches create a tree
 881             insts = create_random_ops(dut, 1, True, 1)
 882             #insts.append((6, 6, 1, 2, (0, 0)))
 883             #insts.append((4, 3, 3, 0, (0, 0)))
 884
 885             src1 = randint(1, dut.n_regs-1)
 886             src2 = randint(1, dut.n_regs-1)
 887             #op = randint(4, 7)
 888             op = 4 # only BGT at the moment
 889
 890             branch_ok = create_random_ops(dut, 1, True, 1)
 891             branch_fail = create_random_ops(dut, 1, True, 1)
 892
 893             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 894
 895         if True:
 896             insts = []
 897             insts.append( (3, 5, 2, 0, (0, 0)) )
 898             branch_ok = []
 899             branch_fail = []
 900             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 901             branch_ok.append( None )
 902             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 903             #branch_fail.append( None )
 904             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 905
 906         siminsts = deepcopy(insts)
 907
 908         # issue instruction(s)
 909         i = -1
 910         instrs = insts
 911         branch_direction = 0
 912         while instrs:
 913             yield
 914             yield
 915             i += 1
 916             branch_direction = yield dut.branch_direction_o # way branch went
 917             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 918             if branch_direction == 1 and shadow_on:
 919                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 920                 continue # branch was "success" and this is a "failed"... skip
 921             if branch_direction == 2 and shadow_off:
 922                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 923                 continue # branch was "fail" and this is a "success"... skip
 924             if branch_direction != 0:
 925                 shadow_on = 0
 926                 shadow_off = 0
 927             is_branch = op >= 4
 928             if is_branch:
 929                 branch_ok, branch_fail = dest
 930                 dest = src2
 931                 # ok zip up the branch success / fail instructions and
 932                 # drop them into the queue, one marked "to have branch success"
 933                 # the other to be marked shadow branch "fail".
 934                 # one out of each of these will be cancelled
 935                 for ok, fl in zip(branch_ok, branch_fail):
 936                     if ok:
 937                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 938                     if fl:
 939                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 940             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 941                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 942             yield from int_instr(dut, op, src1, src2, dest,
 943                                  shadow_on, shadow_off)
 944
 945         # wait for all instructions to stop before checking
 946         yield
 947         yield from wait_for_busy_clear(dut)
 948
 949         i = -1
 950         while siminsts:
 951             instr = siminsts.pop(0)
 952             if instr is None:
 953                 continue
 954             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 955             i += 1
 956             is_branch = op >= 4
 957             if is_branch:
 958                 branch_ok, branch_fail = dest
 959                 dest = src2
 960             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 961                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 962             branch_res = alusim.op(op, src1, src2, dest)
 963             if is_branch:
 964                 if branch_res:
 965                     siminsts += branch_ok
 966                 else:
 967                     siminsts += branch_fail
 968
 969         # check status
 970         yield from alusim.check(dut)
 971         yield from alusim.dump(dut)
 972
 973
 974 def scoreboard_sim(dut, alusim):
 975
 976     seed(0)
 977
 978     for i in range(50):
 979
 980         # set random values in the registers
 981         for i in range(1, dut.n_regs):
 982             val = randint(0, (1<<alusim.rwidth)-1)
 983             #val = 31+i*3
 984             #val = i
 985             yield dut.intregs.regs[i].reg.eq(val)
 986             alusim.setval(i, val)
 987
 988         # create some instructions (some random, some regression tests)
 989         instrs = []
 990         if True:
 991             instrs = create_random_ops(dut, 15, True, 4)
 992
 993         if False:
 994             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
 995
 996         if False:
 997             instrs.append( (7, 3, 2, 4, (0, 0)) )
 998             instrs.append( (7, 6, 6, 2, (0, 0)) )
 999             instrs.append( (1, 7, 2, 2, (0, 0)) )
1000
1001         if False:
1002             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1003             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1004             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1005             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1006             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1007
1008         if False:
1009             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1010             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1011             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1012
1013         if False:
1014             instrs.append((5, 6, 2, 1))
1015             instrs.append((2, 2, 4, 0))
1016             #instrs.append((2, 2, 3, 1))
1017
1018         if False:
1019             instrs.append((2, 1, 2, 3))
1020
1021         if False:
1022             instrs.append((2, 6, 2, 1))
1023             instrs.append((2, 1, 2, 0))
1024
1025         if False:
1026             instrs.append((1, 2, 7, 2))
1027             instrs.append((7, 1, 5, 0))
1028             instrs.append((4, 4, 1, 1))
1029
1030         if False:
1031             instrs.append((5, 6, 2, 2))
1032             instrs.append((1, 1, 4, 1))
1033             instrs.append((6, 5, 3, 0))
1034
1035         if False:
1036             # Write-after-Write Hazard
1037             instrs.append( (3, 6, 7, 2) )
1038             instrs.append( (4, 4, 7, 1) )
1039
1040         if False:
1041             # self-read/write-after-write followed by Read-after-Write
1042             instrs.append((1, 1, 1, 1))
1043             instrs.append((1, 5, 3, 0))
1044
1045         if False:
1046             # Read-after-Write followed by self-read-after-write
1047             instrs.append((5, 6, 1, 2))
1048             instrs.append((1, 1, 1, 1))
1049
1050         if False:
1051             # self-read-write sandwich
1052             instrs.append((5, 6, 1, 2))
1053             instrs.append((1, 1, 1, 1))
1054             instrs.append((1, 5, 3, 0))
1055
1056         if False:
1057             # very weird failure
1058             instrs.append( (5, 2, 5, 2) )
1059             instrs.append( (2, 6, 3, 0) )
1060             instrs.append( (4, 2, 2, 1) )
1061
1062         if False:
1063             v1 = 4
1064             yield dut.intregs.regs[5].reg.eq(v1)
1065             alusim.setval(5, v1)
1066             yield dut.intregs.regs[3].reg.eq(5)
1067             alusim.setval(3, 5)
1068             instrs.append((5, 3, 3, 4, (0, 0)))
1069             instrs.append((4, 2, 1, 2, (0, 1)))
1070
1071         if False:
1072             v1 = 6
1073             yield dut.intregs.regs[5].reg.eq(v1)
1074             alusim.setval(5, v1)
1075             yield dut.intregs.regs[3].reg.eq(5)
1076             alusim.setval(3, 5)
1077             instrs.append((5, 3, 3, 4, (0, 0)))
1078             instrs.append((4, 2, 1, 2, (1, 0)))
1079
1080         if False:
1081             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1082             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1083             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1084             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1085             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1086             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1087             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1088             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1089             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1090
1091         # issue instruction(s), wait for issue to be free before proceeding
1092         for i, instr in enumerate(instrs):
1093             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1094
1095             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1096                     (i, src1, src2, dest, op, opi, imm))
1097             alusim.op(op, opi, imm, src1, src2, dest)
1098             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1099                                br_ok, br_fail)
1100
1101         # wait for all instructions to stop before checking
1102         while True:
1103             iqlen = yield dut.qlen_o
1104             if iqlen == 0:
1105                 break
1106             yield
1107         yield
1108         yield
1109         yield
1110         yield
1111         yield from wait_for_busy_clear(dut)
1112
1113         # check status
1114         yield from alusim.check(dut)
1115         yield from alusim.dump(dut)
1116
1117
1118 def test_scoreboard():
1119     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1120     alusim = RegSim(16, 8)
1121     memsim = MemSim(16, 16)
1122     vl = rtlil.convert(dut, ports=dut.ports())
1123     with open("test_scoreboard6600.il", "w") as f:
1124         f.write(vl)
1125
1126     run_simulation(dut, scoreboard_sim(dut, alusim),
1127                         vcd_name='test_scoreboard6600.vcd')
1128
1129     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1130     #                    vcd_name='test_scoreboard6600.vcd')
1131
1132
1133 if __name__ == '__main__':
1134     test_scoreboard()