src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184
 185         # Int ALUs
 186         add = ALU(rwid)
 187         sub = ALU(rwid)
 188         mul = ALU(rwid)
 189         shf = ALU(rwid)
 190
 191         units = []
 192         for alu in [add, sub, mul, shf]:
 193             aluopwid = 3 # extra bit for immediate mode
 194             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 195
 196         CompUnitsBase.__init__(self, rwid, units)
 197
 198     def elaborate(self, platform):
 199         m = CompUnitsBase.elaborate(self, platform)
 200         comb = m.d.comb
 201
 202         # hand the same operation to all units, only lower 2 bits though
 203         for alu in self.units:
 204             comb += alu.oper_i[0:2].eq(self.oper_i)
 205
 206         return m
 207
 208
 209 class CompUnitBR(CompUnitsBase):
 210
 211     def __init__(self, rwid, opwid):
 212         """ Inputs:
 213
 214             * :rwid:   bit width of register file(s) - both FP and INT
 215             * :opwid:  operand bit width
 216
 217             Note: bgt unit is returned so that a shadow unit can be created
 218             for it
 219         """
 220         self.opwid = opwid
 221
 222         # inputs
 223         self.oper_i = Signal(opwid, reset_less=True)
 224
 225         # Branch ALU and CU
 226         self.bgt = BranchALU(rwid)
 227         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 228         CompUnitsBase.__init__(self, rwid, [self.br1])
 229
 230     def elaborate(self, platform):
 231         m = CompUnitsBase.elaborate(self, platform)
 232         comb = m.d.comb
 233
 234         # hand the same operation to all units
 235         for alu in self.units:
 236             comb += alu.oper_i.eq(self.oper_i)
 237
 238         return m
 239
 240
 241 class FunctionUnits(Elaboratable):
 242
 243     def __init__(self, n_regs, n_int_alus):
 244         self.n_regs = n_regs
 245         self.n_int_alus = n_int_alus
 246
 247         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 248         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 249         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 250
 251         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 252         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 253
 254         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 255         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 256         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 257
 258         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 259         self.readable_o = Signal(n_int_alus, reset_less=True)
 260         self.writable_o = Signal(n_int_alus, reset_less=True)
 261
 262         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 263         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 264         self.go_die_i = Signal(n_int_alus, reset_less=True)
 265         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 266         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 267
 268         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 269
 270     def elaborate(self, platform):
 271         m = Module()
 272         comb = m.d.comb
 273         sync = m.d.sync
 274
 275         n_intfus = self.n_int_alus
 276
 277         # Integer FU-FU Dep Matrix
 278         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 279         m.submodules.intfudeps = intfudeps
 280         # Integer FU-Reg Dep Matrix
 281         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 282         m.submodules.intregdeps = intregdeps
 283
 284         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 285         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 286
 287         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 288         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 289
 290         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 291         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 292         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 293
 294         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 295         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 296         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 297         comb += intfudeps.go_die_i.eq(self.go_die_i)
 298         comb += self.readable_o.eq(intfudeps.readable_o)
 299         comb += self.writable_o.eq(intfudeps.writable_o)
 300
 301         # Connect function issue / arrays, and dest/src1/src2
 302         comb += intregdeps.dest_i.eq(self.dest_i)
 303         comb += intregdeps.src1_i.eq(self.src1_i)
 304         comb += intregdeps.src2_i.eq(self.src2_i)
 305
 306         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 307         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 308         comb += intregdeps.go_die_i.eq(self.go_die_i)
 309         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 310
 311         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 312         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 313         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 314
 315         return m
 316
 317
 318 class Scoreboard(Elaboratable):
 319     def __init__(self, rwid, n_regs):
 320         """ Inputs:
 321
 322             * :rwid:   bit width of register file(s) - both FP and INT
 323             * :n_regs: depth of register file(s) - number of FP and INT regs
 324         """
 325         self.rwid = rwid
 326         self.n_regs = n_regs
 327
 328         # Register Files
 329         self.intregs = RegFileArray(rwid, n_regs)
 330         self.fpregs = RegFileArray(rwid, n_regs)
 331
 332         # issue q needs to get at these
 333         self.aluissue = IssueUnitGroup(4)
 334         self.brissue = IssueUnitGroup(1)
 335         # and these
 336         self.alu_oper_i = Signal(4, reset_less=True)
 337         self.br_oper_i = Signal(4, reset_less=True)
 338
 339         # inputs
 340         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 341         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 342         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 343         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 344
 345         # outputs
 346         self.issue_o = Signal(reset_less=True) # instruction was accepted
 347         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 348
 349         # for branch speculation experiment.  branch_direction = 0 if
 350         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 351         # branch_succ and branch_fail are requests to have the current
 352         # instruction be dependent on the branch unit "shadow" capability.
 353         self.branch_succ_i = Signal(reset_less=True)
 354         self.branch_fail_i = Signal(reset_less=True)
 355         self.branch_direction_o = Signal(2, reset_less=True)
 356
 357     def elaborate(self, platform):
 358         m = Module()
 359         comb = m.d.comb
 360         sync = m.d.sync
 361
 362         m.submodules.intregs = self.intregs
 363         m.submodules.fpregs = self.fpregs
 364
 365         # register ports
 366         int_dest = self.intregs.write_port("dest")
 367         int_src1 = self.intregs.read_port("src1")
 368         int_src2 = self.intregs.read_port("src2")
 369
 370         fp_dest = self.fpregs.write_port("dest")
 371         fp_src1 = self.fpregs.read_port("src1")
 372         fp_src2 = self.fpregs.read_port("src2")
 373
 374         # Int ALUs and Comp Units
 375         n_int_alus = 5
 376         cua = CompUnitALUs(self.rwid, 3)
 377         cub = CompUnitBR(self.rwid, 2)
 378         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 379         bgt = cub.bgt # get at the branch computation unit
 380         br1 = cub.br1
 381
 382         # Int FUs
 383         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 384
 385         # Count of number of FUs
 386         n_intfus = n_int_alus
 387         n_fp_fus = 0 # for now
 388
 389         # Integer Priority Picker 1: Adder + Subtractor
 390         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 391         m.submodules.intpick1 = intpick1
 392
 393         # INT/FP Issue Unit
 394         regdecode = RegDecode(self.n_regs)
 395         m.submodules.regdecode = regdecode
 396         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 397         m.submodules.issueunit = issueunit
 398
 399         # Shadow Matrix.  currently n_intfus shadows, to be used for
 400         # write-after-write hazards.  NOTE: there is one extra for branches,
 401         # so the shadow width is increased by 1
 402         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 403         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 404
 405         # record previous instruction to cast shadow on current instruction
 406         prev_shadow = Signal(n_intfus)
 407
 408         # Branch Speculation recorder.  tracks the success/fail state as
 409         # each instruction is issued, so that when the branch occurs the
 410         # allow/cancel can be issued as appropriate.
 411         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 412
 413         #---------
 414         # ok start wiring things together...
 415         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 416         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 417         #---------
 418
 419         #---------
 420         # Issue Unit is where it starts.  set up some in/outs for this module
 421         #---------
 422         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 423                      regdecode.src1_i.eq(self.int_src1_i),
 424                      regdecode.src2_i.eq(self.int_src2_i),
 425                      regdecode.enable_i.eq(self.reg_enable_i),
 426                      self.issue_o.eq(issueunit.issue_o)
 427                     ]
 428
 429         # take these to outside (issue needs them)
 430         comb += cua.oper_i.eq(self.alu_oper_i)
 431         comb += cub.oper_i.eq(self.br_oper_i)
 432
 433         # TODO: issueunit.f (FP)
 434
 435         # and int function issue / busy arrays, and dest/src1/src2
 436         comb += intfus.dest_i.eq(regdecode.dest_o)
 437         comb += intfus.src1_i.eq(regdecode.src1_o)
 438         comb += intfus.src2_i.eq(regdecode.src2_o)
 439
 440         fn_issue_o = issueunit.fn_issue_o
 441
 442         comb += intfus.fn_issue_i.eq(fn_issue_o)
 443         comb += issueunit.busy_i.eq(cu.busy_o)
 444         comb += self.busy_o.eq(cu.busy_o.bool())
 445
 446         #---------
 447         # merge shadow matrices outputs
 448         #---------
 449
 450         # these are explained in ShadowMatrix docstring, and are to be
 451         # connected to the FUReg and FUFU Matrices, to get them to reset
 452         anydie = Signal(n_intfus, reset_less=True)
 453         allshadown = Signal(n_intfus, reset_less=True)
 454         shreset = Signal(n_intfus, reset_less=True)
 455         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 456         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 457         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 458
 459         #---------
 460         # connect fu-fu matrix
 461         #---------
 462
 463         # Group Picker... done manually for now.
 464         go_rd_o = intpick1.go_rd_o
 465         go_wr_o = intpick1.go_wr_o
 466         go_rd_i = intfus.go_rd_i
 467         go_wr_i = intfus.go_wr_i
 468         go_die_i = intfus.go_die_i
 469         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 470         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 471         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 472         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 473
 474         # Connect Picker
 475         #---------
 476         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 477         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 478         int_rd_o = intfus.readable_o
 479         int_wr_o = intfus.writable_o
 480         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 481         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 482
 483         #---------
 484         # Shadow Matrix
 485         #---------
 486
 487         comb += shadows.issue_i.eq(fn_issue_o)
 488         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 489         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 490         #---------
 491         # NOTE; this setup is for the instruction order preservation...
 492
 493         # connect shadows / go_dies to Computation Units
 494         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 495         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 496
 497         # ok connect first n_int_fu shadows to busy lines, to create an
 498         # instruction-order linked-list-like arrangement, using a bit-matrix
 499         # (instead of e.g. a ring buffer).
 500         # XXX TODO
 501
 502         # when written, the shadow can be cancelled (and was good)
 503         for i in range(n_intfus):
 504             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 505
 506         # *previous* instruction shadows *current* instruction, and, obviously,
 507         # if the previous is completed (!busy) don't cast the shadow!
 508         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 509         for i in range(n_intfus):
 510             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 511
 512         #---------
 513         # ... and this is for branch speculation.  it uses the extra bit
 514         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 515         # only needs to set shadow_i, s_fail_i and s_good_i
 516
 517         # issue captures shadow_i (if enabled)
 518         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 519
 520         bactive = Signal(reset_less=True)
 521         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 522
 523         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 524         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 525             comb += bshadow.issue_i.eq(fn_issue_o)
 526             for i in range(n_intfus):
 527                 with m.If(fn_issue_o & (Const(1<<i))):
 528                     comb += bshadow.shadow_i[i][0].eq(1)
 529
 530         # finally, we need an indicator to the test infrastructure as to
 531         # whether the branch succeeded or failed, plus, link up to the
 532         # "recorder" of whether the instruction was under shadow or not
 533
 534         with m.If(br1.issue_i):
 535             sync += bspec.active_i.eq(1)
 536         with m.If(self.branch_succ_i):
 537             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 538         with m.If(self.branch_fail_i):
 539             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 540
 541         # branch is active (TODO: a better signal: this is over-using the
 542         # go_write signal - actually the branch should not be "writing")
 543         with m.If(br1.go_wr_i):
 544             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 545             sync += bspec.active_i.eq(0)
 546             comb += bspec.br_i.eq(1)
 547             # branch occurs if data == 1, failed if data == 0
 548             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 549             for i in range(n_intfus):
 550                 # *expected* direction of the branch matched against *actual*
 551                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 552                 # ... or it didn't
 553                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 554
 555         #---------
 556         # Connect Register File(s)
 557         #---------
 558         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 559         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 560         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 561
 562         # connect ALUs to regfule
 563         comb += int_dest.data_i.eq(cu.data_o)
 564         comb += cu.src1_i.eq(int_src1.data_o)
 565         comb += cu.src2_i.eq(int_src2.data_o)
 566
 567         # connect ALU Computation Units
 568         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 569         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 570         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 571
 572         return m
 573
 574     def __iter__(self):
 575         yield from self.intregs
 576         yield from self.fpregs
 577         yield self.int_dest_i
 578         yield self.int_src1_i
 579         yield self.int_src2_i
 580         yield self.issue_o
 581         yield self.branch_succ_i
 582         yield self.branch_fail_i
 583         yield self.branch_direction_o
 584
 585     def ports(self):
 586         return list(self)
 587
 588
 589 class IssueToScoreboard(Elaboratable):
 590
 591     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 592         self.qlen = qlen
 593         self.n_in = n_in
 594         self.n_out = n_out
 595         self.rwid = rwid
 596         self.opw = opwid
 597         self.n_regs = n_regs
 598
 599         mqbits = (int(log(qlen) / log(2))+2, False)
 600         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 601         self.p_ready_o = Signal() # instructions were added
 602         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 603
 604         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 605         self.qlen_o = Signal(mqbits, reset_less=True)
 606
 607     def elaborate(self, platform):
 608         m = Module()
 609         comb = m.d.comb
 610         sync = m.d.sync
 611
 612         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 613         sc = Scoreboard(self.rwid, self.n_regs)
 614         m.submodules.iq = iq
 615         m.submodules.sc = sc
 616
 617         # get at the regfile for testing
 618         self.intregs = sc.intregs
 619
 620         # and the "busy" signal and instruction queue length
 621         comb += self.busy_o.eq(sc.busy_o)
 622         comb += self.qlen_o.eq(iq.qlen_o)
 623
 624         # link up instruction queue
 625         comb += iq.p_add_i.eq(self.p_add_i)
 626         comb += self.p_ready_o.eq(iq.p_ready_o)
 627         for i in range(self.n_in):
 628             comb += eq(iq.data_i[i], self.data_i[i])
 629
 630         # take instruction and process it.  note that it's possible to
 631         # "inspect" the queue contents *without* actually removing the
 632         # items.  items are only removed when the
 633
 634         # in "waiting" state
 635         wait_issue_br = Signal()
 636         wait_issue_alu = Signal()
 637
 638         with m.If(wait_issue_br | wait_issue_alu):
 639             # set instruction pop length to 1 if the unit accepted
 640             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 641                 with m.If(iq.qlen_o != 0):
 642                     comb += iq.n_sub_i.eq(1)
 643             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 644                 with m.If(iq.qlen_o != 0):
 645                     comb += iq.n_sub_i.eq(1)
 646
 647         # see if some instruction(s) are here.  note that this is
 648         # "inspecting" the in-place queue.  note also that on the
 649         # cycle following "waiting" for fn_issue_o to be set, the
 650         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 651         with m.If(iq.qlen_o != 0):
 652             # get the operands and operation
 653             dest = iq.data_o[0].dest_i
 654             src1 = iq.data_o[0].src1_i
 655             src2 = iq.data_o[0].src2_i
 656             op = iq.data_o[0].oper_i
 657             opi = iq.data_o[0].opim_i # immediate set
 658
 659             # set the src/dest regs
 660             comb += sc.int_dest_i.eq(dest)
 661             comb += sc.int_src1_i.eq(src1)
 662             comb += sc.int_src2_i.eq(src2)
 663             comb += sc.reg_enable_i.eq(1) # enable the regfile
 664
 665             # choose a Function-Unit-Group
 666             with m.If((op & (0x3<<2)) != 0): # branch
 667                 comb += sc.brissue.insn_i.eq(1)
 668                 comb += sc.br_oper_i.eq(op & 0x3)
 669                 comb += wait_issue_br.eq(1)
 670             with m.Else():                   # alu
 671                 comb += sc.aluissue.insn_i.eq(1)
 672                 comb += sc.alu_oper_i.eq(Cat(op & 0x3, opi))
 673                 comb += wait_issue_alu.eq(1)
 674
 675             # XXX TODO
 676             # these indicate that the instruction is to be made
 677             # shadow-dependent on
 678             # (either) branch success or branch fail
 679             #yield sc.branch_fail_i.eq(branch_fail)
 680             #yield sc.branch_succ_i.eq(branch_success)
 681
 682         return m
 683
 684     def __iter__(self):
 685         yield self.p_ready_o
 686         for o in self.data_i:
 687             yield from list(o)
 688         yield self.p_add_i
 689
 690     def ports(self):
 691         return list(self)
 692
 693
 694 IADD = 0
 695 ISUB = 1
 696 IMUL = 2
 697 ISHF = 3
 698 IBGT = 4
 699 IBLT = 5
 700 IBEQ = 6
 701 IBNE = 7
 702
 703 class RegSim:
 704     def __init__(self, rwidth, nregs):
 705         self.rwidth = rwidth
 706         self.regs = [0] * nregs
 707
 708     def op(self, op, op_imm, src1, src2, dest):
 709         maxbits = (1 << self.rwidth) - 1
 710         src1 = self.regs[src1] & maxbits
 711         if not op_imm: # put op in src2
 712             src2 = self.regs[src2] & maxbits
 713         if op == IADD:
 714             val = src1 + src2
 715         elif op == ISUB:
 716             val = src1 - src2
 717         elif op == IMUL:
 718             val = src1 * src2
 719         elif op == ISHF:
 720             val = src1 >> (src2 & maxbits)
 721         elif op == IBGT:
 722             val = int(src1 > src2)
 723         elif op == IBLT:
 724             val = int(src1 < src2)
 725         elif op == IBEQ:
 726             val = int(src1 == src2)
 727         elif op == IBNE:
 728             val = int(src1 != src2)
 729         val &= maxbits
 730         self.setval(dest, val)
 731         return val
 732
 733     def setval(self, dest, val):
 734         print ("sim setval", dest, hex(val))
 735         self.regs[dest] = val
 736
 737     def dump(self, dut):
 738         for i, val in enumerate(self.regs):
 739             reg = yield dut.intregs.regs[i].reg
 740             okstr = "OK" if reg == val else "!ok"
 741             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 742
 743     def check(self, dut):
 744         for i, val in enumerate(self.regs):
 745             reg = yield dut.intregs.regs[i].reg
 746             if reg != val:
 747                 print("reg %d expected %x received %x\n" % (i, val, reg))
 748                 yield from self.dump(dut)
 749                 assert False
 750
 751 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
 752     instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
 753                'src1_i': src1, 'src2_i': src2}]
 754
 755     sendlen = 1
 756     for idx in range(sendlen):
 757         yield from eq(dut.data_i[idx], instrs[idx])
 758         di = yield dut.data_i[idx]
 759         print ("senddata %d %x" % (idx, di))
 760     yield dut.p_add_i.eq(sendlen)
 761     yield
 762     o_p_ready = yield dut.p_ready_o
 763     while not o_p_ready:
 764         yield
 765         o_p_ready = yield dut.p_ready_o
 766
 767     yield dut.p_add_i.eq(0)
 768
 769
 770 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 771     yield from disable_issue(dut)
 772     yield dut.int_dest_i.eq(dest)
 773     yield dut.int_src1_i.eq(src1)
 774     yield dut.int_src2_i.eq(src2)
 775     if (op & (0x3<<2)) != 0: # branch
 776         yield dut.brissue.insn_i.eq(1)
 777         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 778         dut_issue = dut.brissue
 779     else:
 780         yield dut.aluissue.insn_i.eq(1)
 781         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 782         dut_issue = dut.aluissue
 783     yield dut.reg_enable_i.eq(1)
 784
 785     # these indicate that the instruction is to be made shadow-dependent on
 786     # (either) branch success or branch fail
 787     yield dut.branch_fail_i.eq(branch_fail)
 788     yield dut.branch_succ_i.eq(branch_success)
 789
 790     yield
 791     yield from wait_for_issue(dut, dut_issue)
 792
 793
 794 def print_reg(dut, rnums):
 795     rs = []
 796     for rnum in rnums:
 797         reg = yield dut.intregs.regs[rnum].reg
 798         rs.append("%x" % reg)
 799     rnums = map(str, rnums)
 800     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 801
 802
 803 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 804     insts = []
 805     for i in range(n_ops):
 806         src1 = randint(1, dut.n_regs-1)
 807         src2 = randint(1, dut.n_regs-1)
 808         dest = randint(1, dut.n_regs-1)
 809         op = randint(0, max_opnums)
 810         opi = 0 if randint(0, 3) else 1 # set true if random is nonzero
 811
 812         if shadowing:
 813             insts.append((src1, src2, dest, op, opi, (0, 0)))
 814         else:
 815             insts.append((src1, src2, dest, op, opi))
 816     return insts
 817
 818
 819 def wait_for_busy_clear(dut):
 820     while True:
 821         busy_o = yield dut.busy_o
 822         if not busy_o:
 823             break
 824         print ("busy",)
 825         yield
 826
 827 def disable_issue(dut):
 828     yield dut.aluissue.insn_i.eq(0)
 829     yield dut.brissue.insn_i.eq(0)
 830
 831
 832 def wait_for_issue(dut, dut_issue):
 833     while True:
 834         issue_o = yield dut_issue.fn_issue_o
 835         if issue_o:
 836             yield from disable_issue(dut)
 837             yield dut.reg_enable_i.eq(0)
 838             break
 839         print ("busy",)
 840         #yield from print_reg(dut, [1,2,3])
 841         yield
 842     #yield from print_reg(dut, [1,2,3])
 843
 844 def scoreboard_branch_sim(dut, alusim):
 845
 846     iseed = 3
 847
 848     for i in range(1):
 849
 850         print ("rseed", iseed)
 851         seed(iseed)
 852         iseed += 1
 853
 854         yield dut.branch_direction_o.eq(0)
 855
 856         # set random values in the registers
 857         for i in range(1, dut.n_regs):
 858             val = 31+i*3
 859             val = randint(0, (1<<alusim.rwidth)-1)
 860             yield dut.intregs.regs[i].reg.eq(val)
 861             alusim.setval(i, val)
 862
 863         if False:
 864             # create some instructions: branches create a tree
 865             insts = create_random_ops(dut, 1, True, 1)
 866             #insts.append((6, 6, 1, 2, (0, 0)))
 867             #insts.append((4, 3, 3, 0, (0, 0)))
 868
 869             src1 = randint(1, dut.n_regs-1)
 870             src2 = randint(1, dut.n_regs-1)
 871             #op = randint(4, 7)
 872             op = 4 # only BGT at the moment
 873
 874             branch_ok = create_random_ops(dut, 1, True, 1)
 875             branch_fail = create_random_ops(dut, 1, True, 1)
 876
 877             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 878
 879         if True:
 880             insts = []
 881             insts.append( (3, 5, 2, 0, (0, 0)) )
 882             branch_ok = []
 883             branch_fail = []
 884             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 885             branch_ok.append( None )
 886             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 887             #branch_fail.append( None )
 888             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 889
 890         siminsts = deepcopy(insts)
 891
 892         # issue instruction(s)
 893         i = -1
 894         instrs = insts
 895         branch_direction = 0
 896         while instrs:
 897             yield
 898             yield
 899             i += 1
 900             branch_direction = yield dut.branch_direction_o # way branch went
 901             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 902             if branch_direction == 1 and shadow_on:
 903                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 904                 continue # branch was "success" and this is a "failed"... skip
 905             if branch_direction == 2 and shadow_off:
 906                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 907                 continue # branch was "fail" and this is a "success"... skip
 908             if branch_direction != 0:
 909                 shadow_on = 0
 910                 shadow_off = 0
 911             is_branch = op >= 4
 912             if is_branch:
 913                 branch_ok, branch_fail = dest
 914                 dest = src2
 915                 # ok zip up the branch success / fail instructions and
 916                 # drop them into the queue, one marked "to have branch success"
 917                 # the other to be marked shadow branch "fail".
 918                 # one out of each of these will be cancelled
 919                 for ok, fl in zip(branch_ok, branch_fail):
 920                     if ok:
 921                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 922                     if fl:
 923                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 924             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 925                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 926             yield from int_instr(dut, op, src1, src2, dest,
 927                                  shadow_on, shadow_off)
 928
 929         # wait for all instructions to stop before checking
 930         yield
 931         yield from wait_for_busy_clear(dut)
 932
 933         i = -1
 934         while siminsts:
 935             instr = siminsts.pop(0)
 936             if instr is None:
 937                 continue
 938             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 939             i += 1
 940             is_branch = op >= 4
 941             if is_branch:
 942                 branch_ok, branch_fail = dest
 943                 dest = src2
 944             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 945                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 946             branch_res = alusim.op(op, src1, src2, dest)
 947             if is_branch:
 948                 if branch_res:
 949                     siminsts += branch_ok
 950                 else:
 951                     siminsts += branch_fail
 952
 953         # check status
 954         yield from alusim.check(dut)
 955         yield from alusim.dump(dut)
 956
 957
 958 def scoreboard_sim(dut, alusim):
 959
 960     #seed(2)
 961
 962     for i in range(1):
 963
 964         # set random values in the registers
 965         for i in range(1, dut.n_regs):
 966             val = randint(0, (1<<alusim.rwidth)-1)
 967             #val = 31+i*3
 968             #val = i
 969             yield dut.intregs.regs[i].reg.eq(val)
 970             alusim.setval(i, val)
 971
 972         # create some instructions (some random, some regression tests)
 973         instrs = []
 974         if True:
 975             instrs = create_random_ops(dut, 15, True, 3)
 976
 977         if False:
 978             instrs.append( (7, 3, 2, 4, (0, 0)) )
 979             instrs.append( (7, 6, 6, 2, (0, 0)) )
 980             instrs.append( (1, 7, 2, 2, (0, 0)) )
 981
 982
 983         if False:
 984             instrs.append((2, 3, 3, 0, (0, 0)))
 985             instrs.append((5, 3, 3, 1, (0, 0)))
 986             instrs.append((3, 5, 5, 2, (0, 0)))
 987             instrs.append((5, 3, 3, 3, (0, 0)))
 988             instrs.append((3, 5, 5, 0, (0, 0)))
 989
 990         if False:
 991             instrs.append((5, 6, 2, 1))
 992             instrs.append((2, 2, 4, 0))
 993             #instrs.append((2, 2, 3, 1))
 994
 995         if False:
 996             instrs.append((2, 1, 2, 3))
 997
 998         if False:
 999             instrs.append((2, 6, 2, 1))
1000             instrs.append((2, 1, 2, 0))
1001
1002         if False:
1003             instrs.append((1, 2, 7, 2))
1004             instrs.append((7, 1, 5, 0))
1005             instrs.append((4, 4, 1, 1))
1006
1007         if False:
1008             instrs.append((5, 6, 2, 2))
1009             instrs.append((1, 1, 4, 1))
1010             instrs.append((6, 5, 3, 0))
1011
1012         if False:
1013             # Write-after-Write Hazard
1014             instrs.append( (3, 6, 7, 2) )
1015             instrs.append( (4, 4, 7, 1) )
1016
1017         if False:
1018             # self-read/write-after-write followed by Read-after-Write
1019             instrs.append((1, 1, 1, 1))
1020             instrs.append((1, 5, 3, 0))
1021
1022         if False:
1023             # Read-after-Write followed by self-read-after-write
1024             instrs.append((5, 6, 1, 2))
1025             instrs.append((1, 1, 1, 1))
1026
1027         if False:
1028             # self-read-write sandwich
1029             instrs.append((5, 6, 1, 2))
1030             instrs.append((1, 1, 1, 1))
1031             instrs.append((1, 5, 3, 0))
1032
1033         if False:
1034             # very weird failure
1035             instrs.append( (5, 2, 5, 2) )
1036             instrs.append( (2, 6, 3, 0) )
1037             instrs.append( (4, 2, 2, 1) )
1038
1039         if False:
1040             v1 = 4
1041             yield dut.intregs.regs[5].reg.eq(v1)
1042             alusim.setval(5, v1)
1043             yield dut.intregs.regs[3].reg.eq(5)
1044             alusim.setval(3, 5)
1045             instrs.append((5, 3, 3, 4, (0, 0)))
1046             instrs.append((4, 2, 1, 2, (0, 1)))
1047
1048         if False:
1049             v1 = 6
1050             yield dut.intregs.regs[5].reg.eq(v1)
1051             alusim.setval(5, v1)
1052             yield dut.intregs.regs[3].reg.eq(5)
1053             alusim.setval(3, 5)
1054             instrs.append((5, 3, 3, 4, (0, 0)))
1055             instrs.append((4, 2, 1, 2, (1, 0)))
1056
1057         if False:
1058             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1059             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1060             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1061             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1062             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1063             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1064             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1065             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1066             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1067
1068         # issue instruction(s), wait for issue to be free before proceeding
1069         for i, instr in enumerate(instrs):
1070             src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1071
1072             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1073             alusim.op(op, opi, src1, src2, dest)
1074             yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1075
1076         # wait for all instructions to stop before checking
1077         while True:
1078             iqlen = yield dut.qlen_o
1079             if iqlen == 0:
1080                 break
1081             yield
1082         yield
1083         yield
1084         yield
1085         yield
1086         yield from wait_for_busy_clear(dut)
1087
1088         # check status
1089         yield from alusim.check(dut)
1090         yield from alusim.dump(dut)
1091
1092
1093 def test_scoreboard():
1094     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1095     alusim = RegSim(16, 8)
1096     memsim = MemSim(16, 16)
1097     vl = rtlil.convert(dut, ports=dut.ports())
1098     with open("test_scoreboard6600.il", "w") as f:
1099         f.write(vl)
1100
1101     run_simulation(dut, scoreboard_sim(dut, alusim),
1102                         vcd_name='test_scoreboard6600.vcd')
1103
1104     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1105     #                    vcd_name='test_scoreboard6600.vcd')
1106
1107
1108 if __name__ == '__main__':
1109     test_scoreboard()