src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184
 185         # Int ALUs
 186         add = ALU(rwid)
 187         sub = ALU(rwid)
 188         mul = ALU(rwid)
 189         shf = ALU(rwid)
 190
 191         units = []
 192         for alu in [add, sub, mul, shf]:
 193             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 194
 195         CompUnitsBase.__init__(self, rwid, units)
 196
 197     def elaborate(self, platform):
 198         m = CompUnitsBase.elaborate(self, platform)
 199         comb = m.d.comb
 200
 201         # hand the same operation to all units
 202         for alu in self.units:
 203             comb += alu.oper_i.eq(self.oper_i)
 204
 205         return m
 206
 207
 208 class CompUnitBR(CompUnitsBase):
 209
 210     def __init__(self, rwid, opwid):
 211         """ Inputs:
 212
 213             * :rwid:   bit width of register file(s) - both FP and INT
 214             * :opwid:  operand bit width
 215
 216             Note: bgt unit is returned so that a shadow unit can be created
 217             for it
 218         """
 219         self.opwid = opwid
 220
 221         # inputs
 222         self.oper_i = Signal(opwid, reset_less=True)
 223
 224         # Branch ALU and CU
 225         self.bgt = BranchALU(rwid)
 226         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 227         CompUnitsBase.__init__(self, rwid, [self.br1])
 228
 229     def elaborate(self, platform):
 230         m = CompUnitsBase.elaborate(self, platform)
 231         comb = m.d.comb
 232
 233         # hand the same operation to all units
 234         for alu in self.units:
 235             comb += alu.oper_i.eq(self.oper_i)
 236
 237         return m
 238
 239
 240 class FunctionUnits(Elaboratable):
 241
 242     def __init__(self, n_regs, n_int_alus):
 243         self.n_regs = n_regs
 244         self.n_int_alus = n_int_alus
 245
 246         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 247         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 248         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 249
 250         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 251         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 252
 253         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 254         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 255         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 256
 257         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 258         self.readable_o = Signal(n_int_alus, reset_less=True)
 259         self.writable_o = Signal(n_int_alus, reset_less=True)
 260
 261         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 262         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 263         self.go_die_i = Signal(n_int_alus, reset_less=True)
 264         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 265         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 266
 267         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 268
 269     def elaborate(self, platform):
 270         m = Module()
 271         comb = m.d.comb
 272         sync = m.d.sync
 273
 274         n_intfus = self.n_int_alus
 275
 276         # Integer FU-FU Dep Matrix
 277         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 278         m.submodules.intfudeps = intfudeps
 279         # Integer FU-Reg Dep Matrix
 280         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 281         m.submodules.intregdeps = intregdeps
 282
 283         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 284         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 285
 286         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 287         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 288
 289         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 290         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 291         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 292
 293         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 294         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 295         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 296         comb += intfudeps.go_die_i.eq(self.go_die_i)
 297         comb += self.readable_o.eq(intfudeps.readable_o)
 298         comb += self.writable_o.eq(intfudeps.writable_o)
 299
 300         # Connect function issue / arrays, and dest/src1/src2
 301         comb += intregdeps.dest_i.eq(self.dest_i)
 302         comb += intregdeps.src1_i.eq(self.src1_i)
 303         comb += intregdeps.src2_i.eq(self.src2_i)
 304
 305         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 306         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 307         comb += intregdeps.go_die_i.eq(self.go_die_i)
 308         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 309
 310         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 311         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 312         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 313
 314         return m
 315
 316
 317 class Scoreboard(Elaboratable):
 318     def __init__(self, rwid, n_regs):
 319         """ Inputs:
 320
 321             * :rwid:   bit width of register file(s) - both FP and INT
 322             * :n_regs: depth of register file(s) - number of FP and INT regs
 323         """
 324         self.rwid = rwid
 325         self.n_regs = n_regs
 326
 327         # Register Files
 328         self.intregs = RegFileArray(rwid, n_regs)
 329         self.fpregs = RegFileArray(rwid, n_regs)
 330
 331         # issue q needs to get at these
 332         self.aluissue = IssueUnitGroup(4)
 333         self.brissue = IssueUnitGroup(1)
 334         # and these
 335         self.alu_oper_i = Signal(4, reset_less=True)
 336         self.br_oper_i = Signal(4, reset_less=True)
 337
 338         # inputs
 339         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 340         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 341         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 342         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 343
 344         # outputs
 345         self.issue_o = Signal(reset_less=True) # instruction was accepted
 346         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 347
 348         # for branch speculation experiment.  branch_direction = 0 if
 349         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 350         # branch_succ and branch_fail are requests to have the current
 351         # instruction be dependent on the branch unit "shadow" capability.
 352         self.branch_succ_i = Signal(reset_less=True)
 353         self.branch_fail_i = Signal(reset_less=True)
 354         self.branch_direction_o = Signal(2, reset_less=True)
 355
 356     def elaborate(self, platform):
 357         m = Module()
 358         comb = m.d.comb
 359         sync = m.d.sync
 360
 361         m.submodules.intregs = self.intregs
 362         m.submodules.fpregs = self.fpregs
 363
 364         # register ports
 365         int_dest = self.intregs.write_port("dest")
 366         int_src1 = self.intregs.read_port("src1")
 367         int_src2 = self.intregs.read_port("src2")
 368
 369         fp_dest = self.fpregs.write_port("dest")
 370         fp_src1 = self.fpregs.read_port("src1")
 371         fp_src2 = self.fpregs.read_port("src2")
 372
 373         # Int ALUs and Comp Units
 374         n_int_alus = 5
 375         cua = CompUnitALUs(self.rwid, 2)
 376         cub = CompUnitBR(self.rwid, 2)
 377         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 378         bgt = cub.bgt # get at the branch computation unit
 379         br1 = cub.br1
 380
 381         # Int FUs
 382         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 383
 384         # Count of number of FUs
 385         n_intfus = n_int_alus
 386         n_fp_fus = 0 # for now
 387
 388         # Integer Priority Picker 1: Adder + Subtractor
 389         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 390         m.submodules.intpick1 = intpick1
 391
 392         # INT/FP Issue Unit
 393         regdecode = RegDecode(self.n_regs)
 394         m.submodules.regdecode = regdecode
 395         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 396         m.submodules.issueunit = issueunit
 397
 398         # Shadow Matrix.  currently n_intfus shadows, to be used for
 399         # write-after-write hazards.  NOTE: there is one extra for branches,
 400         # so the shadow width is increased by 1
 401         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 402         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 403
 404         # record previous instruction to cast shadow on current instruction
 405         prev_shadow = Signal(n_intfus)
 406
 407         # Branch Speculation recorder.  tracks the success/fail state as
 408         # each instruction is issued, so that when the branch occurs the
 409         # allow/cancel can be issued as appropriate.
 410         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 411
 412         #---------
 413         # ok start wiring things together...
 414         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 415         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 416         #---------
 417
 418         #---------
 419         # Issue Unit is where it starts.  set up some in/outs for this module
 420         #---------
 421         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 422                      regdecode.src1_i.eq(self.int_src1_i),
 423                      regdecode.src2_i.eq(self.int_src2_i),
 424                      regdecode.enable_i.eq(self.reg_enable_i),
 425                      self.issue_o.eq(issueunit.issue_o)
 426                     ]
 427
 428         # take these to outside (issue needs them)
 429         comb += cua.oper_i.eq(self.alu_oper_i)
 430         comb += cub.oper_i.eq(self.br_oper_i)
 431
 432         # TODO: issueunit.f (FP)
 433
 434         # and int function issue / busy arrays, and dest/src1/src2
 435         comb += intfus.dest_i.eq(regdecode.dest_o)
 436         comb += intfus.src1_i.eq(regdecode.src1_o)
 437         comb += intfus.src2_i.eq(regdecode.src2_o)
 438
 439         fn_issue_o = issueunit.fn_issue_o
 440
 441         comb += intfus.fn_issue_i.eq(fn_issue_o)
 442         comb += issueunit.busy_i.eq(cu.busy_o)
 443         comb += self.busy_o.eq(cu.busy_o.bool())
 444
 445         #---------
 446         # merge shadow matrices outputs
 447         #---------
 448
 449         # these are explained in ShadowMatrix docstring, and are to be
 450         # connected to the FUReg and FUFU Matrices, to get them to reset
 451         anydie = Signal(n_intfus, reset_less=True)
 452         allshadown = Signal(n_intfus, reset_less=True)
 453         shreset = Signal(n_intfus, reset_less=True)
 454         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 455         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 456         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 457
 458         #---------
 459         # connect fu-fu matrix
 460         #---------
 461
 462         # Group Picker... done manually for now.
 463         go_rd_o = intpick1.go_rd_o
 464         go_wr_o = intpick1.go_wr_o
 465         go_rd_i = intfus.go_rd_i
 466         go_wr_i = intfus.go_wr_i
 467         go_die_i = intfus.go_die_i
 468         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 469         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 470         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 471         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 472
 473         # Connect Picker
 474         #---------
 475         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 476         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 477         int_rd_o = intfus.readable_o
 478         int_wr_o = intfus.writable_o
 479         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 480         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 481
 482         #---------
 483         # Shadow Matrix
 484         #---------
 485
 486         comb += shadows.issue_i.eq(fn_issue_o)
 487         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 488         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 489         #---------
 490         # NOTE; this setup is for the instruction order preservation...
 491
 492         # connect shadows / go_dies to Computation Units
 493         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 494         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 495
 496         # ok connect first n_int_fu shadows to busy lines, to create an
 497         # instruction-order linked-list-like arrangement, using a bit-matrix
 498         # (instead of e.g. a ring buffer).
 499         # XXX TODO
 500
 501         # when written, the shadow can be cancelled (and was good)
 502         for i in range(n_intfus):
 503             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 504
 505         # *previous* instruction shadows *current* instruction, and, obviously,
 506         # if the previous is completed (!busy) don't cast the shadow!
 507         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 508         for i in range(n_intfus):
 509             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 510
 511         #---------
 512         # ... and this is for branch speculation.  it uses the extra bit
 513         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 514         # only needs to set shadow_i, s_fail_i and s_good_i
 515
 516         # issue captures shadow_i (if enabled)
 517         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 518
 519         bactive = Signal(reset_less=True)
 520         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 521
 522         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 523         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 524             comb += bshadow.issue_i.eq(fn_issue_o)
 525             for i in range(n_intfus):
 526                 with m.If(fn_issue_o & (Const(1<<i))):
 527                     comb += bshadow.shadow_i[i][0].eq(1)
 528
 529         # finally, we need an indicator to the test infrastructure as to
 530         # whether the branch succeeded or failed, plus, link up to the
 531         # "recorder" of whether the instruction was under shadow or not
 532
 533         with m.If(br1.issue_i):
 534             sync += bspec.active_i.eq(1)
 535         with m.If(self.branch_succ_i):
 536             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 537         with m.If(self.branch_fail_i):
 538             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 539
 540         # branch is active (TODO: a better signal: this is over-using the
 541         # go_write signal - actually the branch should not be "writing")
 542         with m.If(br1.go_wr_i):
 543             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 544             sync += bspec.active_i.eq(0)
 545             comb += bspec.br_i.eq(1)
 546             # branch occurs if data == 1, failed if data == 0
 547             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 548             for i in range(n_intfus):
 549                 # *expected* direction of the branch matched against *actual*
 550                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 551                 # ... or it didn't
 552                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 553
 554         #---------
 555         # Connect Register File(s)
 556         #---------
 557         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 558         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 559         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 560
 561         # connect ALUs to regfule
 562         comb += int_dest.data_i.eq(cu.data_o)
 563         comb += cu.src1_i.eq(int_src1.data_o)
 564         comb += cu.src2_i.eq(int_src2.data_o)
 565
 566         # connect ALU Computation Units
 567         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 568         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 569         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 570
 571         return m
 572
 573     def __iter__(self):
 574         yield from self.intregs
 575         yield from self.fpregs
 576         yield self.int_dest_i
 577         yield self.int_src1_i
 578         yield self.int_src2_i
 579         yield self.issue_o
 580         yield self.branch_succ_i
 581         yield self.branch_fail_i
 582         yield self.branch_direction_o
 583
 584     def ports(self):
 585         return list(self)
 586
 587
 588 class IssueToScoreboard(Elaboratable):
 589
 590     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 591         self.qlen = qlen
 592         self.n_in = n_in
 593         self.n_out = n_out
 594         self.rwid = rwid
 595         self.opw = opwid
 596         self.n_regs = n_regs
 597
 598         mqbits = (int(log(qlen) / log(2))+2, False)
 599         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 600         self.p_ready_o = Signal() # instructions were added
 601         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 602
 603         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 604         self.qlen_o = Signal(mqbits, reset_less=True)
 605
 606     def elaborate(self, platform):
 607         m = Module()
 608         comb = m.d.comb
 609         sync = m.d.sync
 610
 611         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 612         sc = Scoreboard(self.rwid, self.n_regs)
 613         m.submodules.iq = iq
 614         m.submodules.sc = sc
 615
 616         # get at the regfile for testing
 617         self.intregs = sc.intregs
 618
 619         # and the "busy" signal and instruction queue length
 620         comb += self.busy_o.eq(sc.busy_o)
 621         comb += self.qlen_o.eq(iq.qlen_o)
 622
 623         # link up instruction queue
 624         comb += iq.p_add_i.eq(self.p_add_i)
 625         comb += self.p_ready_o.eq(iq.p_ready_o)
 626         for i in range(self.n_in):
 627             comb += eq(iq.data_i[i], self.data_i[i])
 628
 629         # take instruction and process it.  note that it's possible to
 630         # "inspect" the queue contents *without* actually removing the
 631         # items.  items are only removed when the
 632
 633         # in "waiting" state
 634         wait_issue_br = Signal()
 635         wait_issue_alu = Signal()
 636
 637         with m.If(wait_issue_br | wait_issue_alu):
 638             # set instruction pop length to 1 if the unit accepted
 639             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 640                 with m.If(iq.qlen_o != 0):
 641                     comb += iq.n_sub_i.eq(1)
 642             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 643                 with m.If(iq.qlen_o != 0):
 644                     comb += iq.n_sub_i.eq(1)
 645
 646         # see if some instruction(s) are here.  note that this is
 647         # "inspecting" the in-place queue.  note also that on the
 648         # cycle following "waiting" for fn_issue_o to be set, the
 649         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 650         with m.If(iq.qlen_o != 0):
 651             # get the operands and operation
 652             dest = iq.data_o[0].dest_i
 653             src1 = iq.data_o[0].src1_i
 654             src2 = iq.data_o[0].src2_i
 655             op = iq.data_o[0].oper_i
 656
 657             # set the src/dest regs
 658             comb += sc.int_dest_i.eq(dest)
 659             comb += sc.int_src1_i.eq(src1)
 660             comb += sc.int_src2_i.eq(src2)
 661             comb += sc.reg_enable_i.eq(1) # enable the regfile
 662
 663             # choose a Function-Unit-Group
 664             with m.If((op & (0x3<<2)) != 0): # branch
 665                 comb += sc.brissue.insn_i.eq(1)
 666                 comb += sc.br_oper_i.eq(op & 0x3)
 667                 comb += wait_issue_br.eq(1)
 668             with m.Else():                   # alu
 669                 comb += sc.aluissue.insn_i.eq(1)
 670                 comb += sc.alu_oper_i.eq(op & 0x3)
 671                 comb += wait_issue_alu.eq(1)
 672
 673             # XXX TODO
 674             # these indicate that the instruction is to be made
 675             # shadow-dependent on
 676             # (either) branch success or branch fail
 677             #yield sc.branch_fail_i.eq(branch_fail)
 678             #yield sc.branch_succ_i.eq(branch_success)
 679
 680         return m
 681
 682     def __iter__(self):
 683         yield self.p_ready_o
 684         for o in self.data_i:
 685             yield from list(o)
 686         yield self.p_add_i
 687
 688     def ports(self):
 689         return list(self)
 690
 691
 692 IADD = 0
 693 ISUB = 1
 694 IMUL = 2
 695 ISHF = 3
 696 IBGT = 4
 697 IBLT = 5
 698 IBEQ = 6
 699 IBNE = 7
 700
 701 class RegSim:
 702     def __init__(self, rwidth, nregs):
 703         self.rwidth = rwidth
 704         self.regs = [0] * nregs
 705
 706     def op(self, op, op_imm, src1, src2, dest):
 707         maxbits = (1 << self.rwidth) - 1
 708         src1 = self.regs[src1] & maxbits
 709         if not op_imm: # put op in src2
 710             src2 = self.regs[src2] & maxbits
 711         if op == IADD:
 712             val = src1 + src2
 713         elif op == ISUB:
 714             val = src1 - src2
 715         elif op == IMUL:
 716             val = src1 * src2
 717         elif op == ISHF:
 718             val = src1 >> (src2 & maxbits)
 719         elif op == IBGT:
 720             val = int(src1 > src2)
 721         elif op == IBLT:
 722             val = int(src1 < src2)
 723         elif op == IBEQ:
 724             val = int(src1 == src2)
 725         elif op == IBNE:
 726             val = int(src1 != src2)
 727         val &= maxbits
 728         self.setval(dest, val)
 729         return val
 730
 731     def setval(self, dest, val):
 732         print ("sim setval", dest, hex(val))
 733         self.regs[dest] = val
 734
 735     def dump(self, dut):
 736         for i, val in enumerate(self.regs):
 737             reg = yield dut.intregs.regs[i].reg
 738             okstr = "OK" if reg == val else "!ok"
 739             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 740
 741     def check(self, dut):
 742         for i, val in enumerate(self.regs):
 743             reg = yield dut.intregs.regs[i].reg
 744             if reg != val:
 745                 print("reg %d expected %x received %x\n" % (i, val, reg))
 746                 yield from self.dump(dut)
 747                 assert False
 748
 749 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
 750     instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
 751                'src1_i': src1, 'src2_i': src2}]
 752
 753     sendlen = 1
 754     for idx in range(sendlen):
 755         yield from eq(dut.data_i[idx], instrs[idx])
 756         di = yield dut.data_i[idx]
 757         print ("senddata %d %x" % (idx, di))
 758     yield dut.p_add_i.eq(sendlen)
 759     yield
 760     o_p_ready = yield dut.p_ready_o
 761     while not o_p_ready:
 762         yield
 763         o_p_ready = yield dut.p_ready_o
 764
 765     yield dut.p_add_i.eq(0)
 766
 767
 768 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 769     yield from disable_issue(dut)
 770     yield dut.int_dest_i.eq(dest)
 771     yield dut.int_src1_i.eq(src1)
 772     yield dut.int_src2_i.eq(src2)
 773     if (op & (0x3<<2)) != 0: # branch
 774         yield dut.brissue.insn_i.eq(1)
 775         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 776         dut_issue = dut.brissue
 777     else:
 778         yield dut.aluissue.insn_i.eq(1)
 779         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 780         dut_issue = dut.aluissue
 781     yield dut.reg_enable_i.eq(1)
 782
 783     # these indicate that the instruction is to be made shadow-dependent on
 784     # (either) branch success or branch fail
 785     yield dut.branch_fail_i.eq(branch_fail)
 786     yield dut.branch_succ_i.eq(branch_success)
 787
 788     yield
 789     yield from wait_for_issue(dut, dut_issue)
 790
 791
 792 def print_reg(dut, rnums):
 793     rs = []
 794     for rnum in rnums:
 795         reg = yield dut.intregs.regs[rnum].reg
 796         rs.append("%x" % reg)
 797     rnums = map(str, rnums)
 798     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 799
 800
 801 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 802     insts = []
 803     for i in range(n_ops):
 804         src1 = randint(1, dut.n_regs-1)
 805         src2 = randint(1, dut.n_regs-1)
 806         dest = randint(1, dut.n_regs-1)
 807         op = randint(0, max_opnums)
 808         opi = 0 if randint(0, 3) else 1 # set true if random is nonzero
 809
 810         if shadowing:
 811             insts.append((src1, src2, dest, op, opi, (0, 0)))
 812         else:
 813             insts.append((src1, src2, dest, op, opi))
 814     return insts
 815
 816
 817 def wait_for_busy_clear(dut):
 818     while True:
 819         busy_o = yield dut.busy_o
 820         if not busy_o:
 821             break
 822         print ("busy",)
 823         yield
 824
 825 def disable_issue(dut):
 826     yield dut.aluissue.insn_i.eq(0)
 827     yield dut.brissue.insn_i.eq(0)
 828
 829
 830 def wait_for_issue(dut, dut_issue):
 831     while True:
 832         issue_o = yield dut_issue.fn_issue_o
 833         if issue_o:
 834             yield from disable_issue(dut)
 835             yield dut.reg_enable_i.eq(0)
 836             break
 837         print ("busy",)
 838         #yield from print_reg(dut, [1,2,3])
 839         yield
 840     #yield from print_reg(dut, [1,2,3])
 841
 842 def scoreboard_branch_sim(dut, alusim):
 843
 844     iseed = 3
 845
 846     for i in range(1):
 847
 848         print ("rseed", iseed)
 849         seed(iseed)
 850         iseed += 1
 851
 852         yield dut.branch_direction_o.eq(0)
 853
 854         # set random values in the registers
 855         for i in range(1, dut.n_regs):
 856             val = 31+i*3
 857             val = randint(0, (1<<alusim.rwidth)-1)
 858             yield dut.intregs.regs[i].reg.eq(val)
 859             alusim.setval(i, val)
 860
 861         if False:
 862             # create some instructions: branches create a tree
 863             insts = create_random_ops(dut, 1, True, 1)
 864             #insts.append((6, 6, 1, 2, (0, 0)))
 865             #insts.append((4, 3, 3, 0, (0, 0)))
 866
 867             src1 = randint(1, dut.n_regs-1)
 868             src2 = randint(1, dut.n_regs-1)
 869             #op = randint(4, 7)
 870             op = 4 # only BGT at the moment
 871
 872             branch_ok = create_random_ops(dut, 1, True, 1)
 873             branch_fail = create_random_ops(dut, 1, True, 1)
 874
 875             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 876
 877         if True:
 878             insts = []
 879             insts.append( (3, 5, 2, 0, (0, 0)) )
 880             branch_ok = []
 881             branch_fail = []
 882             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 883             branch_ok.append( None )
 884             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 885             #branch_fail.append( None )
 886             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 887
 888         siminsts = deepcopy(insts)
 889
 890         # issue instruction(s)
 891         i = -1
 892         instrs = insts
 893         branch_direction = 0
 894         while instrs:
 895             yield
 896             yield
 897             i += 1
 898             branch_direction = yield dut.branch_direction_o # way branch went
 899             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 900             if branch_direction == 1 and shadow_on:
 901                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 902                 continue # branch was "success" and this is a "failed"... skip
 903             if branch_direction == 2 and shadow_off:
 904                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 905                 continue # branch was "fail" and this is a "success"... skip
 906             if branch_direction != 0:
 907                 shadow_on = 0
 908                 shadow_off = 0
 909             is_branch = op >= 4
 910             if is_branch:
 911                 branch_ok, branch_fail = dest
 912                 dest = src2
 913                 # ok zip up the branch success / fail instructions and
 914                 # drop them into the queue, one marked "to have branch success"
 915                 # the other to be marked shadow branch "fail".
 916                 # one out of each of these will be cancelled
 917                 for ok, fl in zip(branch_ok, branch_fail):
 918                     if ok:
 919                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 920                     if fl:
 921                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 922             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 923                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 924             yield from int_instr(dut, op, src1, src2, dest,
 925                                  shadow_on, shadow_off)
 926
 927         # wait for all instructions to stop before checking
 928         yield
 929         yield from wait_for_busy_clear(dut)
 930
 931         i = -1
 932         while siminsts:
 933             instr = siminsts.pop(0)
 934             if instr is None:
 935                 continue
 936             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 937             i += 1
 938             is_branch = op >= 4
 939             if is_branch:
 940                 branch_ok, branch_fail = dest
 941                 dest = src2
 942             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 943                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 944             branch_res = alusim.op(op, src1, src2, dest)
 945             if is_branch:
 946                 if branch_res:
 947                     siminsts += branch_ok
 948                 else:
 949                     siminsts += branch_fail
 950
 951         # check status
 952         yield from alusim.check(dut)
 953         yield from alusim.dump(dut)
 954
 955
 956 def scoreboard_sim(dut, alusim):
 957
 958     #seed(2)
 959
 960     for i in range(1):
 961
 962         # set random values in the registers
 963         for i in range(1, dut.n_regs):
 964             val = randint(0, (1<<alusim.rwidth)-1)
 965             #val = 31+i*3
 966             #val = i
 967             yield dut.intregs.regs[i].reg.eq(val)
 968             alusim.setval(i, val)
 969
 970         # create some instructions (some random, some regression tests)
 971         instrs = []
 972         if True:
 973             instrs = create_random_ops(dut, 15, True, 3)
 974
 975         if False:
 976             instrs.append( (7, 3, 2, 4, (0, 0)) )
 977             instrs.append( (7, 6, 6, 2, (0, 0)) )
 978             instrs.append( (1, 7, 2, 2, (0, 0)) )
 979
 980
 981         if False:
 982             instrs.append((2, 3, 3, 0, (0, 0)))
 983             instrs.append((5, 3, 3, 1, (0, 0)))
 984             instrs.append((3, 5, 5, 2, (0, 0)))
 985             instrs.append((5, 3, 3, 3, (0, 0)))
 986             instrs.append((3, 5, 5, 0, (0, 0)))
 987
 988         if False:
 989             instrs.append((5, 6, 2, 1))
 990             instrs.append((2, 2, 4, 0))
 991             #instrs.append((2, 2, 3, 1))
 992
 993         if False:
 994             instrs.append((2, 1, 2, 3))
 995
 996         if False:
 997             instrs.append((2, 6, 2, 1))
 998             instrs.append((2, 1, 2, 0))
 999
1000         if False:
1001             instrs.append((1, 2, 7, 2))
1002             instrs.append((7, 1, 5, 0))
1003             instrs.append((4, 4, 1, 1))
1004
1005         if False:
1006             instrs.append((5, 6, 2, 2))
1007             instrs.append((1, 1, 4, 1))
1008             instrs.append((6, 5, 3, 0))
1009
1010         if False:
1011             # Write-after-Write Hazard
1012             instrs.append( (3, 6, 7, 2) )
1013             instrs.append( (4, 4, 7, 1) )
1014
1015         if False:
1016             # self-read/write-after-write followed by Read-after-Write
1017             instrs.append((1, 1, 1, 1))
1018             instrs.append((1, 5, 3, 0))
1019
1020         if False:
1021             # Read-after-Write followed by self-read-after-write
1022             instrs.append((5, 6, 1, 2))
1023             instrs.append((1, 1, 1, 1))
1024
1025         if False:
1026             # self-read-write sandwich
1027             instrs.append((5, 6, 1, 2))
1028             instrs.append((1, 1, 1, 1))
1029             instrs.append((1, 5, 3, 0))
1030
1031         if False:
1032             # very weird failure
1033             instrs.append( (5, 2, 5, 2) )
1034             instrs.append( (2, 6, 3, 0) )
1035             instrs.append( (4, 2, 2, 1) )
1036
1037         if False:
1038             v1 = 4
1039             yield dut.intregs.regs[5].reg.eq(v1)
1040             alusim.setval(5, v1)
1041             yield dut.intregs.regs[3].reg.eq(5)
1042             alusim.setval(3, 5)
1043             instrs.append((5, 3, 3, 4, (0, 0)))
1044             instrs.append((4, 2, 1, 2, (0, 1)))
1045
1046         if False:
1047             v1 = 6
1048             yield dut.intregs.regs[5].reg.eq(v1)
1049             alusim.setval(5, v1)
1050             yield dut.intregs.regs[3].reg.eq(5)
1051             alusim.setval(3, 5)
1052             instrs.append((5, 3, 3, 4, (0, 0)))
1053             instrs.append((4, 2, 1, 2, (1, 0)))
1054
1055         if False:
1056             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1057             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1058             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1059             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1060             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1061             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1062             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1063             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1064             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1065
1066         # issue instruction(s), wait for issue to be free before proceeding
1067         for i, instr in enumerate(instrs):
1068             src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1069
1070             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1071             alusim.op(op, opi, src1, src2, dest)
1072             yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1073
1074         # wait for all instructions to stop before checking
1075         while True:
1076             iqlen = yield dut.qlen_o
1077             if iqlen == 0:
1078                 break
1079             yield
1080         yield
1081         yield
1082         yield
1083         yield
1084         yield from wait_for_busy_clear(dut)
1085
1086         # check status
1087         yield from alusim.check(dut)
1088         yield from alusim.dump(dut)
1089
1090
1091 def test_scoreboard():
1092     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1093     alusim = RegSim(16, 8)
1094     memsim = MemSim(16, 16)
1095     vl = rtlil.convert(dut, ports=dut.ports())
1096     with open("test_scoreboard6600.il", "w") as f:
1097         f.write(vl)
1098
1099     run_simulation(dut, scoreboard_sim(dut, alusim),
1100                         vcd_name='test_scoreboard6600.vcd')
1101
1102     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1103     #                    vcd_name='test_scoreboard6600.vcd')
1104
1105
1106 if __name__ == '__main__':
1107     test_scoreboard()