src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184         self.imm_i = Signal(rwid, reset_less=True)
 185
 186         # Int ALUs
 187         add = ALU(rwid)
 188         sub = ALU(rwid)
 189         mul = ALU(rwid)
 190         shf = ALU(rwid)
 191
 192         units = []
 193         for alu in [add, sub, mul, shf]:
 194             aluopwid = 3 # extra bit for immediate mode
 195             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 196
 197         CompUnitsBase.__init__(self, rwid, units)
 198
 199     def elaborate(self, platform):
 200         m = CompUnitsBase.elaborate(self, platform)
 201         comb = m.d.comb
 202
 203         # hand the same operation to all units, only lower 2 bits though
 204         for alu in self.units:
 205             comb += alu.oper_i[0:3].eq(self.oper_i)
 206             comb += alu.imm_i.eq(self.imm_i)
 207
 208         return m
 209
 210
 211 class CompUnitBR(CompUnitsBase):
 212
 213     def __init__(self, rwid, opwid):
 214         """ Inputs:
 215
 216             * :rwid:   bit width of register file(s) - both FP and INT
 217             * :opwid:  operand bit width
 218
 219             Note: bgt unit is returned so that a shadow unit can be created
 220             for it
 221         """
 222         self.opwid = opwid
 223
 224         # inputs
 225         self.oper_i = Signal(opwid, reset_less=True)
 226
 227         # Branch ALU and CU
 228         self.bgt = BranchALU(rwid)
 229         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 230         CompUnitsBase.__init__(self, rwid, [self.br1])
 231
 232     def elaborate(self, platform):
 233         m = CompUnitsBase.elaborate(self, platform)
 234         comb = m.d.comb
 235
 236         # hand the same operation to all units
 237         for alu in self.units:
 238             comb += alu.oper_i.eq(self.oper_i)
 239
 240         return m
 241
 242
 243 class FunctionUnits(Elaboratable):
 244
 245     def __init__(self, n_regs, n_int_alus):
 246         self.n_regs = n_regs
 247         self.n_int_alus = n_int_alus
 248
 249         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 250         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 251         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 252
 253         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 254         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 255
 256         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 257         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 258         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 259
 260         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 261         self.readable_o = Signal(n_int_alus, reset_less=True)
 262         self.writable_o = Signal(n_int_alus, reset_less=True)
 263
 264         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 265         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 266         self.go_die_i = Signal(n_int_alus, reset_less=True)
 267         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 268         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 269
 270         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 271
 272     def elaborate(self, platform):
 273         m = Module()
 274         comb = m.d.comb
 275         sync = m.d.sync
 276
 277         n_intfus = self.n_int_alus
 278
 279         # Integer FU-FU Dep Matrix
 280         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 281         m.submodules.intfudeps = intfudeps
 282         # Integer FU-Reg Dep Matrix
 283         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 284         m.submodules.intregdeps = intregdeps
 285
 286         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 287         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 288
 289         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 290         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 291
 292         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 293         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 294         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 295
 296         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 297         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 298         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 299         comb += intfudeps.go_die_i.eq(self.go_die_i)
 300         comb += self.readable_o.eq(intfudeps.readable_o)
 301         comb += self.writable_o.eq(intfudeps.writable_o)
 302
 303         # Connect function issue / arrays, and dest/src1/src2
 304         comb += intregdeps.dest_i.eq(self.dest_i)
 305         comb += intregdeps.src1_i.eq(self.src1_i)
 306         comb += intregdeps.src2_i.eq(self.src2_i)
 307
 308         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 309         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 310         comb += intregdeps.go_die_i.eq(self.go_die_i)
 311         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 312
 313         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 314         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 315         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 316
 317         return m
 318
 319
 320 class Scoreboard(Elaboratable):
 321     def __init__(self, rwid, n_regs):
 322         """ Inputs:
 323
 324             * :rwid:   bit width of register file(s) - both FP and INT
 325             * :n_regs: depth of register file(s) - number of FP and INT regs
 326         """
 327         self.rwid = rwid
 328         self.n_regs = n_regs
 329
 330         # Register Files
 331         self.intregs = RegFileArray(rwid, n_regs)
 332         self.fpregs = RegFileArray(rwid, n_regs)
 333
 334         # issue q needs to get at these
 335         self.aluissue = IssueUnitGroup(4)
 336         self.brissue = IssueUnitGroup(1)
 337         # and these
 338         self.alu_oper_i = Signal(4, reset_less=True)
 339         self.alu_imm_i = Signal(rwid, reset_less=True)
 340         self.br_oper_i = Signal(4, reset_less=True)
 341
 342         # inputs
 343         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 344         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 345         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 346         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 347
 348         # outputs
 349         self.issue_o = Signal(reset_less=True) # instruction was accepted
 350         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 351
 352         # for branch speculation experiment.  branch_direction = 0 if
 353         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 354         # branch_succ and branch_fail are requests to have the current
 355         # instruction be dependent on the branch unit "shadow" capability.
 356         self.branch_succ_i = Signal(reset_less=True)
 357         self.branch_fail_i = Signal(reset_less=True)
 358         self.branch_direction_o = Signal(2, reset_less=True)
 359
 360     def elaborate(self, platform):
 361         m = Module()
 362         comb = m.d.comb
 363         sync = m.d.sync
 364
 365         m.submodules.intregs = self.intregs
 366         m.submodules.fpregs = self.fpregs
 367
 368         # register ports
 369         int_dest = self.intregs.write_port("dest")
 370         int_src1 = self.intregs.read_port("src1")
 371         int_src2 = self.intregs.read_port("src2")
 372
 373         fp_dest = self.fpregs.write_port("dest")
 374         fp_src1 = self.fpregs.read_port("src1")
 375         fp_src2 = self.fpregs.read_port("src2")
 376
 377         # Int ALUs and Comp Units
 378         n_int_alus = 5
 379         cua = CompUnitALUs(self.rwid, 3)
 380         cub = CompUnitBR(self.rwid, 2)
 381         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 382         bgt = cub.bgt # get at the branch computation unit
 383         br1 = cub.br1
 384
 385         # Int FUs
 386         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 387
 388         # Count of number of FUs
 389         n_intfus = n_int_alus
 390         n_fp_fus = 0 # for now
 391
 392         # Integer Priority Picker 1: Adder + Subtractor
 393         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 394         m.submodules.intpick1 = intpick1
 395
 396         # INT/FP Issue Unit
 397         regdecode = RegDecode(self.n_regs)
 398         m.submodules.regdecode = regdecode
 399         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 400         m.submodules.issueunit = issueunit
 401
 402         # Shadow Matrix.  currently n_intfus shadows, to be used for
 403         # write-after-write hazards.  NOTE: there is one extra for branches,
 404         # so the shadow width is increased by 1
 405         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 406         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 407
 408         # record previous instruction to cast shadow on current instruction
 409         prev_shadow = Signal(n_intfus)
 410
 411         # Branch Speculation recorder.  tracks the success/fail state as
 412         # each instruction is issued, so that when the branch occurs the
 413         # allow/cancel can be issued as appropriate.
 414         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 415
 416         #---------
 417         # ok start wiring things together...
 418         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 419         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 420         #---------
 421
 422         #---------
 423         # Issue Unit is where it starts.  set up some in/outs for this module
 424         #---------
 425         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 426                      regdecode.src1_i.eq(self.int_src1_i),
 427                      regdecode.src2_i.eq(self.int_src2_i),
 428                      regdecode.enable_i.eq(self.reg_enable_i),
 429                      self.issue_o.eq(issueunit.issue_o)
 430                     ]
 431
 432         # take these to outside (issue needs them)
 433         comb += cua.oper_i.eq(self.alu_oper_i)
 434         comb += cua.imm_i.eq(self.alu_imm_i)
 435         comb += cub.oper_i.eq(self.br_oper_i)
 436
 437         # TODO: issueunit.f (FP)
 438
 439         # and int function issue / busy arrays, and dest/src1/src2
 440         comb += intfus.dest_i.eq(regdecode.dest_o)
 441         comb += intfus.src1_i.eq(regdecode.src1_o)
 442         comb += intfus.src2_i.eq(regdecode.src2_o)
 443
 444         fn_issue_o = issueunit.fn_issue_o
 445
 446         comb += intfus.fn_issue_i.eq(fn_issue_o)
 447         comb += issueunit.busy_i.eq(cu.busy_o)
 448         comb += self.busy_o.eq(cu.busy_o.bool())
 449
 450         #---------
 451         # merge shadow matrices outputs
 452         #---------
 453
 454         # these are explained in ShadowMatrix docstring, and are to be
 455         # connected to the FUReg and FUFU Matrices, to get them to reset
 456         anydie = Signal(n_intfus, reset_less=True)
 457         allshadown = Signal(n_intfus, reset_less=True)
 458         shreset = Signal(n_intfus, reset_less=True)
 459         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 460         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 461         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 462
 463         #---------
 464         # connect fu-fu matrix
 465         #---------
 466
 467         # Group Picker... done manually for now.
 468         go_rd_o = intpick1.go_rd_o
 469         go_wr_o = intpick1.go_wr_o
 470         go_rd_i = intfus.go_rd_i
 471         go_wr_i = intfus.go_wr_i
 472         go_die_i = intfus.go_die_i
 473         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 474         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 475         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 476         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 477
 478         # Connect Picker
 479         #---------
 480         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 481         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 482         int_rd_o = intfus.readable_o
 483         int_wr_o = intfus.writable_o
 484         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 485         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 486
 487         #---------
 488         # Shadow Matrix
 489         #---------
 490
 491         comb += shadows.issue_i.eq(fn_issue_o)
 492         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 493         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 494         #---------
 495         # NOTE; this setup is for the instruction order preservation...
 496
 497         # connect shadows / go_dies to Computation Units
 498         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 499         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 500
 501         # ok connect first n_int_fu shadows to busy lines, to create an
 502         # instruction-order linked-list-like arrangement, using a bit-matrix
 503         # (instead of e.g. a ring buffer).
 504         # XXX TODO
 505
 506         # when written, the shadow can be cancelled (and was good)
 507         for i in range(n_intfus):
 508             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 509
 510         # *previous* instruction shadows *current* instruction, and, obviously,
 511         # if the previous is completed (!busy) don't cast the shadow!
 512         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 513         for i in range(n_intfus):
 514             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 515
 516         #---------
 517         # ... and this is for branch speculation.  it uses the extra bit
 518         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 519         # only needs to set shadow_i, s_fail_i and s_good_i
 520
 521         # issue captures shadow_i (if enabled)
 522         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 523
 524         bactive = Signal(reset_less=True)
 525         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 526
 527         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 528         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 529             comb += bshadow.issue_i.eq(fn_issue_o)
 530             for i in range(n_intfus):
 531                 with m.If(fn_issue_o & (Const(1<<i))):
 532                     comb += bshadow.shadow_i[i][0].eq(1)
 533
 534         # finally, we need an indicator to the test infrastructure as to
 535         # whether the branch succeeded or failed, plus, link up to the
 536         # "recorder" of whether the instruction was under shadow or not
 537
 538         with m.If(br1.issue_i):
 539             sync += bspec.active_i.eq(1)
 540         with m.If(self.branch_succ_i):
 541             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 542         with m.If(self.branch_fail_i):
 543             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 544
 545         # branch is active (TODO: a better signal: this is over-using the
 546         # go_write signal - actually the branch should not be "writing")
 547         with m.If(br1.go_wr_i):
 548             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 549             sync += bspec.active_i.eq(0)
 550             comb += bspec.br_i.eq(1)
 551             # branch occurs if data == 1, failed if data == 0
 552             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 553             for i in range(n_intfus):
 554                 # *expected* direction of the branch matched against *actual*
 555                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 556                 # ... or it didn't
 557                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 558
 559         #---------
 560         # Connect Register File(s)
 561         #---------
 562         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 563         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 564         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 565
 566         # connect ALUs to regfule
 567         comb += int_dest.data_i.eq(cu.data_o)
 568         comb += cu.src1_i.eq(int_src1.data_o)
 569         comb += cu.src2_i.eq(int_src2.data_o)
 570
 571         # connect ALU Computation Units
 572         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 573         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 574         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 575
 576         return m
 577
 578     def __iter__(self):
 579         yield from self.intregs
 580         yield from self.fpregs
 581         yield self.int_dest_i
 582         yield self.int_src1_i
 583         yield self.int_src2_i
 584         yield self.issue_o
 585         yield self.branch_succ_i
 586         yield self.branch_fail_i
 587         yield self.branch_direction_o
 588
 589     def ports(self):
 590         return list(self)
 591
 592
 593 class IssueToScoreboard(Elaboratable):
 594
 595     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 596         self.qlen = qlen
 597         self.n_in = n_in
 598         self.n_out = n_out
 599         self.rwid = rwid
 600         self.opw = opwid
 601         self.n_regs = n_regs
 602
 603         mqbits = (int(log(qlen) / log(2))+2, False)
 604         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 605         self.p_ready_o = Signal() # instructions were added
 606         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 607
 608         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 609         self.qlen_o = Signal(mqbits, reset_less=True)
 610
 611     def elaborate(self, platform):
 612         m = Module()
 613         comb = m.d.comb
 614         sync = m.d.sync
 615
 616         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 617         sc = Scoreboard(self.rwid, self.n_regs)
 618         m.submodules.iq = iq
 619         m.submodules.sc = sc
 620
 621         # get at the regfile for testing
 622         self.intregs = sc.intregs
 623
 624         # and the "busy" signal and instruction queue length
 625         comb += self.busy_o.eq(sc.busy_o)
 626         comb += self.qlen_o.eq(iq.qlen_o)
 627
 628         # link up instruction queue
 629         comb += iq.p_add_i.eq(self.p_add_i)
 630         comb += self.p_ready_o.eq(iq.p_ready_o)
 631         for i in range(self.n_in):
 632             comb += eq(iq.data_i[i], self.data_i[i])
 633
 634         # take instruction and process it.  note that it's possible to
 635         # "inspect" the queue contents *without* actually removing the
 636         # items.  items are only removed when the
 637
 638         # in "waiting" state
 639         wait_issue_br = Signal()
 640         wait_issue_alu = Signal()
 641
 642         with m.If(wait_issue_br | wait_issue_alu):
 643             # set instruction pop length to 1 if the unit accepted
 644             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 645                 with m.If(iq.qlen_o != 0):
 646                     comb += iq.n_sub_i.eq(1)
 647             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 648                 with m.If(iq.qlen_o != 0):
 649                     comb += iq.n_sub_i.eq(1)
 650
 651         # see if some instruction(s) are here.  note that this is
 652         # "inspecting" the in-place queue.  note also that on the
 653         # cycle following "waiting" for fn_issue_o to be set, the
 654         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 655         with m.If(iq.qlen_o != 0):
 656             # get the operands and operation
 657             imm = iq.data_o[0].imm_i
 658             dest = iq.data_o[0].dest_i
 659             src1 = iq.data_o[0].src1_i
 660             src2 = iq.data_o[0].src2_i
 661             op = iq.data_o[0].oper_i
 662             opi = iq.data_o[0].opim_i # immediate set
 663
 664             # set the src/dest regs
 665             comb += sc.int_dest_i.eq(dest)
 666             comb += sc.int_src1_i.eq(src1)
 667             comb += sc.int_src2_i.eq(src2)
 668             comb += sc.reg_enable_i.eq(1) # enable the regfile
 669
 670             # choose a Function-Unit-Group
 671             with m.If((op & (0x3<<2)) != 0): # branch
 672                 comb += sc.brissue.insn_i.eq(1)
 673                 comb += sc.br_oper_i.eq(op & 0x3)
 674                 comb += wait_issue_br.eq(1)
 675             with m.Else():                   # alu
 676                 comb += sc.aluissue.insn_i.eq(1)
 677                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 678                 comb += sc.alu_imm_i.eq(imm)
 679                 comb += wait_issue_alu.eq(1)
 680
 681             # XXX TODO
 682             # these indicate that the instruction is to be made
 683             # shadow-dependent on
 684             # (either) branch success or branch fail
 685             #yield sc.branch_fail_i.eq(branch_fail)
 686             #yield sc.branch_succ_i.eq(branch_success)
 687
 688         return m
 689
 690     def __iter__(self):
 691         yield self.p_ready_o
 692         for o in self.data_i:
 693             yield from list(o)
 694         yield self.p_add_i
 695
 696     def ports(self):
 697         return list(self)
 698
 699
 700 IADD = 0
 701 ISUB = 1
 702 IMUL = 2
 703 ISHF = 3
 704 IBGT = 4
 705 IBLT = 5
 706 IBEQ = 6
 707 IBNE = 7
 708
 709 class RegSim:
 710     def __init__(self, rwidth, nregs):
 711         self.rwidth = rwidth
 712         self.regs = [0] * nregs
 713
 714     def op(self, op, op_imm, imm, src1, src2, dest):
 715         maxbits = (1 << self.rwidth) - 1
 716         src1 = self.regs[src1] & maxbits
 717         if op_imm:
 718             src2 = imm
 719         else:
 720             src2 = self.regs[src2] & maxbits
 721         if op == IADD:
 722             val = src1 + src2
 723         elif op == ISUB:
 724             val = src1 - src2
 725         elif op == IMUL:
 726             val = src1 * src2
 727         elif op == ISHF:
 728             val = src1 >> (src2 & maxbits)
 729         elif op == IBGT:
 730             val = int(src1 > src2)
 731         elif op == IBLT:
 732             val = int(src1 < src2)
 733         elif op == IBEQ:
 734             val = int(src1 == src2)
 735         elif op == IBNE:
 736             val = int(src1 != src2)
 737         val &= maxbits
 738         self.setval(dest, val)
 739         return val
 740
 741     def setval(self, dest, val):
 742         print ("sim setval", dest, hex(val))
 743         self.regs[dest] = val
 744
 745     def dump(self, dut):
 746         for i, val in enumerate(self.regs):
 747             reg = yield dut.intregs.regs[i].reg
 748             okstr = "OK" if reg == val else "!ok"
 749             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 750
 751     def check(self, dut):
 752         for i, val in enumerate(self.regs):
 753             reg = yield dut.intregs.regs[i].reg
 754             if reg != val:
 755                 print("reg %d expected %x received %x\n" % (i, val, reg))
 756                 yield from self.dump(dut)
 757                 assert False
 758
 759 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 760             branch_success, branch_fail):
 761     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 762                'src1_i': src1, 'src2_i': src2}]
 763
 764     sendlen = 1
 765     for idx in range(sendlen):
 766         yield from eq(dut.data_i[idx], instrs[idx])
 767         di = yield dut.data_i[idx]
 768         print ("senddata %d %x" % (idx, di))
 769     yield dut.p_add_i.eq(sendlen)
 770     yield
 771     o_p_ready = yield dut.p_ready_o
 772     while not o_p_ready:
 773         yield
 774         o_p_ready = yield dut.p_ready_o
 775
 776     yield dut.p_add_i.eq(0)
 777
 778
 779 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 780     yield from disable_issue(dut)
 781     yield dut.int_dest_i.eq(dest)
 782     yield dut.int_src1_i.eq(src1)
 783     yield dut.int_src2_i.eq(src2)
 784     if (op & (0x3<<2)) != 0: # branch
 785         yield dut.brissue.insn_i.eq(1)
 786         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 787         dut_issue = dut.brissue
 788     else:
 789         yield dut.aluissue.insn_i.eq(1)
 790         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 791         yield dut.alu_imm_i.eq(imm)
 792         dut_issue = dut.aluissue
 793     yield dut.reg_enable_i.eq(1)
 794
 795     # these indicate that the instruction is to be made shadow-dependent on
 796     # (either) branch success or branch fail
 797     yield dut.branch_fail_i.eq(branch_fail)
 798     yield dut.branch_succ_i.eq(branch_success)
 799
 800     yield
 801     yield from wait_for_issue(dut, dut_issue)
 802
 803
 804 def print_reg(dut, rnums):
 805     rs = []
 806     for rnum in rnums:
 807         reg = yield dut.intregs.regs[rnum].reg
 808         rs.append("%x" % reg)
 809     rnums = map(str, rnums)
 810     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 811
 812
 813 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 814     insts = []
 815     for i in range(n_ops):
 816         src1 = randint(1, dut.n_regs-1)
 817         src2 = randint(1, dut.n_regs-1)
 818         imm = randint(1, (1<<dut.rwid)-1)
 819         dest = randint(1, dut.n_regs-1)
 820         op = randint(0, max_opnums)
 821         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 822
 823         if shadowing:
 824             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 825         else:
 826             insts.append((src1, src2, dest, op, opi, imm))
 827     return insts
 828
 829
 830 def wait_for_busy_clear(dut):
 831     while True:
 832         busy_o = yield dut.busy_o
 833         if not busy_o:
 834             break
 835         print ("busy",)
 836         yield
 837
 838 def disable_issue(dut):
 839     yield dut.aluissue.insn_i.eq(0)
 840     yield dut.brissue.insn_i.eq(0)
 841
 842
 843 def wait_for_issue(dut, dut_issue):
 844     while True:
 845         issue_o = yield dut_issue.fn_issue_o
 846         if issue_o:
 847             yield from disable_issue(dut)
 848             yield dut.reg_enable_i.eq(0)
 849             break
 850         print ("busy",)
 851         #yield from print_reg(dut, [1,2,3])
 852         yield
 853     #yield from print_reg(dut, [1,2,3])
 854
 855 def scoreboard_branch_sim(dut, alusim):
 856
 857     iseed = 3
 858
 859     for i in range(1):
 860
 861         print ("rseed", iseed)
 862         seed(iseed)
 863         iseed += 1
 864
 865         yield dut.branch_direction_o.eq(0)
 866
 867         # set random values in the registers
 868         for i in range(1, dut.n_regs):
 869             val = 31+i*3
 870             val = randint(0, (1<<alusim.rwidth)-1)
 871             yield dut.intregs.regs[i].reg.eq(val)
 872             alusim.setval(i, val)
 873
 874         if False:
 875             # create some instructions: branches create a tree
 876             insts = create_random_ops(dut, 1, True, 1)
 877             #insts.append((6, 6, 1, 2, (0, 0)))
 878             #insts.append((4, 3, 3, 0, (0, 0)))
 879
 880             src1 = randint(1, dut.n_regs-1)
 881             src2 = randint(1, dut.n_regs-1)
 882             #op = randint(4, 7)
 883             op = 4 # only BGT at the moment
 884
 885             branch_ok = create_random_ops(dut, 1, True, 1)
 886             branch_fail = create_random_ops(dut, 1, True, 1)
 887
 888             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 889
 890         if True:
 891             insts = []
 892             insts.append( (3, 5, 2, 0, (0, 0)) )
 893             branch_ok = []
 894             branch_fail = []
 895             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 896             branch_ok.append( None )
 897             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 898             #branch_fail.append( None )
 899             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 900
 901         siminsts = deepcopy(insts)
 902
 903         # issue instruction(s)
 904         i = -1
 905         instrs = insts
 906         branch_direction = 0
 907         while instrs:
 908             yield
 909             yield
 910             i += 1
 911             branch_direction = yield dut.branch_direction_o # way branch went
 912             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 913             if branch_direction == 1 and shadow_on:
 914                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 915                 continue # branch was "success" and this is a "failed"... skip
 916             if branch_direction == 2 and shadow_off:
 917                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 918                 continue # branch was "fail" and this is a "success"... skip
 919             if branch_direction != 0:
 920                 shadow_on = 0
 921                 shadow_off = 0
 922             is_branch = op >= 4
 923             if is_branch:
 924                 branch_ok, branch_fail = dest
 925                 dest = src2
 926                 # ok zip up the branch success / fail instructions and
 927                 # drop them into the queue, one marked "to have branch success"
 928                 # the other to be marked shadow branch "fail".
 929                 # one out of each of these will be cancelled
 930                 for ok, fl in zip(branch_ok, branch_fail):
 931                     if ok:
 932                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 933                     if fl:
 934                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 935             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 936                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 937             yield from int_instr(dut, op, src1, src2, dest,
 938                                  shadow_on, shadow_off)
 939
 940         # wait for all instructions to stop before checking
 941         yield
 942         yield from wait_for_busy_clear(dut)
 943
 944         i = -1
 945         while siminsts:
 946             instr = siminsts.pop(0)
 947             if instr is None:
 948                 continue
 949             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 950             i += 1
 951             is_branch = op >= 4
 952             if is_branch:
 953                 branch_ok, branch_fail = dest
 954                 dest = src2
 955             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 956                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 957             branch_res = alusim.op(op, src1, src2, dest)
 958             if is_branch:
 959                 if branch_res:
 960                     siminsts += branch_ok
 961                 else:
 962                     siminsts += branch_fail
 963
 964         # check status
 965         yield from alusim.check(dut)
 966         yield from alusim.dump(dut)
 967
 968
 969 def scoreboard_sim(dut, alusim):
 970
 971     seed(0)
 972
 973     for i in range(1):
 974
 975         # set random values in the registers
 976         for i in range(1, dut.n_regs):
 977             val = randint(0, (1<<alusim.rwidth)-1)
 978             #val = 31+i*3
 979             #val = i
 980             yield dut.intregs.regs[i].reg.eq(val)
 981             alusim.setval(i, val)
 982
 983         # create some instructions (some random, some regression tests)
 984         instrs = []
 985         if False:
 986             instrs = create_random_ops(dut, 15, True, 4)
 987
 988         if False:
 989             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
 990
 991         if False:
 992             instrs.append( (7, 3, 2, 4, (0, 0)) )
 993             instrs.append( (7, 6, 6, 2, (0, 0)) )
 994             instrs.append( (1, 7, 2, 2, (0, 0)) )
 995
 996         if False:
 997             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
 998             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
 999             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1000             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1001             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1002
1003         if True:
1004             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1005             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1006             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1007
1008         if False:
1009             instrs.append((5, 6, 2, 1))
1010             instrs.append((2, 2, 4, 0))
1011             #instrs.append((2, 2, 3, 1))
1012
1013         if False:
1014             instrs.append((2, 1, 2, 3))
1015
1016         if False:
1017             instrs.append((2, 6, 2, 1))
1018             instrs.append((2, 1, 2, 0))
1019
1020         if False:
1021             instrs.append((1, 2, 7, 2))
1022             instrs.append((7, 1, 5, 0))
1023             instrs.append((4, 4, 1, 1))
1024
1025         if False:
1026             instrs.append((5, 6, 2, 2))
1027             instrs.append((1, 1, 4, 1))
1028             instrs.append((6, 5, 3, 0))
1029
1030         if False:
1031             # Write-after-Write Hazard
1032             instrs.append( (3, 6, 7, 2) )
1033             instrs.append( (4, 4, 7, 1) )
1034
1035         if False:
1036             # self-read/write-after-write followed by Read-after-Write
1037             instrs.append((1, 1, 1, 1))
1038             instrs.append((1, 5, 3, 0))
1039
1040         if False:
1041             # Read-after-Write followed by self-read-after-write
1042             instrs.append((5, 6, 1, 2))
1043             instrs.append((1, 1, 1, 1))
1044
1045         if False:
1046             # self-read-write sandwich
1047             instrs.append((5, 6, 1, 2))
1048             instrs.append((1, 1, 1, 1))
1049             instrs.append((1, 5, 3, 0))
1050
1051         if False:
1052             # very weird failure
1053             instrs.append( (5, 2, 5, 2) )
1054             instrs.append( (2, 6, 3, 0) )
1055             instrs.append( (4, 2, 2, 1) )
1056
1057         if False:
1058             v1 = 4
1059             yield dut.intregs.regs[5].reg.eq(v1)
1060             alusim.setval(5, v1)
1061             yield dut.intregs.regs[3].reg.eq(5)
1062             alusim.setval(3, 5)
1063             instrs.append((5, 3, 3, 4, (0, 0)))
1064             instrs.append((4, 2, 1, 2, (0, 1)))
1065
1066         if False:
1067             v1 = 6
1068             yield dut.intregs.regs[5].reg.eq(v1)
1069             alusim.setval(5, v1)
1070             yield dut.intregs.regs[3].reg.eq(5)
1071             alusim.setval(3, 5)
1072             instrs.append((5, 3, 3, 4, (0, 0)))
1073             instrs.append((4, 2, 1, 2, (1, 0)))
1074
1075         if False:
1076             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1077             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1078             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1079             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1080             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1081             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1082             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1083             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1084             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1085
1086         # issue instruction(s), wait for issue to be free before proceeding
1087         for i, instr in enumerate(instrs):
1088             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1089
1090             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1091                     (i, src1, src2, dest, op, opi, imm))
1092             alusim.op(op, opi, imm, src1, src2, dest)
1093             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1094                                br_ok, br_fail)
1095
1096         # wait for all instructions to stop before checking
1097         while True:
1098             iqlen = yield dut.qlen_o
1099             if iqlen == 0:
1100                 break
1101             yield
1102         yield
1103         yield
1104         yield
1105         yield
1106         yield from wait_for_busy_clear(dut)
1107
1108         # check status
1109         yield from alusim.check(dut)
1110         yield from alusim.dump(dut)
1111
1112
1113 def test_scoreboard():
1114     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1115     alusim = RegSim(16, 8)
1116     memsim = MemSim(16, 16)
1117     vl = rtlil.convert(dut, ports=dut.ports())
1118     with open("test_scoreboard6600.il", "w") as f:
1119         f.write(vl)
1120
1121     run_simulation(dut, scoreboard_sim(dut, alusim),
1122                         vcd_name='test_scoreboard6600.vcd')
1123
1124     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1125     #                    vcd_name='test_scoreboard6600.vcd')
1126
1127
1128 if __name__ == '__main__':
1129     test_scoreboard()