src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184         self.imm_i = Signal(rwid, reset_less=True)
 185
 186         # Int ALUs
 187         add = ALU(rwid)
 188         sub = ALU(rwid)
 189         mul = ALU(rwid)
 190         shf = ALU(rwid)
 191
 192         units = []
 193         for alu in [add, sub, mul, shf]:
 194             aluopwid = 3 # extra bit for immediate mode
 195             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 196
 197         CompUnitsBase.__init__(self, rwid, units)
 198
 199     def elaborate(self, platform):
 200         m = CompUnitsBase.elaborate(self, platform)
 201         comb = m.d.comb
 202
 203         # hand the same operation to all units, only lower 2 bits though
 204         for alu in self.units:
 205             comb += alu.oper_i[0:3].eq(self.oper_i)
 206             comb += alu.imm_i.eq(self.imm_i)
 207
 208         return m
 209
 210
 211 class CompUnitBR(CompUnitsBase):
 212
 213     def __init__(self, rwid, opwid):
 214         """ Inputs:
 215
 216             * :rwid:   bit width of register file(s) - both FP and INT
 217             * :opwid:  operand bit width
 218
 219             Note: bgt unit is returned so that a shadow unit can be created
 220             for it
 221         """
 222         self.opwid = opwid
 223
 224         # inputs
 225         self.oper_i = Signal(opwid, reset_less=True)
 226         self.imm_i = Signal(rwid, reset_less=True)
 227
 228         # Branch ALU and CU
 229         self.bgt = BranchALU(rwid)
 230         aluopwid = 3 # extra bit for immediate mode
 231         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 232         CompUnitsBase.__init__(self, rwid, [self.br1])
 233
 234     def elaborate(self, platform):
 235         m = CompUnitsBase.elaborate(self, platform)
 236         comb = m.d.comb
 237
 238         # hand the same operation to all units
 239         for alu in self.units:
 240             comb += alu.oper_i.eq(self.oper_i)
 241             comb += alu.imm_i.eq(self.imm_i)
 242
 243         return m
 244
 245
 246 class FunctionUnits(Elaboratable):
 247
 248     def __init__(self, n_regs, n_int_alus):
 249         self.n_regs = n_regs
 250         self.n_int_alus = n_int_alus
 251
 252         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 253         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 254         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 255
 256         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 257         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 258
 259         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 260         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 261         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 262
 263         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 264         self.readable_o = Signal(n_int_alus, reset_less=True)
 265         self.writable_o = Signal(n_int_alus, reset_less=True)
 266
 267         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 268         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 269         self.go_die_i = Signal(n_int_alus, reset_less=True)
 270         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 271         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 272
 273         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 274
 275     def elaborate(self, platform):
 276         m = Module()
 277         comb = m.d.comb
 278         sync = m.d.sync
 279
 280         n_intfus = self.n_int_alus
 281
 282         # Integer FU-FU Dep Matrix
 283         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 284         m.submodules.intfudeps = intfudeps
 285         # Integer FU-Reg Dep Matrix
 286         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 287         m.submodules.intregdeps = intregdeps
 288
 289         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 290         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 291
 292         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 293         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 294
 295         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 296         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 297         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 298
 299         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 300         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 301         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 302         comb += intfudeps.go_die_i.eq(self.go_die_i)
 303         comb += self.readable_o.eq(intfudeps.readable_o)
 304         comb += self.writable_o.eq(intfudeps.writable_o)
 305
 306         # Connect function issue / arrays, and dest/src1/src2
 307         comb += intregdeps.dest_i.eq(self.dest_i)
 308         comb += intregdeps.src1_i.eq(self.src1_i)
 309         comb += intregdeps.src2_i.eq(self.src2_i)
 310
 311         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 312         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 313         comb += intregdeps.go_die_i.eq(self.go_die_i)
 314         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 315
 316         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 317         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 318         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 319
 320         return m
 321
 322
 323 class Scoreboard(Elaboratable):
 324     def __init__(self, rwid, n_regs):
 325         """ Inputs:
 326
 327             * :rwid:   bit width of register file(s) - both FP and INT
 328             * :n_regs: depth of register file(s) - number of FP and INT regs
 329         """
 330         self.rwid = rwid
 331         self.n_regs = n_regs
 332
 333         # Register Files
 334         self.intregs = RegFileArray(rwid, n_regs)
 335         self.fpregs = RegFileArray(rwid, n_regs)
 336
 337         # issue q needs to get at these
 338         self.aluissue = IssueUnitGroup(4)
 339         self.brissue = IssueUnitGroup(1)
 340         # and these
 341         self.alu_oper_i = Signal(4, reset_less=True)
 342         self.alu_imm_i = Signal(rwid, reset_less=True)
 343         self.br_oper_i = Signal(4, reset_less=True)
 344         self.br_imm_i = Signal(rwid, reset_less=True)
 345
 346         # inputs
 347         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 348         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 349         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 350         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 351
 352         # outputs
 353         self.issue_o = Signal(reset_less=True) # instruction was accepted
 354         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 355
 356         # for branch speculation experiment.  branch_direction = 0 if
 357         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 358         # branch_succ and branch_fail are requests to have the current
 359         # instruction be dependent on the branch unit "shadow" capability.
 360         self.branch_succ_i = Signal(reset_less=True)
 361         self.branch_fail_i = Signal(reset_less=True)
 362         self.branch_direction_o = Signal(2, reset_less=True)
 363
 364     def elaborate(self, platform):
 365         m = Module()
 366         comb = m.d.comb
 367         sync = m.d.sync
 368
 369         m.submodules.intregs = self.intregs
 370         m.submodules.fpregs = self.fpregs
 371
 372         # register ports
 373         int_dest = self.intregs.write_port("dest")
 374         int_src1 = self.intregs.read_port("src1")
 375         int_src2 = self.intregs.read_port("src2")
 376
 377         fp_dest = self.fpregs.write_port("dest")
 378         fp_src1 = self.fpregs.read_port("src1")
 379         fp_src2 = self.fpregs.read_port("src2")
 380
 381         # Int ALUs and Comp Units
 382         n_int_alus = 5
 383         cua = CompUnitALUs(self.rwid, 3)
 384         cub = CompUnitBR(self.rwid, 3)
 385         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 386         bgt = cub.bgt # get at the branch computation unit
 387         br1 = cub.br1
 388
 389         # Int FUs
 390         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 391
 392         # Count of number of FUs
 393         n_intfus = n_int_alus
 394         n_fp_fus = 0 # for now
 395
 396         # Integer Priority Picker 1: Adder + Subtractor
 397         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 398         m.submodules.intpick1 = intpick1
 399
 400         # INT/FP Issue Unit
 401         regdecode = RegDecode(self.n_regs)
 402         m.submodules.regdecode = regdecode
 403         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 404         m.submodules.issueunit = issueunit
 405
 406         # Shadow Matrix.  currently n_intfus shadows, to be used for
 407         # write-after-write hazards.  NOTE: there is one extra for branches,
 408         # so the shadow width is increased by 1
 409         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 410         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 411
 412         # record previous instruction to cast shadow on current instruction
 413         prev_shadow = Signal(n_intfus)
 414
 415         # Branch Speculation recorder.  tracks the success/fail state as
 416         # each instruction is issued, so that when the branch occurs the
 417         # allow/cancel can be issued as appropriate.
 418         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 419
 420         #---------
 421         # ok start wiring things together...
 422         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 423         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 424         #---------
 425
 426         #---------
 427         # Issue Unit is where it starts.  set up some in/outs for this module
 428         #---------
 429         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 430                      regdecode.src1_i.eq(self.int_src1_i),
 431                      regdecode.src2_i.eq(self.int_src2_i),
 432                      regdecode.enable_i.eq(self.reg_enable_i),
 433                      self.issue_o.eq(issueunit.issue_o)
 434                     ]
 435
 436         # take these to outside (issue needs them)
 437         comb += cua.oper_i.eq(self.alu_oper_i)
 438         comb += cua.imm_i.eq(self.alu_imm_i)
 439         comb += cub.oper_i.eq(self.br_oper_i)
 440         comb += cub.imm_i.eq(self.br_imm_i)
 441
 442         # TODO: issueunit.f (FP)
 443
 444         # and int function issue / busy arrays, and dest/src1/src2
 445         comb += intfus.dest_i.eq(regdecode.dest_o)
 446         comb += intfus.src1_i.eq(regdecode.src1_o)
 447         comb += intfus.src2_i.eq(regdecode.src2_o)
 448
 449         fn_issue_o = issueunit.fn_issue_o
 450
 451         comb += intfus.fn_issue_i.eq(fn_issue_o)
 452         comb += issueunit.busy_i.eq(cu.busy_o)
 453         comb += self.busy_o.eq(cu.busy_o.bool())
 454
 455         #---------
 456         # merge shadow matrices outputs
 457         #---------
 458
 459         # these are explained in ShadowMatrix docstring, and are to be
 460         # connected to the FUReg and FUFU Matrices, to get them to reset
 461         anydie = Signal(n_intfus, reset_less=True)
 462         allshadown = Signal(n_intfus, reset_less=True)
 463         shreset = Signal(n_intfus, reset_less=True)
 464         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 465         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 466         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 467
 468         #---------
 469         # connect fu-fu matrix
 470         #---------
 471
 472         # Group Picker... done manually for now.
 473         go_rd_o = intpick1.go_rd_o
 474         go_wr_o = intpick1.go_wr_o
 475         go_rd_i = intfus.go_rd_i
 476         go_wr_i = intfus.go_wr_i
 477         go_die_i = intfus.go_die_i
 478         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 479         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 480         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 481         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 482
 483         # Connect Picker
 484         #---------
 485         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 486         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 487         int_rd_o = intfus.readable_o
 488         int_wr_o = intfus.writable_o
 489         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 490         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 491
 492         #---------
 493         # Shadow Matrix
 494         #---------
 495
 496         comb += shadows.issue_i.eq(fn_issue_o)
 497         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 498         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 499         #---------
 500         # NOTE; this setup is for the instruction order preservation...
 501
 502         # connect shadows / go_dies to Computation Units
 503         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 504         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 505
 506         # ok connect first n_int_fu shadows to busy lines, to create an
 507         # instruction-order linked-list-like arrangement, using a bit-matrix
 508         # (instead of e.g. a ring buffer).
 509         # XXX TODO
 510
 511         # when written, the shadow can be cancelled (and was good)
 512         for i in range(n_intfus):
 513             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 514
 515         # *previous* instruction shadows *current* instruction, and, obviously,
 516         # if the previous is completed (!busy) don't cast the shadow!
 517         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 518         for i in range(n_intfus):
 519             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 520
 521         #---------
 522         # ... and this is for branch speculation.  it uses the extra bit
 523         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 524         # only needs to set shadow_i, s_fail_i and s_good_i
 525
 526         # issue captures shadow_i (if enabled)
 527         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 528
 529         bactive = Signal(reset_less=True)
 530         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 531
 532         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 533         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 534             comb += bshadow.issue_i.eq(fn_issue_o)
 535             for i in range(n_intfus):
 536                 with m.If(fn_issue_o & (Const(1<<i))):
 537                     comb += bshadow.shadow_i[i][0].eq(1)
 538
 539         # finally, we need an indicator to the test infrastructure as to
 540         # whether the branch succeeded or failed, plus, link up to the
 541         # "recorder" of whether the instruction was under shadow or not
 542
 543         with m.If(br1.issue_i):
 544             sync += bspec.active_i.eq(1)
 545         with m.If(self.branch_succ_i):
 546             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 547         with m.If(self.branch_fail_i):
 548             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 549
 550         # branch is active (TODO: a better signal: this is over-using the
 551         # go_write signal - actually the branch should not be "writing")
 552         with m.If(br1.go_wr_i):
 553             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 554             sync += bspec.active_i.eq(0)
 555             comb += bspec.br_i.eq(1)
 556             # branch occurs if data == 1, failed if data == 0
 557             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 558             for i in range(n_intfus):
 559                 # *expected* direction of the branch matched against *actual*
 560                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 561                 # ... or it didn't
 562                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 563
 564         #---------
 565         # Connect Register File(s)
 566         #---------
 567         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 568         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 569         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 570
 571         # connect ALUs to regfule
 572         comb += int_dest.data_i.eq(cu.data_o)
 573         comb += cu.src1_i.eq(int_src1.data_o)
 574         comb += cu.src2_i.eq(int_src2.data_o)
 575
 576         # connect ALU Computation Units
 577         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 578         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 579         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 580
 581         return m
 582
 583     def __iter__(self):
 584         yield from self.intregs
 585         yield from self.fpregs
 586         yield self.int_dest_i
 587         yield self.int_src1_i
 588         yield self.int_src2_i
 589         yield self.issue_o
 590         yield self.branch_succ_i
 591         yield self.branch_fail_i
 592         yield self.branch_direction_o
 593
 594     def ports(self):
 595         return list(self)
 596
 597
 598 class IssueToScoreboard(Elaboratable):
 599
 600     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 601         self.qlen = qlen
 602         self.n_in = n_in
 603         self.n_out = n_out
 604         self.rwid = rwid
 605         self.opw = opwid
 606         self.n_regs = n_regs
 607
 608         mqbits = (int(log(qlen) / log(2))+2, False)
 609         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 610         self.p_ready_o = Signal() # instructions were added
 611         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 612
 613         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 614         self.qlen_o = Signal(mqbits, reset_less=True)
 615
 616     def elaborate(self, platform):
 617         m = Module()
 618         comb = m.d.comb
 619         sync = m.d.sync
 620
 621         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 622         sc = Scoreboard(self.rwid, self.n_regs)
 623         m.submodules.iq = iq
 624         m.submodules.sc = sc
 625
 626         # get at the regfile for testing
 627         self.intregs = sc.intregs
 628
 629         # and the "busy" signal and instruction queue length
 630         comb += self.busy_o.eq(sc.busy_o)
 631         comb += self.qlen_o.eq(iq.qlen_o)
 632
 633         # link up instruction queue
 634         comb += iq.p_add_i.eq(self.p_add_i)
 635         comb += self.p_ready_o.eq(iq.p_ready_o)
 636         for i in range(self.n_in):
 637             comb += eq(iq.data_i[i], self.data_i[i])
 638
 639         # take instruction and process it.  note that it's possible to
 640         # "inspect" the queue contents *without* actually removing the
 641         # items.  items are only removed when the
 642
 643         # in "waiting" state
 644         wait_issue_br = Signal()
 645         wait_issue_alu = Signal()
 646
 647         with m.If(wait_issue_br | wait_issue_alu):
 648             # set instruction pop length to 1 if the unit accepted
 649             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 650                 with m.If(iq.qlen_o != 0):
 651                     comb += iq.n_sub_i.eq(1)
 652             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 653                 with m.If(iq.qlen_o != 0):
 654                     comb += iq.n_sub_i.eq(1)
 655
 656         # see if some instruction(s) are here.  note that this is
 657         # "inspecting" the in-place queue.  note also that on the
 658         # cycle following "waiting" for fn_issue_o to be set, the
 659         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 660         with m.If(iq.qlen_o != 0):
 661             # get the operands and operation
 662             imm = iq.data_o[0].imm_i
 663             dest = iq.data_o[0].dest_i
 664             src1 = iq.data_o[0].src1_i
 665             src2 = iq.data_o[0].src2_i
 666             op = iq.data_o[0].oper_i
 667             opi = iq.data_o[0].opim_i # immediate set
 668
 669             # set the src/dest regs
 670             comb += sc.int_dest_i.eq(dest)
 671             comb += sc.int_src1_i.eq(src1)
 672             comb += sc.int_src2_i.eq(src2)
 673             comb += sc.reg_enable_i.eq(1) # enable the regfile
 674
 675             # choose a Function-Unit-Group
 676             with m.If((op & (0x3<<2)) != 0): # branch
 677                 comb += sc.brissue.insn_i.eq(1)
 678                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 679                 comb += sc.br_imm_i.eq(imm)
 680                 comb += wait_issue_br.eq(1)
 681             with m.Else():                   # alu
 682                 comb += sc.aluissue.insn_i.eq(1)
 683                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 684                 comb += sc.alu_imm_i.eq(imm)
 685                 comb += wait_issue_alu.eq(1)
 686
 687             # XXX TODO
 688             # these indicate that the instruction is to be made
 689             # shadow-dependent on
 690             # (either) branch success or branch fail
 691             #yield sc.branch_fail_i.eq(branch_fail)
 692             #yield sc.branch_succ_i.eq(branch_success)
 693
 694         return m
 695
 696     def __iter__(self):
 697         yield self.p_ready_o
 698         for o in self.data_i:
 699             yield from list(o)
 700         yield self.p_add_i
 701
 702     def ports(self):
 703         return list(self)
 704
 705
 706 IADD = 0
 707 ISUB = 1
 708 IMUL = 2
 709 ISHF = 3
 710 IBGT = 4
 711 IBLT = 5
 712 IBEQ = 6
 713 IBNE = 7
 714
 715 class RegSim:
 716     def __init__(self, rwidth, nregs):
 717         self.rwidth = rwidth
 718         self.regs = [0] * nregs
 719
 720     def op(self, op, op_imm, imm, src1, src2, dest):
 721         maxbits = (1 << self.rwidth) - 1
 722         src1 = self.regs[src1] & maxbits
 723         if op_imm:
 724             src2 = imm
 725         else:
 726             src2 = self.regs[src2] & maxbits
 727         if op == IADD:
 728             val = src1 + src2
 729         elif op == ISUB:
 730             val = src1 - src2
 731         elif op == IMUL:
 732             val = src1 * src2
 733         elif op == ISHF:
 734             val = src1 >> (src2 & maxbits)
 735         elif op == IBGT:
 736             val = int(src1 > src2)
 737         elif op == IBLT:
 738             val = int(src1 < src2)
 739         elif op == IBEQ:
 740             val = int(src1 == src2)
 741         elif op == IBNE:
 742             val = int(src1 != src2)
 743         val &= maxbits
 744         self.setval(dest, val)
 745         return val
 746
 747     def setval(self, dest, val):
 748         print ("sim setval", dest, hex(val))
 749         self.regs[dest] = val
 750
 751     def dump(self, dut):
 752         for i, val in enumerate(self.regs):
 753             reg = yield dut.intregs.regs[i].reg
 754             okstr = "OK" if reg == val else "!ok"
 755             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 756
 757     def check(self, dut):
 758         for i, val in enumerate(self.regs):
 759             reg = yield dut.intregs.regs[i].reg
 760             if reg != val:
 761                 print("reg %d expected %x received %x\n" % (i, val, reg))
 762                 yield from self.dump(dut)
 763                 assert False
 764
 765 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 766             branch_success, branch_fail):
 767     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 768                'src1_i': src1, 'src2_i': src2}]
 769
 770     sendlen = 1
 771     for idx in range(sendlen):
 772         yield from eq(dut.data_i[idx], instrs[idx])
 773         di = yield dut.data_i[idx]
 774         print ("senddata %d %x" % (idx, di))
 775     yield dut.p_add_i.eq(sendlen)
 776     yield
 777     o_p_ready = yield dut.p_ready_o
 778     while not o_p_ready:
 779         yield
 780         o_p_ready = yield dut.p_ready_o
 781
 782     yield dut.p_add_i.eq(0)
 783
 784
 785 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 786     yield from disable_issue(dut)
 787     yield dut.int_dest_i.eq(dest)
 788     yield dut.int_src1_i.eq(src1)
 789     yield dut.int_src2_i.eq(src2)
 790     if (op & (0x3<<2)) != 0: # branch
 791         yield dut.brissue.insn_i.eq(1)
 792         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 793         yield dut.br_imm_i.eq(imm)
 794         dut_issue = dut.brissue
 795     else:
 796         yield dut.aluissue.insn_i.eq(1)
 797         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 798         yield dut.alu_imm_i.eq(imm)
 799         dut_issue = dut.aluissue
 800     yield dut.reg_enable_i.eq(1)
 801
 802     # these indicate that the instruction is to be made shadow-dependent on
 803     # (either) branch success or branch fail
 804     yield dut.branch_fail_i.eq(branch_fail)
 805     yield dut.branch_succ_i.eq(branch_success)
 806
 807     yield
 808     yield from wait_for_issue(dut, dut_issue)
 809
 810
 811 def print_reg(dut, rnums):
 812     rs = []
 813     for rnum in rnums:
 814         reg = yield dut.intregs.regs[rnum].reg
 815         rs.append("%x" % reg)
 816     rnums = map(str, rnums)
 817     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 818
 819
 820 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 821     insts = []
 822     for i in range(n_ops):
 823         src1 = randint(1, dut.n_regs-1)
 824         src2 = randint(1, dut.n_regs-1)
 825         imm = randint(1, (1<<dut.rwid)-1)
 826         dest = randint(1, dut.n_regs-1)
 827         op = randint(0, max_opnums)
 828         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 829
 830         if shadowing:
 831             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 832         else:
 833             insts.append((src1, src2, dest, op, opi, imm))
 834     return insts
 835
 836
 837 def wait_for_busy_clear(dut):
 838     while True:
 839         busy_o = yield dut.busy_o
 840         if not busy_o:
 841             break
 842         print ("busy",)
 843         yield
 844
 845 def disable_issue(dut):
 846     yield dut.aluissue.insn_i.eq(0)
 847     yield dut.brissue.insn_i.eq(0)
 848
 849
 850 def wait_for_issue(dut, dut_issue):
 851     while True:
 852         issue_o = yield dut_issue.fn_issue_o
 853         if issue_o:
 854             yield from disable_issue(dut)
 855             yield dut.reg_enable_i.eq(0)
 856             break
 857         print ("busy",)
 858         #yield from print_reg(dut, [1,2,3])
 859         yield
 860     #yield from print_reg(dut, [1,2,3])
 861
 862 def scoreboard_branch_sim(dut, alusim):
 863
 864     iseed = 3
 865
 866     for i in range(1):
 867
 868         print ("rseed", iseed)
 869         seed(iseed)
 870         iseed += 1
 871
 872         yield dut.branch_direction_o.eq(0)
 873
 874         # set random values in the registers
 875         for i in range(1, dut.n_regs):
 876             val = 31+i*3
 877             val = randint(0, (1<<alusim.rwidth)-1)
 878             yield dut.intregs.regs[i].reg.eq(val)
 879             alusim.setval(i, val)
 880
 881         if False:
 882             # create some instructions: branches create a tree
 883             insts = create_random_ops(dut, 1, True, 1)
 884             #insts.append((6, 6, 1, 2, (0, 0)))
 885             #insts.append((4, 3, 3, 0, (0, 0)))
 886
 887             src1 = randint(1, dut.n_regs-1)
 888             src2 = randint(1, dut.n_regs-1)
 889             #op = randint(4, 7)
 890             op = 4 # only BGT at the moment
 891
 892             branch_ok = create_random_ops(dut, 1, True, 1)
 893             branch_fail = create_random_ops(dut, 1, True, 1)
 894
 895             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 896
 897         if True:
 898             insts = []
 899             insts.append( (3, 5, 2, 0, (0, 0)) )
 900             branch_ok = []
 901             branch_fail = []
 902             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 903             branch_ok.append( None )
 904             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 905             #branch_fail.append( None )
 906             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 907
 908         siminsts = deepcopy(insts)
 909
 910         # issue instruction(s)
 911         i = -1
 912         instrs = insts
 913         branch_direction = 0
 914         while instrs:
 915             yield
 916             yield
 917             i += 1
 918             branch_direction = yield dut.branch_direction_o # way branch went
 919             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 920             if branch_direction == 1 and shadow_on:
 921                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 922                 continue # branch was "success" and this is a "failed"... skip
 923             if branch_direction == 2 and shadow_off:
 924                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 925                 continue # branch was "fail" and this is a "success"... skip
 926             if branch_direction != 0:
 927                 shadow_on = 0
 928                 shadow_off = 0
 929             is_branch = op >= 4
 930             if is_branch:
 931                 branch_ok, branch_fail = dest
 932                 dest = src2
 933                 # ok zip up the branch success / fail instructions and
 934                 # drop them into the queue, one marked "to have branch success"
 935                 # the other to be marked shadow branch "fail".
 936                 # one out of each of these will be cancelled
 937                 for ok, fl in zip(branch_ok, branch_fail):
 938                     if ok:
 939                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 940                     if fl:
 941                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 942             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 943                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 944             yield from int_instr(dut, op, src1, src2, dest,
 945                                  shadow_on, shadow_off)
 946
 947         # wait for all instructions to stop before checking
 948         yield
 949         yield from wait_for_busy_clear(dut)
 950
 951         i = -1
 952         while siminsts:
 953             instr = siminsts.pop(0)
 954             if instr is None:
 955                 continue
 956             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 957             i += 1
 958             is_branch = op >= 4
 959             if is_branch:
 960                 branch_ok, branch_fail = dest
 961                 dest = src2
 962             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 963                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 964             branch_res = alusim.op(op, src1, src2, dest)
 965             if is_branch:
 966                 if branch_res:
 967                     siminsts += branch_ok
 968                 else:
 969                     siminsts += branch_fail
 970
 971         # check status
 972         yield from alusim.check(dut)
 973         yield from alusim.dump(dut)
 974
 975
 976 def scoreboard_sim(dut, alusim):
 977
 978     seed(0)
 979
 980     for i in range(50):
 981
 982         # set random values in the registers
 983         for i in range(1, dut.n_regs):
 984             val = randint(0, (1<<alusim.rwidth)-1)
 985             #val = 31+i*3
 986             #val = i
 987             yield dut.intregs.regs[i].reg.eq(val)
 988             alusim.setval(i, val)
 989
 990         # create some instructions (some random, some regression tests)
 991         instrs = []
 992         if True:
 993             instrs = create_random_ops(dut, 15, True, 4)
 994
 995         if False:
 996             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
 997
 998         if False:
 999             instrs.append( (7, 3, 2, 4, (0, 0)) )
1000             instrs.append( (7, 6, 6, 2, (0, 0)) )
1001             instrs.append( (1, 7, 2, 2, (0, 0)) )
1002
1003         if False:
1004             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1005             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1006             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1007             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1008             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1009
1010         if False:
1011             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1012             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1013             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1014
1015         if False:
1016             instrs.append((5, 6, 2, 1))
1017             instrs.append((2, 2, 4, 0))
1018             #instrs.append((2, 2, 3, 1))
1019
1020         if False:
1021             instrs.append((2, 1, 2, 3))
1022
1023         if False:
1024             instrs.append((2, 6, 2, 1))
1025             instrs.append((2, 1, 2, 0))
1026
1027         if False:
1028             instrs.append((1, 2, 7, 2))
1029             instrs.append((7, 1, 5, 0))
1030             instrs.append((4, 4, 1, 1))
1031
1032         if False:
1033             instrs.append((5, 6, 2, 2))
1034             instrs.append((1, 1, 4, 1))
1035             instrs.append((6, 5, 3, 0))
1036
1037         if False:
1038             # Write-after-Write Hazard
1039             instrs.append( (3, 6, 7, 2) )
1040             instrs.append( (4, 4, 7, 1) )
1041
1042         if False:
1043             # self-read/write-after-write followed by Read-after-Write
1044             instrs.append((1, 1, 1, 1))
1045             instrs.append((1, 5, 3, 0))
1046
1047         if False:
1048             # Read-after-Write followed by self-read-after-write
1049             instrs.append((5, 6, 1, 2))
1050             instrs.append((1, 1, 1, 1))
1051
1052         if False:
1053             # self-read-write sandwich
1054             instrs.append((5, 6, 1, 2))
1055             instrs.append((1, 1, 1, 1))
1056             instrs.append((1, 5, 3, 0))
1057
1058         if False:
1059             # very weird failure
1060             instrs.append( (5, 2, 5, 2) )
1061             instrs.append( (2, 6, 3, 0) )
1062             instrs.append( (4, 2, 2, 1) )
1063
1064         if False:
1065             v1 = 4
1066             yield dut.intregs.regs[5].reg.eq(v1)
1067             alusim.setval(5, v1)
1068             yield dut.intregs.regs[3].reg.eq(5)
1069             alusim.setval(3, 5)
1070             instrs.append((5, 3, 3, 4, (0, 0)))
1071             instrs.append((4, 2, 1, 2, (0, 1)))
1072
1073         if False:
1074             v1 = 6
1075             yield dut.intregs.regs[5].reg.eq(v1)
1076             alusim.setval(5, v1)
1077             yield dut.intregs.regs[3].reg.eq(5)
1078             alusim.setval(3, 5)
1079             instrs.append((5, 3, 3, 4, (0, 0)))
1080             instrs.append((4, 2, 1, 2, (1, 0)))
1081
1082         if False:
1083             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1084             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1085             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1086             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1087             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1088             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1089             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1090             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1091             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1092
1093         # issue instruction(s), wait for issue to be free before proceeding
1094         for i, instr in enumerate(instrs):
1095             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1096
1097             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1098                     (i, src1, src2, dest, op, opi, imm))
1099             alusim.op(op, opi, imm, src1, src2, dest)
1100             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1101                                br_ok, br_fail)
1102
1103         # wait for all instructions to stop before checking
1104         while True:
1105             iqlen = yield dut.qlen_o
1106             if iqlen == 0:
1107                 break
1108             yield
1109         yield
1110         yield
1111         yield
1112         yield
1113         yield from wait_for_busy_clear(dut)
1114
1115         # check status
1116         yield from alusim.check(dut)
1117         yield from alusim.dump(dut)
1118
1119
1120 def test_scoreboard():
1121     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1122     alusim = RegSim(16, 8)
1123     memsim = MemSim(16, 16)
1124     vl = rtlil.convert(dut, ports=dut.ports())
1125     with open("test_scoreboard6600.il", "w") as f:
1126         f.write(vl)
1127
1128     run_simulation(dut, scoreboard_sim(dut, alusim),
1129                         vcd_name='test_scoreboard6600.vcd')
1130
1131     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1132     #                    vcd_name='test_scoreboard6600.vcd')
1133
1134
1135 if __name__ == '__main__':
1136     test_scoreboard()