src/soc/experiment/compalu_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record, Const
   4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
   5
   6 from nmutil.latch import SRLatch, latchregister
   7 from nmutil.iocontrol import RecordObject
   8
   9 from soc.decoder.power_decoder2 import Data
  10 from soc.decoder.power_enums import InternalOp
  11 from soc.fu.regspec import RegSpec, RegSpecALUAPI
  12
  13
  14 """ Computation Unit (aka "ALU Manager").
  15
  16     This module runs a "revolving door" set of three latches, based on
  17     * Issue
  18     * Go_Read
  19     * Go_Write
  20     where one of them cannot be set on any given cycle.
  21
  22     * When issue is first raised, a busy signal is sent out.
  23       The src1 and src2 registers and the operand can be latched in
  24       at this point
  25
  26     * Read request is set, which is acknowledged through the Scoreboard
  27       to the priority picker, which generates (one and only one) Go_Read
  28       at a time.  One of those will (eventually) be this Computation Unit.
  29
  30     * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
  31       src1/src2/operand in place), and the ALU is told to proceed.
  32
  33     * when the ALU pipeline is ready, this activates "write request release",
  34       and the ALU's output is captured into a temporary register.
  35
  36     * Write request release is *HELD UP* (prevented from proceeding) if shadowN
  37       is asserted LOW.  This is how all speculation, precise exceptions,
  38       predication - everything - is achieved.
  39
  40     * Write request release will go through a similar process as Read request,
  41       resulting (eventually) in Go_Write being asserted.
  42
  43     * When Go_Write is asserted, two things happen: (1) the data in the temp
  44       register is placed combinatorially onto the output, and (2) the
  45       req_l latch is cleared, busy is dropped, and the Comp Unit is back
  46       through its revolving door to do another task.
  47
  48     Note that the read and write latches are held synchronously for one cycle,
  49     i.e. that when Go_Read comes in, one cycle is given in which the incoming
  50     register (broadcast over a Regfile Read Port) may have time to be latched.
  51
  52     It is REQUIRED that Go_Read be held valid only for one cycle, and it is
  53     REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
  54     Go_Read is asserted HI.
  55
  56     Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
  57     likewise be dropped exactly one cycle after assertion of Go_Write.
  58
  59     When Go_Die is asserted then strictly speaking the entire FSM should be
  60     fully reset and that includes sending a cancellation request to the ALU.
  61     (XXX TODO: alu "go die" is not presently wired up)
  62 """
  63
  64 def go_record(n, name):
  65     r = Record([('go', n, DIR_FANIN),
  66                 ('rel', n, DIR_FANOUT)], name=name)
  67     r.go.reset_less = True
  68     r.rel.reset_less = True
  69     return r
  70
  71 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
  72
  73 class CompUnitRecord(RegSpec, RecordObject):
  74     """CompUnitRecord
  75
  76     base class for Computation Units, to provide a uniform API
  77     and allow "record.connect" etc. to be used, particularly when
  78     it comes to connecting multiple Computation Units up as a block
  79     (very laborious)
  80
  81     LDSTCompUnitRecord should derive from this class and add the
  82     additional signals it requires
  83
  84     :subkls:      the class (not an instance) needed to construct the opcode
  85     :rwid:        either an integer (specifies width of all regs) or a "regspec"
  86
  87     see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
  88     """
  89     def __init__(self, subkls, rwid, n_src=None, n_dst=None, name=None):
  90         RegSpec.__init__(self, rwid, n_src, n_dst)
  91         RecordObject.__init__(self, name)
  92         self._subkls = subkls
  93
  94         # create source operands
  95         src = []
  96         for i in range(n_src):
  97             j = i + 1 # name numbering to match src1/src2
  98             name = "src%d_i" % j
  99             rw = self._get_srcwid(i)
 100             sreg = Signal(rw, name=name, reset_less=True)
 101             setattr(self, name, sreg)
 102             src.append(sreg)
 103         self._src_i = src
 104
 105         # create dest operands
 106         dst = []
 107         for i in range(n_dst):
 108             j = i + 1 # name numbering to match dest1/2...
 109             name = "dest%d_i" % j
 110             rw = self._get_dstwid(i)
 111             dreg = Signal(rw, name=name, reset_less=True)
 112             setattr(self, name, dreg)
 113             dst.append(dreg)
 114         self._dest = dst
 115
 116         # operation / data input
 117         self.oper_i = subkls() # operand
 118
 119         # create read/write and other scoreboard signalling
 120         self.rd = go_record(n_src, name="rd") # read in, req out
 121         self.wr = go_record(n_dst, name="wr") # write in, req out
 122         self.issue_i = Signal(reset_less=True) # fn issue in
 123         self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
 124         self.go_die_i = Signal() # go die (reset)
 125
 126         # output (busy/done)
 127         self.busy_o = Signal(reset_less=True) # fn busy out
 128         self.done_o = Signal(reset_less=True)
 129
 130
 131 class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 132     def __init__(self, rwid, alu, opsubsetkls, n_src=2, n_dst=1):
 133         """MultiCompUnit
 134
 135         * :rwid:        width of register latches (TODO: allocate per regspec)
 136         * :alu:         the ALU (pipeline, FSM) - must conform to nmutil Pipe API
 137         * :opsubsetkls: the subset of Decode2ExecuteType
 138         * :n_src:       number of src operands
 139         * :n_dst:       number of destination operands
 140         """
 141         RegSpecALUAPI.__init__(self, rwid, alu)
 142         self.n_src, self.n_dst = n_src, n_dst
 143         self.opsubsetkls = opsubsetkls
 144         self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
 145
 146         # convenience names for src operands
 147         for i in range(n_src):
 148             j = i + 1 # name numbering to match src1/src2
 149             name = "src%d_i" % j
 150             setattr(self, name, getattr(cu, name))
 151
 152         # convenience names for dest operands
 153         for i in range(n_dst):
 154             j = i + 1 # name numbering to match dest1/2...
 155             name = "dest%d_i" % j
 156             setattr(self, name, getattr(cu, name))
 157
 158         # more convenience names
 159         self.rd = cu.rd
 160         self.wr = cu.wr
 161         self.go_rd_i = self.rd.go # temporary naming
 162         self.go_wr_i = self.wr.go # temporary naming
 163         self.rd_rel_o = self.rd.rel # temporary naming
 164         self.req_rel_o = self.wr.rel # temporary naming
 165         self.issue_i = cu.issue_i
 166         self.shadown_i = cu.shadown_i
 167         self.go_die_i = cu.go_die_i
 168
 169         # operation / data input
 170         self.oper_i = cu.oper_i
 171         self.src_i = cu._src_i
 172
 173         self.busy_o = cu.busy_o
 174         self.dest = cu._dest
 175         self.data_o = self.dest[0] # Dest out
 176         self.done_o = cu.done_o
 177
 178
 179     def _mux_op(self, m, sl, op_is_imm, imm, i):
 180         # select zero immediate if opcode says so.  however also change the latch
 181         # to trigger *from* the opcode latch instead.
 182         src_or_imm = Signal(self.cu._get_srcwid(i), reset_less=True)
 183         src_sel = Signal(reset_less=True)
 184         m.d.comb += src_sel.eq(Mux(op_is_imm, self.opc_l.q, self.src_l.q[i]))
 185         m.d.comb += src_or_imm.eq(Mux(op_is_imm, imm, self.src_i[i]))
 186         # overwrite 1st src-latch with immediate-muxed stuff
 187         sl[i][0] = src_or_imm
 188         sl[i][2] = src_sel
 189
 190     def elaborate(self, platform):
 191         m = Module()
 192         m.submodules.alu = self.alu
 193         m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
 194         m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
 195         m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
 196         m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
 197         m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
 198         self.opc_l, self.src_l = opc_l, src_l
 199
 200         # ALU only proceeds when all src are ready.  rd_rel_o is delayed
 201         # so combine it with go_rd_i.  if all bits are set we're good
 202         all_rd = Signal(reset_less=True)
 203         m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
 204                     (((~self.rd.rel) | self.rd.go).all()))
 205
 206         # write_requests all done
 207         # req_done works because any one of the last of the writes
 208         # is enough, when combined with when read-phase is done (rst_l.q)
 209         wr_any = Signal(reset_less=True)
 210         req_done = Signal(reset_less=True)
 211         m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
 212         m.d.comb += wr_any.eq(self.wr.go.bool())
 213         m.d.comb += req_done.eq(rst_l.q & wr_any)
 214
 215         # shadow/go_die
 216         reset = Signal(reset_less=True)
 217         rst_r = Signal(reset_less=True) # reset latch off
 218         reset_w = Signal(self.n_dst, reset_less=True)
 219         reset_r = Signal(self.n_src, reset_less=True)
 220         m.d.comb += reset.eq(req_done | self.go_die_i)
 221         m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
 222         m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
 223         m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
 224
 225         # read-done,wr-proceed latch
 226         m.d.comb += rok_l.s.eq(self.issue_i)  # set up when issue starts
 227         m.d.comb += rok_l.r.eq(self.alu.p.ready_o) # off when ALU acknowledges
 228
 229         # wr-done, back-to-start latch
 230         m.d.comb += rst_l.s.eq(all_rd)     # set when read-phase is fully done
 231         m.d.comb += rst_l.r.eq(rst_r)        # *off* on issue
 232
 233         # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
 234         m.d.sync += opc_l.s.eq(self.issue_i)       # set on issue
 235         m.d.sync += opc_l.r.eq(self.alu.n.valid_o & req_done) # reset on ALU
 236
 237         # src operand latch (not using go_wr_i)
 238         m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
 239         m.d.sync += src_l.r.eq(reset_r)
 240
 241         # dest operand latch (not using issue_i)
 242         m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
 243         m.d.sync += req_l.r.eq(reset_w)
 244
 245         # create a latch/register for the operand
 246         oper_r = self.opsubsetkls()
 247         latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
 248
 249         # and for each output from the ALU
 250         drl = []
 251         for i in range(self.n_dst):
 252             name = "data_r%d" % i
 253             data_r = Signal(self.cu._get_srcwid(i), name=name, reset_less=True)
 254             latchregister(m, self.get_out(i), data_r, req_l.q[i], name)
 255             drl.append(data_r)
 256
 257         # pass the operation to the ALU
 258         m.d.comb += self.get_op().eq(oper_r)
 259
 260         # create list of src/alu-src/src-latch.  override 1st and 2nd one below.
 261         # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
 262         # in the input "regspec".  see for example soc.fu.alu.pipe_data.ALUInputData
 263         sl = []
 264         for i in range(self.n_src):
 265             sl.append([self.src_i[i], self.get_in(i), src_l.q[i]])
 266
 267         # if the operand subset has "zero_a" we implicitly assume that means
 268         # src_i[0] is an INT register type where zero can be multiplexed in, instead.
 269         # see https://bugs.libre-soc.org/show_bug.cgi?id=336
 270         if hasattr(oper_r, "zero_a"):
 271             # select zero immediate if opcode says so.  however also change the latch
 272             # to trigger *from* the opcode latch instead.
 273             self._mux_op(m, sl, oper_r.zero_a, 0, 0)
 274
 275         # if the operand subset has "imm_data" we implicitly assume that means
 276         # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
 277         if hasattr(oper_r, "imm_data"):
 278             # select immediate if opcode says so.  however also change the latch
 279             # to trigger *from* the opcode latch instead.
 280             op_is_imm = oper_r.imm_data.imm_ok
 281             imm = oper_r.imm_data.imm
 282             self._mux_op(m, sl, op_is_imm, imm, 1)
 283
 284         # create a latch/register for src1/src2 (even if it is a copy of an immediate)
 285         for i in range(self.n_src):
 286             src, alusrc, latch = sl[i]
 287             latchregister(m, src, alusrc, latch, name="src_r%d" % i)
 288
 289         # -----
 290         # outputs
 291         # -----
 292
 293         # all request signals gated by busy_o.  prevents picker problems
 294         m.d.comb += self.busy_o.eq(opc_l.q) # busy out
 295         bro = Repl(self.busy_o, self.n_src)
 296         m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
 297
 298         # on a go_read, tell the ALU we're accepting data.
 299         # NOTE: this spells TROUBLE if the ALU isn't ready!
 300         # go_read is only valid for one clock!
 301         with m.If(all_rd):                           # src operands ready, GO!
 302             with m.If(~self.alu.p.ready_o):          # no ACK yet
 303                 m.d.comb += self.alu.p.valid_i.eq(1) # so indicate valid
 304
 305         brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
 306         # only proceed if ALU says its output is valid
 307         with m.If(self.alu.n.valid_o):
 308             # when ALU ready, write req release out. waits for shadow
 309             m.d.comb += self.wr.rel.eq(req_l.q & brd)
 310             # when output latch is ready, and ALU says ready, accept ALU output
 311             with m.If(reset):
 312                 m.d.comb += self.alu.n.ready_i.eq(1) # tells ALU "thanks got it"
 313
 314         # output the data from the latch on go_write
 315         for i in range(self.n_dst):
 316             with m.If(self.wr.go[i]):
 317                 m.d.comb += self.dest[i].eq(drl[i])
 318
 319         return m
 320
 321     def __iter__(self):
 322         yield self.rd.go
 323         yield self.wr.go
 324         yield self.issue_i
 325         yield self.shadown_i
 326         yield self.go_die_i
 327         yield from self.oper_i.ports()
 328         yield self.src1_i
 329         yield self.src2_i
 330         yield self.busy_o
 331         yield self.rd.rel
 332         yield self.wr.rel
 333         yield self.data_o
 334
 335     def ports(self):
 336         return list(self)
 337
 338
 339 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
 340     yield dut.issue_i.eq(0)
 341     yield
 342     yield dut.src_i[0].eq(a)
 343     yield dut.src_i[1].eq(b)
 344     yield dut.oper_i.insn_type.eq(op)
 345     yield dut.oper_i.invert_a.eq(inv_a)
 346     yield dut.oper_i.imm_data.imm.eq(imm)
 347     yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
 348     yield dut.issue_i.eq(1)
 349     yield
 350     yield dut.issue_i.eq(0)
 351     yield
 352     yield dut.rd.go.eq(0b11)
 353     while True:
 354         yield
 355         rd_rel_o = yield dut.rd.rel
 356         print ("rd_rel", rd_rel_o)
 357         if rd_rel_o:
 358             break
 359     yield
 360     yield dut.rd.go.eq(0)
 361     req_rel_o = yield dut.wr.rel
 362     result = yield dut.data_o
 363     print ("req_rel", req_rel_o, result)
 364     while True:
 365         req_rel_o = yield dut.wr.rel
 366         result = yield dut.data_o
 367         print ("req_rel", req_rel_o, result)
 368         if req_rel_o:
 369             break
 370         yield
 371     yield dut.wr.go[0].eq(1)
 372     yield
 373     result = yield dut.data_o
 374     print ("result", result)
 375     yield dut.wr.go[0].eq(0)
 376     yield
 377     return result
 378
 379
 380 def scoreboard_sim(dut):
 381     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
 382                                     imm=8, imm_ok=1)
 383     assert result == 13
 384
 385     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
 386     assert result == 7
 387
 388     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
 389     assert result == 65532
 390
 391
 392 def test_compunit():
 393     from alu_hier import ALU
 394     from soc.fu.alu.alu_input_record import CompALUOpSubset
 395
 396     m = Module()
 397     alu = ALU(16)
 398     dut = MultiCompUnit(16, alu, CompALUOpSubset)
 399     m.submodules.cu = dut
 400
 401     vl = rtlil.convert(dut, ports=dut.ports())
 402     with open("test_compunit1.il", "w") as f:
 403         f.write(vl)
 404
 405     run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
 406
 407
 408 def test_compunit_regspec1():
 409     from alu_hier import ALU
 410     from soc.fu.alu.alu_input_record import CompALUOpSubset
 411
 412     inspec = [('INT', 'a', '0:15'),
 413               ('INT', 'b', '0:15')]
 414     outspec = [('INT', 'o', '0:15'),
 415               ]
 416
 417     regspec = (inspec, outspec)
 418
 419     m = Module()
 420     alu = ALU(16)
 421     dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
 422     m.submodules.cu = dut
 423
 424     vl = rtlil.convert(dut, ports=dut.ports())
 425     with open("test_compunit_regspec1.il", "w") as f:
 426         f.write(vl)
 427
 428     run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
 429
 430
 431 if __name__ == '__main__':
 432     test_compunit()
 433     test_compunit_regspec1()