common function for op zero and op immed
[soc.git] / src / soc / experiment / compalu_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record, Const
4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
5
6 from nmutil.latch import SRLatch, latchregister
7 from nmutil.iocontrol import RecordObject
8
9 from soc.decoder.power_decoder2 import Data
10 from soc.decoder.power_enums import InternalOp
11 from soc.fu.regspec import RegSpec, RegSpecALUAPI
12
13
14 """ Computation Unit (aka "ALU Manager").
15
16 This module runs a "revolving door" set of three latches, based on
17 * Issue
18 * Go_Read
19 * Go_Write
20 where one of them cannot be set on any given cycle.
21
22 * When issue is first raised, a busy signal is sent out.
23 The src1 and src2 registers and the operand can be latched in
24 at this point
25
26 * Read request is set, which is acknowledged through the Scoreboard
27 to the priority picker, which generates (one and only one) Go_Read
28 at a time. One of those will (eventually) be this Computation Unit.
29
30 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
31 src1/src2/operand in place), and the ALU is told to proceed.
32
33 * when the ALU pipeline is ready, this activates "write request release",
34 and the ALU's output is captured into a temporary register.
35
36 * Write request release is *HELD UP* (prevented from proceeding) if shadowN
37 is asserted LOW. This is how all speculation, precise exceptions,
38 predication - everything - is achieved.
39
40 * Write request release will go through a similar process as Read request,
41 resulting (eventually) in Go_Write being asserted.
42
43 * When Go_Write is asserted, two things happen: (1) the data in the temp
44 register is placed combinatorially onto the output, and (2) the
45 req_l latch is cleared, busy is dropped, and the Comp Unit is back
46 through its revolving door to do another task.
47
48 Note that the read and write latches are held synchronously for one cycle,
49 i.e. that when Go_Read comes in, one cycle is given in which the incoming
50 register (broadcast over a Regfile Read Port) may have time to be latched.
51
52 It is REQUIRED that Go_Read be held valid only for one cycle, and it is
53 REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
54 Go_Read is asserted HI.
55
56 Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
57 likewise be dropped exactly one cycle after assertion of Go_Write.
58
59 When Go_Die is asserted then strictly speaking the entire FSM should be
60 fully reset and that includes sending a cancellation request to the ALU.
61 (XXX TODO: alu "go die" is not presently wired up)
62 """
63
64 def go_record(n, name):
65 r = Record([('go', n, DIR_FANIN),
66 ('rel', n, DIR_FANOUT)], name=name)
67 r.go.reset_less = True
68 r.rel.reset_less = True
69 return r
70
71 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
72
73 class CompUnitRecord(RegSpec, RecordObject):
74 """CompUnitRecord
75
76 base class for Computation Units, to provide a uniform API
77 and allow "record.connect" etc. to be used, particularly when
78 it comes to connecting multiple Computation Units up as a block
79 (very laborious)
80
81 LDSTCompUnitRecord should derive from this class and add the
82 additional signals it requires
83
84 :subkls: the class (not an instance) needed to construct the opcode
85 :rwid: either an integer (specifies width of all regs) or a "regspec"
86
87 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
88 """
89 def __init__(self, subkls, rwid, n_src=None, n_dst=None, name=None):
90 RegSpec.__init__(self, rwid, n_src, n_dst)
91 RecordObject.__init__(self, name)
92 self._subkls = subkls
93
94 # create source operands
95 src = []
96 for i in range(n_src):
97 j = i + 1 # name numbering to match src1/src2
98 name = "src%d_i" % j
99 rw = self._get_srcwid(i)
100 sreg = Signal(rw, name=name, reset_less=True)
101 setattr(self, name, sreg)
102 src.append(sreg)
103 self._src_i = src
104
105 # create dest operands
106 dst = []
107 for i in range(n_dst):
108 j = i + 1 # name numbering to match dest1/2...
109 name = "dest%d_i" % j
110 rw = self._get_dstwid(i)
111 dreg = Signal(rw, name=name, reset_less=True)
112 setattr(self, name, dreg)
113 dst.append(dreg)
114 self._dest = dst
115
116 # operation / data input
117 self.oper_i = subkls() # operand
118
119 # create read/write and other scoreboard signalling
120 self.rd = go_record(n_src, name="rd") # read in, req out
121 self.wr = go_record(n_dst, name="wr") # write in, req out
122 self.issue_i = Signal(reset_less=True) # fn issue in
123 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
124 self.go_die_i = Signal() # go die (reset)
125
126 # output (busy/done)
127 self.busy_o = Signal(reset_less=True) # fn busy out
128 self.done_o = Signal(reset_less=True)
129
130
131 class MultiCompUnit(RegSpecALUAPI, Elaboratable):
132 def __init__(self, rwid, alu, opsubsetkls, n_src=2, n_dst=1):
133 """MultiCompUnit
134
135 * :rwid: width of register latches (TODO: allocate per regspec)
136 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
137 * :opsubsetkls: the subset of Decode2ExecuteType
138 * :n_src: number of src operands
139 * :n_dst: number of destination operands
140 """
141 RegSpecALUAPI.__init__(self, rwid, alu)
142 self.n_src, self.n_dst = n_src, n_dst
143 self.opsubsetkls = opsubsetkls
144 self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
145
146 # convenience names for src operands
147 for i in range(n_src):
148 j = i + 1 # name numbering to match src1/src2
149 name = "src%d_i" % j
150 setattr(self, name, getattr(cu, name))
151
152 # convenience names for dest operands
153 for i in range(n_dst):
154 j = i + 1 # name numbering to match dest1/2...
155 name = "dest%d_i" % j
156 setattr(self, name, getattr(cu, name))
157
158 # more convenience names
159 self.rd = cu.rd
160 self.wr = cu.wr
161 self.go_rd_i = self.rd.go # temporary naming
162 self.go_wr_i = self.wr.go # temporary naming
163 self.rd_rel_o = self.rd.rel # temporary naming
164 self.req_rel_o = self.wr.rel # temporary naming
165 self.issue_i = cu.issue_i
166 self.shadown_i = cu.shadown_i
167 self.go_die_i = cu.go_die_i
168
169 # operation / data input
170 self.oper_i = cu.oper_i
171 self.src_i = cu._src_i
172
173 self.busy_o = cu.busy_o
174 self.dest = cu._dest
175 self.data_o = self.dest[0] # Dest out
176 self.done_o = cu.done_o
177
178
179 def _mux_op(self, m, sl, op_is_imm, imm, i):
180 # select zero immediate if opcode says so. however also change the latch
181 # to trigger *from* the opcode latch instead.
182 src_or_imm = Signal(self.cu._get_srcwid(i), reset_less=True)
183 src_sel = Signal(reset_less=True)
184 m.d.comb += src_sel.eq(Mux(op_is_imm, self.opc_l.q, self.src_l.q[i]))
185 m.d.comb += src_or_imm.eq(Mux(op_is_imm, imm, self.src_i[i]))
186 # overwrite 1st src-latch with immediate-muxed stuff
187 sl[i][0] = src_or_imm
188 sl[i][2] = src_sel
189
190 def elaborate(self, platform):
191 m = Module()
192 m.submodules.alu = self.alu
193 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
194 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
195 m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
196 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
197 m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
198 self.opc_l, self.src_l = opc_l, src_l
199
200 # ALU only proceeds when all src are ready. rd_rel_o is delayed
201 # so combine it with go_rd_i. if all bits are set we're good
202 all_rd = Signal(reset_less=True)
203 m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
204 (((~self.rd.rel) | self.rd.go).all()))
205
206 # write_requests all done
207 # req_done works because any one of the last of the writes
208 # is enough, when combined with when read-phase is done (rst_l.q)
209 wr_any = Signal(reset_less=True)
210 req_done = Signal(reset_less=True)
211 m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
212 m.d.comb += wr_any.eq(self.wr.go.bool())
213 m.d.comb += req_done.eq(rst_l.q & wr_any)
214
215 # shadow/go_die
216 reset = Signal(reset_less=True)
217 rst_r = Signal(reset_less=True) # reset latch off
218 reset_w = Signal(self.n_dst, reset_less=True)
219 reset_r = Signal(self.n_src, reset_less=True)
220 m.d.comb += reset.eq(req_done | self.go_die_i)
221 m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
222 m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
223 m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
224
225 # read-done,wr-proceed latch
226 m.d.comb += rok_l.s.eq(self.issue_i) # set up when issue starts
227 m.d.comb += rok_l.r.eq(self.alu.p.ready_o) # off when ALU acknowledges
228
229 # wr-done, back-to-start latch
230 m.d.comb += rst_l.s.eq(all_rd) # set when read-phase is fully done
231 m.d.comb += rst_l.r.eq(rst_r) # *off* on issue
232
233 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
234 m.d.sync += opc_l.s.eq(self.issue_i) # set on issue
235 m.d.sync += opc_l.r.eq(self.alu.n.valid_o & req_done) # reset on ALU
236
237 # src operand latch (not using go_wr_i)
238 m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
239 m.d.sync += src_l.r.eq(reset_r)
240
241 # dest operand latch (not using issue_i)
242 m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
243 m.d.sync += req_l.r.eq(reset_w)
244
245 # create a latch/register for the operand
246 oper_r = self.opsubsetkls()
247 latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
248
249 # and for each output from the ALU
250 drl = []
251 for i in range(self.n_dst):
252 name = "data_r%d" % i
253 data_r = Signal(self.cu._get_srcwid(i), name=name, reset_less=True)
254 latchregister(m, self.get_out(i), data_r, req_l.q[i], name)
255 drl.append(data_r)
256
257 # pass the operation to the ALU
258 m.d.comb += self.get_op().eq(oper_r)
259
260 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
261 # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
262 # in the input "regspec". see for example soc.fu.alu.pipe_data.ALUInputData
263 sl = []
264 for i in range(self.n_src):
265 sl.append([self.src_i[i], self.get_in(i), src_l.q[i]])
266
267 # if the operand subset has "zero_a" we implicitly assume that means
268 # src_i[0] is an INT register type where zero can be multiplexed in, instead.
269 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
270 if hasattr(oper_r, "zero_a"):
271 # select zero immediate if opcode says so. however also change the latch
272 # to trigger *from* the opcode latch instead.
273 self._mux_op(m, sl, oper_r.zero_a, 0, 0)
274
275 # if the operand subset has "imm_data" we implicitly assume that means
276 # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
277 if hasattr(oper_r, "imm_data"):
278 # select immediate if opcode says so. however also change the latch
279 # to trigger *from* the opcode latch instead.
280 op_is_imm = oper_r.imm_data.imm_ok
281 imm = oper_r.imm_data.imm
282 self._mux_op(m, sl, op_is_imm, imm, 1)
283
284 # create a latch/register for src1/src2 (even if it is a copy of an immediate)
285 for i in range(self.n_src):
286 src, alusrc, latch = sl[i]
287 latchregister(m, src, alusrc, latch, name="src_r%d" % i)
288
289 # -----
290 # outputs
291 # -----
292
293 # all request signals gated by busy_o. prevents picker problems
294 m.d.comb += self.busy_o.eq(opc_l.q) # busy out
295 bro = Repl(self.busy_o, self.n_src)
296 m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
297
298 # on a go_read, tell the ALU we're accepting data.
299 # NOTE: this spells TROUBLE if the ALU isn't ready!
300 # go_read is only valid for one clock!
301 with m.If(all_rd): # src operands ready, GO!
302 with m.If(~self.alu.p.ready_o): # no ACK yet
303 m.d.comb += self.alu.p.valid_i.eq(1) # so indicate valid
304
305 brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
306 # only proceed if ALU says its output is valid
307 with m.If(self.alu.n.valid_o):
308 # when ALU ready, write req release out. waits for shadow
309 m.d.comb += self.wr.rel.eq(req_l.q & brd)
310 # when output latch is ready, and ALU says ready, accept ALU output
311 with m.If(reset):
312 m.d.comb += self.alu.n.ready_i.eq(1) # tells ALU "thanks got it"
313
314 # output the data from the latch on go_write
315 for i in range(self.n_dst):
316 with m.If(self.wr.go[i]):
317 m.d.comb += self.dest[i].eq(drl[i])
318
319 return m
320
321 def __iter__(self):
322 yield self.rd.go
323 yield self.wr.go
324 yield self.issue_i
325 yield self.shadown_i
326 yield self.go_die_i
327 yield from self.oper_i.ports()
328 yield self.src1_i
329 yield self.src2_i
330 yield self.busy_o
331 yield self.rd.rel
332 yield self.wr.rel
333 yield self.data_o
334
335 def ports(self):
336 return list(self)
337
338
339 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
340 yield dut.issue_i.eq(0)
341 yield
342 yield dut.src_i[0].eq(a)
343 yield dut.src_i[1].eq(b)
344 yield dut.oper_i.insn_type.eq(op)
345 yield dut.oper_i.invert_a.eq(inv_a)
346 yield dut.oper_i.imm_data.imm.eq(imm)
347 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
348 yield dut.issue_i.eq(1)
349 yield
350 yield dut.issue_i.eq(0)
351 yield
352 yield dut.rd.go.eq(0b11)
353 while True:
354 yield
355 rd_rel_o = yield dut.rd.rel
356 print ("rd_rel", rd_rel_o)
357 if rd_rel_o:
358 break
359 yield
360 yield dut.rd.go.eq(0)
361 req_rel_o = yield dut.wr.rel
362 result = yield dut.data_o
363 print ("req_rel", req_rel_o, result)
364 while True:
365 req_rel_o = yield dut.wr.rel
366 result = yield dut.data_o
367 print ("req_rel", req_rel_o, result)
368 if req_rel_o:
369 break
370 yield
371 yield dut.wr.go[0].eq(1)
372 yield
373 result = yield dut.data_o
374 print ("result", result)
375 yield dut.wr.go[0].eq(0)
376 yield
377 return result
378
379
380 def scoreboard_sim(dut):
381 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
382 imm=8, imm_ok=1)
383 assert result == 13
384
385 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
386 assert result == 7
387
388 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
389 assert result == 65532
390
391
392 def test_compunit():
393 from alu_hier import ALU
394 from soc.fu.alu.alu_input_record import CompALUOpSubset
395
396 m = Module()
397 alu = ALU(16)
398 dut = MultiCompUnit(16, alu, CompALUOpSubset)
399 m.submodules.cu = dut
400
401 vl = rtlil.convert(dut, ports=dut.ports())
402 with open("test_compunit1.il", "w") as f:
403 f.write(vl)
404
405 run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
406
407
408 def test_compunit_regspec1():
409 from alu_hier import ALU
410 from soc.fu.alu.alu_input_record import CompALUOpSubset
411
412 inspec = [('INT', 'a', '0:15'),
413 ('INT', 'b', '0:15')]
414 outspec = [('INT', 'o', '0:15'),
415 ]
416
417 regspec = (inspec, outspec)
418
419 m = Module()
420 alu = ALU(16)
421 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
422 m.submodules.cu = dut
423
424 vl = rtlil.convert(dut, ports=dut.ports())
425 with open("test_compunit_regspec1.il", "w") as f:
426 f.write(vl)
427
428 run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
429
430
431 if __name__ == '__main__':
432 test_compunit()
433 test_compunit_regspec1()