split out RegSpec API into separate class (TODO: move to separate file)
[soc.git] / src / soc / experiment / compalu_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record
4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
5
6 from nmutil.latch import SRLatch, latchregister
7 from nmutil.iocontrol import RecordObject
8
9 from soc.decoder.power_decoder2 import Data
10 from soc.decoder.power_enums import InternalOp
11
12
13 """ Computation Unit (aka "ALU Manager").
14
15 This module runs a "revolving door" set of three latches, based on
16 * Issue
17 * Go_Read
18 * Go_Write
19 where one of them cannot be set on any given cycle.
20
21 * When issue is first raised, a busy signal is sent out.
22 The src1 and src2 registers and the operand can be latched in
23 at this point
24
25 * Read request is set, which is acknowledged through the Scoreboard
26 to the priority picker, which generates (one and only one) Go_Read
27 at a time. One of those will (eventually) be this Computation Unit.
28
29 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
30 src1/src2/operand in place), and the ALU is told to proceed.
31
32 * when the ALU pipeline is ready, this activates "write request release",
33 and the ALU's output is captured into a temporary register.
34
35 * Write request release is *HELD UP* (prevented from proceeding) if shadowN
36 is asserted LOW. This is how all speculation, precise exceptions,
37 predication - everything - is achieved.
38
39 * Write request release will go through a similar process as Read request,
40 resulting (eventually) in Go_Write being asserted.
41
42 * When Go_Write is asserted, two things happen: (1) the data in the temp
43 register is placed combinatorially onto the output, and (2) the
44 req_l latch is cleared, busy is dropped, and the Comp Unit is back
45 through its revolving door to do another task.
46
47 Note that the read and write latches are held synchronously for one cycle,
48 i.e. that when Go_Read comes in, one cycle is given in which the incoming
49 register (broadcast over a Regfile Read Port) may have time to be latched.
50
51 It is REQUIRED that Go_Read be held valid only for one cycle, and it is
52 REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
53 Go_Read is asserted HI.
54
55 Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
56 likewise be dropped exactly one cycle after assertion of Go_Write.
57
58 When Go_Die is asserted then strictly speaking the entire FSM should be
59 fully reset and that includes sending a cancellation request to the ALU.
60 (XXX TODO: alu "go die" is not presently wired up)
61 """
62
63 def go_record(n, name):
64 r = Record([('go', n, DIR_FANIN),
65 ('rel', n, DIR_FANOUT)], name=name)
66 r.go.reset_less = True
67 r.rel.reset_less = True
68 return r
69
70 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
71 def get_regspec_bitwidth(regspec, srcdest, idx):
72 bitspec = regspec[srcdest][idx]
73 wid = 0
74 print (bitspec)
75 for ranges in bitspec[2].split(","):
76 ranges = ranges.split(":")
77 print (ranges)
78 if len(ranges) == 1: # only one bit
79 wid += 1
80 else:
81 start, end = map(int, ranges)
82 wid += (end-start)+1
83 return wid
84
85
86 class CompUnitRecord(RecordObject):
87 """CompUnitRecord
88
89 base class for Computation Units, to provide a uniform API
90 and allow "record.connect" etc. to be used, particularly when
91 it comes to connecting multiple Computation Units up as a block
92 (very laborious)
93
94 LDSTCompUnitRecord should derive from this class and add the
95 additional signals it requires
96
97 :subkls: the class (not an instance) needed to construct the opcode
98 :rwid: either an integer (specifies width of all regs) or a "regspec"
99
100 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
101 """
102 def __init__(self, subkls, rwid, n_src=None, n_dst=None, name=None):
103 RecordObject.__init__(self, name)
104 self._rwid = rwid
105 if isinstance(rwid, int):
106 # rwid: integer (covers all registers)
107 self._n_src, self._n_dst = n_src, n_dst
108 else:
109 # rwid: a regspec.
110 self._n_src, self._n_dst = len(rwid[0]), len(rwid[1])
111 self._subkls = subkls
112
113 src = []
114 for i in range(n_src):
115 j = i + 1 # name numbering to match src1/src2
116 name = "src%d_i" % j
117 rw = self._get_srcwid(i)
118 sreg = Signal(rw, name=name, reset_less=True)
119 setattr(self, name, sreg)
120 src.append(sreg)
121 self._src_i = src
122
123 dst = []
124 for i in range(n_dst):
125 j = i + 1 # name numbering to match dest1/2...
126 name = "dest%d_i" % j
127 rw = self._get_dstwid(i)
128 dreg = Signal(rw, name=name, reset_less=True)
129 setattr(self, name, dreg)
130 dst.append(dreg)
131 self._dest = dst
132
133 self.rd = go_record(n_src, name="rd") # read in, req out
134 self.wr = go_record(n_dst, name="wr") # write in, req out
135 self.issue_i = Signal(reset_less=True) # fn issue in
136 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
137 self.go_die_i = Signal() # go die (reset)
138
139 # operation / data input
140 self.oper_i = subkls() # operand
141
142 # output (busy/done)
143 self.busy_o = Signal(reset_less=True) # fn busy out
144 self.done_o = Signal(reset_less=True)
145
146 def _get_dstwid(self, i):
147 if isinstance(self._rwid, int):
148 return self._rwid
149 return get_regspec_bitwidth(self._rwid, 1, i)
150
151 def _get_srcwid(self, i):
152 if isinstance(self._rwid, int):
153 return self._rwid
154 return get_regspec_bitwidth(self._rwid, 0, i)
155
156
157 class RegSpecALUAPI:
158 def __init__(self, rwid, alu):
159 """RegSpecAPI
160
161 * :rwid: regspec
162 * :alu: ALU covered by this regspec
163 """
164 self.rwid = rwid
165 self.alu = alu # actual ALU - set as a "submodule" of the CU
166
167 def get_out(self, i):
168 if isinstance(self.rwid, int): # old - testing - API (rwid is int)
169 return self.alu.out[i]
170 # regspec-based API: look up variable through regspec according to row number
171 return getattr(self.alu.n.data_o, self.rwid[1][i][1])
172
173 def get_in(self, i):
174 if isinstance(self.rwid, int): # old - testing - API (rwid is int)
175 return self.alu.i[i]
176 # regspec-based API: look up variable through regspec according to row number
177 return getattr(self.alu.p.data_i, self.rwid[0][i][1])
178
179 def get_op(self):
180 if isinstance(self.rwid, int): # old - testing - API (rwid is int)
181 return self.alu.op
182 return self.alu.p.data_i.ctx.op
183
184
185 class MultiCompUnit(RegSpecALUAPI, Elaboratable):
186 def __init__(self, rwid, alu, opsubsetkls, n_src=2, n_dst=1):
187 """MultiCompUnit
188
189 * :rwid: width of register latches (TODO: allocate per regspec)
190 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
191 * :opsubsetkls: the subset of Decode2ExecuteType
192 * :n_src: number of src operands
193 * :n_dst: number of destination operands
194 """
195 RegSpecALUAPI.__init__(self, rwid, alu)
196 self.n_src, self.n_dst = n_src, n_dst
197 self.opsubsetkls = opsubsetkls
198 self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
199
200 for i in range(n_src):
201 j = i + 1 # name numbering to match src1/src2
202 name = "src%d_i" % j
203 setattr(self, name, getattr(cu, name))
204
205 for i in range(n_dst):
206 j = i + 1 # name numbering to match dest1/2...
207 name = "dest%d_i" % j
208 setattr(self, name, getattr(cu, name))
209
210 # convenience names
211 self.rd = cu.rd
212 self.wr = cu.wr
213 self.go_rd_i = self.rd.go # temporary naming
214 self.go_wr_i = self.wr.go # temporary naming
215 self.rd_rel_o = self.rd.rel # temporary naming
216 self.req_rel_o = self.wr.rel # temporary naming
217 self.issue_i = cu.issue_i
218 self.shadown_i = cu.shadown_i
219 self.go_die_i = cu.go_die_i
220
221 # operation / data input
222 self.oper_i = cu.oper_i
223 self.src_i = cu._src_i
224
225 self.busy_o = cu.busy_o
226 self.dest = cu._dest
227 self.data_o = self.dest[0] # Dest out
228 self.done_o = cu.done_o
229
230 def elaborate(self, platform):
231 m = Module()
232 m.submodules.alu = self.alu
233 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
234 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
235 m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
236 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
237 m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
238
239 # ALU only proceeds when all src are ready. rd_rel_o is delayed
240 # so combine it with go_rd_i. if all bits are set we're good
241 all_rd = Signal(reset_less=True)
242 m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
243 (((~self.rd.rel) | self.rd.go).all()))
244
245 # write_requests all done
246 # req_done works because any one of the last of the writes
247 # is enough, when combined with when read-phase is done (rst_l.q)
248 wr_any = Signal(reset_less=True)
249 req_done = Signal(reset_less=True)
250 m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
251 m.d.comb += wr_any.eq(self.wr.go.bool())
252 m.d.comb += req_done.eq(rst_l.q & wr_any)
253
254 # shadow/go_die
255 reset = Signal(reset_less=True)
256 rst_r = Signal(reset_less=True) # reset latch off
257 reset_w = Signal(self.n_dst, reset_less=True)
258 reset_r = Signal(self.n_src, reset_less=True)
259 m.d.comb += reset.eq(req_done | self.go_die_i)
260 m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
261 m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
262 m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
263
264 # read-done,wr-proceed latch
265 m.d.comb += rok_l.s.eq(self.issue_i) # set up when issue starts
266 m.d.comb += rok_l.r.eq(self.alu.p.ready_o) # off when ALU acknowledges
267
268 # wr-done, back-to-start latch
269 m.d.comb += rst_l.s.eq(all_rd) # set when read-phase is fully done
270 m.d.comb += rst_l.r.eq(rst_r) # *off* on issue
271
272 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
273 m.d.sync += opc_l.s.eq(self.issue_i) # set on issue
274 m.d.sync += opc_l.r.eq(self.alu.n.valid_o & req_done) # reset on ALU
275
276 # src operand latch (not using go_wr_i)
277 m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
278 m.d.sync += src_l.r.eq(reset_r)
279
280 # dest operand latch (not using issue_i)
281 m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
282 m.d.sync += req_l.r.eq(reset_w)
283
284 # create a latch/register for the operand
285 oper_r = self.opsubsetkls()
286 latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
287
288 # and for each output from the ALU
289 drl = []
290 for i in range(self.n_dst):
291 name = "data_r%d" % i
292 data_r = Signal(self.cu._get_srcwid(i), name=name, reset_less=True)
293 latchregister(m, self.get_out(i), data_r, req_l.q[i], name)
294 drl.append(data_r)
295
296 # pass the operation to the ALU
297 m.d.comb += self.get_op().eq(oper_r)
298
299 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
300 # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
301 # in the input "regspec". see for example soc.fu.alu.pipe_data.ALUInputData
302 # TODO: assume RA is the 1st operand, zero_a detection is needed.
303 sl = []
304 for i in range(self.n_src):
305 sl.append([self.src_i[i], self.get_in(i), src_l.q[i]])
306
307 # if the operand subset has "zero_a" we implicitly assume that means
308 # src_i[0] is an INT register type where zero can be multiplexed in, instead.
309 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
310 #if hasattr(oper_r, "zero_a"):
311 # select zero immediate if opcode says so. however also change the latch
312 # to trigger *from* the opcode latch instead.
313 # ...
314 # ...
315
316 # if the operand subset has "imm_data" we implicitly assume that means
317 # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
318 if hasattr(oper_r, "imm_data"):
319 # select immediate if opcode says so. however also change the latch
320 # to trigger *from* the opcode latch instead.
321 op_is_imm = oper_r.imm_data.imm_ok
322 src2_or_imm = Signal(self.cu._get_srcwid(1), reset_less=True)
323 src_sel = Signal(reset_less=True)
324 m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.q, src_l.q[1]))
325 m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
326 self.src2_i))
327 # overwrite 2nd src-latch with immediate-muxed stuff
328 sl[1][0] = src2_or_imm
329 sl[1][2] = src_sel
330
331 # create a latch/register for src1/src2 (even if it is a copy of an immediate)
332 for i in range(self.n_src):
333 src, alusrc, latch = sl[i]
334 latchregister(m, src, alusrc, latch, name="src_r%d" % i)
335
336 # -----
337 # outputs
338 # -----
339
340 # all request signals gated by busy_o. prevents picker problems
341 m.d.comb += self.busy_o.eq(opc_l.q) # busy out
342 bro = Repl(self.busy_o, self.n_src)
343 m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
344
345 # on a go_read, tell the ALU we're accepting data.
346 # NOTE: this spells TROUBLE if the ALU isn't ready!
347 # go_read is only valid for one clock!
348 with m.If(all_rd): # src operands ready, GO!
349 with m.If(~self.alu.p.ready_o): # no ACK yet
350 m.d.comb += self.alu.p.valid_i.eq(1) # so indicate valid
351
352 brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
353 # only proceed if ALU says its output is valid
354 with m.If(self.alu.n.valid_o):
355 # when ALU ready, write req release out. waits for shadow
356 m.d.comb += self.wr.rel.eq(req_l.q & brd)
357 # when output latch is ready, and ALU says ready, accept ALU output
358 with m.If(reset):
359 m.d.comb += self.alu.n.ready_i.eq(1) # tells ALU "thanks got it"
360
361 # output the data from the latch on go_write
362 for i in range(self.n_dst):
363 with m.If(self.wr.go[i]):
364 m.d.comb += self.dest[i].eq(drl[i])
365
366 return m
367
368 def __iter__(self):
369 yield self.rd.go
370 yield self.wr.go
371 yield self.issue_i
372 yield self.shadown_i
373 yield self.go_die_i
374 yield from self.oper_i.ports()
375 yield self.src1_i
376 yield self.src2_i
377 yield self.busy_o
378 yield self.rd.rel
379 yield self.wr.rel
380 yield self.data_o
381
382 def ports(self):
383 return list(self)
384
385
386 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
387 yield dut.issue_i.eq(0)
388 yield
389 yield dut.src_i[0].eq(a)
390 yield dut.src_i[1].eq(b)
391 yield dut.oper_i.insn_type.eq(op)
392 yield dut.oper_i.invert_a.eq(inv_a)
393 yield dut.oper_i.imm_data.imm.eq(imm)
394 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
395 yield dut.issue_i.eq(1)
396 yield
397 yield dut.issue_i.eq(0)
398 yield
399 yield dut.rd.go.eq(0b11)
400 while True:
401 yield
402 rd_rel_o = yield dut.rd.rel
403 print ("rd_rel", rd_rel_o)
404 if rd_rel_o:
405 break
406 yield
407 yield dut.rd.go.eq(0)
408 req_rel_o = yield dut.wr.rel
409 result = yield dut.data_o
410 print ("req_rel", req_rel_o, result)
411 while True:
412 req_rel_o = yield dut.wr.rel
413 result = yield dut.data_o
414 print ("req_rel", req_rel_o, result)
415 if req_rel_o:
416 break
417 yield
418 yield dut.wr.go[0].eq(1)
419 yield
420 result = yield dut.data_o
421 print ("result", result)
422 yield dut.wr.go[0].eq(0)
423 yield
424 return result
425
426
427 def scoreboard_sim(dut):
428 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
429 imm=8, imm_ok=1)
430 assert result == 13
431
432 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
433 assert result == 7
434
435 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
436 assert result == 65532
437
438
439 def test_compunit():
440 from alu_hier import ALU
441 from soc.fu.alu.alu_input_record import CompALUOpSubset
442
443 m = Module()
444 alu = ALU(16)
445 dut = MultiCompUnit(16, alu, CompALUOpSubset)
446 m.submodules.cu = dut
447
448 vl = rtlil.convert(dut, ports=dut.ports())
449 with open("test_compunit1.il", "w") as f:
450 f.write(vl)
451
452 run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
453
454
455 def test_compunit_regspec1():
456 from alu_hier import ALU
457 from soc.fu.alu.alu_input_record import CompALUOpSubset
458
459 inspec = [('INT', 'a', '0:15'),
460 ('INT', 'b', '0:15')]
461 outspec = [('INT', 'o', '0:15'),
462 ]
463
464 regspec = (inspec, outspec)
465
466 m = Module()
467 alu = ALU(16)
468 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
469 m.submodules.cu = dut
470
471 vl = rtlil.convert(dut, ports=dut.ports())
472 with open("test_compunit_regspec1.il", "w") as f:
473 f.write(vl)
474
475 run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
476
477
478 if __name__ == '__main__':
479 test_compunit()
480 test_compunit_regspec1()