1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
13 from nmigen
import Module
, Signal
, Mux
, Elaboratable
, Repl
, Cat
, Const
14 from nmigen
.hdl
.rec
import (Record
, DIR_FANIN
, DIR_FANOUT
)
16 from nmutil
.latch
import SRLatch
, latchregister
17 from nmutil
.iocontrol
import RecordObject
19 from soc
.fu
.regspec
import RegSpec
, RegSpecALUAPI
23 """find_ok helper function - finds field ending in "_ok"
25 for field_name
in fields
:
26 if field_name
.endswith("_ok"):
31 def go_record(n
, name
):
32 r
= Record([('go', n
, DIR_FANIN
),
33 ('rel', n
, DIR_FANOUT
)], name
=name
)
34 r
.go
.reset_less
= True
35 r
.rel
.reset_less
= True
39 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
41 class CompUnitRecord(RegSpec
, RecordObject
):
44 base class for Computation Units, to provide a uniform API
45 and allow "record.connect" etc. to be used, particularly when
46 it comes to connecting multiple Computation Units up as a block
49 LDSTCompUnitRecord should derive from this class and add the
50 additional signals it requires
52 :subkls: the class (not an instance) needed to construct the opcode
53 :rwid: either an integer (specifies width of all regs) or a "regspec"
55 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
57 def __init__(self
, subkls
, rwid
, n_src
=None, n_dst
=None, name
=None):
58 RegSpec
.__init
__(self
, rwid
, n_src
, n_dst
)
59 RecordObject
.__init
__(self
, name
)
61 n_src
, n_dst
= self
._n
_src
, self
._n
_dst
63 # create source operands
65 for i
in range(n_src
):
66 j
= i
+ 1 # name numbering to match src1/src2
68 rw
= self
._get
_srcwid
(i
)
69 sreg
= Signal(rw
, name
=name
, reset_less
=True)
70 setattr(self
, name
, sreg
)
74 # create dest operands
76 for i
in range(n_dst
):
77 j
= i
+ 1 # name numbering to match dest1/2...
79 rw
= self
._get
_dstwid
(i
)
80 #dreg = Data(rw, name=name) XXX ??? output needs to be a Data type?
81 dreg
= Signal(rw
, name
=name
, reset_less
=True)
82 setattr(self
, name
, dreg
)
86 # operation / data input
87 self
.oper_i
= subkls(name
="oper_i") # operand
89 # create read/write and other scoreboard signalling
90 self
.rd
= go_record(n_src
, name
="rd") # read in, req out
91 self
.wr
= go_record(n_dst
, name
="wr") # write in, req out
92 self
.rdmaskn
= Signal(n_src
, reset_less
=True) # read mask
93 self
.wrmask
= Signal(n_dst
, reset_less
=True) # write mask
94 self
.issue_i
= Signal(reset_less
=True) # fn issue in
95 self
.shadown_i
= Signal(reset
=1) # shadow function, defaults to ON
96 self
.go_die_i
= Signal() # go die (reset)
99 self
.busy_o
= Signal(reset_less
=True) # fn busy out
100 self
.done_o
= Signal(reset_less
=True)
103 class MultiCompUnit(RegSpecALUAPI
, Elaboratable
):
104 def __init__(self
, rwid
, alu
, opsubsetkls
, n_src
=2, n_dst
=1):
107 * :rwid: width of register latches (TODO: allocate per regspec)
108 * :alu: ALU (pipeline, FSM) - must conform to nmutil Pipe API
109 * :opsubsetkls: subset of Decode2ExecuteType
110 * :n_src: number of src operands
111 * :n_dst: number of destination operands
113 RegSpecALUAPI
.__init
__(self
, rwid
, alu
)
114 self
.opsubsetkls
= opsubsetkls
115 self
.cu
= cu
= CompUnitRecord(opsubsetkls
, rwid
, n_src
, n_dst
)
116 n_src
, n_dst
= self
.n_src
, self
.n_dst
= cu
._n
_src
, cu
._n
_dst
117 print ("n_src %d n_dst %d" % (self
.n_src
, self
.n_dst
))
119 # convenience names for src operands
120 for i
in range(n_src
):
121 j
= i
+ 1 # name numbering to match src1/src2
123 setattr(self
, name
, getattr(cu
, name
))
125 # convenience names for dest operands
126 for i
in range(n_dst
):
127 j
= i
+ 1 # name numbering to match dest1/2...
128 name
= "dest%d_o" % j
129 setattr(self
, name
, getattr(cu
, name
))
131 # more convenience names
134 self
.rdmaskn
= cu
.rdmaskn
135 self
.wrmask
= cu
.wrmask
136 self
.go_rd_i
= self
.rd
.go
# temporary naming
137 self
.go_wr_i
= self
.wr
.go
# temporary naming
138 self
.rd_rel_o
= self
.rd
.rel
# temporary naming
139 self
.req_rel_o
= self
.wr
.rel
# temporary naming
140 self
.issue_i
= cu
.issue_i
141 self
.shadown_i
= cu
.shadown_i
142 self
.go_die_i
= cu
.go_die_i
144 # operation / data input
145 self
.oper_i
= cu
.oper_i
146 self
.src_i
= cu
._src
_i
148 self
.busy_o
= cu
.busy_o
150 self
.data_o
= self
.dest
[0] # Dest out
151 self
.done_o
= cu
.done_o
153 def _mux_op(self
, m
, sl
, op_is_imm
, imm
, i
):
154 # select imm if opcode says so. however also change the latch
155 # to trigger *from* the opcode latch instead.
156 src_or_imm
= Signal(self
.cu
._get
_srcwid
(i
), reset_less
=True)
157 src_sel
= Signal(reset_less
=True)
158 m
.d
.comb
+= src_sel
.eq(Mux(op_is_imm
, self
.opc_l
.q
, self
.src_l
.q
[i
]))
159 m
.d
.comb
+= src_or_imm
.eq(Mux(op_is_imm
, imm
, self
.src_i
[i
]))
160 # overwrite 1st src-latch with immediate-muxed stuff
161 sl
[i
][0] = src_or_imm
163 sl
[i
][3] = ~op_is_imm
# change rd.rel[i] gate condition
165 def elaborate(self
, platform
):
167 m
.submodules
.alu
= self
.alu
168 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
169 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
170 m
.submodules
.req_l
= req_l
= SRLatch(False, self
.n_dst
, name
="req")
171 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
172 m
.submodules
.rok_l
= rok_l
= SRLatch(sync
=False, name
="rdok")
173 self
.opc_l
, self
.src_l
= opc_l
, src_l
175 # ALU only proceeds when all src are ready. rd_rel_o is delayed
176 # so combine it with go_rd_i. if all bits are set we're good
177 all_rd
= Signal(reset_less
=True)
178 m
.d
.comb
+= all_rd
.eq(self
.busy_o
& rok_l
.q
&
179 (((~self
.rd
.rel
) | self
.rd
.go
).all()))
181 # generate read-done pulse
182 all_rd_dly
= Signal(reset_less
=True)
183 all_rd_pulse
= Signal(reset_less
=True)
184 m
.d
.sync
+= all_rd_dly
.eq(all_rd
)
185 m
.d
.comb
+= all_rd_pulse
.eq(all_rd
& ~all_rd_dly
)
187 # create rising pulse from alu valid condition.
188 alu_done
= Signal(reset_less
=True)
189 alu_done_dly
= Signal(reset_less
=True)
190 alu_pulse
= Signal(reset_less
=True)
191 alu_pulsem
= Signal(self
.n_dst
, reset_less
=True)
192 m
.d
.comb
+= alu_done
.eq(self
.alu
.n
.valid_o
)
193 m
.d
.sync
+= alu_done_dly
.eq(alu_done
)
194 m
.d
.comb
+= alu_pulse
.eq(alu_done
& ~alu_done_dly
)
195 m
.d
.comb
+= alu_pulsem
.eq(Repl(alu_pulse
, self
.n_dst
))
197 # sigh bug where req_l gets both set and reset raised at same time
198 prev_wr_go
= Signal(self
.n_dst
)
199 brd
= Repl(self
.busy_o
, self
.n_dst
)
200 m
.d
.sync
+= prev_wr_go
.eq(self
.wr
.go
& brd
)
202 # write_requests all done
203 # req_done works because any one of the last of the writes
204 # is enough, when combined with when read-phase is done (rst_l.q)
205 wr_any
= Signal(reset_less
=True)
206 req_done
= Signal(reset_less
=True)
207 m
.d
.comb
+= self
.done_o
.eq(self
.busy_o
& \
208 ~
((self
.wr
.rel
& ~self
.wrmask
).bool()))
209 m
.d
.comb
+= wr_any
.eq(self
.wr
.go
.bool() | prev_wr_go
.bool())
210 m
.d
.comb
+= req_done
.eq(wr_any
& ~self
.alu
.n
.ready_i
& \
211 ((req_l
.q
& self
.wrmask
) == 0))
212 # argh, complicated hack: if there are no regs to write,
213 # instead of waiting for regs that are never going to happen,
214 # we indicate "done" when the ALU is "done"
215 with m
.If((self
.wrmask
== 0) & \
216 self
.alu
.n
.ready_i
& self
.alu
.n
.valid_o
& self
.busy_o
):
217 m
.d
.comb
+= req_done
.eq(1)
220 reset
= Signal(reset_less
=True)
221 rst_r
= Signal(reset_less
=True) # reset latch off
222 reset_w
= Signal(self
.n_dst
, reset_less
=True)
223 reset_r
= Signal(self
.n_src
, reset_less
=True)
224 m
.d
.comb
+= reset
.eq(req_done | self
.go_die_i
)
225 m
.d
.comb
+= rst_r
.eq(self
.issue_i | self
.go_die_i
)
226 m
.d
.comb
+= reset_w
.eq(self
.wr
.go |
Repl(self
.go_die_i
, self
.n_dst
))
227 m
.d
.comb
+= reset_r
.eq(self
.rd
.go |
Repl(self
.go_die_i
, self
.n_src
))
229 # read-done,wr-proceed latch
230 m
.d
.comb
+= rok_l
.s
.eq(self
.issue_i
) # set up when issue starts
231 m
.d
.sync
+= rok_l
.r
.eq(self
.alu
.n
.valid_o
& self
.busy_o
) # ALU done
233 # wr-done, back-to-start latch
234 m
.d
.comb
+= rst_l
.s
.eq(all_rd
) # set when read-phase is fully done
235 m
.d
.comb
+= rst_l
.r
.eq(rst_r
) # *off* on issue
237 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
238 m
.d
.sync
+= opc_l
.s
.eq(self
.issue_i
) # set on issue
239 m
.d
.sync
+= opc_l
.r
.eq(req_done
) # reset on ALU
241 # src operand latch (not using go_wr_i)
242 m
.d
.sync
+= src_l
.s
.eq(Repl(self
.issue_i
, self
.n_src
))
243 m
.d
.sync
+= src_l
.r
.eq(reset_r
)
245 # dest operand latch (not using issue_i)
246 m
.d
.comb
+= req_l
.s
.eq(alu_pulsem
& self
.wrmask
)
247 m
.d
.comb
+= req_l
.r
.eq(reset_w | prev_wr_go
)
249 # create a latch/register for the operand
250 oper_r
= self
.opsubsetkls(name
="oper_r")
251 latchregister(m
, self
.oper_i
, oper_r
, self
.issue_i
, "oper_l")
253 # and for each output from the ALU: capture when ALU output is valid
256 for i
in range(self
.n_dst
):
257 name
= "data_r%d" % i
258 lro
= self
.get_out(i
)
260 if isinstance(lro
, Record
):
261 data_r
= Record
.like(lro
, name
=name
)
262 print ("wr fields", i
, lro
, data_r
.fields
)
263 # bye-bye abstract interface design..
264 fname
= find_ok(data_r
.fields
)
268 data_r
= Signal
.like(lro
, name
=name
, reset_less
=True)
270 latchregister(m
, lro
, data_r
, alu_pulsem
, name
+ "_l")
273 # ok, above we collated anything with an "ok" on the output side
274 # now actually use those to create a write-mask. this basically
275 # is now the Function Unit API tells the Comp Unit "do not request
276 # a regfile port because this particular output is not valid"
277 m
.d
.comb
+= self
.wrmask
.eq(Cat(*wrok
))
279 # pass the operation to the ALU
280 m
.d
.comb
+= self
.get_op().eq(oper_r
)
282 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
283 # in the case, for ALU and Logical pipelines, we assume RB is the
284 # 2nd operand in the input "regspec". see for example
285 # soc.fu.alu.pipe_data.ALUInputData
287 print ("src_i", self
.src_i
)
288 for i
in range(self
.n_src
):
289 sl
.append([self
.src_i
[i
], self
.get_in(i
), src_l
.q
[i
], Const(1,1)])
291 # if the operand subset has "zero_a" we implicitly assume that means
292 # src_i[0] is an INT reg type where zero can be multiplexed in, instead.
293 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
294 if hasattr(oper_r
, "zero_a"):
295 # select zero imm if opcode says so. however also change the latch
296 # to trigger *from* the opcode latch instead.
297 self
._mux
_op
(m
, sl
, oper_r
.zero_a
, 0, 0)
299 # if the operand subset has "imm_data" we implicitly assume that means
300 # "this is an INT ALU/Logical FU jobbie, RB is muxed with the immediate"
301 if hasattr(oper_r
, "imm_data"):
302 # select immediate if opcode says so. however also change the latch
303 # to trigger *from* the opcode latch instead.
304 op_is_imm
= oper_r
.imm_data
.imm_ok
305 imm
= oper_r
.imm_data
.imm
306 self
._mux
_op
(m
, sl
, op_is_imm
, imm
, 1)
308 # create a latch/register for src1/src2 (even if it is a copy of imm)
309 for i
in range(self
.n_src
):
310 src
, alusrc
, latch
, _
= sl
[i
]
311 latchregister(m
, src
, alusrc
, latch
, name
="src_r%d" % i
)
314 # ALU connection / interaction
317 # on a go_read, tell the ALU we're accepting data.
318 m
.submodules
.alui_l
= alui_l
= SRLatch(False, name
="alui")
319 m
.d
.comb
+= self
.alu
.p
.valid_i
.eq(alui_l
.q
)
320 m
.d
.sync
+= alui_l
.r
.eq(self
.alu
.p
.ready_o
& alui_l
.q
)
321 m
.d
.comb
+= alui_l
.s
.eq(all_rd_pulse
)
323 # ALU output "ready" side. alu "ready" indication stays hi until
325 m
.submodules
.alu_l
= alu_l
= SRLatch(False, name
="alu")
326 m
.d
.comb
+= self
.alu
.n
.ready_i
.eq(alu_l
.q
)
327 m
.d
.sync
+= alu_l
.r
.eq(self
.alu
.n
.valid_o
& alu_l
.q
)
328 m
.d
.comb
+= alu_l
.s
.eq(all_rd_pulse
)
334 slg
= Cat(*map(lambda x
: x
[3], sl
)) # get req gate conditions
335 # all request signals gated by busy_o. prevents picker problems
336 m
.d
.comb
+= self
.busy_o
.eq(opc_l
.q
) # busy out
338 # read-release gated by busy (and read-mask)
339 bro
= Repl(self
.busy_o
, self
.n_src
)
340 m
.d
.comb
+= self
.rd
.rel
.eq(src_l
.q
& bro
& slg
& ~self
.rdmaskn
)
342 # write-release gated by busy and by shadow (and write-mask)
343 brd
= Repl(self
.busy_o
& self
.shadown_i
, self
.n_dst
)
344 m
.d
.comb
+= self
.wr
.rel
.eq(req_l
.q
& brd
& self
.wrmask
)
346 # output the data from the latch on go_write
347 for i
in range(self
.n_dst
):
348 with m
.If(self
.wr
.go
[i
]):
349 m
.d
.comb
+= self
.dest
[i
].eq(drl
[i
])
359 yield from self
.oper_i
.ports()