Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / experiment / compldst.py
1 """ LOAD / STORE Computation Unit. Also capable of doing ADD and ADD immediate
2
3 This module runs a "revolving door" set of four latches, based on
4 * Issue
5 * Go_Read
6 * Go_Addr
7 * Go_Write *OR* Go_Store
8
9 (Note that opc_l has been inverted (and qn used), due to SRLatch
10 default reset state being "0" rather than "1")
11
12 Also note: the LD/ST Comp Unit can act as a *standard ALU* doing
13 add and subtract.
14
15 Stores are activated when Go_Store is enabled, and uses the ALU
16 to add the immediate (imm_i) to the address (src1_i), and then
17 when ready (go_st_i and the ALU ready) the operand (src2_i) is stored
18 in the computed address.
19 """
20
21 from nmigen.compat.sim import run_simulation
22 from nmigen.cli import verilog, rtlil
23 from nmigen import Module, Signal, Mux, Cat, Elaboratable
24
25 from nmutil.latch import SRLatch, latchregister
26
27 from testmem import TestMemory
28
29 # internal opcodes. hypothetically this could do more combinations.
30 # meanings:
31 # * bit 0: 0 = ADD , 1 = SUB
32 # * bit 1: 0 = src1, 1 = IMM
33 # * bit 2: 1 = LD
34 # * bit 3: 1 = ST
35 BIT0_ADD = 0
36 BIT1_SRC = 1
37 BIT2_ST = 2
38 BIT3_LD = 3
39 # convenience thingies.
40 LDST_OP_ADD = 0b0000 # plain ADD (src1 + src2) - use this ALU as an ADD
41 LDST_OP_SUB = 0b0001 # plain SUB (src1 - src2) - use this ALU as a SUB
42 LDST_OP_ADDI = 0b0010 # immed ADD (imm + src1)
43 LDST_OP_SUBI = 0b0011 # immed SUB (imm - src1)
44 LDST_OP_ST = 0b0110 # immed ADD plus LD op. ADD result is address
45 LDST_OP_LD = 0b1010 # immed ADD plus ST op. ADD result is address
46
47
48
49 class LDSTCompUnit(Elaboratable):
50 """ LOAD / STORE / ADD / SUB Computation Unit
51
52 Inputs
53 ------
54
55 * :rwid: register width
56 * :alu: an ALU module
57 * :mem: a Memory Module (read-write capable)
58
59 Control Signals (In)
60 --------------------
61
62 * :oper_i: operation being carried out (LDST_OP_ADD, LDST_OP_LD)
63 * :issue_i: LD/ST is being "issued".
64 * :isalu_i: ADD/SUB is being "issued" (aka issue_alu_i)
65 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
66 * :go_rd_i: read is being actioned (latches in src regs)
67 * :go_wr_i: write mode (exactly like ALU CompUnit)
68 * :go_ad_i: address is being actioned (triggers actual mem LD)
69 * :go_st_i: store is being actioned (triggers actual mem STORE)
70 * :go_die_i: resets the unit back to "wait for issue"
71
72 Control Signals (Out)
73 ---------------------
74
75 * :busy_o: function unit is busy
76 * :rd_rel_o: request src1/src2
77 * :adr_rel_o: request address (from mem)
78 * :sto_rel_o: request store (to mem)
79 * :req_rel_o: request write (result)
80 * :load_mem_o: activate memory LOAD
81 * :stwd_mem_o: activate memory STORE
82
83 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
84 in a single cycle and the CompUnit set back to doing another op.
85 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
86 depending on whether the operation is a STORE, LD, or a straight
87 ALU operation respectively.
88
89 Control Data (out)
90 ------------------
91 * :data_o: Dest out (LD or ALU)
92 * :addr_o: Address out (LD or ST)
93 """
94 def __init__(self, rwid, opwid, alu, mem):
95 self.opwid = opwid
96 self.rwid = rwid
97 self.alu = alu
98 self.mem = mem
99
100 self.counter = Signal(4)
101 self.go_rd_i = Signal(reset_less=True) # go read in
102 self.go_ad_i = Signal(reset_less=True) # go address in
103 self.go_wr_i = Signal(reset_less=True) # go write in
104 self.go_st_i = Signal(reset_less=True) # go store in
105 self.issue_i = Signal(reset_less=True) # fn issue in
106 self.isalu_i = Signal(reset_less=True) # fn issue as ALU in
107 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
108 self.go_die_i = Signal() # go die (reset)
109
110 self.oper_i = Signal(opwid, reset_less=True) # opcode in
111 self.imm_i = Signal(rwid, reset_less=True) # immediate in
112 self.src1_i = Signal(rwid, reset_less=True) # oper1 in
113 self.src2_i = Signal(rwid, reset_less=True) # oper2 in
114
115 self.busy_o = Signal(reset_less=True) # fn busy out
116 self.rd_rel_o = Signal(reset_less=True) # request src1/src2
117 self.adr_rel_o = Signal(reset_less=True) # request address (from mem)
118 self.sto_rel_o = Signal(reset_less=True) # request store (to mem)
119 self.req_rel_o = Signal(reset_less=True) # request write (result)
120 self.done_o = Signal(reset_less=True) # final release signal
121 self.data_o = Signal(rwid, reset_less=True) # Dest out (LD or ALU)
122 self.addr_o = Signal(rwid, reset_less=True) # Address out (LD or ST)
123
124 # hmm... TODO... move these to outside of LDSTCompUnit?
125 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
126 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
127 self.ld_o = Signal(reset_less=True) # operation is a LD
128 self.st_o = Signal(reset_less=True) # operation is a ST
129
130 def elaborate(self, platform):
131 m = Module()
132 comb = m.d.comb
133 sync = m.d.sync
134
135 m.submodules.alu = self.alu
136 #m.submodules.mem = self.mem
137 m.submodules.src_l = src_l = SRLatch(sync=False, name="src")
138 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
139 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
140 m.submodules.req_l = req_l = SRLatch(sync=False, name="req")
141 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
142
143 # shadow/go_die
144 reset_b = Signal(reset_less=True)
145 reset_w = Signal(reset_less=True)
146 reset_a = Signal(reset_less=True)
147 reset_s = Signal(reset_less=True)
148 reset_r = Signal(reset_less=True)
149 comb += reset_b.eq(self.go_st_i|self.go_wr_i|self.go_ad_i|self.go_die_i)
150 comb += reset_w.eq(self.go_wr_i | self.go_die_i)
151 comb += reset_s.eq(self.go_st_i | self.go_die_i)
152 comb += reset_r.eq(self.go_rd_i | self.go_die_i)
153 # this one is slightly different, issue_alu_i selects go_wr_i)
154 a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i)
155 comb += reset_a.eq(a_sel| self.go_die_i)
156
157 # opcode decode
158 op_alu = Signal(reset_less=True)
159 op_is_ld = Signal(reset_less=True)
160 op_is_st = Signal(reset_less=True)
161 op_ldst = Signal(reset_less=True)
162 op_is_imm = Signal(reset_less=True)
163
164 # src2 register
165 src2_r = Signal(self.rwid, reset_less=True)
166
167 # select immediate or src2 reg to add
168 src2_or_imm = Signal(self.rwid, reset_less=True)
169 src_sel = Signal(reset_less=True)
170
171 # issue can be either issue_i or issue_alu_i (isalu_i)
172 issue_i = Signal(reset_less=True)
173 comb += issue_i.eq(self.issue_i | self.isalu_i)
174
175 # Ripple-down the latches, each one set cancels the previous.
176 # NOTE: use sync to stop combinatorial loops.
177
178 # opcode latch - inverted so that busy resets to 0
179 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
180 sync += opc_l.r.eq(reset_b) # XXX NOTE: INVERTED FROM book!
181
182 # src operand latch
183 sync += src_l.s.eq(issue_i)
184 sync += src_l.r.eq(reset_r)
185
186 # addr latch
187 sync += adr_l.s.eq(self.go_rd_i)
188 sync += adr_l.r.eq(reset_a)
189
190 # dest operand latch
191 sync += req_l.s.eq(self.go_ad_i|self.go_st_i|self.go_wr_i)
192 sync += req_l.r.eq(reset_w)
193
194 # store latch
195 sync += sto_l.s.eq(self.go_rd_i) # XXX not sure which
196 sync += sto_l.r.eq(reset_s)
197
198 # outputs: busy and release signals
199 busy_o = self.busy_o
200 comb += self.busy_o.eq(opc_l.q) # busy out
201 comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
202 comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i & op_is_st)
203
204 # request release enabled based on if op is a LD/ST or a plain ALU
205 # if op is an ADD/SUB or a LD, req_rel activates.
206 wr_q = Signal(reset_less=True)
207 comb += wr_q.eq(req_l.q & (~op_ldst | op_is_ld))
208
209 alulatch = Signal(reset_less=True)
210 comb += alulatch.eq((op_ldst & self.adr_rel_o) | \
211 (~op_ldst & self.req_rel_o))
212
213 # select immediate if opcode says so. however also change the latch
214 # to trigger *from* the opcode latch instead.
215 comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
216 comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
217
218 # create a latch/register for src1/src2 (include immediate select)
219 latchregister(m, self.src1_i, self.alu.a, src_l.q, name="src1_r")
220 latchregister(m, self.src2_i, src2_r, src_l.q, name="src2_r")
221 latchregister(m, src2_or_imm, self.alu.b, src_sel, name="imm_r")
222
223 # create a latch/register for the operand
224 oper_r = Signal(self.opwid, reset_less=True) # Dest register
225 latchregister(m, self.oper_i, oper_r, self.issue_i, name="operi_r")
226 alu_op = Cat(op_alu, 0, op_is_imm) # using alu_hier, here.
227 comb += self.alu.op.eq(alu_op)
228
229 # and one for the output from the ALU
230 data_r = Signal(self.rwid, reset_less=True) # Dest register
231 latchregister(m, self.alu.o, data_r, alulatch, "aluo_r")
232
233 # decode bits of operand (latched)
234 comb += op_alu.eq(oper_r[BIT0_ADD]) # ADD/SUB
235 comb += op_is_imm.eq(oper_r[BIT1_SRC]) # IMMED/reg
236 comb += op_is_st.eq(oper_r[BIT2_ST]) # OP is ST
237 comb += op_is_ld.eq(oper_r[BIT3_LD]) # OP is LD
238 comb += op_ldst.eq(op_is_ld | op_is_st)
239 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
240 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
241 comb += self.ld_o.eq(op_is_ld)
242 comb += self.st_o.eq(op_is_st)
243
244 # on a go_read, tell the ALU we're accepting data.
245 # NOTE: this spells TROUBLE if the ALU isn't ready!
246 # go_read is only valid for one clock!
247 with m.If(self.go_rd_i): # src operands ready, GO!
248 with m.If(~self.alu.p_ready_o): # no ACK yet
249 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
250
251 # only proceed if ALU says its output is valid
252 with m.If(self.alu.n_valid_o):
253 # write req release out. waits until shadow is dropped.
254 comb += self.req_rel_o.eq(wr_q & busy_o & self.shadown_i)
255 # address release only happens on LD/ST, and is shadowed.
256 comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o & \
257 self.shadown_i)
258 # when output latch is ready, and ALU says ready, accept ALU output
259 with m.If(self.req_rel_o):
260 m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
261
262 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
263 comb += self.done_o.eq((self.req_rel_o & ~op_ldst) |
264 (self.adr_rel_o & op_ldst))
265
266 # put the register directly onto the output bus on a go_write
267 # this is "ALU mode". go_wr_i *must* be deasserted on next clock
268 with m.If(self.go_wr_i):
269 comb += self.data_o.eq(data_r)
270
271 # "LD/ST" mode: put the register directly onto the *address* bus
272 with m.If(self.go_ad_i | self.go_st_i):
273 comb += self.addr_o.eq(data_r)
274
275 # TODO: think about moving these to another module
276
277 # connect ST to memory. NOTE: unit *must* be set back
278 # to start again by dropping go_st_i on next clock
279 with m.If(self.stwd_mem_o):
280 wrport = self.mem.wrport
281 comb += wrport.addr.eq(self.addr_o)
282 comb += wrport.data.eq(src2_r)
283 comb += wrport.en.eq(1)
284
285 # connect LD to memory. NOTE: unit *must* be set back
286 # to start again by dropping go_ad_i on next clock
287 with m.If(self.load_mem_o):
288 rdport = self.mem.rdport
289 comb += rdport.addr.eq(self.addr_o)
290 comb += self.data_o.eq(rdport.data)
291 # comb += rdport.en.eq(1) # only when transparent=False
292
293 return m
294
295 def __iter__(self):
296 yield self.go_rd_i
297 yield self.go_ad_i
298 yield self.go_wr_i
299 yield self.go_st_i
300 yield self.issue_i
301 yield self.isalu_i
302 yield self.shadown_i
303 yield self.go_die_i
304 yield self.oper_i
305 yield self.imm_i
306 yield self.src1_i
307 yield self.src2_i
308 yield self.busy_o
309 yield self.rd_rel_o
310 yield self.adr_rel_o
311 yield self.sto_rel_o
312 yield self.req_rel_o
313 yield self.data_o
314 yield self.load_mem_o
315 yield self.stwd_mem_o
316
317 def ports(self):
318 return list(self)
319
320 def wait_for(sig):
321 v = (yield sig)
322 print ("wait for", sig, v)
323 while True:
324 yield
325 v = (yield sig)
326 print (v)
327 if v:
328 break
329
330 def store(dut, src1, src2, imm):
331 yield dut.oper_i.eq(LDST_OP_ST)
332 yield dut.src1_i.eq(src1)
333 yield dut.src2_i.eq(src2)
334 yield dut.imm_i.eq(imm)
335 yield dut.issue_i.eq(1)
336 yield
337 yield dut.issue_i.eq(0)
338 yield
339 yield dut.go_rd_i.eq(1)
340 yield from wait_for(dut.rd_rel_o)
341 yield dut.go_rd_i.eq(0)
342 yield from wait_for(dut.adr_rel_o)
343 yield dut.go_st_i.eq(1)
344 yield from wait_for(dut.sto_rel_o)
345 wait_for(dut.stwd_mem_o)
346 yield dut.go_st_i.eq(0)
347 yield
348
349
350 def load(dut, src1, src2, imm):
351 yield dut.oper_i.eq(LDST_OP_LD)
352 yield dut.src1_i.eq(src1)
353 yield dut.src2_i.eq(src2)
354 yield dut.imm_i.eq(imm)
355 yield dut.issue_i.eq(1)
356 yield
357 yield dut.issue_i.eq(0)
358 yield
359 yield dut.go_rd_i.eq(1)
360 yield from wait_for(dut.rd_rel_o)
361 yield dut.go_rd_i.eq(0)
362 yield from wait_for(dut.adr_rel_o)
363 yield dut.go_ad_i.eq(1)
364 yield from wait_for(dut.busy_o)
365 yield
366 data = (yield dut.data_o)
367 yield dut.go_ad_i.eq(0)
368 #wait_for(dut.stwd_mem_o)
369 return data
370
371
372 def add(dut, src1, src2, imm, imm_mode = False):
373 yield dut.oper_i.eq(LDST_OP_ADDI if imm_mode else LDST_OP_ADD)
374 yield dut.src1_i.eq(src1)
375 yield dut.src2_i.eq(src2)
376 yield dut.imm_i.eq(imm)
377 yield dut.issue_i.eq(1)
378 yield
379 yield dut.issue_i.eq(0)
380 yield
381 yield dut.go_rd_i.eq(1)
382 yield from wait_for(dut.rd_rel_o)
383 yield dut.go_rd_i.eq(0)
384 yield from wait_for(dut.req_rel_o)
385 yield dut.go_wr_i.eq(1)
386 yield from wait_for(dut.busy_o)
387 yield
388 data = (yield dut.data_o)
389 yield dut.go_wr_i.eq(0)
390 yield
391 #wait_for(dut.stwd_mem_o)
392 return data
393
394 def scoreboard_sim(dut):
395 # two STs (different addresses)
396 yield from store(dut, 4, 3, 2)
397 yield from store(dut, 2, 9, 2)
398 yield
399 # two LDs (deliberately LD from the 1st address then 2nd)
400 data = yield from load(dut, 4, 0, 2)
401 assert data == 0x0003
402 data = yield from load(dut, 2, 0, 2)
403 assert data == 0x0009
404 yield
405
406 # now do an add
407 data = yield from add(dut, 4, 3, 0xfeed)
408 assert data == 0x7
409
410 # and an add-immediate
411 data = yield from add(dut, 4, 0xdeef, 2, imm_mode=True)
412 assert data == 0x6
413
414
415 class TestLDSTCompUnit(LDSTCompUnit):
416
417 def __init__(self, rwid, opwid):
418 from alu_hier import ALU
419 self.alu = alu = ALU(rwid)
420 self.mem = mem = TestMemory(rwid, 8)
421 LDSTCompUnit.__init__(self, rwid, opwid, alu, mem)
422
423 def elaborate(self, platform):
424 m = LDSTCompUnit.elaborate(self, platform)
425 m.submodules.mem = self.mem
426 return m
427
428
429 def test_scoreboard():
430
431 dut = TestLDSTCompUnit(16, 4)
432 vl = rtlil.convert(dut, ports=dut.ports())
433 with open("test_ldst_comp.il", "w") as f:
434 f.write(vl)
435
436 run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
437
438 if __name__ == '__main__':
439 test_scoreboard()