1 """LOAD / STORE Computation Unit.
3 This module covers POWER9-compliant Load and Store operations,
4 with selection on each between immediate and indexed mode as
5 options for the calculation of the Effective Address (EA),
6 and also "update" mode which optionally stores that EA into
7 an additional register.
10 Note: it took 15 attempts over several weeks to redraw the diagram
11 needed to capture this FSM properly. To understand it fully, please
12 take the time to review the links, video, and diagram.
15 Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
16 compute the "Effective Address", and, when ready the operand (src3_i)
17 is stored in the computed address (passed through to the PortInterface)
19 Loads are activated when Go_Write[0] is enabled. The EA is computed,
20 and (as long as there was no exception) the data comes out (at any
21 time from the PortInterface), and is captured by the LDCompSTUnit.
23 TODO: dcbz, yes, that's going to be complicated, has to be done
24 with great care, to detect the case when dcbz is set
25 and *not* expect to read any data, just the address.
26 so, wait for RA but not RB.
28 Both LD and ST may request that the address be computed from summing
29 operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
30 the immediate (from the opcode).
32 Both LD and ST may also request "update" mode (op_is_update) which
33 activates the use of Go_Write[1] to control storage of the EA into
34 a *second* operand in the register file.
36 Thus this module has *TWO* write-requests to the register file and
37 *THREE* read-requests to the register file (not all at the same time!)
38 The regfile port usage is:
50 It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
51 is not suited to (nmigen.FSM is clock-driven, and some aspects of
52 the nested FSMs below are *combinatorial*).
54 * One FSM covers Operand collection and communication address-side
55 with the LD/ST PortInterface. its role ends when "RD_DONE" is asserted
57 * A second FSM activates to cover LD. it activates if op_is_ld is true
59 * A third FSM activates to cover ST. it activates if op_is_st is true
61 * The "overall" (fourth) FSM coordinates the progression and completion
62 of the three other FSMs, firing "WR_RESET" which switches off "busy"
66 https://libre-soc.org/3d_gpu/ld_st_comp_unit.jpg
68 Links including to walk-through videos:
70 * https://libre-soc.org/3d_gpu/architecture/6600scoreboard/
71 * http://libre-soc.org/openpower/isa/fixedload
72 * http://libre-soc.org/openpower/isa/fixedstore
76 * https://bugs.libre-soc.org/show_bug.cgi?id=302
77 * https://bugs.libre-soc.org/show_bug.cgi?id=216
81 * EA - Effective Address
86 from nmigen
.compat
.sim
import run_simulation
87 from nmigen
.cli
import verilog
, rtlil
88 from nmigen
import Module
, Signal
, Mux
, Cat
, Elaboratable
, Array
, Repl
89 from nmigen
.hdl
.rec
import Record
, Layout
91 from nmutil
.latch
import SRLatch
, latchregister
92 from nmutil
.byterev
import byte_reverse
93 from nmutil
.extend
import exts
95 from soc
.experiment
.compalu_multi
import go_record
, CompUnitRecord
96 from soc
.experiment
.l0_cache
import PortInterface
97 from soc
.experiment
.pimem
import LDSTException
98 from soc
.fu
.regspec
import RegSpecAPI
100 from openpower
.decoder
.power_enums
import MicrOp
, Function
, LDSTMode
101 from soc
.fu
.ldst
.ldst_input_record
import CompLDSTOpSubset
102 from openpower
.decoder
.power_decoder2
import Data
103 from openpower
.consts
import MSR
104 from soc
.config
.test
.test_loadstore
import TestMemPspec
107 from nmutil
.util
import Display
110 # TODO: LDSTInputData and LDSTOutputData really should be used
111 # here, to make things more like the other CompUnits. currently,
112 # also, RegSpecAPI is used explicitly here
115 class LDSTCompUnitRecord(CompUnitRecord
):
116 def __init__(self
, rwid
, opsubset
=CompLDSTOpSubset
, name
=None):
117 CompUnitRecord
.__init
__(self
, opsubset
, rwid
,
118 n_src
=3, n_dst
=2, name
=name
)
120 self
.ad
= go_record(1, name
="cu_ad") # address go in, req out
121 self
.st
= go_record(1, name
="cu_st") # store go in, req out
123 self
.exc_o
= LDSTException("exc_o")
125 self
.ld_o
= Signal(reset_less
=True) # operation is a LD
126 self
.st_o
= Signal(reset_less
=True) # operation is a ST
128 # hmm... are these necessary?
129 self
.load_mem_o
= Signal(reset_less
=True) # activate memory LOAD
130 self
.stwd_mem_o
= Signal(reset_less
=True) # activate memory STORE
133 class LDSTCompUnit(RegSpecAPI
, Elaboratable
):
134 """LOAD / STORE Computation Unit
139 * :pi: a PortInterface to the memory subsystem (read-write capable)
140 * :rwid: register width
141 * :awid: address width
145 * :src_i: Source Operands (RA/RB/RC) - managed by rd[0-3] go/req
149 * :o_data: Dest out (LD) - managed by wr[0] go/req
150 * :addr_o: Address out (LD or ST) - managed by wr[1] go/req
151 * :exc_o: Address/Data Exception occurred. LD/ST must terminate
153 TODO: make exc_o a data-type rather than a single-bit signal
159 * :oper_i: operation being carried out (POWER9 decode LD/ST subset)
160 * :issue_i: LD/ST is being "issued".
161 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
162 * :go_rd_i: read is being actioned (latches in src regs)
163 * :go_wr_i: write mode (exactly like ALU CompUnit)
164 * :go_ad_i: address is being actioned (triggers actual mem LD)
165 * :go_st_i: store is being actioned (triggers actual mem STORE)
166 * :go_die_i: resets the unit back to "wait for issue"
168 Control Signals (Out)
169 ---------------------
171 * :busy_o: function unit is busy
172 * :rd_rel_o: request src1/src2
173 * :adr_rel_o: request address (from mem)
174 * :sto_rel_o: request store (to mem)
175 * :req_rel_o: request write (result)
176 * :load_mem_o: activate memory LOAD
177 * :stwd_mem_o: activate memory STORE
179 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
180 in a single cycle and the CompUnit set back to doing another op.
181 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
182 depending on whether the operation is a ST or LD.
184 Note: LDSTCompUnit takes care of LE/BE normalisation:
185 * LD data is normalised after receipt from the PortInterface
186 * ST data is normalised *prior* to sending onto the PortInterface
187 TODO: use one module for the byte-reverse as it's quite expensive in gates
190 def __init__(self
, pi
=None, rwid
=64, awid
=48, opsubset
=CompLDSTOpSubset
,
191 debugtest
=False, name
=None):
192 super().__init
__(rwid
)
195 self
.cu
= cu
= LDSTCompUnitRecord(rwid
, opsubset
, name
=name
)
196 self
.debugtest
= debugtest
198 # POWER-compliant LD/ST has index and update: *fixed* number of ports
199 self
.n_src
= n_src
= 3 # RA, RB, RT/RS
200 self
.n_dst
= n_dst
= 2 # RA, RT/RS
202 # set up array of src and dest signals
203 for i
in range(n_src
):
204 j
= i
+ 1 # name numbering to match src1/src2
206 setattr(self
, name
, getattr(cu
, name
))
209 for i
in range(n_dst
):
210 j
= i
+ 1 # name numbering to match dest1/2...
211 name
= "dest%d_o" % j
212 setattr(self
, name
, getattr(cu
, name
))
217 self
.rdmaskn
= cu
.rdmaskn
218 self
.wrmask
= cu
.wrmask
223 # HACK: get data width from dest[0]. this is used across the board
224 # (it really shouldn't be)
225 self
.data_wid
= self
.dest
[0].shape()
227 self
.go_rd_i
= self
.rd
.go_i
# temporary naming
228 self
.go_wr_i
= self
.wr
.go_i
# temporary naming
229 self
.go_ad_i
= self
.ad
.go_i
# temp naming: go address in
230 self
.go_st_i
= self
.st
.go_i
# temp naming: go store in
232 self
.rd_rel_o
= self
.rd
.rel_o
# temporary naming
233 self
.req_rel_o
= self
.wr
.rel_o
# temporary naming
234 self
.adr_rel_o
= self
.ad
.rel_o
# request address (from mem)
235 self
.sto_rel_o
= self
.st
.rel_o
# request store (to mem)
237 self
.issue_i
= cu
.issue_i
238 self
.shadown_i
= cu
.shadown_i
239 self
.go_die_i
= cu
.go_die_i
241 self
.oper_i
= cu
.oper_i
242 self
.src_i
= cu
._src
_i
244 self
.o_data
= Data(self
.data_wid
, name
="o") # Dest1 out: RT
245 self
.addr_o
= Data(self
.data_wid
, name
="ea") # Addr out: Update => RA
246 self
.exc_o
= cu
.exc_o
247 self
.done_o
= cu
.done_o
248 self
.busy_o
= cu
.busy_o
253 self
.load_mem_o
= cu
.load_mem_o
254 self
.stwd_mem_o
= cu
.stwd_mem_o
256 def elaborate(self
, platform
):
262 issue_i
= self
.issue_i
264 #####################
265 # latches for the FSM.
266 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
267 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
268 m
.submodules
.alu_l
= alu_l
= SRLatch(sync
=False, name
="alu")
269 m
.submodules
.adr_l
= adr_l
= SRLatch(sync
=False, name
="adr")
270 m
.submodules
.lod_l
= lod_l
= SRLatch(sync
=False, name
="lod")
271 m
.submodules
.sto_l
= sto_l
= SRLatch(sync
=False, name
="sto")
272 m
.submodules
.wri_l
= wri_l
= SRLatch(sync
=False, name
="wri")
273 m
.submodules
.upd_l
= upd_l
= SRLatch(sync
=False, name
="upd")
274 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
275 m
.submodules
.lsd_l
= lsd_l
= SRLatch(sync
=False, name
="lsd") # done
281 op_is_ld
= Signal(reset_less
=True)
282 op_is_st
= Signal(reset_less
=True)
283 op_is_dcbz
= Signal(reset_less
=True)
285 # ALU/LD data output control
286 alu_valid
= Signal(reset_less
=True) # ALU operands are valid
287 alu_ok
= Signal(reset_less
=True) # ALU out ok (1 clock delay valid)
288 addr_ok
= Signal(reset_less
=True) # addr ok (from PortInterface)
289 ld_ok
= Signal(reset_less
=True) # LD out ok from PortInterface
290 wr_any
= Signal(reset_less
=True) # any write (incl. store)
291 rda_any
= Signal(reset_less
=True) # any read for address ops
292 rd_done
= Signal(reset_less
=True) # all *necessary* operands read
293 wr_reset
= Signal(reset_less
=True) # final reset condition
296 alu_o
= Signal(self
.data_wid
, reset_less
=True)
297 ldd_o
= Signal(self
.data_wid
, reset_less
=True)
299 ##############################
300 # reset conditions for latches
302 # temporaries (also convenient when debugging)
303 reset_o
= Signal(reset_less
=True) # reset opcode
304 reset_w
= Signal(reset_less
=True) # reset write
305 reset_u
= Signal(reset_less
=True) # reset update
306 reset_a
= Signal(reset_less
=True) # reset adr latch
307 reset_i
= Signal(reset_less
=True) # issue|die (use a lot)
308 reset_r
= Signal(self
.n_src
, reset_less
=True) # reset src
309 reset_s
= Signal(reset_less
=True) # reset store
311 # end execution when a terminating condition is detected:
312 # - go_die_i: a speculative operation was cancelled
313 # - exc_o.happened: an exception has occurred
315 comb
+= terminate
.eq(self
.go_die_i | self
.exc_o
.happened
)
317 comb
+= reset_i
.eq(issue_i | terminate
) # various
318 comb
+= reset_o
.eq(self
.done_o | terminate
) # opcode reset
319 comb
+= reset_w
.eq(self
.wr
.go_i
[0] | terminate
) # write reg 1
320 comb
+= reset_u
.eq(self
.wr
.go_i
[1] | terminate
) # update (reg 2)
321 comb
+= reset_s
.eq(self
.go_st_i | terminate
) # store reset
322 comb
+= reset_r
.eq(self
.rd
.go_i |
Repl(terminate
, self
.n_src
))
323 comb
+= reset_a
.eq(self
.go_ad_i | terminate
)
325 p_st_go
= Signal(reset_less
=True)
326 sync
+= p_st_go
.eq(self
.st
.go_i
)
328 # decode bits of operand (latched)
329 oper_r
= CompLDSTOpSubset(name
="oper_r") # Dest register
330 comb
+= op_is_st
.eq(oper_r
.insn_type
== MicrOp
.OP_STORE
) # ST
331 comb
+= op_is_ld
.eq(oper_r
.insn_type
== MicrOp
.OP_LOAD
) # LD
332 comb
+= op_is_dcbz
.eq(oper_r
.insn_type
== MicrOp
.OP_DCBZ
) # DCBZ
334 #comb += Display("compldst_multi: op_is_dcbz = %i",
335 # (oper_r.insn_type == MicrOp.OP_DCBZ))
336 op_is_update
= oper_r
.ldst_mode
== LDSTMode
.update
# UPDATE
337 op_is_cix
= oper_r
.ldst_mode
== LDSTMode
.cix
# cache-inhibit
338 comb
+= self
.load_mem_o
.eq(op_is_ld
& self
.go_ad_i
)
339 comb
+= self
.stwd_mem_o
.eq(op_is_st
& self
.go_st_i
)
340 comb
+= self
.ld_o
.eq(op_is_ld
)
341 comb
+= self
.st_o
.eq(op_is_st
)
343 ##########################
344 # FSM implemented through sequence of latches. approximately this:
346 # - src_l[0] : operands
348 # - alu_l : looks after add of src1/2/imm (EA)
349 # - adr_l : waits for add (EA)
350 # - upd_l : waits for adr and Regfile (port 2)
352 # - lod_l : waits for adr (EA) and for LD Data
353 # - wri_l : waits for LD Data and Regfile (port 1)
354 # - st_l : waits for alu and operand2
355 # - rst_l : waits for all FSM paths to converge.
356 # NOTE: use sync to stop combinatorial loops.
358 # opcode latch - inverted so that busy resets to 0
359 # note this MUST be sync so as to avoid a combinatorial loop
360 # between busy_o and issue_i on the reset latch (rst_l)
361 sync
+= opc_l
.s
.eq(issue_i
) # XXX NOTE: INVERTED FROM book!
362 sync
+= opc_l
.r
.eq(reset_o
) # XXX NOTE: INVERTED FROM book!
365 sync
+= src_l
.s
.eq(Repl(issue_i
, self
.n_src
))
366 sync
+= src_l
.r
.eq(reset_r
)
368 # alu latch. use sync-delay between alu_ok and valid to generate pulse
369 comb
+= alu_l
.s
.eq(reset_i
)
370 comb
+= alu_l
.r
.eq(alu_ok
& ~alu_valid
& ~rda_any
)
373 comb
+= adr_l
.s
.eq(reset_i
)
374 sync
+= adr_l
.r
.eq(reset_a
)
377 comb
+= lod_l
.s
.eq(reset_i
)
378 comb
+= lod_l
.r
.eq(ld_ok
)
381 comb
+= wri_l
.s
.eq(issue_i
)
382 sync
+= wri_l
.r
.eq(reset_w |
Repl(wr_reset |
383 (~self
.pi
.busy_o
& op_is_update
),
384 #(self.pi.busy_o & op_is_update),
385 #self.done_o | (self.pi.busy_o & op_is_update),
388 # update-mode operand latch (EA written to reg 2)
389 sync
+= upd_l
.s
.eq(reset_i
)
390 sync
+= upd_l
.r
.eq(reset_u
)
393 comb
+= sto_l
.s
.eq(addr_ok
& op_is_st
)
394 sync
+= sto_l
.r
.eq(reset_s | p_st_go
)
396 # ld/st done. needed to stop LD/ST from activating repeatedly
397 comb
+= lsd_l
.s
.eq(issue_i
)
398 sync
+= lsd_l
.r
.eq(reset_s | p_st_go | ld_ok
)
401 comb
+= rst_l
.s
.eq(addr_ok
) # start when address is ready
402 comb
+= rst_l
.r
.eq(issue_i
)
404 # create a latch/register for the operand
405 with m
.If(self
.issue_i
):
406 sync
+= oper_r
.eq(self
.oper_i
)
407 with m
.If(self
.done_o | terminate
):
411 ldd_r
= Signal(self
.data_wid
, reset_less
=True) # Dest register
412 latchregister(m
, ldd_o
, ldd_r
, ld_ok
, name
="ldo_r")
414 # and for each input from the incoming src operands
416 for i
in range(self
.n_src
):
418 src_r
= Signal(self
.data_wid
, name
=name
, reset_less
=True)
419 with m
.If(self
.rd
.go_i
[i
]):
420 sync
+= src_r
.eq(self
.src_i
[i
])
421 with m
.If(self
.issue_i
):
425 # and one for the output from the ADD (for the EA)
426 addr_r
= Signal(self
.data_wid
, reset_less
=True) # Effective Address
427 latchregister(m
, alu_o
, addr_r
, alu_l
.q
, "ea_r")
429 # select either zero or src1 if opcode says so
430 op_is_z
= oper_r
.zero_a
431 src1_or_z
= Signal(self
.data_wid
, reset_less
=True)
432 m
.d
.comb
+= src1_or_z
.eq(Mux(op_is_z
, 0, srl
[0]))
434 # select either immediate or src2 if opcode says so
435 op_is_imm
= oper_r
.imm_data
.ok
436 src2_or_imm
= Signal(self
.data_wid
, reset_less
=True)
437 m
.d
.comb
+= src2_or_imm
.eq(Mux(op_is_imm
, oper_r
.imm_data
.data
, srl
[1]))
439 # now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
440 comb
+= alu_o
.eq(src1_or_z
+ src2_or_imm
) # actual EA
441 m
.d
.sync
+= alu_ok
.eq(alu_valid
) # keep ack in sync with EA
443 ############################
444 # Control Signal calculation
448 comb
+= self
.busy_o
.eq(opc_l
.q
) # | self.pi.busy_o) # busy out
450 # 1st operand read-request only when zero not active
451 # 2nd operand only needed when immediate is not active
452 slg
= Cat(op_is_z
, op_is_imm
)
453 bro
= Repl(self
.busy_o
, self
.n_src
)
454 comb
+= self
.rd
.rel_o
.eq(src_l
.q
& bro
& ~slg
& ~self
.rdmaskn
)
456 # note when the address-related read "go" signals are active
457 comb
+= rda_any
.eq(self
.rd
.go_i
[0] | self
.rd
.go_i
[1])
459 # alu input valid when 1st and 2nd ops done (or imm not active)
460 comb
+= alu_valid
.eq(busy_o
& ~
(self
.rd
.rel_o
[0] | self
.rd
.rel_o
[1]))
462 # 3rd operand only needed when operation is a store
463 comb
+= self
.rd
.rel_o
[2].eq(src_l
.q
[2] & busy_o
& op_is_st
)
465 # all reads done when alu is valid and 3rd operand needed
466 comb
+= rd_done
.eq(alu_valid
& ~self
.rd
.rel_o
[2])
468 # address release only if addr ready, but Port must be idle
469 comb
+= self
.adr_rel_o
.eq(alu_valid
& adr_l
.q
& busy_o
)
471 # the write/store (etc) all must be cancelled if an exception occurs
472 # note: cancel is active low, like shadown_i,
473 # while exc_o.happpened is active high
474 cancel
= Signal(reset_less
=True)
475 comb
+= cancel
.eq(~self
.exc_o
.happened
& self
.shadown_i
)
477 # store release when st ready *and* all operands read (and no shadow)
478 comb
+= self
.st
.rel_o
.eq(sto_l
.q
& busy_o
& rd_done
& op_is_st
&
481 # request write of LD result. waits until shadow is dropped.
482 comb
+= self
.wr
.rel_o
[0].eq(rd_done
& wri_l
.q
& busy_o
& lod_l
.qn
&
485 # request write of EA result only in update mode
486 comb
+= self
.wr
.rel_o
[1].eq(upd_l
.q
& busy_o
& op_is_update
&
489 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
490 comb
+= wr_any
.eq(self
.st
.go_i | p_st_go |
491 self
.wr
.go_i
[0] | self
.wr
.go_i
[1])
492 comb
+= wr_reset
.eq(rst_l
.q
& busy_o
& cancel
&
493 ~
(self
.st
.rel_o | self
.wr
.rel_o
[0] |
495 (lod_l
.qn | op_is_st
)
497 comb
+= self
.done_o
.eq(wr_reset
& (~self
.pi
.busy_o | op_is_ld
))
499 ######################
500 # Data/Address outputs
502 # put the LD-output register directly onto the output bus on a go_write
503 comb
+= self
.o_data
.data
.eq(self
.dest
[0])
504 with m
.If(self
.wr
.go_i
[0]):
505 comb
+= self
.dest
[0].eq(ldd_r
)
507 # "update" mode, put address out on 2nd go-write
508 comb
+= self
.addr_o
.data
.eq(self
.dest
[1])
509 with m
.If(op_is_update
& self
.wr
.go_i
[1]):
510 comb
+= self
.dest
[1].eq(addr_r
)
512 # need to look like MultiCompUnit: put wrmask out.
513 # XXX may need to make this enable only when write active
514 comb
+= self
.wrmask
.eq(bro
& Cat(op_is_ld
, op_is_update
))
516 ###########################
517 # PortInterface connections
520 # connect to LD/ST PortInterface.
521 comb
+= pi
.is_ld_i
.eq(op_is_ld
& busy_o
) # decoded-LD
522 comb
+= pi
.is_st_i
.eq(op_is_st
& busy_o
) # decoded-ST
523 comb
+= pi
.data_len
.eq(oper_r
.data_len
) # data_len
524 # address: use sync to avoid long latency
525 sync
+= pi
.addr
.data
.eq(addr_r
) # EA from adder
526 sync
+= pi
.addr
.ok
.eq(alu_ok
& lsd_l
.q
) # "do address stuff" (once)
527 comb
+= self
.exc_o
.eq(pi
.exc_o
) # exception occurred
528 comb
+= addr_ok
.eq(self
.pi
.addr_ok_o
) # no exc, address fine
529 # connect MSR.PR for priv/virt operation
530 comb
+= pi
.msr_pr
.eq(oper_r
.msr
[MSR
.PR
])
533 revnorev
= Signal(64, reset_less
=True)
534 with m
.If(oper_r
.byte_reverse
):
535 # byte-reverse the data based on ld/st width (turn it to LE)
536 data_len
= oper_r
.data_len
537 lddata_r
= byte_reverse(m
, 'lddata_r', pi
.ld
.data
, data_len
)
538 comb
+= revnorev
.eq(lddata_r
) # put reversed- data out
540 comb
+= revnorev
.eq(pi
.ld
.data
) # put data out, straight (as BE)
542 # then check sign-extend
543 with m
.If(oper_r
.sign_extend
):
544 # okok really should "if data_len == 4" and so on here
545 with m
.If(oper_r
.data_len
== 2):
546 comb
+= ldd_o
.eq(exts(revnorev
, 16, 64)) # sign-extend hword
548 comb
+= ldd_o
.eq(exts(revnorev
, 32, 64)) # sign-extend dword
550 comb
+= ldd_o
.eq(revnorev
)
552 # ld - ld gets latched in via lod_l
553 comb
+= ld_ok
.eq(pi
.ld
.ok
) # ld.ok *closes* (freezes) ld data
556 op3
= srl
[2] # 3rd operand latch
557 with m
.If(oper_r
.byte_reverse
):
558 # byte-reverse the data based on width
559 data_len
= oper_r
.data_len
560 stdata_r
= byte_reverse(m
, 'stdata_r', op3
, data_len
)
561 comb
+= pi
.st
.data
.eq(stdata_r
)
563 comb
+= pi
.st
.data
.eq(op3
)
564 # store - data goes in based on go_st
565 comb
+= pi
.st
.ok
.eq(self
.st
.go_i
) # go store signals st data valid
569 def get_out(self
, i
):
570 """make LDSTCompUnit look like RegSpecALUAPI. these correspond
571 to LDSTOutputData o and o1 respectively.
574 return self
.o_data
# LDSTOutputData.regspec o
576 return self
.addr_o
# LDSTOutputData.regspec o1
577 # return self.dest[i]
579 def get_fu_out(self
, i
):
580 return self
.get_out(i
)
590 yield from self
.oper_i
.ports()
591 yield from self
.src_i
597 yield from self
.o_data
.ports()
598 yield from self
.addr_o
.ports()
599 yield self
.load_mem_o
600 yield self
.stwd_mem_o
606 def wait_for(sig
, wait
=True, test1st
=False):
608 print("wait for", sig
, v
, wait
, test1st
)
609 if test1st
and bool(v
) == wait
:
614 #print("...wait for", sig, v)
619 def store(dut
, src1
, src2
, src3
, imm
, imm_ok
=True, update
=False,
621 print("ST", src1
, src2
, src3
, imm
, imm_ok
, update
)
622 yield dut
.oper_i
.insn_type
.eq(MicrOp
.OP_STORE
)
623 yield dut
.oper_i
.data_len
.eq(2) # half-word
624 yield dut
.oper_i
.byte_reverse
.eq(byterev
)
625 yield dut
.src1_i
.eq(src1
)
626 yield dut
.src2_i
.eq(src2
)
627 yield dut
.src3_i
.eq(src3
)
628 yield dut
.oper_i
.imm_data
.data
.eq(imm
)
629 yield dut
.oper_i
.imm_data
.ok
.eq(imm_ok
)
630 #guess: this one was removed -- yield dut.oper_i.update.eq(update)
631 yield dut
.issue_i
.eq(1)
633 yield dut
.issue_i
.eq(0)
639 # wait for all active rel signals to come up
641 rel
= yield dut
.rd
.rel_o
642 if rel
== active_rel
:
645 yield dut
.rd
.go_i
.eq(active_rel
)
647 yield dut
.rd
.go_i
.eq(0)
649 yield from wait_for(dut
.adr_rel_o
, False, test1st
=True)
650 # yield from wait_for(dut.adr_rel_o)
651 # yield dut.ad.go.eq(1)
653 # yield dut.ad.go.eq(0)
656 yield from wait_for(dut
.wr
.rel_o
[1])
657 yield dut
.wr
.go
.eq(0b10)
659 addr
= yield dut
.addr_o
661 yield dut
.wr
.go
.eq(0)
665 yield from wait_for(dut
.sto_rel_o
)
666 yield dut
.go_st_i
.eq(1)
668 yield dut
.go_st_i
.eq(0)
669 yield from wait_for(dut
.busy_o
, False)
670 # wait_for(dut.stwd_mem_o)
675 def load(dut
, src1
, src2
, imm
, imm_ok
=True, update
=False, zero_a
=False,
677 print("LD", src1
, src2
, imm
, imm_ok
, update
)
678 yield dut
.oper_i
.insn_type
.eq(MicrOp
.OP_LOAD
)
679 yield dut
.oper_i
.data_len
.eq(2) # half-word
680 yield dut
.oper_i
.byte_reverse
.eq(byterev
)
681 yield dut
.src1_i
.eq(src1
)
682 yield dut
.src2_i
.eq(src2
)
683 yield dut
.oper_i
.zero_a
.eq(zero_a
)
684 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
685 yield dut
.oper_i
.imm_data
.ok
.eq(imm_ok
)
686 yield dut
.issue_i
.eq(1)
688 yield dut
.issue_i
.eq(0)
691 # set up read-operand flags
693 if not imm_ok
: # no immediate means RB register needs to be read
695 if not zero_a
: # no zero-a means RA needs to be read
698 # wait for the operands (RA, RB, or both)
700 yield dut
.rd
.go
.eq(rd
)
701 yield from wait_for(dut
.rd
.rel_o
)
702 yield dut
.rd
.go
.eq(0)
704 yield from wait_for(dut
.adr_rel_o
, False, test1st
=True)
705 # yield dut.ad.go.eq(1)
707 # yield dut.ad.go.eq(0)
710 yield from wait_for(dut
.wr
.rel_o
[1])
711 yield dut
.wr
.go
.eq(0b10)
713 addr
= yield dut
.addr_o
715 yield dut
.wr
.go
.eq(0)
719 yield from wait_for(dut
.wr
.rel_o
[0], test1st
=True)
720 yield dut
.wr
.go
.eq(1)
722 data
= yield dut
.o_data
724 yield dut
.wr
.go
.eq(0)
725 yield from wait_for(dut
.busy_o
)
727 # wait_for(dut.stwd_mem_o)
736 # two STs (different addresses)
737 yield from store(dut
, 4, 0, 3, 2) # ST reg4 into addr rfile[reg3]+2
738 yield from store(dut
, 2, 0, 9, 2) # ST reg4 into addr rfile[reg9]+2
740 # two LDs (deliberately LD from the 1st address then 2nd)
741 data
, addr
= yield from load(dut
, 4, 0, 2)
742 assert data
== 0x0003, "returned %x" % data
743 data
, addr
= yield from load(dut
, 2, 0, 2)
744 assert data
== 0x0009, "returned %x" % data
748 yield from store(dut
, 9, 5, 3, 0, imm_ok
=False)
749 data
, addr
= yield from load(dut
, 9, 5, 0, imm_ok
=False)
750 assert data
== 0x0003, "returned %x" % data
752 # update-immediate version
753 addr
= yield from store(dut
, 9, 6, 3, 2, update
=True)
754 assert addr
== 0x000b, "returned %x" % addr
756 # update-indexed version
757 data
, addr
= yield from load(dut
, 9, 5, 0, imm_ok
=False, update
=True)
758 assert data
== 0x0003, "returned %x" % data
759 assert addr
== 0x000e, "returned %x" % addr
761 # immediate *and* zero version
762 data
, addr
= yield from load(dut
, 1, 4, 8, imm_ok
=True, zero_a
=True)
763 assert data
== 0x0008, "returned %x" % data
766 class TestLDSTCompUnit(LDSTCompUnit
):
768 def __init__(self
, rwid
, pspec
):
769 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
770 self
.l0
= l0
= TstL0CacheBuffer(pspec
)
772 LDSTCompUnit
.__init
__(self
, pi
, rwid
, 4)
774 def elaborate(self
, platform
):
775 m
= LDSTCompUnit
.elaborate(self
, platform
)
776 m
.submodules
.l0
= self
.l0
777 # link addr-go direct to rel
778 m
.d
.comb
+= self
.ad
.go_i
.eq(self
.ad
.rel_o
)
782 def test_scoreboard():
785 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
786 imem_ifacetype
='bare_wb',
792 dut
= TestLDSTCompUnit(16,pspec
)
793 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
794 with
open("test_ldst_comp.il", "w") as f
:
797 run_simulation(dut
, ldst_sim(dut
), vcd_name
='test_ldst_comp.vcd')
800 class TestLDSTCompUnitRegSpec(LDSTCompUnit
):
802 def __init__(self
, pspec
):
803 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
804 from soc
.fu
.ldst
.pipe_data
import LDSTPipeSpec
805 regspec
= LDSTPipeSpec
.regspec
806 self
.l0
= l0
= TstL0CacheBuffer(pspec
)
808 LDSTCompUnit
.__init
__(self
, pi
, regspec
, 4)
810 def elaborate(self
, platform
):
811 m
= LDSTCompUnit
.elaborate(self
, platform
)
812 m
.submodules
.l0
= self
.l0
813 # link addr-go direct to rel
814 m
.d
.comb
+= self
.ad
.go_i
.eq(self
.ad
.rel_o
)
818 def test_scoreboard_regspec():
821 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
822 imem_ifacetype
='bare_wb',
828 dut
= TestLDSTCompUnitRegSpec(pspec
)
829 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
830 with
open("test_ldst_comp.il", "w") as f
:
833 run_simulation(dut
, ldst_sim(dut
), vcd_name
='test_ldst_regspec.vcd')
836 if __name__
== '__main__':
837 test_scoreboard_regspec()