1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
13 from nmigen
.compat
.sim
import run_simulation
, Settle
14 from nmigen
.cli
import rtlil
15 from nmigen
import Module
17 from soc
.decoder
.power_enums
import MicrOp
19 from soc
.experiment
.compalu_multi
import MultiCompUnit
20 from soc
.experiment
.alu_hier
import ALU
, DummyALU
21 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
22 from soc
.experiment
.alu_fsm
import Shifter
, CompFSMOpSubset
25 def op_sim_fsm(dut
, a
, b
, direction
):
26 yield dut
.issue_i
.eq(0)
28 yield dut
.src_i
[0].eq(a
)
29 yield dut
.src_i
[1].eq(b
)
30 yield dut
.oper_i
.sdir
.eq(direction
)
31 yield dut
.issue_i
.eq(1)
33 yield dut
.issue_i
.eq(0)
36 yield dut
.rd
.go
.eq(0b11)
39 rd_rel_o
= yield dut
.rd
.rel
40 print ("rd_rel", rd_rel_o
)
45 req_rel_o
= yield dut
.wr
.rel
46 result
= yield dut
.data_o
47 print ("req_rel", req_rel_o
, result
)
49 req_rel_o
= yield dut
.wr
.rel
50 result
= yield dut
.data_o
51 print ("req_rel", req_rel_o
, result
)
55 yield dut
.wr
.go
[0].eq(1)
57 result
= yield dut
.data_o
59 print ("result", result
)
60 yield dut
.wr
.go
[0].eq(0)
65 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
66 yield dut
.issue_i
.eq(0)
68 yield dut
.src_i
[0].eq(a
)
69 yield dut
.src_i
[1].eq(b
)
70 yield dut
.oper_i
.insn_type
.eq(op
)
71 yield dut
.oper_i
.invert_a
.eq(inv_a
)
72 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
73 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
74 yield dut
.oper_i
.zero_a
.eq(zero_a
)
75 yield dut
.issue_i
.eq(1)
77 yield dut
.issue_i
.eq(0)
79 if not imm_ok
or not zero_a
:
80 yield dut
.rd
.go
.eq(0b11)
83 rd_rel_o
= yield dut
.rd
.rel
84 print ("rd_rel", rd_rel_o
)
88 if len(dut
.src_i
) == 3:
89 yield dut
.rd
.go
.eq(0b100)
92 rd_rel_o
= yield dut
.rd
.rel
93 print ("rd_rel", rd_rel_o
)
98 req_rel_o
= yield dut
.wr
.rel
99 result
= yield dut
.data_o
100 print ("req_rel", req_rel_o
, result
)
102 req_rel_o
= yield dut
.wr
.rel
103 result
= yield dut
.data_o
104 print ("req_rel", req_rel_o
, result
)
108 yield dut
.wr
.go
[0].eq(1)
110 result
= yield dut
.data_o
112 print ("result", result
)
113 yield dut
.wr
.go
[0].eq(0)
118 def scoreboard_sim_fsm(dut
):
119 result
= yield from op_sim_fsm(dut
, 13, 2, 1)
120 assert result
== 3, result
122 result
= yield from op_sim_fsm(dut
, 3, 4, 0)
123 assert result
== 48, result
125 result
= yield from op_sim_fsm(dut
, 21, 0, 0)
126 assert result
== 21, result
129 def scoreboard_sim_dummy(dut
):
130 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
, inv_a
=0,
132 assert result
== 5, result
134 result
= yield from op_sim(dut
, 9, 2, MicrOp
.OP_NOP
, inv_a
=0,
136 assert result
== 9, result
140 def scoreboard_sim(dut
):
141 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=0,
145 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
)
148 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=1)
149 assert result
== 65532
151 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1,
155 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1)
158 # test combinatorial zero-delay operation
159 # In the test ALU, any operation other than ADD, MUL or SHR
160 # is zero-delay, and do a subtraction.
161 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
)
165 def test_compunit_fsm():
169 dut
= MultiCompUnit(8, alu
, CompFSMOpSubset
)
170 m
.submodules
.cu
= dut
172 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
173 with
open("test_compunit_fsm1.il", "w") as f
:
176 run_simulation(m
, scoreboard_sim_fsm(dut
),
177 vcd_name
='test_compunit_fsm1.vcd')
184 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
185 m
.submodules
.cu
= dut
187 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
188 with
open("test_compunit1.il", "w") as f
:
191 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
194 class CompUnitParallelTest
:
195 def __init__(self
, dut
):
198 # Operation cycle should not take longer than this:
199 self
.MAX_BUSY_WAIT
= 50
201 # Minimum duration in which issue_i will be kept inactive,
202 # during which busy_o must remain low.
203 self
.MIN_BUSY_LOW
= 5
205 # Number of cycles to stall until the assertion of go.
206 # One value, for each port. Can be zero, for no delay.
207 self
.RD_GO_DELAY
= [0, 3]
209 # store common data for the input operation of the processes
212 self
.inv_a
= self
.zero_a
= 0
213 self
.imm
= self
.imm_ok
= 0
214 self
.imm_control
= (0, 0)
215 self
.rdmaskn
= (0, 0)
217 self
.operands
= (0, 0)
219 # Indicates completion of the sub-processes
220 self
.rd_complete
= [False, False]
223 print("Begin parallel test.")
224 yield from self
.operation(5, 2, MicrOp
.OP_ADD
)
226 def operation(self
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0,
228 # store data for the operation
229 self
.operands
= (a
, b
)
235 self
.imm_control
= (zero_a
, imm_ok
)
236 self
.rdmaskn
= rdmaskn
238 # Initialize completion flags
239 self
.rd_complete
= [False, False]
241 # trigger operation cycle
242 yield from self
.issue()
244 # check that the sub-processes completed, before the busy_o cycle ended
245 for completion
in self
.rd_complete
:
249 # issue_i starts inactive
250 yield self
.dut
.issue_i
.eq(0)
252 for n
in range(self
.MIN_BUSY_LOW
):
254 # busy_o must remain inactive. It cannot rise on its own.
255 busy_o
= yield self
.dut
.busy_o
258 # activate issue_i to begin the operation cycle
259 yield self
.dut
.issue_i
.eq(1)
261 # at the same time, present the operation
262 yield self
.dut
.oper_i
.insn_type
.eq(self
.op
)
263 yield self
.dut
.oper_i
.invert_a
.eq(self
.inv_a
)
264 yield self
.dut
.oper_i
.imm_data
.imm
.eq(self
.imm
)
265 yield self
.dut
.oper_i
.imm_data
.imm_ok
.eq(self
.imm_ok
)
266 yield self
.dut
.oper_i
.zero_a
.eq(self
.zero_a
)
267 rdmaskn
= self
.rdmaskn
[0] |
(self
.rdmaskn
[1] << 1)
268 yield self
.dut
.rdmaskn
.eq(rdmaskn
)
270 # give one cycle for the CompUnit to latch the data
273 # busy_o must keep being low in this cycle, because issue_i was
274 # low on the previous cycle.
275 # It cannot rise on its own.
276 # Also, busy_o and issue_i must never be active at the same time, ever.
277 busy_o
= yield self
.dut
.busy_o
281 yield self
.dut
.issue_i
.eq(0)
283 # deactivate inputs along with issue_i, so we can be sure the data
284 # was latched at the correct cycle
285 # note: rdmaskn must be held, while busy_o is active
286 # TODO: deactivate rdmaskn when the busy_o cycle ends
287 yield self
.dut
.oper_i
.insn_type
.eq(0)
288 yield self
.dut
.oper_i
.invert_a
.eq(0)
289 yield self
.dut
.oper_i
.imm_data
.imm
.eq(0)
290 yield self
.dut
.oper_i
.imm_data
.imm_ok
.eq(0)
291 yield self
.dut
.oper_i
.zero_a
.eq(0)
294 # wait for busy_o to lower
295 # timeout after self.MAX_BUSY_WAIT cycles
296 for n
in range(self
.MAX_BUSY_WAIT
):
297 # sample busy_o in the current cycle
298 busy_o
= yield self
.dut
.busy_o
300 # operation cycle ends when busy_o becomes inactive
304 # if busy_o is still active, a timeout has occurred
305 # TODO: Uncomment this, once the test is complete:
309 print("If you are reading this, "
310 "it's because the above test failed, as expected,\n"
311 "with a timeout. It must pass, once the test is complete.")
314 print("If you are reading this, "
315 "it's because the above test unexpectedly passed.")
317 def rd(self
, rd_idx
):
318 # wait for issue_i to rise
320 issue_i
= yield self
.dut
.issue_i
323 # issue_i has not risen yet, so rd must keep low
324 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
328 # we do not want rd to rise on an immediate operand
329 # if it is immediate, exit the process
330 # likewise, if the read mask is active
331 # TODO: don't exit the process, monitor rd instead to ensure it
332 # doesn't rise on its own
333 if self
.rdmaskn
[rd_idx
] or self
.imm_control
[rd_idx
]:
334 self
.rd_complete
[rd_idx
] = True
337 # issue_i has risen. rel must rise on the next cycle
338 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
341 # stall for additional cycles. Check that rel doesn't fall on its own
342 for n
in range(self
.RD_GO_DELAY
[rd_idx
]):
344 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
347 # Before asserting "go", make sure "rel" has risen.
348 # The use of Settle allows "go" to be set combinatorially,
349 # rising on the same cycle as "rel".
351 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
354 # assert go for one cycle, passing along the operand value
355 yield self
.dut
.rd
.go
[rd_idx
].eq(1)
356 yield self
.dut
.src_i
[rd_idx
].eq(self
.operands
[rd_idx
])
357 # check that the operand was sent to the alu
358 # TODO: Properly check the alu protocol
360 alu_input
= yield self
.dut
.get_in(rd_idx
)
361 assert alu_input
== self
.operands
[rd_idx
]
364 # rel must keep high, since go was inactive in the last cycle
365 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
368 # finish the go one-clock pulse
369 yield self
.dut
.rd
.go
[rd_idx
].eq(0)
370 yield self
.dut
.src_i
[rd_idx
].eq(0)
373 # rel must have gone low in response to go being high
374 # on the previous cycle
375 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
378 self
.rd_complete
[rd_idx
] = True
380 # TODO: check that rel doesn't rise again until the end of the
383 def wr(self
, wr_idx
):
384 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
386 # TODO: also when dut.wr.go is set, check the output against the
387 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
389 def run_simulation(self
, vcd_name
):
390 run_simulation(self
.dut
, [self
.driver(),
391 self
.rd(0), # one read port (a)
392 self
.rd(1), # one read port (b)
393 self
.wr(0), # one write port (o)
398 def test_compunit_regspec2_fsm():
400 inspec
= [('INT', 'a', '0:15'),
401 ('INT', 'b', '0:15'),
403 outspec
= [('INT', 'o', '0:15'),
406 regspec
= (inspec
, outspec
)
410 dut
= MultiCompUnit(regspec
, alu
, CompFSMOpSubset
)
411 m
.submodules
.cu
= dut
413 run_simulation(m
, scoreboard_sim_fsm(dut
),
414 vcd_name
='test_compunit_regspec2_fsm.vcd')
417 def test_compunit_regspec3():
419 inspec
= [('INT', 'a', '0:15'),
420 ('INT', 'b', '0:15'),
421 ('INT', 'c', '0:15')]
422 outspec
= [('INT', 'o', '0:15'),
425 regspec
= (inspec
, outspec
)
429 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
430 m
.submodules
.cu
= dut
432 run_simulation(m
, scoreboard_sim_dummy(dut
),
433 vcd_name
='test_compunit_regspec3.vcd')
436 def test_compunit_regspec1():
438 inspec
= [('INT', 'a', '0:15'),
439 ('INT', 'b', '0:15')]
440 outspec
= [('INT', 'o', '0:15'),
443 regspec
= (inspec
, outspec
)
447 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
448 m
.submodules
.cu
= dut
450 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
451 with
open("test_compunit_regspec1.il", "w") as f
:
454 run_simulation(m
, scoreboard_sim(dut
),
455 vcd_name
='test_compunit_regspec1.vcd')
457 test
= CompUnitParallelTest(dut
)
458 test
.run_simulation("test_compunit_parallel.vcd")
461 if __name__
== '__main__':
464 test_compunit_regspec1()
465 test_compunit_regspec3()