Convert a few more tests to be able to use cxxsim
[soc.git] / src / soc / fu / compunits / test / test_compunit.py
1 from nmigen import Module, Signal, ResetSignal
2
3 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
4 # Also, check out the cxxsim nmigen branch, and latest yosys from git
5 from nmutil.sim_tmp_alternative import Simulator, Settle
6
7 from nmutil.formaltest import FHDLTestCase
8 from nmigen.cli import rtlil
9 import unittest
10 from soc.decoder.power_decoder import create_pdecode
11 from soc.decoder.power_decoder2 import PowerDecode2, get_rdflags
12 from soc.decoder.power_enums import Function
13 from soc.decoder.isa.all import ISA
14
15 from soc.experiment.compalu_multi import find_ok # hack
16 from soc.config.test.test_loadstore import TestMemPspec
17
18
19 def set_cu_input(cu, idx, data):
20 rdop = cu.get_in_name(idx)
21 yield cu.src_i[idx].eq(data)
22 while True:
23 rd_rel_o = yield cu.rd.rel_o[idx]
24 print("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
25 if rd_rel_o:
26 break
27 yield
28 yield cu.rd.go_i[idx].eq(1)
29 while True:
30 yield
31 rd_rel_o = yield cu.rd.rel_o[idx]
32 if rd_rel_o:
33 break
34 print("rd_rel %d wait HI" % idx, rd_rel_o)
35 yield
36 yield cu.rd.go_i[idx].eq(0)
37 yield cu.src_i[idx].eq(0)
38
39
40 def get_cu_output(cu, idx, code):
41 wrmask = yield cu.wrmask
42 wrop = cu.get_out_name(idx)
43 wrok = cu.get_out(idx)
44 fname = find_ok(wrok.fields)
45 wrok = yield getattr(wrok, fname)
46 print("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
47 assert wrmask & (1 << idx), \
48 "get_cu_output '%s': mask bit %d not set\n" \
49 "write-operand '%s' Data.ok likely not set (%s)" \
50 % (code, idx, wrop, hex(wrok))
51 while True:
52 wr_relall_o = yield cu.wr.rel_o
53 wr_rel_o = yield cu.wr.rel_o[idx]
54 print("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
55 if wr_rel_o:
56 break
57 yield
58 yield cu.wr.go_i[idx].eq(1)
59 yield Settle()
60 result = yield cu.dest[idx]
61 yield
62 yield cu.wr.go_i[idx].eq(0)
63 print("result", repr(code), idx, wrop, wrok, hex(result))
64
65 return result
66
67
68 def set_cu_inputs(cu, inp):
69 print("set_cu_inputs", inp)
70 for idx, data in inp.items():
71 yield from set_cu_input(cu, idx, data)
72 # gets out of sync when checking busy if there is no wait, here.
73 if len(inp) == 0:
74 yield # wait one cycle
75
76
77 def set_operand(cu, dec2, sim):
78 yield from cu.oper_i.eq_from_execute1(dec2.e)
79 yield cu.issue_i.eq(1)
80 yield
81 yield cu.issue_i.eq(0)
82 yield
83
84
85 def get_cu_outputs(cu, code):
86 res = {}
87 # wait for pipeline to indicate valid. this because for long
88 # pipelines (or FSMs) the write mask is only valid at that time.
89 if hasattr(cu, "alu"): # ALU CompUnits
90 while True:
91 valid_o = yield cu.alu.n.valid_o
92 if valid_o:
93 break
94 yield
95 else: # LDST CompUnit
96 # not a lot can be done about this - simply wait a few cycles
97 for i in range(5):
98 yield
99
100 wrmask = yield cu.wrmask
101 wr_rel_o = yield cu.wr.rel_o
102 print("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
103 # no point waiting (however really should doublecheck wr.rel)
104 if not wrmask:
105 return {}
106 # wait for at least one result
107 while True:
108 wr_rel_o = yield cu.wr.rel_o
109 if wr_rel_o:
110 break
111 yield
112 for i in range(cu.n_dst):
113 wr_rel_o = yield cu.wr.rel_o[i]
114 if wr_rel_o:
115 result = yield from get_cu_output(cu, i, code)
116 wrop = cu.get_out_name(i)
117 print("output", i, wrop, hex(result))
118 res[wrop] = result
119 return res
120
121
122 def get_inp_indexed(cu, inp):
123 res = {}
124 for i in range(cu.n_src):
125 wrop = cu.get_in_name(i)
126 if wrop in inp:
127 res[i] = inp[wrop]
128 return res
129
130
131 def get_l0_mem(l0): # BLECH!
132 if hasattr(l0.pimem, 'lsui'):
133 return l0.pimem.lsui.mem
134 return l0.pimem.mem.mem
135
136
137 def setup_test_memory(l0, sim):
138 mem = get_l0_mem(l0)
139 print("before, init mem", mem.depth, mem.width, mem)
140 for i in range(mem.depth):
141 data = sim.mem.ld(i*8, 8, False)
142 print("init ", i, hex(data))
143 yield mem._array[i].eq(data)
144 yield Settle()
145 for k, v in sim.mem.mem.items():
146 print(" %6x %016x" % (k, v))
147 print("before, nmigen mem dump")
148 for i in range(mem.depth):
149 actual_mem = yield mem._array[i]
150 print(" %6i %016x" % (i, actual_mem))
151
152
153 def dump_sim_memory(dut, l0, sim, code):
154 mem = get_l0_mem(l0)
155 print("sim mem dump")
156 for k, v in sim.mem.mem.items():
157 print(" %6x %016x" % (k, v))
158 print("nmigen mem dump")
159 for i in range(mem.depth):
160 actual_mem = yield mem._array[i]
161 print(" %6i %016x" % (i, actual_mem))
162
163
164 def check_sim_memory(dut, l0, sim, code):
165 mem = get_l0_mem(l0)
166
167 for i in range(mem.depth):
168 expected_mem = sim.mem.ld(i*8, 8, False)
169 actual_mem = yield mem._array[i]
170 dut.assertEqual(expected_mem, actual_mem,
171 "%s %d %x %x" % (code, i,
172 expected_mem, actual_mem))
173
174
175 class TestRunner(FHDLTestCase):
176 def __init__(self, test_data, fukls, iodef, funit, bigendian):
177 super().__init__("run_all")
178 self.test_data = test_data
179 self.fukls = fukls
180 self.iodef = iodef
181 self.funit = funit
182 self.bigendian = bigendian
183
184 def execute(self, cu, l0, instruction, pdecode2, simdec2, test):
185
186 program = test.program
187 print("test", test.name, test.mem)
188 gen = list(program.generate_instructions())
189 insncode = program.assembly.splitlines()
190 instructions = list(zip(gen, insncode))
191 sim = ISA(simdec2, test.regs, test.sprs, test.cr, test.mem,
192 test.msr,
193 initial_insns=gen, respect_pc=True,
194 disassembly=insncode,
195 bigendian=self.bigendian)
196
197 # initialise memory
198 if self.funit == Function.LDST:
199 yield from setup_test_memory(l0, sim)
200
201 pc = sim.pc.CIA.value
202 index = pc//4
203 msr = sim.msr.value
204 while True:
205 print("instr pc", pc)
206 try:
207 yield from sim.setup_one()
208 except KeyError: # indicates instruction not in imem: stop
209 break
210 yield Settle()
211 ins, code = instructions[index]
212 print("instruction @", index, code)
213
214 # ask the decoder to decode this binary data (endian'd)
215 yield pdecode2.dec.bigendian.eq(self.bigendian) # le / be?
216 yield pdecode2.state.msr.eq(msr) # set MSR "state"
217 yield pdecode2.state.pc.eq(pc) # set PC "state"
218 yield instruction.eq(ins) # raw binary instr.
219 yield Settle()
220 # debugging issue with branch
221 if self.funit == Function.BRANCH:
222 lk = yield pdecode2.e.do.lk
223 fast_out2 = yield pdecode2.e.write_fast2.data
224 fast_out2_ok = yield pdecode2.e.write_fast2.ok
225 print("lk:", lk, fast_out2, fast_out2_ok)
226 op_lk = yield cu.alu.pipe1.p.data_i.ctx.op.lk
227 print("op_lk:", op_lk)
228 print(dir(cu.alu.pipe1.n.data_o))
229 fn_unit = yield pdecode2.e.do.fn_unit
230 fuval = self.funit.value
231 self.assertEqual(fn_unit & fuval, fuval)
232
233 # set operand and get inputs
234 yield from set_operand(cu, pdecode2, sim)
235 # reset read-operand mask
236 rdmask = get_rdflags(pdecode2.e, cu)
237 #print ("hardcoded rdmask", cu.rdflags(pdecode2.e))
238 #print ("decoder rdmask", rdmask)
239 yield cu.rdmaskn.eq(~rdmask)
240
241 yield Settle()
242 iname = yield from self.iodef.get_cu_inputs(pdecode2, sim)
243 inp = get_inp_indexed(cu, iname)
244
245 # reset write-operand mask
246 for idx in range(cu.n_dst):
247 wrok = cu.get_out(idx)
248 fname = find_ok(wrok.fields)
249 yield getattr(wrok, fname).eq(0)
250
251 yield Settle()
252
253 # set inputs into CU
254 rd_rel_o = yield cu.rd.rel_o
255 wr_rel_o = yield cu.wr.rel_o
256 print("before inputs, rd_rel, wr_rel: ",
257 bin(rd_rel_o), bin(wr_rel_o))
258 assert wr_rel_o == 0, "wr.rel %s must be zero. "\
259 "previous instr not written all regs\n"\
260 "respec %s" % \
261 (bin(wr_rel_o), cu.rwid[1])
262 yield from set_cu_inputs(cu, inp)
263 rd_rel_o = yield cu.rd.rel_o
264 wr_rel_o = yield cu.wr.rel_o
265 wrmask = yield cu.wrmask
266 print("after inputs, rd_rel, wr_rel, wrmask: ",
267 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
268
269 # call simulated operation
270 yield from sim.execute_one()
271 yield Settle()
272 pc = sim.pc.CIA.value
273 index = pc//4
274 msr = sim.msr.value
275
276 # get all outputs (one by one, just "because")
277 res = yield from get_cu_outputs(cu, code)
278 wrmask = yield cu.wrmask
279 rd_rel_o = yield cu.rd.rel_o
280 wr_rel_o = yield cu.wr.rel_o
281 print("after got outputs, rd_rel, wr_rel, wrmask: ",
282 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
283
284 # wait for busy to go low
285 while True:
286 busy_o = yield cu.busy_o
287 print("busy", busy_o)
288 if not busy_o:
289 break
290 yield
291
292 # reset read-mask. IMPORTANT when there are no operands
293 yield cu.rdmaskn.eq(0)
294 yield
295
296 # debugging issue with branch
297 if self.funit == Function.BRANCH:
298 lr = yield cu.alu.pipe1.n.data_o.lr.data
299 lr_ok = yield cu.alu.pipe1.n.data_o.lr.ok
300 print("lr:", hex(lr), lr_ok)
301
302 if self.funit == Function.LDST:
303 yield from dump_sim_memory(self, l0, sim, code)
304
305 # sigh. hard-coded. test memory
306 if self.funit == Function.LDST:
307 yield from check_sim_memory(self, l0, sim, code)
308 yield from self.iodef.check_cu_outputs(res, pdecode2,
309 sim, cu,
310 code)
311 else:
312 yield from self.iodef.check_cu_outputs(res, pdecode2,
313 sim, cu.alu,
314 code)
315
316 def run_all(self):
317 m = Module()
318 comb = m.d.comb
319 instruction = Signal(32)
320
321 pdecode = create_pdecode()
322 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
323
324 # copy of the decoder for simulator
325 simdec = create_pdecode()
326 simdec2 = PowerDecode2(simdec)
327 m.submodules.simdec2 = simdec2 # pain in the neck
328
329 if self.funit == Function.LDST:
330 from soc.experiment.l0_cache import TstL0CacheBuffer
331 pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
332 addr_wid=48,
333 mask_wid=8,
334 reg_wid=64)
335 m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
336 pi = l0.l0.dports[0]
337 m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
338 m.d.comb += cu.ad.go_i.eq(cu.ad.rel_o) # link addr direct to rel
339 m.d.comb += cu.st.go_i.eq(cu.st.rel_o) # link store direct to rel
340 else:
341 m.submodules.cu = cu = self.fukls(0)
342 l0 = None
343
344 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
345 sim = Simulator(m)
346
347 sim.add_clock(1e-6)
348
349 def process():
350 yield cu.issue_i.eq(0)
351 yield
352
353 for test in self.test_data:
354 print(test.name)
355 with self.subTest(test.name):
356 yield from self.execute(cu, l0, instruction,
357 pdecode2, simdec2,
358 test)
359
360 sim.add_sync_process(process)
361
362 name = self.funit.name.lower()
363 with sim.write_vcd("%s_simulator.vcd" % name,
364 traces=[]):
365 sim.run()