Add sign extend to the Test ALU
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17 from nmutil.extend import exts
18 from nmutil.gtkw import write_gtkw
19
20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
23 is_engine_pysim)
24
25 from soc.decoder.power_enums import MicrOp, Function, CryIn
26
27 from soc.fu.alu.alu_input_record import CompALUOpSubset
28 from soc.fu.cr.cr_input_record import CompCROpSubset
29
30 import operator
31
32
33 class Adder(Elaboratable):
34 def __init__(self, width):
35 self.invert_in = Signal()
36 self.a = Signal(width)
37 self.b = Signal(width)
38 self.o = Signal(width, name="add_o")
39
40 def elaborate(self, platform):
41 m = Module()
42 with m.If(self.invert_in):
43 m.d.comb += self.o.eq((~self.a) + self.b)
44 with m.Else():
45 m.d.comb += self.o.eq(self.a + self.b)
46 return m
47
48
49 class Subtractor(Elaboratable):
50 def __init__(self, width):
51 self.a = Signal(width)
52 self.b = Signal(width)
53 self.o = Signal(width, name="sub_o")
54
55 def elaborate(self, platform):
56 m = Module()
57 m.d.comb += self.o.eq(self.a - self.b)
58 return m
59
60
61 class Multiplier(Elaboratable):
62 def __init__(self, width):
63 self.a = Signal(width)
64 self.b = Signal(width)
65 self.o = Signal(width, name="mul_o")
66
67 def elaborate(self, platform):
68 m = Module()
69 m.d.comb += self.o.eq(self.a * self.b)
70 return m
71
72
73 class Shifter(Elaboratable):
74 def __init__(self, width):
75 self.width = width
76 self.a = Signal(width)
77 self.b = Signal(width)
78 self.o = Signal(width, name="shf_o")
79
80 def elaborate(self, platform):
81 m = Module()
82 btrunc = Signal(self.width)
83 m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
84 m.d.comb += self.o.eq(self.a >> btrunc)
85 return m
86
87
88 class SignExtend(Elaboratable):
89 def __init__(self, width):
90 self.width = width
91 self.a = Signal(width)
92 self.o = Signal(width, name="exts_o")
93
94 def elaborate(self, platform):
95 m = Module()
96 m.d.comb += self.o.eq(exts(self.a, 8, self.width))
97 return m
98
99
100 class Dummy:
101 pass
102
103
104 class DummyALU(Elaboratable):
105 def __init__(self, width):
106 self.p = Dummy() # make look like nmutil pipeline API
107 self.p.data_i = Dummy()
108 self.p.data_i.ctx = Dummy()
109 self.n = Dummy() # make look like nmutil pipeline API
110 self.n.data_o = Dummy()
111 self.p.valid_i = Signal()
112 self.p.ready_o = Signal()
113 self.n.ready_i = Signal()
114 self.n.valid_o = Signal()
115 self.counter = Signal(4)
116 self.op = CompCROpSubset()
117 i = []
118 i.append(Signal(width, name="i1"))
119 i.append(Signal(width, name="i2"))
120 i.append(Signal(width, name="i3"))
121 self.i = Array(i)
122 self.a, self.b, self.c = i[0], i[1], i[2]
123 self.out = Array([Signal(width, name="alu_o")])
124 self.o = self.out[0]
125 self.width = width
126 # more "look like nmutil pipeline API"
127 self.p.data_i.ctx.op = self.op
128 self.p.data_i.a = self.a
129 self.p.data_i.b = self.b
130 self.p.data_i.c = self.c
131 self.n.data_o.o = self.o
132
133 def elaborate(self, platform):
134 m = Module()
135
136 go_now = Signal(reset_less=True) # testing no-delay ALU
137
138 with m.If(self.p.valid_i):
139 # input is valid. next check, if we already said "ready" or not
140 with m.If(~self.p.ready_o):
141 # we didn't say "ready" yet, so say so and initialise
142 m.d.sync += self.p.ready_o.eq(1)
143
144 m.d.sync += self.o.eq(self.a)
145 m.d.comb += go_now.eq(1)
146 m.d.sync += self.counter.eq(1)
147
148 with m.Else():
149 # input says no longer valid, so drop ready as well.
150 # a "proper" ALU would have had to sync in the opcode and a/b ops
151 m.d.sync += self.p.ready_o.eq(0)
152
153 # ok so the counter's running: when it gets to 1, fire the output
154 with m.If((self.counter == 1) | go_now):
155 # set the output as valid if the recipient is ready for it
156 m.d.sync += self.n.valid_o.eq(1)
157 with m.If(self.n.ready_i & self.n.valid_o):
158 m.d.sync += self.n.valid_o.eq(0)
159 # recipient said it was ready: reset back to known-good.
160 m.d.sync += self.counter.eq(0) # reset the counter
161 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
162
163 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
164 with m.If(self.counter > 1):
165 m.d.sync += self.counter.eq(self.counter - 1)
166
167 return m
168
169 def __iter__(self):
170 yield from self.op.ports()
171 yield self.a
172 yield self.b
173 yield self.c
174 yield self.o
175
176 def ports(self):
177 return list(self)
178
179
180 class ALU(Elaboratable):
181 def __init__(self, width):
182 self.p = Dummy() # make look like nmutil pipeline API
183 self.p.data_i = Dummy()
184 self.p.data_i.ctx = Dummy()
185 self.n = Dummy() # make look like nmutil pipeline API
186 self.n.data_o = Dummy()
187 self.p.valid_i = Signal()
188 self.p.ready_o = Signal()
189 self.n.ready_i = Signal()
190 self.n.valid_o = Signal()
191 self.counter = Signal(4)
192 self.op = CompALUOpSubset(name="op")
193 i = []
194 i.append(Signal(width, name="i1"))
195 i.append(Signal(width, name="i2"))
196 self.i = Array(i)
197 self.a, self.b = i[0], i[1]
198 self.out = Array([Signal(width, name="alu_o")])
199 self.o = self.out[0]
200 self.width = width
201 # more "look like nmutil pipeline API"
202 self.p.data_i.ctx.op = self.op
203 self.p.data_i.a = self.a
204 self.p.data_i.b = self.b
205 self.n.data_o.o = self.o
206
207 def elaborate(self, platform):
208 m = Module()
209 add = Adder(self.width)
210 mul = Multiplier(self.width)
211 shf = Shifter(self.width)
212 sub = Subtractor(self.width)
213 ext_sign = SignExtend(self.width)
214
215 m.submodules.add = add
216 m.submodules.mul = mul
217 m.submodules.shf = shf
218 m.submodules.sub = sub
219 m.submodules.ext_sign = ext_sign
220
221 # really should not activate absolutely all ALU inputs like this
222 for mod in [add, mul, shf, sub]:
223 m.d.comb += [
224 mod.a.eq(self.a),
225 mod.b.eq(self.b),
226 ]
227 m.d.comb += ext_sign.a.eq(self.a)
228
229 # pass invert (and carry later)
230 m.d.comb += add.invert_in.eq(self.op.invert_in)
231
232 go_now = Signal(reset_less=True) # testing no-delay ALU
233
234 # ALU sequencer is idle when the count is zero
235 alu_idle = Signal(reset_less=True)
236 m.d.comb += alu_idle.eq(self.counter == 0)
237
238 # ALU sequencer is done when the count is one
239 alu_done = Signal(reset_less=True)
240 m.d.comb += alu_done.eq(self.counter == 1)
241
242 # select handshake handling according to ALU type
243 with m.If(go_now):
244 # with a combinatorial, no-delay ALU, just pass through
245 # the handshake signals to the other side
246 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
247 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
248 with m.Else():
249 # sequential ALU handshake:
250 # ready_o responds to valid_i, but only if the ALU is idle
251 m.d.comb += self.p.ready_o.eq(alu_idle)
252 # select the internally generated valid_o, above
253 m.d.comb += self.n.valid_o.eq(alu_done)
254
255 # hold the ALU result until ready_o is asserted
256 alu_r = Signal(self.width)
257
258 with m.If(alu_idle):
259 with m.If(self.p.valid_i):
260
261 # as this is a "fake" pipeline, just grab the output right now
262 with m.If(self.op.insn_type == MicrOp.OP_ADD):
263 m.d.sync += alu_r.eq(add.o)
264 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
265 m.d.sync += alu_r.eq(mul.o)
266 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
267 m.d.sync += alu_r.eq(shf.o)
268 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
269 m.d.sync += alu_r.eq(ext_sign.o)
270 # SUB is zero-delay, no need to register
271
272 # NOTE: all of these are fake, just something to test
273
274 # MUL, to take 5 instructions
275 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
276 m.d.sync += self.counter.eq(5)
277 # SHIFT to take 1, straight away
278 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
279 m.d.sync += self.counter.eq(1)
280 # ADD/SUB to take 3
281 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
282 m.d.sync += self.counter.eq(3)
283 # EXTS to take 1
284 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
285 m.d.sync += self.counter.eq(1)
286 # others to take no delay
287 with m.Else():
288 m.d.comb += go_now.eq(1)
289
290 with m.Elif(~alu_done | self.n.ready_i):
291 # decrement the counter while the ALU is neither idle nor finished
292 m.d.sync += self.counter.eq(self.counter - 1)
293
294 # choose between zero-delay output, or registered
295 with m.If(go_now):
296 m.d.comb += self.o.eq(sub.o)
297 # only present the result at the last computation cycle
298 with m.Elif(alu_done):
299 m.d.comb += self.o.eq(alu_r)
300
301 return m
302
303 def __iter__(self):
304 yield from self.op.ports()
305 yield self.a
306 yield self.b
307 yield self.o
308 yield self.p.valid_i
309 yield self.p.ready_o
310 yield self.n.valid_o
311 yield self.n.ready_i
312
313 def ports(self):
314 return list(self)
315
316
317 class BranchOp(Elaboratable):
318 def __init__(self, width, op):
319 self.a = Signal(width)
320 self.b = Signal(width)
321 self.o = Signal(width)
322 self.op = op
323
324 def elaborate(self, platform):
325 m = Module()
326 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
327 return m
328
329
330 class BranchALU(Elaboratable):
331 def __init__(self, width):
332 self.p = Dummy() # make look like nmutil pipeline API
333 self.p.data_i = Dummy()
334 self.p.data_i.ctx = Dummy()
335 self.n = Dummy() # make look like nmutil pipeline API
336 self.n.data_o = Dummy()
337 self.p.valid_i = Signal()
338 self.p.ready_o = Signal()
339 self.n.ready_i = Signal()
340 self.n.valid_o = Signal()
341 self.counter = Signal(4)
342 self.op = Signal(2)
343 i = []
344 i.append(Signal(width, name="i1"))
345 i.append(Signal(width, name="i2"))
346 self.i = Array(i)
347 self.a, self.b = i[0], i[1]
348 self.out = Array([Signal(width)])
349 self.o = self.out[0]
350 self.width = width
351
352 def elaborate(self, platform):
353 m = Module()
354 bgt = BranchOp(self.width, operator.gt)
355 blt = BranchOp(self.width, operator.lt)
356 beq = BranchOp(self.width, operator.eq)
357 bne = BranchOp(self.width, operator.ne)
358
359 m.submodules.bgt = bgt
360 m.submodules.blt = blt
361 m.submodules.beq = beq
362 m.submodules.bne = bne
363 for mod in [bgt, blt, beq, bne]:
364 m.d.comb += [
365 mod.a.eq(self.a),
366 mod.b.eq(self.b),
367 ]
368
369 go_now = Signal(reset_less=True) # testing no-delay ALU
370 with m.If(self.p.valid_i):
371 # input is valid. next check, if we already said "ready" or not
372 with m.If(~self.p.ready_o):
373 # we didn't say "ready" yet, so say so and initialise
374 m.d.sync += self.p.ready_o.eq(1)
375
376 # as this is a "fake" pipeline, just grab the output right now
377 with m.Switch(self.op):
378 for i, mod in enumerate([bgt, blt, beq, bne]):
379 with m.Case(i):
380 m.d.sync += self.o.eq(mod.o)
381 # branch to take 5 cycles (fake)
382 m.d.sync += self.counter.eq(5)
383 #m.d.comb += go_now.eq(1)
384 with m.Else():
385 # input says no longer valid, so drop ready as well.
386 # a "proper" ALU would have had to sync in the opcode and a/b ops
387 m.d.sync += self.p.ready_o.eq(0)
388
389 # ok so the counter's running: when it gets to 1, fire the output
390 with m.If((self.counter == 1) | go_now):
391 # set the output as valid if the recipient is ready for it
392 m.d.sync += self.n.valid_o.eq(1)
393 with m.If(self.n.ready_i & self.n.valid_o):
394 m.d.sync += self.n.valid_o.eq(0)
395 # recipient said it was ready: reset back to known-good.
396 m.d.sync += self.counter.eq(0) # reset the counter
397 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
398
399 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
400 with m.If(self.counter > 1):
401 m.d.sync += self.counter.eq(self.counter - 1)
402
403 return m
404
405 def __iter__(self):
406 yield self.op
407 yield self.a
408 yield self.b
409 yield self.o
410
411 def ports(self):
412 return list(self)
413
414
415 def run_op(dut, a, b, op, inv_a=0):
416 yield dut.a.eq(a)
417 yield dut.b.eq(b)
418 yield dut.op.insn_type.eq(op)
419 yield dut.op.invert_in.eq(inv_a)
420 yield dut.n.ready_i.eq(0)
421 yield dut.p.valid_i.eq(1)
422 yield dut.n.ready_i.eq(1)
423 yield
424
425 # wait for the ALU to accept our input data
426 while not (yield dut.p.ready_o):
427 yield
428
429 yield dut.p.valid_i.eq(0)
430 yield dut.a.eq(0)
431 yield dut.b.eq(0)
432 yield dut.op.insn_type.eq(0)
433 yield dut.op.invert_in.eq(0)
434
435 # wait for the ALU to present the output data
436 while not (yield dut.n.valid_o):
437 yield
438
439 # latch the result and lower read_i
440 result = yield dut.o
441 yield dut.n.ready_i.eq(0)
442
443 return result
444
445
446 def alu_sim(dut):
447 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
448 print("alu_sim add", result)
449 assert (result == 8)
450
451 result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
452 print("alu_sim mul", result)
453 assert (result == 6)
454
455 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
456 print("alu_sim add-inv", result)
457 assert (result == 65533)
458
459 # test zero-delay ALU
460 # don't have OP_SUB, so use any other
461 result = yield from run_op(dut, 5, 3, MicrOp.OP_NOP)
462 print("alu_sim sub", result)
463 assert (result == 2)
464
465 result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
466 print("alu_sim shr", result)
467 assert (result == 3)
468
469
470 def test_alu():
471 alu = ALU(width=16)
472 write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
473 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
474
475 vl = rtlil.convert(alu, ports=alu.ports())
476 with open("test_alu.il", "w") as f:
477 f.write(vl)
478
479
480 def test_alu_parallel():
481 # Compare with the sequential test implementation, above.
482 m = Module()
483 m.submodules.alu = dut = ALU(width=16)
484 write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
485 pysim=is_engine_pysim())
486
487 sim = Simulator(m)
488 sim.add_clock(1e-6)
489
490 def send(a, b, op, inv_a=0):
491 # present input data and assert valid_i
492 yield dut.a.eq(a)
493 yield dut.b.eq(b)
494 yield dut.op.insn_type.eq(op)
495 yield dut.op.invert_in.eq(inv_a)
496 yield dut.p.valid_i.eq(1)
497 yield
498 # wait for ready_o to be asserted
499 while not (yield dut.p.ready_o):
500 yield
501 # clear input data and negate valid_i
502 # if send is called again immediately afterwards, there will be no
503 # visible transition (they will not be negated, after all)
504 yield dut.p.valid_i.eq(0)
505 yield dut.a.eq(0)
506 yield dut.b.eq(0)
507 yield dut.op.insn_type.eq(0)
508 yield dut.op.invert_in.eq(0)
509
510 def receive():
511 # signal readiness to receive data
512 yield dut.n.ready_i.eq(1)
513 yield
514 # wait for valid_o to be asserted
515 while not (yield dut.n.valid_o):
516 yield
517 # read result
518 result = yield dut.o
519 # negate ready_i
520 # if receive is called again immediately afterwards, there will be no
521 # visible transition (it will not be negated, after all)
522 yield dut.n.ready_i.eq(0)
523 return result
524
525 def producer():
526 # send a few test cases, interspersed with wait states
527 # note that, for this test, we do not wait for the result to be ready,
528 # before presenting the next input
529 # 5 + 3
530 yield from send(5, 3, MicrOp.OP_ADD)
531 yield
532 yield
533 # 2 * 3
534 yield from send(2, 3, MicrOp.OP_MUL_L64)
535 # (-5) + 3
536 yield from send(5, 3, MicrOp.OP_ADD, inv_a=1)
537 yield
538 # 5 - 3
539 # note that this is a zero-delay operation
540 yield from send(5, 3, MicrOp.OP_NOP)
541 yield
542 yield
543 # 13 >> 2
544 yield from send(13, 2, MicrOp.OP_SHR)
545 # sign extent 13
546 yield from send(13, 2, MicrOp.OP_EXTS)
547 # sign extend -128 (8 bits)
548 yield from send(0x80, 2, MicrOp.OP_EXTS)
549
550 def consumer():
551 # receive and check results, interspersed with wait states
552 # the consumer is not in step with the producer, but the
553 # order of the results are preserved
554 yield
555 # 5 + 3 = 8
556 result = yield from receive()
557 assert (result == 8)
558 # 2 * 3 = 6
559 result = yield from receive()
560 assert (result == 6)
561 yield
562 yield
563 # (-5) + 3 = -2
564 result = yield from receive()
565 assert (result == 65533) # unsigned equivalent to -2
566 # 5 - 3 = 2
567 # note that this is a zero-delay operation
568 # this, and the previous result, will be received back-to-back
569 # (check the output waveform to see this)
570 result = yield from receive()
571 assert (result == 2)
572 yield
573 yield
574 # 13 >> 2 = 3
575 result = yield from receive()
576 assert (result == 3)
577 # sign extent 13 = 13
578 result = yield from receive()
579 assert (result == 13)
580 # sign extend -128 (8 bits) = -128 (16 bits)
581 result = yield from receive()
582 assert (result == 0xFF80)
583
584 sim.add_sync_process(producer)
585 sim.add_sync_process(consumer)
586 sim_writer = sim.write_vcd("test_alu_parallel.vcd")
587 with sim_writer:
588 sim.run()
589
590
591 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
592 pysim=True):
593 """Common function to write the GTKWave documents for this module"""
594 gtkwave_desc = [
595 'clk',
596 'i1[15:0]',
597 'i2[15:0]',
598 'op__insn_type' if pysim else 'op__insn_type[6:0]',
599 'op__invert_in',
600 'valid_i',
601 'ready_o',
602 'valid_o',
603 'ready_i',
604 'alu_o[15:0]',
605 ]
606 # determine the module name of the DUT
607 module = 'top'
608 if sub_module is not None:
609 module = nmigen_sim_top_module + sub_module
610 vcd_name = gtkw_name.replace('.gtkw', '.vcd')
611 write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
612 loc=__file__, clk_period=clk_period, base='signed')
613
614
615 if __name__ == "__main__":
616 test_alu()
617 test_alu_parallel()
618
619 # alu = BranchALU(width=16)
620 # vl = rtlil.convert(alu, ports=alu.ports())
621 # with open("test_branch_alu.il", "w") as f:
622 # f.write(vl)