split out Popcount into separate module: visually it interferes with readability...
[soc.git] / src / soc / fu / logical / popcount.py
1 # Popcount
2 from nmigen import (Elaboratable, Module, Signal, Cat, Mux, Const)
3
4
5 def array_of(count, bitwidth):
6 res = []
7 for i in range(count):
8 res.append(Signal(bitwidth, reset_less=True,
9 name=f"pop_{bitwidth}_{i}"))
10 return res
11
12
13 class Popcount(Elaboratable):
14 def __init__(self):
15 self.a = Signal(64, reset_less=True)
16 self.b = Signal(64, reset_less=True)
17 self.data_len = Signal(64, reset_less=True)
18 self.o = Signal(64, reset_less=True)
19
20 def elaborate(self, platform):
21 m = Module()
22 comb = m.d.comb
23 a, b, data_len, o = self.a, self.b, self.data_len, self.o
24
25 # starting from a, perform successive addition-reductions
26 # creating arrays big enough to store the sum, each time
27 pc = [a]
28 # QTY32 2-bit (to take 2x 1-bit sums) etc.
29 work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
30 for l, bw in work:
31 pc.append(array_of(l, bw))
32 pc8 = pc[3] # array of 8 8-bit counts (popcntb)
33 pc32 = pc[5] # array of 2 32-bit counts (popcntw)
34 popcnt = pc[-1] # array of 1 64-bit count (popcntd)
35 # cascade-tree of adds
36 for idx, (l, bw) in enumerate(work):
37 for i in range(l):
38 stt, end = i*2, i*2+1
39 src, dst = pc[idx], pc[idx+1]
40 comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
41 Cat(src[end], Const(0, 1)))
42 # decode operation length
43 with m.If(data_len == 1):
44 # popcntb - pack 8x 4-bit answers into output
45 for i in range(8):
46 comb += o[i*8:(i+1)*8].eq(pc8[i])
47 with m.Elif(data_len == 4):
48 # popcntw - pack 2x 5-bit answers into output
49 for i in range(2):
50 comb += o[i*32:(i+1)*32].eq(pc32[i])
51 with m.Else():
52 # popcntd - put 1x 6-bit answer into output
53 comb += o.eq(popcnt[0])
54
55 return m