2 from nmigen
import (Elaboratable
, Module
, Signal
, Cat
, Mux
, Const
)
5 def array_of(count
, bitwidth
):
8 res
.append(Signal(bitwidth
, reset_less
=True,
9 name
=f
"pop_{bitwidth}_{i}"))
13 class Popcount(Elaboratable
):
15 self
.a
= Signal(64, reset_less
=True)
16 self
.b
= Signal(64, reset_less
=True)
17 self
.data_len
= Signal(64, reset_less
=True)
18 self
.o
= Signal(64, reset_less
=True)
20 def elaborate(self
, platform
):
23 a
, b
, data_len
, o
= self
.a
, self
.b
, self
.data_len
, self
.o
25 # starting from a, perform successive addition-reductions
26 # creating arrays big enough to store the sum, each time
28 # QTY32 2-bit (to take 2x 1-bit sums) etc.
29 work
= [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
31 pc
.append(array_of(l
, bw
))
32 pc8
= pc
[3] # array of 8 8-bit counts (popcntb)
33 pc32
= pc
[5] # array of 2 32-bit counts (popcntw)
34 popcnt
= pc
[-1] # array of 1 64-bit count (popcntd)
35 # cascade-tree of adds
36 for idx
, (l
, bw
) in enumerate(work
):
39 src
, dst
= pc
[idx
], pc
[idx
+1]
40 comb
+= dst
[i
].eq(Cat(src
[stt
], Const(0, 1)) +
41 Cat(src
[end
], Const(0, 1)))
42 # decode operation length
43 with m
.If(data_len
== 1):
44 # popcntb - pack 8x 4-bit answers into output
46 comb
+= o
[i
*8:(i
+1)*8].eq(pc8
[i
])
47 with m
.Elif(data_len
== 4):
48 # popcntw - pack 2x 5-bit answers into output
50 comb
+= o
[i
*32:(i
+1)*32].eq(pc32
[i
])
52 # popcntd - put 1x 6-bit answer into output
53 comb
+= o
.eq(popcnt
[0])