remove enough chars to get slide onto 1 page, pospopcount.s
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / pospopcount.s
1 mtspr 9, 3 # move r3 to CTR
2 setvl 3,0,8,0,1,1 # MVL=8, VL=r3=MIN(MVL,CTR)
3 # load VL bytes (update r4 addr) but width=8 (dw=8)
4 addi 6, 0, 0 # set 64-bits of r6=0
5 sv.lbzu/pi/dw=8 *6, 1(4)
6 # gather performs transpose (gets us to positional)
7 gbbd 8,6
8 # now bits are turned around, popcount and sum them
9 setvl 0,0,8,0,1,1 # set MVL=VL=8
10 sv.popcntd/sw=8 *24,*8 # do (transposed) popcnt
11 sv.add *16,*16,*24 # accumulate in results
12 # branch back if CTR non-zero. works even when VL=8
13 sv.bc/all 16, *0, -0x28 # reduce CTR by VL