c8347927f930cd08dfe62ce3b550b6892a3e1f4e
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / pospopcount.s
1
2 mtspr 9, 3 # move r3 to CTR
3 setvl 3,0,8,0,1,1 # set MVL=8, VL=r3=MIN(MVL,CTR)
4 # load VL bytes (update r4 addr) but compressed (dw=8)
5 addi 6, 0, 0 # initialise all 64-bits of r6 to zero
6 sv.lbzu/pi/dw=8 *6, 1(4) # should be /lf here as well
7 # gather performs the transpose (which gets us to positional..)
8 gbbd 8,6
9 # now those bits have been turned around, popcount and sum them
10 setvl 0,0,8,0,1,1 # set MVL=VL=8
11 sv.popcntd/sw=8 *24,*8 # do the (now transposed) popcount
12 sv.add *16,*16,*24 # and accumulate in results
13 # branch back if CTR still non-zero. works even though VL=8
14 sv.bc/all 16, *0, -0x28 # reduce CTR by VL and stop if -ve