src/openpower/decoder/isa/test_caller_svp64_maxloc.py

   1 """Implementation of FORTRAN MAXLOC SVP64
   2 Copyright (C) 2022,2023 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 Licensed under the LGPLv3+
   4 Funded by NLnet NGI-ASSURE under EU grant agreement No 957073.
   5 * https://nlnet.nl/project/Libre-SOC-OpenPOWER-ISA
   6 * https://bugs.libre-soc.org/show_bug.cgi?id=676
   7 * https://libre-soc.org/openpower/sv/cookbook/fortran_maxloc/
   8 """
   9
  10 import unittest
  11 from copy import deepcopy
  12
  13 from nmutil.formaltest import FHDLTestCase
  14 from openpower.decoder.isa.caller import SVP64State
  15 from openpower.decoder.isa.test_caller import run_tst
  16 from openpower.decoder.selectable_int import SelectableInt
  17 from openpower.simulator.program import Program
  18 from openpower.insndb.asm import SVP64Asm
  19 from openpower.util import log
  20
  21
  22
  23 def cmpd(x, y):
  24     class CRfield:
  25         def __repr__(self):
  26             return "<lt %d gt %d eq %d>" % (self.lt, self.gt, self.eq)
  27         def __int__(self):
  28             return (CRf.lt<<3) | (CRf.gt<<2) | (CRf.eq<<1)
  29     CRf = CRfield()
  30     CRf.lt = x < y
  31     CRf.gt = x > y
  32     CRf.eq = x == y
  33     return CRf
  34
  35
  36 # example sv.minmax/ff=lt 0, 1, *10, 5
  37 # see https://bugs.libre-soc.org/show_bug.cgi?id=1183#c3
  38 def sv_maxu(gpr, vl, ra, rb, rt):
  39     CR0, i = None, 0
  40     while i < vl:
  41         CR0 = cmpd(gpr[ra+i], gpr[rb])
  42         log("sv_maxss test", i, gpr[ra + i], gpr[rb], CR0, int(CR0))
  43         gpr[rt] = gpr[ra+i] if CR0.lt else gpr[rb]
  44         if not CR0.gt:
  45             break
  46         i += 1
  47     return i, CR0 # new VL
  48
  49
  50 class DDFFirstTestCase(FHDLTestCase):
  51
  52     def _check_regs(self, sim, expected):
  53         for i in range(32):
  54             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
  55
  56     def test_sv_maxloc_1(self):
  57         self.sv_maxloc([1,2,3,4])
  58
  59     def tst_sv_maxloc_2(self):
  60         self.sv_maxloc([3,4,1,0])
  61
  62     def tst_sv_maxloc_3(self):
  63         self.sv_maxloc([2,9,8,0])
  64
  65     def tst_sv_maxloc_4(self):
  66         self.sv_maxloc([2,1,3,0])
  67
  68     def sv_maxloc(self, ra):
  69         """
  70             m, nm, i, n = 0, 0, 0, len(a)
  71             while (i<n):
  72                 while (i<n and a[i]<=m) : i += 1
  73                 while (i<n and a[i] > m): m, nm, i = a[i], i, i+1
  74             return nm
  75         """
  76
  77         lst = SVP64Asm(["sv.minmax./ff=le 4, *10, 4, 1" # scalar RB=RT
  78                 "mtspr 9, 3",               # move r3 to CTR
  79                 # VL = MIN(CTR,MAXVL=8)
  80                 "setvl 3,0,8,0,1,1",        # set MVL=8, VL=MIN(MVL,CTR)
  81                 # load VL bytes (update r4 addr) but compressed (dw=8)
  82                 "addi 6, 0, 0",             # initialise r6 to zero
  83                 "sv.lbzu/pi/dw=8 *6, 1(4)", # should be /lf here as well
  84                 # gather performs the transpose (which gets us to positional..)
  85                 "gbbd 8,6",
  86                 # now those bits have been turned around, popcount and sum them
  87                 "setvl 0,0,8,0,1,1",        # set MVL=VL=8
  88                 "sv.popcntd/sw=8 *24,*8",   # do the (now transposed) popcount
  89                 "sv.add *16,*16,*24",       # and accumulate in results
  90                 # branch back if CTR still non-zero. works even though VL=8
  91                 "sv.bc/all 16, *0, -0x28", # reduce CTR by VL and stop if -ve
  92                         ])
  93         lst = list(lst)
  94
  95         # SVSTATE
  96         svstate = SVP64State()
  97         vl = len(ra)  # VL is length of array ra
  98         svstate.vl = vl  # VL
  99         svstate.maxvl = vl  # MAXVL
 100         print("SVSTATE", bin(svstate.asint()))
 101
 102         gprs = [0] * 32
 103         gprs[4] =  rb # (RT&RB) accumulator in r4
 104         for i, ra in enumerate(ra): # vector in ra starts at r10
 105             gprs[10+i] = ra
 106             log("maxu ddff", i, gprs[10+i])
 107
 108         cr_res = [0]*8
 109         res = deepcopy(gprs)
 110
 111         expected_vl = sv_maxu(res, cr_res, vl, 10, 4, 4)
 112         log("sv_maxu", expected_vl, cr_res)
 113
 114         with Program(lst, bigendian=False) as program:
 115             sim = self.run_tst_program(program, initial_regs=gprs,
 116                                        svstate=svstate)
 117             for i in range(4):
 118                 val = sim.gpr(i).value
 119                 res.append(val)
 120                 cr_res.append(0)
 121                 log("i", i, val)
 122             # confirm that the results are as expected
 123
 124             for i, v in enumerate(cr_res[:vl]):
 125                 crf = sim.crl[i].get_range().value
 126                 log("crf", i, res[i], bin(crf), bin(int(v)))
 127                 self.assertEqual(crf, int(v))
 128
 129             for i, v in enumerate(res):
 130                 self.assertEqual(v, res[i])
 131
 132             self.assertEqual(sim.svstate.vl, expected_vl)
 133             self.assertEqual(sim.svstate.maxvl, 4)
 134             self.assertEqual(sim.svstate.srcstep, 0)
 135             self.assertEqual(sim.svstate.dststep, 0)
 136
 137     def run_tst_program(self, prog, initial_regs=None,
 138                         svstate=None,
 139                         initial_mem=None,
 140                         initial_fprs=None):
 141         if initial_regs is None:
 142             initial_regs = [0] * 32
 143         simulator = run_tst(prog, initial_regs, mem=initial_mem,
 144                             initial_fprs=initial_fprs,
 145                             svstate=svstate)
 146
 147         print("GPRs")
 148         simulator.gpr.dump()
 149         print("FPRs")
 150         simulator.fpr.dump()
 151
 152         return simulator
 153
 154
 155 if __name__ == "__main__":
 156     unittest.main()