bug 1244: add maxloc.s and maxloc.py
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 12 Feb 2024 17:46:55 +0000 (17:46 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 12 Feb 2024 17:46:55 +0000 (17:46 +0000)
conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex
conferences/fosdem2024/fosdem2024_ddffirst/maxloc.py [new file with mode: 0644]
conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s [new file with mode: 0644]

index 30c79c5f91fa402e4fa499d96a4ef33ac4f11952..3317021a89dc363359d71efa3d41085becfebaa8 100644 (file)
@@ -260,6 +260,7 @@ for (i = 0; i < VL; i++)
     \item http://libre-soc.org/
        \item https://nlnet.nl/project/Libre-SOC-OpenPOWER-ISA
 \item https://bugs.libre-soc.org/show\_bug.cgi?id=676
+\item https://bugs.libre-soc.org/show\_bug.cgi?id=1244
 \item https://libre-soc.org/openpower/sv/cookbook/fortran\_maxloc
     \item https://libre-soc.org/nlnet/\#faq
   \end{itemize}
diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.py b/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.py
new file mode 100644 (file)
index 0000000..031ffc3
--- /dev/null
@@ -0,0 +1,5 @@
+
+m,nm,i,n = 0,0,0,len(a)
+while (i<n):
+  while (i<n and a[i]<=m): i += 1
+  while (i<n and a[i]> m): m,nm,i = a[i],i,i+1
diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s b/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s
new file mode 100644 (file)
index 0000000..f94ebb7
--- /dev/null
@@ -0,0 +1,15 @@
+# while (i<n)
+setvl 2,0,4,0,1,1            # set MVL=4, VL=MIN(MVL,CTR)
+#    while (i<n and a[i]<=m) : i += 1
+sv.cmp/ff=gt/m=ge *0,0,*10,4 # truncates VL to min
+sv.creqv *16,*16,*16         # set mask on already-tested
+setvl 2,0,4,0,1,1            # set MVL=4, VL=MIN(MVL,CTR)
+mtcrf 128, 0                 # clear CR0 (in case VL=0?)
+#    while (i<n and a[i]>m):
+sv.minmax./ff=le/m=ge/mr 4,*10,4,1 # r4 accumulator
+crternlogi 0,1,2,127         # test greater/equal or VL=0
+sv.crand *19,*16,0           # clear if CR0.eq=0
+#      nm = i (count masked bits. could use crweirds here)
+sv.svstep/mr/m=so 1,0,6,1    # svstep: get vector dststep
+sv.creqv *16,*16,*16         # set mask on already-tested
+bc 12,0, -0x40               # CR0 lt bit clear, branch back