bug 1244: update maxloc slides after finding shorter algorithm

author Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)

committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)
author Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)
committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)
diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex

index f196881e6a73e487ded1bfc6f9b52e46c03e1f37..49e37798cf32501352e374b3a9d2fc2c08fdd6d5 100644 (file)
--- a/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex
+++ b/conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex
@@ -221,14 +221,15 @@ for (i = 0; i < VL; i++)
  
         \begin{itemize}
                 \item FORTRAN MAXLOC - find the index of largest number
-               \item notoriously difficult to optimally implement for SIMD
+                     notoriously difficult to optimally implement for SIMD
                 \item algorithms include \textit{depth-first} recursive
                       descent (!) mapreduce-style, offsetting the
                       locally-computed largest index (plus value) which
                       are then tested in upper level(s)
-               \item SVP64 through Data-Dependent Fail-First can perform
-                         each of the two key while-loop tests with
-                         \textit{single instructions}.
+               \item SVP64: note below the sv.cmp (first while-loop),
+               sv.minmax. (second while-loop) and the sv.crnand which
+               by Predicate masking is 3-in 1-out CR ops
+               not the usual 2-in 1-out
                 \item There is however quite a bit of "housekeeping".
                         Full analysis: \\
         https://libre-soc.org/openpower/sv/cookbook/fortran\_maxloc
@@ -238,7 +239,6 @@ for (i = 0; i < VL; i++)
  \frame{\frametitle{maxloc assembler}
         
         \lstinputlisting[language={}]{maxloc.s}
-       
  }
  
  \frame{\frametitle{Summary}
diff --git a/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s b/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s

index 2639c112d8ffbe3de4ce8a1b17ea951d12342891..a343ab93153415837a670f160c43ffe565c76c66 100644 (file)
--- a/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s
+++ b/conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s
@@ -12,4 +12,4 @@ sv.crnand/m=lt/zz *19,*16,0 # SO=~LT, if CR0.eq=0
  #   nm = i: count masked bits. could use crweirds
  sv.svstep/mr/m=so 1,0,6,1 # get vector dststep
  sv.creqv *16,*16,*16  # set mask on already-tested
-bc 12,0,-0x40         # CR0 lt clear, branch back
+bc 12,0,-0x3c         # CR0 lt clear, branch back
author	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)
committer	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Tue, 13 Feb 2024 12:15:05 +0000 (12:15 +0000)
conferences/fosdem2024/fosdem2024_ddffirst/fosdem2024_ddffirst.tex		patch \| blob \| history
conferences/fosdem2024/fosdem2024_ddffirst/maxloc.s		patch \| blob \| history