bug 1244: update maxloc slides after finding shorter algorithm
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / fosdem2024_ddffirst.tex
index f196881e6a73e487ded1bfc6f9b52e46c03e1f37..49e37798cf32501352e374b3a9d2fc2c08fdd6d5 100644 (file)
@@ -221,14 +221,15 @@ for (i = 0; i < VL; i++)
 
        \begin{itemize}
                \item FORTRAN MAXLOC - find the index of largest number
-               \item notoriously difficult to optimally implement for SIMD
+                     notoriously difficult to optimally implement for SIMD
                \item algorithms include \textit{depth-first} recursive
                      descent (!) mapreduce-style, offsetting the
                      locally-computed largest index (plus value) which
                      are then tested in upper level(s)
-               \item SVP64 through Data-Dependent Fail-First can perform
-                         each of the two key while-loop tests with
-                         \textit{single instructions}.
+               \item SVP64: note below the sv.cmp (first while-loop),
+               sv.minmax. (second while-loop) and the sv.crnand which
+               by Predicate masking is 3-in 1-out CR ops
+               not the usual 2-in 1-out
                \item There is however quite a bit of "housekeeping".
                        Full analysis: \\
        https://libre-soc.org/openpower/sv/cookbook/fortran\_maxloc
@@ -238,7 +239,6 @@ for (i = 0; i < VL; i++)
 \frame{\frametitle{maxloc assembler}
        
        \lstinputlisting[language={}]{maxloc.s}
-       
 }
 
 \frame{\frametitle{Summary}