From 088a12b91495dc31a9ec5f31157d245f32da4493 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 26 May 2018 12:40:42 +0100 Subject: [PATCH] feedback from rogier bruisse --- simple_v_extension/simple_v_chennai_2018.tex | 137 ++++++++++--------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/simple_v_extension/simple_v_chennai_2018.tex b/simple_v_extension/simple_v_chennai_2018.tex index e185c27f5..5466364b4 100644 --- a/simple_v_extension/simple_v_chennai_2018.tex +++ b/simple_v_extension/simple_v_chennai_2018.tex @@ -28,7 +28,7 @@ \begin{itemize} \item The Designers of RISC-V\vspace{15pt} \item The RVV Working Group and contributors\vspace{15pt} - \item Jacob Bachmeyer, Xan Phung, Chuanhua Chang,\\ + \item Allen Baum, Jacob Bachmeyer, Xan Phung, Chuanhua Chang,\\ Guy Lemurieux, Jonathan Neuschafer, Roger Bruisse, and others\vspace{15pt} \item ISA-Dev Group Members\vspace{10pt} @@ -59,8 +59,8 @@ \begin{itemize} \item Extremely powerful (extensible to 256 registers)\vspace{10pt} \item Supports polymorphism, several datatypes (inc. FP16)\vspace{10pt} - \item Requires a separate Register File\vspace{10pt} - \item Can be implemented as a separate pipeline\vspace{10pt} + \item Requires a separate Register File (32 w/ext to 256)\vspace{10pt} + \item Implemented as a separate pipeline (no impact on scalar)\vspace{10pt} \end{itemize} However...\vspace{10pt} \begin{itemize} @@ -97,8 +97,8 @@ \begin{itemize} \item memcpy becomes much smaller (higher bang-per-buck)\vspace{10pt} \item context-switch (LOAD/STORE multiple): 1-2 instructions\vspace{10pt} + \item Compressed instrs further reduces I-cache (etc.)\vspace{10pt} \item greatly-reduced I-cache load (and less reads)\vspace{10pt} - \item parallelisation of C further reduces I-cache (etc.)\vspace{10pt} \end{itemize} Note:\vspace{10pt} \begin{itemize} @@ -191,8 +191,10 @@ \frame{\frametitle{What's the deal / juice / score?} \begin{itemize} - \item Standard Register File(s) overloaded with "vector span"\vspace{10pt} - \item Element width and type concepts remain same as RVV\vspace{10pt} + \item Standard Register File(s) overloaded with "vector span"\\ + (see pseudocode slides for examples) + \item Element width and type concepts remain same as RVV\\ + (CSRs are used to "interpret" elements in registers) \item CSRs are key-value tables (overlaps allowed)\vspace{10pt} \end{itemize} Key differences from RVV:\vspace{10pt} @@ -205,6 +207,68 @@ } +\begin{frame}[fragile] +\frametitle{ADD pseudocode (or trap, or actual hardware loop)} + +\begin{semiverbatim} +function op_add(rd, rs1, rs2, predr) # add not VADD! +  int i, id=0, irs1=0, irs2=0; +  for (i=0; i < MIN(VL, vectorlen[rd]); i++) +   if (ireg[predr] & 1< 1; -s2 = vectorlen[src2] > 1; -for (int i = 0; i < VL; ++i) - preg[rs3] |= 1 << cmp(s1 ? reg[src1+i] : reg[src1], - s2 ? reg[src2+i] : reg[src2]); -\end{semiverbatim} - - \begin{itemize} - \item SIMD slightly more complex (case above is elwidth = default) - \item If s1 and s2 both scalars, Standard branch occurs - \item Predication stored in integer regfile as a bitfield - \item Scalar-vector and vector-vector supported - \end{itemize} -\end{frame} - -\begin{frame}[fragile] -\frametitle{VLD/VLD.S/VLD.X (or trap, or actual hardware loop)} - -\begin{semiverbatim} -if (unit-strided) stride = elsize; -else stride = areg[as2]; // constant-strided -for (int i = 0; i < VL; ++i) - if (preg_enabled[rd] && ([!]preg[rd] & 1<