Been looking into SIMD and Cray Vectors, started adding explanatory content
authorAndrey Miroshnikov <andrey@technepisteme.xyz>
Fri, 17 Jun 2022 00:02:11 +0000 (01:02 +0100)
committerAndrey Miroshnikov <andrey@technepisteme.xyz>
Fri, 17 Jun 2022 00:02:11 +0000 (01:02 +0100)
svp64-primer/acronyms.tex [new file with mode: 0644]
svp64-primer/img/simd_axb.jpg [new file with mode: 0644]
svp64-primer/img/vl_reg_n.jpg [new file with mode: 0644]
svp64-primer/references.bib [new file with mode: 0644]
svp64-primer/summary.tex
svp64-primer/svp64-primer.tex

diff --git a/svp64-primer/acronyms.tex b/svp64-primer/acronyms.tex
new file mode 100644 (file)
index 0000000..0d08537
--- /dev/null
@@ -0,0 +1,8 @@
+\section{List of Acronyms}
+\begin{acronym}
+       \acro{CPU}{Central Processing Unit}
+       \acro{ISA}{Instruction Set Architecture}
+       \acro{DAXPY}{double-precision aX plus Y}
+       \acro{SIMD}{Single Instruction Multiple Data}
+       \acro{SV}{Simple Vectorisation}
+\end{acronym}
\ No newline at end of file
diff --git a/svp64-primer/img/simd_axb.jpg b/svp64-primer/img/simd_axb.jpg
new file mode 100644 (file)
index 0000000..ae18d92
Binary files /dev/null and b/svp64-primer/img/simd_axb.jpg differ
diff --git a/svp64-primer/img/vl_reg_n.jpg b/svp64-primer/img/vl_reg_n.jpg
new file mode 100644 (file)
index 0000000..6f239bb
Binary files /dev/null and b/svp64-primer/img/vl_reg_n.jpg differ
diff --git a/svp64-primer/references.bib b/svp64-primer/references.bib
new file mode 100644 (file)
index 0000000..da6b544
--- /dev/null
@@ -0,0 +1,90 @@
+@online{SIMD_HARM,
+       ALTauthor = {David Patterson, Andrew Waterman},
+       ALTeditor = {editor},
+       title = {SIMD Instructions Considered Harmful},
+       date = {18-09-2017},
+       url = {https://www.sigarch.org/simd-instructions-considered-harmful/},
+       OPTsubtitle = {subtitle},
+       OPTtitleaddon = {titleaddon},
+       OPTlanguage = {English},
+       OPTversion = {version},
+       OPTnote = {note},
+       OPTorganization = {organization},
+       OPTdate = {date},
+       OPTmonth = {month},
+       OPTyear = {year},
+       OPTaddendum = {addendum},
+       OPTpubstate = {pubstate},
+       OPTurldate = {16-06-2022},
+}
+
+@online{SIMD_HPC,
+       ALTauthor = {João M.P.Cardoso, José Gabriel F.Coutinho, Pedro C.Diniz},
+       ALTeditor = {editor},
+       title = {High-performance embedded computing},
+       date = {2017},
+       url = {https://www.sciencedirect.com/topics/computer-science/single-instruction-multiple-data},
+       OPTsubtitle = {subtitle},
+       OPTtitleaddon = {titleaddon},
+       OPTlanguage = {English},
+       OPTversion = {version},
+       OPTnote = {note},
+       OPTorganization = {organization},
+       OPTdate = {date},
+       OPTmonth = {month},
+       OPTyear = {year},
+       OPTaddendum = {addendum},
+       OPTpubstate = {pubstate},
+       OPTurldate = {urldate},
+}
+
+@online{SIMD_WASM,
+       ALTauthor = {Nick Lewycky},
+       ALTeditor = {editor},
+       title = {WebAssembly and SIMD},
+       date = {31-07-2019},
+       url = {https://medium.com/wasmer/webassembly-and-simd-13badb9bf1a8},
+       OPTsubtitle = {subtitle},
+       OPTtitleaddon = {titleaddon},
+       OPTlanguage = {language},
+       OPTversion = {version},
+       OPTnote = {note},
+       OPTorganization = {organization},
+       OPTdate = {date},
+       OPTmonth = {month},
+       OPTyear = {year},
+       OPTaddendum = {addendum},
+       OPTpubstate = {pubstate},
+       OPTurldate = {urldate},
+}
+
+@manual{riscv-v-spec,
+       ALTauthor = {author},
+       ALTeditor = {editor},
+       title = {RISC-V "V" Vector Extension},
+       date = {date},
+       OPTsubtitle = {subtitle},
+       OPTtitleaddon = {titleaddon},
+       OPTlanguage = {language},
+       OPTedition = {edition},
+       OPTtype = {type},
+       OPTseries = {series},
+       OPTnumber = {number},
+       OPTversion = {version},
+       OPTnote = {note},
+       OPTorganization = {organization},
+       OPTpublisher = {publisher},
+       OPTlocation = {location},
+       OPTisbn = {isbn},
+       OPTchapter = {2},
+       OPTpages = {8},
+       OPTpagetotal = {111},
+       OPTaddendum = {addendum},
+       OPTpubstate = {pubstate},
+       OPTdoi = {doi},
+       OPTeprint = {eprint},
+       OPTeprintclass = {eprintclass},
+       OPTeprinttype = {eprinttype},
+       OPTurl = {https://github.com/riscv/riscv-v-spec/releases/download/v1.0/riscv-v-spec-1.0.pdf},
+       OPTurldate = {20-09-2021},
+}
index 41824537fbd6fb6dece719055ce45a96fcdfcf0b..8b8150156ed909998b7ff960a8a4b2ff2eaea4bb 100644 (file)
@@ -1,21 +1,68 @@
 \section{Summary}
 Specification for hardware for-loop that ONLY uses scalar instructions
 
+\subsection{What is SIMD?}
+\textit{(for clarity only 64-bit registers will be discussed here, however 128-, 256-, and 512-bit implementations also exist)}
 
-Existing SIMD technologies are complex [examples], and 
-https://www.sigarch.org/simd-instructions-considered-harmful/
-(actually much worse)
-AVX512 example requires setup instructions (and data organisation), which means real code baloons. At the start/end need to deal with memory boundaries.
+\ac{SIMD} is a way of partitioning existing \ac{CPU} registers of 64-bit length into smaller 8-, 16-, 32-bit pieces \cite{SIMD_HARM}\cite{SIMD_HPC}. These partitions can then be operated on simultaneously, and the initial values and results being stored as entire 64-bit registers. The SIMD instruction opcode includes the data width and the operation to perform.\par
 
+\begin{figure}[h]
+       \includegraphics[width=\linewidth]{simd_axb}
+       \caption{SIMD multiplication}
+       \label{simd_axb}
+\end{figure}
 
+This method can have a huge advantage for rapid processing of vector-type data (image/video, physics simulations, cryptography, etc.)\cite{SIMD_WASM}, and thus on paper is very attractive compared to scalar-only instructions.\par
+
+SIMD registers are of a fixed length and thus to achieve greater performance, CPU architects typically increase the width of registers (to 128-, 256-, 512-bit etc) for more partitions.\par
+Additionally, binary compatibility is an important feature, and thus each doubling of SIMD registers also expands the instruction set. The number of instructions quickly balloons and this can be seen in popular \ac{ISA}, for example IA-32 expanding from 80 to about 1400 instructions since 1978\cite{SIMD_HARM}.\par
+
+\subsection{Vector Architectures}
+An older alternative exists to utilise data parallelism - vector architectures. Vector CPUs collect operands from the main memory, and store them in large, sequential vector registers.\par
+
+Pipelined execution units then perform parallel computations on these vector registers. The result vector is then broken up into individual results which are sent back into the main memory.\par
+
+A simple vector processor might operate on one element at a time, however as the operations are independent by definition \textbf{(where is this from?)}, a processor could be made to compute all of the vector's elements simultaneously.\par
+
+Typically, today's vector processors can execute two, four, or eight 64-bit elements per clock cycle\cite{SIMD_HARM}. Such processors can also deal with (in hardware) fringe cases where the vector length is not a multiple of the number of elements. The element data width is variable (just like in SIMD). Fig \ref{vl_reg_n} shows the relationship between number of elements, data width and register vector length.
+
+\begin{figure}[h]
+       \includegraphics[width=\linewidth]{vl_reg_n}
+       \caption{Vector length, data width, number of elements}
+       \label{vl_reg_n}
+\end{figure}
+
+RISCV Vector extension supports a VL of up to $2^{16}$ or $65536$ bits, which can fit 1024 64-bit words \cite{riscv-v-spec}.
+
+\subsection{Comparison Between SIMD and Vector}
+\textit{(Need to add more here, use example from \cite{SIMD_HARM}?)}
+
+\subsubsection{Code Example}
+\begin{verbatim}
+test test
+\end{verbatim}
+
+\subsection{Shortfalls of SIMD}
+The following are just some of the reasons why SIMD is unsustainable as the number of instructions increase:
+\begin{itemize}
+       \item Hardware design, ASIC routing etc.
+       \item Compiler design
+       \item Documentation of the ISA
+       \item Manual coding and optimisation
+       \item Time to support the platform
+\end{itemize}
+
+\subsection{Simple Vectorisation}
+\ac{SV} is a an extension to a scalar ISA, designed to be as simple as possible, with no dedicated vector instructions. Effectively a hardware for-loop.
+
+\subsubsection{Prefix 64 - SVP64}
 
 SVP64, is a specification designed to rival existing SIMD implementations by:
 \begin{itemize}
-       \item Simplify hardware design
+       \item Simplifying the hardware design
        \item Reducing maintenance overhead
-       \item Simplify manual assembler hand optimisation
        \item Easier for compilers, coders, documentation
-       \item Time to support platform is a fraction of conventional SIMD (Less money R\&D, faster to deliver)
+       \item Time to support platform is a fraction of conventional SIMD (Less money on R\&D, faster to deliver)
 \end{itemize}
 
 - Intel SIMD is designed to be more capable and has more features, and thus has a greater complexity (?)
index 8a5fef1c1310ce878951aa33a657be06d81f01a6..260e6ae33b913ba7a07e468d50ccbd74846b00fa 100644 (file)
@@ -1,5 +1,8 @@
 \documentclass[a4paper, 10pt]{article}
 \usepackage[utf8]{inputenc}
+\usepackage[printonlyused,withpage]{acronym}
+\usepackage{graphicx}
+\graphicspath{ {./img/} }
 
 \title{(DRAFT) SVP64 Primer}
 
@@ -8,7 +11,11 @@
 \begin{document}
 \maketitle
 
+
+\input{acronyms}
 \input{summary}
 %\input{...}
 
+\bibliography{references}
+\bibliographystyle{ieeetr}
 \end{document}