(no commit message)
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_bigint / fosdem2024_bigint.tex
1 % Copyright 2024 Jacob Lifshay
2
3 \documentclass{beamer}
4 \usepackage{beamerthemesplit}
5 \usetheme{default}
6 \usepackage[english]{babel}
7 \usepackage{tikz}
8 \usepackage{minted}
9 \usemintedstyle{monokai}
10 \definecolor{codebg}{rgb}{0.1,0.09,0.08}
11 \newminted[codeenv]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
12 \newmintinline[codeinline]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
13 \usepackage{hyperref}
14
15 \title[Fast Big-Integer Arithmetic on SVP64 ...]{
16 Fast Big-Integer Arithmetic on SVP64 at up to 256-bits/cycle and beyond
17 }
18
19 \author{Jacob R. Lifshay}
20
21 \date{FOSDEM 2024}
22
23 \logo{\includegraphics[height=0.5cm]{../../../images/lsoclogo.png}}
24
25 \begin{document}
26
27 \begin{frame}
28 \titlepage
29 \end{frame}
30
31 % TODO: add intro describing why people should want this -- what problem this solves.
32 % also, what I want them to do with this knowledge
33
34 \begin{frame}[fragile]
35 \frametitle{What is SVP64?}
36 \begin{itemize}
37 \item Vectorization Extension for PowerISA developed by \href{https://libre-soc.org}{Libre-SOC}
38 \pause
39 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
40 \pause \\
41 \medskip
42 Simple Example:
43 \begin{codeenv}
44 setvl 0, 0, 3, 0, 1, 1 # makes stuff run 3 times
45 sv.add *r3, *r15, r12 # adds 3 times
46 @\pause@
47 # expands to:
48 add r3, r15, r12 # no * means r12 doesn't increment
49 add r4, r16, r12 # * means r3 and r15 increment
50 add r5, r17, r12
51 \end{codeenv}
52 \end{itemize}
53 \end{frame}
54
55 \begin{frame}[fragile]
56 \frametitle{Big-Integer Addition on SVP64}
57 How can we use SVP64 to add 256-bit integers?
58 \pause
59 \begin{codeenv}
60 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
61 addic r0, r0, 0 # clear CA (carry flag)
62 sv.adde *r4, *r4, *r8 # carry-propagating add
63 @\pause@
64 # expands to:
65 addic r0, r0, 0 # clear CA (carry flag)
66 adde r4, r4, r8
67 adde r5, r5, r9
68 adde r6, r6, r10
69 adde r7, r7, r11
70 \end{codeenv}
71 \end{frame}
72
73 \begin{frame}[fragile]
74 \frametitle{Big-Integer Addition on SVP64}
75 How can we use SVP64 to add 256-bit integers?
76 \bigbreak
77 \input{sv.adde.dia-tex}
78 \end{frame}
79
80 \begin{frame}
81 \frametitle{Big-Integer Addition on an example CPU}
82 Disclaimer:
83 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
84 \end{frame}
85
86 \begin{frame}
87 \frametitle{Big-Integer Addition on an example CPU}
88 \input{bigint-add-pipe.dia-tex}
89 \end{frame}
90
91 \begin{frame}[fragile]
92 \frametitle{Big-Integer Multiply on SVP64}
93 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
94 \pause
95 \begin{itemize}
96 \item new instruction: \codeinline{maddedu RT, RA, RB, RC}
97 \pause
98 \item $64 \times 64 + 64 \rightarrow 128$-bit Multiply-Add
99 \pause
100 \item Semantics as used in this presentation (somewhat simplified):
101 \begin{codeenv}
102 result = (RA * RB) + RC
103 RT = LSB_HALF(result)
104 RC = MSB_HALF(result)
105 \end{codeenv}
106 \end{itemize}
107 \end{frame}
108
109 \begin{frame}[fragile]
110 \frametitle{Big-Integer Multiply on SVP64}
111 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
112 \pause
113 \begin{codeenv}
114 # 64-bit input in r3
115 # 256-bit input in r20-23
116 # 320-bit output in r4-8
117 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
118 li r8, 0 # clear carry register
119 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
120 @\pause@
121 # expands to:
122 li r8, 0
123 maddedu r4, r3, r20, r8
124 maddedu r5, r3, r21, r8
125 maddedu r6, r3, r22, r8
126 maddedu r7, r3, r23, r8
127 \end{codeenv}
128 \end{frame}
129
130 \begin{frame}
131 \frametitle{Big-Integer Multiply on SVP64}
132 \input{sv.maddedu.dia-tex}
133 \end{frame}
134
135 \begin{frame}[fragile]
136 \frametitle{Big-Integer Multiply on an example CPU}
137 \begin{overprint}
138 \onslide<1>
139 \codeinline{sv.maddld *r4, *r8, *r16, *r20 # mul-add}
140 \onslide<2>
141 \codeinline{sv.maddedu *r4, r3, *r20, r8 # carrying multiply}
142 \end{overprint}
143 \bigbreak
144 \input{bigint-mul-pipe.dia-tex}
145 \end{frame}
146
147 \begin{frame}[fragile]
148 \begin{itemize}
149 \item Discussion: \url{https://lists.libre-soc.org}
150 \item IRC \#libre-soc on OFTC or Libera
151 % workaround busted escaping in \href command
152 \item Matrix \hyperref{https://matrix.to/\#/\#_oftc_}{libre-soc:matrix}{org}{\#\_oftc\_\#libre-soc:matrix.org}
153 \item \url{https://libre-soc.org/}
154 \item Thanks to NLnet for funding this: \url{https://nlnet.nl/assure}
155 \item \url{https://libre-soc.org/nlnet/\#faq}
156 \end{itemize}
157 \end{frame}
158
159 \end{document}