fosdem2024_bigint: remove test.dia
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_bigint / fosdem2024_bigint.tex
1 % Copyright 2024 Jacob Lifshay
2
3 \documentclass{beamer}
4 \usepackage{beamerthemesplit}
5 \usetheme{default}
6 \usepackage[english]{babel}
7 \usepackage{tikz}
8 \usepackage{minted}
9 \usemintedstyle{monokai}
10 \definecolor{codebg}{rgb}{0.1,0.09,0.08}
11 \newminted[codeenv]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
12 \newmintinline[codeinline]{python3}{escapeinside=@@,fontsize=\small,bgcolor=codebg}
13
14 \title[Fast Big-Integer Arithmetic on SVP64 ...]{
15 Fast Big-Integer Arithmetic on SVP64 at up to 256-bits/cycle and beyond
16 }
17
18 \author{Jacob R. Lifshay}
19
20 \date{FOSDEM 2024}
21
22 \logo{\includegraphics[height=0.5cm]{../../../images/lsoclogo.png}}
23
24 \begin{document}
25
26 \begin{frame}
27 \titlepage
28 \end{frame}
29
30 \begin{frame}[fragile]
31 \frametitle{What is SVP64?}
32 \begin{itemize}
33 \item Vectorization Extension for PowerISA developed by \href{https://libre-soc.org}{Libre-SOC}
34 \pause
35 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
36 \pause \\
37 \medskip
38 Simple Example:
39 \begin{codeenv}
40 setvl 0, 0, 3, 0, 1, 1 # makes stuff run 3 times
41 sv.add *r3, *r15, r12 # adds 3 times
42 @\pause@
43 # expands to:
44 add r3, r15, r12 # no * means r12 doesn't increment
45 add r4, r16, r12 # * means r3 and r15 increment
46 add r5, r17, r12
47 \end{codeenv}
48 \end{itemize}
49 \end{frame}
50
51 \begin{frame}[fragile]
52 \frametitle{Big-Integer Addition on SVP64}
53 How can we use SVP64 to add 256-bit integers?
54 \pause
55 \begin{codeenv}
56 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
57 addic r0, r0, 0 # clear CA (carry flag)
58 sv.adde *r4, *r4, *r8 # carry-propagating add
59 @\pause@
60 # expands to:
61 addic r0, r0, 0 # clear CA (carry flag)
62 adde r4, r4, r8
63 adde r5, r5, r9
64 adde r6, r6, r10
65 adde r7, r7, r11
66 \end{codeenv}
67 \end{frame}
68
69 \begin{frame}[fragile]
70 \frametitle{Big-Integer Addition on SVP64}
71 How can we use SVP64 to add 256-bit integers?
72 \medbreak
73 \input{sv.adde.dia-tex}
74 \end{frame}
75
76 \begin{frame}
77 \frametitle{Big-Integer Addition on an example CPU}
78 Disclaimer:
79 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
80 \end{frame}
81
82 \begin{frame}
83 \frametitle{Big-Integer Addition on an example CPU}
84 \input{bigint-add-pipe.dia-tex}
85 \end{frame}
86
87 \begin{frame}[fragile]
88 \frametitle{Big-Integer Multiply on SVP64}
89 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
90 \pause
91 \begin{itemize}
92 \item new instruction: \codeinline{maddedu RT, RA, RB, RC}
93 \pause
94 \item $64 \times 64 + 64 \rightarrow 128$-bit Multiply-Add
95 \pause
96 \item Semantics as used in this presentation (somewhat simplified):
97 \begin{codeenv}
98 result = (RA * RB) + RC
99 RT = LSB_HALF(result)
100 RC = MSB_HALF(result)
101 \end{codeenv}
102 \end{itemize}
103 \end{frame}
104
105 \begin{frame}[fragile]
106 \frametitle{Big-Integer Multiply on SVP64}
107 How can we use SVP64 to Multiply a 64-bit by a 256-bit integer?
108 \pause
109 \begin{codeenv}
110 # 64-bit input in r3
111 # 256-bit input in r20-23
112 # 320-bit output in r4-8
113 setvl 0, 0, 4, 0, 1, 1 # makes stuff run 4 times
114 li r8, 0 # clear carry register
115 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
116 @\pause@
117 # expands to:
118 li r8, 0
119 maddedu r4, r3, r20, r8
120 maddedu r5, r3, r21, r8
121 maddedu r6, r3, r22, r8
122 maddedu r7, r3, r23, r8
123 \end{codeenv}
124 \end{frame}
125
126 \begin{frame}
127 \end{frame}
128
129 \end{document}