1 % Copyright 2024 Jacob Lifshay
4 \usepackage{beamerthemesplit
}
6 \usepackage[english
]{babel
}
9 \usemintedstyle{monokai
}
10 \definecolor{codebg
}{rgb
}{0.1,
0.09,
0.08}
11 \newminted[codeenv
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
12 \newmintinline[codeinline
]{python3
}{escapeinside=@@,fontsize=
\small,bgcolor=codebg
}
15 \title[Fast Big-Integer Arithmetic on SVP64 ...
]{
16 Fast Big-Integer Arithmetic on SVP64 at up to
256-bits/cycle and beyond
19 \author{Jacob R. Lifshay
}
23 \logo{\includegraphics[height=
0.5cm
]{../../../images/lsoclogo.png
}}
31 % TODO: add intro describing why people should want this -- what problem this solves.
32 % also, what I want them to do with this knowledge
34 \begin{frame
}[fragile
]
35 \frametitle{What is SVP64?
}
37 \item Vectorization Extension for PowerISA developed by
\href{https://libre-soc.org
}{Libre-SOC
}
39 \item Basically, a way to modify nearly any PowerISA instruction to run it in a HW loop.
44 setvl
0,
0,
3,
0,
1,
1 # makes stuff run
3 times
45 sv.add *r3, *r15, r12 # adds
3 times
48 add r3, r15, r12 # no * means r12 doesn't increment
49 add r4, r16, r12 # * means r3 and r15 increment
55 \begin{frame
}[fragile
]
56 \frametitle{Big-Integer Addition on SVP64
}
57 How can we use SVP64 to add
256-bit integers?
60 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
61 addic r0, r0,
0 # clear CA (carry flag)
62 sv.adde *r4, *r4, *r8 # carry-propagating add
65 addic r0, r0,
0 # clear CA (carry flag)
73 \begin{frame
}[fragile
]
74 \frametitle{Big-Integer Addition on SVP64
}
75 How can we use SVP64 to add
256-bit integers?
77 \input{sv.adde.dia-tex
}
81 \frametitle{Big-Integer Addition on an example CPU
}
83 SVP64 is designed for everything from tiny to big and fast CPUs, this example only shows a hypothetical big and fast CPU design
87 \frametitle{Big-Integer Addition on an example CPU
}
88 \input{bigint-add-pipe.dia-tex
}
91 \begin{frame
}[fragile
]
92 \frametitle{Big-Integer Multiply on SVP64
}
93 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
96 \item new instruction:
\codeinline{maddedu RT, RA, RB, RC
}
98 \item $
64 \times 64 +
64 \rightarrow 128$-bit Multiply-Add
100 \item Semantics as used in this presentation (somewhat simplified):
102 result = (RA * RB) + RC
103 RT = LSB_HALF(result)
104 RC = MSB_HALF(result)
109 \begin{frame
}[fragile
]
110 \frametitle{Big-Integer Multiply on SVP64
}
111 How can we use SVP64 to Multiply a
64-bit by a
256-bit integer?
115 #
256-bit input in r20-
23
116 #
320-bit output in r4-
8
117 setvl
0,
0,
4,
0,
1,
1 # makes stuff run
4 times
118 li r8,
0 # clear carry register
119 sv.maddedu *r4, r3, *r20, r8 # carrying multiply
123 maddedu r4, r3, r20, r8
124 maddedu r5, r3, r21, r8
125 maddedu r6, r3, r22, r8
126 maddedu r7, r3, r23, r8
131 \frametitle{Big-Integer Multiply on SVP64
}
132 \input{sv.maddedu.dia-tex
}
135 \begin{frame
}[fragile
]
136 \frametitle{Big-Integer Multiply on an example CPU
}
139 \codeinline{sv.maddld *r4, *r8, *r16, *r20 # mul-add
}
141 \codeinline{sv.maddedu *r4, r3, *r20, r8 # carrying multiply
}
144 \input{bigint-mul-pipe.dia-tex
}
147 \begin{frame
}[fragile
]
149 \item Discussion:
\url{https://lists.libre-soc.org
}
150 \item IRC \#libre-soc on OFTC or Libera
151 % workaround busted escaping in \href command
152 \item Matrix
\hyperref{https://matrix.to/\#/\#_oftc_
}{libre-soc:matrix
}{org
}{\#
\_oftc\_\#libre-soc:matrix.org
}
153 \item \url{https://libre-soc.org/
}
154 \item Thanks to NLnet for funding this:
\url{https://nlnet.nl/assure
}
155 \item \url{https://libre-soc.org/nlnet/\#faq
}