04805956b4ec69c52215cd79a878d57512640706
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / fosdem2024_ddffirst.tex
1 \documentclass[slidestop]{beamer}
2 \usepackage{beamerthemesplit}
3 \usepackage{graphics}
4 \usepackage{pstricks}
5 \usepackage{pgffor}
6 \usepackage{listings}
7
8 \graphicspath{{./}}
9
10 \title{Data-Dependent-Fail-First}
11 \author{Luke Kenneth Casson Leighton and Shriya Sharma}
12
13
14 \begin{document}
15
16 \frame{
17 \begin{center}
18 \huge{The Libre-SOC Hybrid 3D CPU}\\
19 \vspace{32pt}
20 \Large{Data-Dependent-Fail-First}\\
21
22 \vspace{24pt}
23 \Large{FOSDEM2024}\\
24 \vspace{16pt}
25 \large{Sponsored by NLnet's PET Programme}\\
26 \vspace{6pt}
27 \large{\today}
28 \end{center}
29 }
30
31
32 \frame{\frametitle{Why another SoC?}
33
34 \begin{itemize}
35 \item Intel Management Engine, Apple QA issues, Spectre\vspace{6pt}
36 \item Endless proprietary drivers, "simplest" solution: \\
37 License proprietary hard macros (with proprietary firmware)\\
38 Adversely affects product development cost\\
39 due to opaque driver bugs (Samsung S3C6410 / S5P100)
40 \vspace{6pt}
41 \item Alternative: Intel and Valve-Steam collaboration\\
42 "Most productive business meeting ever!"\\
43 https://tinyurl.com/valve-steam-intel
44 \vspace{6pt}
45 \item Because for 30 years I Always Wanted To Design A CPU
46 \vspace{6pt}
47 \item Ultimately it is a strategic \textit{business} objective to
48 develop entirely Libre hardware, firmware and drivers.
49 \end{itemize}
50 }
51
52
53
54 \frame{\frametitle{How can you help?}
55
56 \vspace{5pt}
57
58 \begin{itemize}
59 \item Start here! https://libre-soc.org \\
60 Mailing lists https://lists.libre-soc.org \\
61 IRC Freenode libre-soc \\
62 etc. etc. (it's a Libre project, go figure) \\
63 \vspace{3pt}
64 \item Can I get paid? Yes! NLnet funded\\
65 See https://libre-soc.org/nlnet/\#faq \\
66 \vspace{3pt}
67 \item Also profit-sharing in any commercial ventures \\
68 \vspace{3pt}
69 \item How many opportunities to develop Libre SoCs exist,\\
70 and actually get paid for it?
71 \vspace{3pt}
72 \item I'm not a developer, how can I help?\\
73 - Plenty of research needed, artwork, website \\
74 - Help find customers and OEMs willing to commit (LOI)
75 \end{itemize}
76 }
77
78
79
80 \frame{\frametitle{What goes into a typical SoC?}
81 \vspace{9pt}
82 \begin{itemize}
83 \item 15 to 20mm BGA package: 2.5 to 5 watt power consumption\\
84 heat sink normally not required (simplifies overall design)
85 \vspace{3pt}
86 \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
87 USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc.
88 \vspace{3pt}
89 \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt}
90 \item Built-in VPU (likewise, proprietary)\vspace{3pt}
91 \item Target price between \$2.50 and \$30 depending on market\\
92 Radically different from IBM POWER9 Core (200 Watt)
93 \vspace{3pt}
94 \item We're doing the same, just with a hybrid architecture.\\
95 CPU == GPU == VPU
96 \end{itemize}
97 }
98
99
100
101 \begin{frame}[fragile]
102 \frametitle{Simple-V CMPI in a nutshell}
103
104 \begin{semiverbatim}
105 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
106 (assuming you know power-isa)
107  int i, id=0, ira=0;
108  for (i = 0; i < VL; i++)
109   CR[BA+id] <= compare(ireg[RA+ira], SI);
110 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
111 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
112 \end{semiverbatim}
113
114 \begin{itemize}
115 \item Above is oversimplified: predication etc. left out
116 \item Scalar-scalar and scalar-vector and vector-vector now all in one
117 \item OoO may choose to push CMPIs into instr. queue (v. busy!)
118 \end{itemize}
119 \end{frame}
120
121
122 \frame{\frametitle{Load/Store Fault-First}
123
124 \begin{itemize}
125 \item Problem: vector load and store can cause a page fault
126 \item Solution: a protocol that allows optional load/store
127 \item instruction \textit{requests} a number of elements
128 \item instruction \textit{informs} the number actually loaded
129 \item first element load/store is not optional (cannot fail)
130 \item ARM SVE: https://arxiv.org/pdf/1803.06185.pdf
131 \item more: wikipedia Vector processor page: Fault/Fail First
132 \vspace{10pt}
133 \item Load/Store is Memory to/from Register, what about
134 Register to Register?
135 \item Register-to-register: "Data-Dependent Fail-First."
136 \item Z80 LDIR: Mem-Register, CPIR: Register-Register
137 \end{itemize}
138 }
139
140 \begin{frame}[fragile]
141 \frametitle{Data-Dependent-Fail-First in a nutshell}
142
143 \begin{semiverbatim}
144 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
145 int i, id=0, ira=0;
146 for (i = 0; i < VL; i++)
147 CR[BA+id] <= compare(ireg[RA+ira], SI);
148 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
149 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
150 if test (CR[BA+id]) == FAIL: \{ VL = i + 1; break \}
151 \end{semiverbatim}
152
153 \begin{itemize}
154 \item Parallelism still perfectly possible
155 ("hold" writing results until sequential post-analysis
156 carried out. Best done with OoO)
157 \item VL truncation can be inclusive or exclusive
158 (include or exclude a NULL pointer or a
159 string-end character, or overflow result)
160 \item \textit{Truncation can be to zero Vector Length}
161 \end{itemize}
162 \end{frame}
163
164 \frame{\frametitle{Power ISA v3.1 vstribr}
165
166 \lstinputlisting[language={}]{vstribr.txt}
167
168 \begin{itemize}
169 \item ironically this hard-coded instruction is
170 identical to general-purpose Simple-V DD-FFirst...
171 \end{itemize}
172
173 }Po
174
175 \frame{\frametitle{maxloc}
176 \begin{itemize}
177 \item "TODO
178 \end{itemize}
179 }
180
181 \frame{\frametitle{Pospopcount}
182
183 \begin{itemize}
184 \item Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
185 \item Notoriously difficult to do in SIMD assembler: typically 550 lines
186 \item https://github.com/clausecker/pospop
187
188 \end{itemize}
189
190 \lstinputlisting[language={}]{pospopcount.c}
191
192
193 }
194
195 \frame{\frametitle{Pospopcount}
196
197 \begin{center}
198 \includegraphics[width=0.5\textwidth]{pospopcount.png}
199 \end{center}
200 \begin{itemize}
201 \item The challenge is to perform an appropriate transpose of the data (the CPU can only work on registers, horizontally),
202 in blocks that suit the processor and the ISA capacity.
203
204
205 \end{itemize}
206 }
207
208 \frame{\frametitle{Pospopcount}
209
210 \begin{center}
211 \includegraphics[width=0.6\textwidth]{array_popcnt.png}
212 \end{center}
213
214 \begin{itemize}
215
216 \item The draft gbbd instruction implements the transpose (shown above),
217 preparing the data to use the standard popcount instruction.
218 (gbbd is based on Power ISA vgbbd)
219
220 \end{itemize}
221
222 }
223
224 \frame{\frametitle{Pospopcount.s}
225
226
227 \lstinputlisting[language={}]{pospopcount.s}
228
229 }
230
231
232 \frame{\frametitle{strncpy}
233
234 \lstinputlisting[language={}]{strncpy.c}
235 \begin{itemize}
236 \item "TODO
237 \end{itemize}
238 }
239
240
241
242 \frame{\frametitle{strncpy assembler}
243
244 \lstinputlisting[language={}]{strncpy.s}
245
246 }
247
248 \frame{\frametitle{linked-list walking}
249 \begin{itemize}
250 \item "TODO
251 \end{itemize}
252 }
253 \frame{\frametitle{Summary}
254
255 \begin{itemize}
256 \item Goal is to create a mass-volume low-power embedded SoC suitable
257 for use in netbooks, chromebooks, tablets, smartphones, IoT SBCs.
258 \item No way we could implement a project of this magnitude without
259 nmigen (being able to use python OO to HDL)
260 \item Collaboration with OpenPOWER Foundation and Members absolutely
261 essential. No short-cuts. Standards to be developed and ratified
262 so that everyone benefits.
263 \item Riding the wave of huge stability of OpenPOWER ecosystem
264 \item Greatly simplified open 3D and Video drivers reduces product
265 development costs for customers
266 \item It also happens to be fascinating, deeply rewarding technically
267 challenging, and funded by NLnet
268
269 \end{itemize}
270 }
271
272
273 \frame{
274 \begin{center}
275 {\Huge The end\vspace{12pt}\\
276 Thank you\vspace{12pt}\\
277 Questions?\vspace{12pt}
278 }
279 \end{center}
280
281 \begin{itemize}
282 \item Discussion: http://lists.libre-soc.org
283 \item Freenode IRC \#libre-soc
284 \item http://libre-soc.org/
285 \item http://nlnet.nl/PET
286 \item https://libre-soc.org/nlnet/\#faq
287 \end{itemize}
288 }
289
290
291 \end{document}