bug 1244: add pospopcount.png converted from SVG
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / fosdem2024_ddffirst.tex
1 \documentclass[slidestop]{beamer}
2 \usepackage{beamerthemesplit}
3 \usepackage{graphics}
4 \usepackage{pstricks}
5 \usepackage{pgffor}
6 \usepackage{listings}
7
8 \graphicspath{{./}}
9
10 \title{Data-Dependent-Fail-First}
11 \author{Luke Kenneth Casson Leighton and Shriya Sharma}
12
13
14 \begin{document}
15
16 \frame{
17 \begin{center}
18 \huge{The Libre-SOC Hybrid 3D CPU}\\
19 \vspace{32pt}
20 \Large{Data-Dependent-Fail-First}\\
21
22 \vspace{24pt}
23 \Large{FOSDEM2024}\\
24 \vspace{16pt}
25 \large{Sponsored by NLnet's PET Programme}\\
26 \vspace{6pt}
27 \large{\today}
28 \end{center}
29 }
30
31
32 \frame{\frametitle{Why another SoC?}
33
34 \begin{itemize}
35 \item Intel Management Engine, Apple QA issues, Spectre\vspace{6pt}
36 \item Endless proprietary drivers, "simplest" solution: \\
37 License proprietary hard macros (with proprietary firmware)\\
38 Adversely affects product development cost\\
39 due to opaque driver bugs (Samsung S3C6410 / S5P100)
40 \vspace{6pt}
41 \item Alternative: Intel and Valve-Steam collaboration\\
42 "Most productive business meeting ever!"\\
43 https://tinyurl.com/valve-steam-intel
44 \vspace{6pt}
45 \item Because for 30 years I Always Wanted To Design A CPU
46 \vspace{6pt}
47 \item Ultimately it is a strategic \textit{business} objective to
48 develop entirely Libre hardware, firmware and drivers.
49 \end{itemize}
50 }
51
52
53
54 \frame{\frametitle{How can you help?}
55
56 \vspace{5pt}
57
58 \begin{itemize}
59 \item Start here! https://libre-soc.org \\
60 Mailing lists https://lists.libre-soc.org \\
61 IRC Freenode libre-soc \\
62 etc. etc. (it's a Libre project, go figure) \\
63 \vspace{3pt}
64 \item Can I get paid? Yes! NLnet funded\\
65 See https://libre-soc.org/nlnet/\#faq \\
66 \vspace{3pt}
67 \item Also profit-sharing in any commercial ventures \\
68 \vspace{3pt}
69 \item How many opportunities to develop Libre SoCs exist,\\
70 and actually get paid for it?
71 \vspace{3pt}
72 \item I'm not a developer, how can I help?\\
73 - Plenty of research needed, artwork, website \\
74 - Help find customers and OEMs willing to commit (LOI)
75 \end{itemize}
76 }
77
78
79
80 \frame{\frametitle{What goes into a typical SoC?}
81 \vspace{9pt}
82 \begin{itemize}
83 \item 15 to 20mm BGA package: 2.5 to 5 watt power consumption\\
84 heat sink normally not required (simplifies overall design)
85 \vspace{3pt}
86 \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
87 USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc.
88 \vspace{3pt}
89 \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt}
90 \item Built-in VPU (likewise, proprietary)\vspace{3pt}
91 \item Target price between \$2.50 and \$30 depending on market\\
92 Radically different from IBM POWER9 Core (200 Watt)
93 \vspace{3pt}
94 \item We're doing the same, just with a hybrid architecture.\\
95 CPU == GPU == VPU
96 \end{itemize}
97 }
98
99
100
101 \frame{\frametitle{Simple SBC-style SoC}
102
103 \begin{center}
104 \includegraphics[width=0.6\textwidth]{pospopcount.png}
105 \end{center}
106
107 }
108
109
110
111
112 \begin{frame}[fragile]
113 \frametitle{Simple-V CMPI in a nutshell}
114
115 \begin{semiverbatim}
116 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
117 (assuming you know power-isa)
118  int i, id=0, ira=0;
119  for (i = 0; i < VL; i++)
120   CR[BA+id] <= compare(ireg[RA+ira], SI);
121 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
122 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
123 \end{semiverbatim}
124
125 \begin{itemize}
126 \item Above is oversimplified: predication etc. left out
127 \item Scalar-scalar and scalar-vector and vector-vector now all in one
128 \item OoO may choose to push CMPIs into instr. queue (v. busy!)
129 \end{itemize}
130 \end{frame}
131
132 \frame{\frametitle{Load/Store Fault-First}
133
134 \begin{itemize}
135 \item Problem: vector load and store can cause a page fault
136 \item Solution: a protocol that allows optional load/store
137 \item instruction \textit{requests} a number of elements
138 \item instruction \textit{informs} the number actually loaded
139 \item first load/store is not optional
140 \end{itemize}
141 }
142
143 \begin{frame}[fragile]
144 \frametitle{Data-Dependent Fail-First}
145
146 \begin{semiverbatim}
147 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
148 int i, id=0, ira=0;
149 for (i = 0; i < VL; i++)
150 CR[BA+id] <= compare(ireg[RA+ira], SI);
151 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
152 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
153 \end{semiverbatim}
154
155 \begin{itemize}
156 \item Above is oversimplified: predication etc. left out
157 \item Scalar-scalar and scalar-vector and vector-vector now all in one
158 \item OoO may choose to push CMPIs into instr. queue (v. busy!)
159 \end{itemize}
160 \end{frame}
161
162
163 \frame{\frametitle{Additional Simple-V features}
164
165 \begin{itemize}
166 \item "fail-on-first" (POWER9 VSX strncpy segfaults on boundary!)
167 \item "Twin Predication" (covers VSPLAT, VGATHER, VSCATTER, VINDEX etc.)
168 \item SVP64: extensive "tag" (Vector context) augmentation
169 \item "Context propagation": a VLIW-like context. Allows contexts
170 to be repeatedly applied.
171 Effectively a "hardware compression algorithm" for ISAs.
172 \item Ultimate goal: cut down I-Cache usage, cuts down on power
173 \item Typical GPU has its own I-Cache and small shaders.\\
174 \textit{We are a Hybrid CPU/GPU: I-Cache is not separate!}
175 \item Needs to go through OpenPOWER Foundation `approval'
176 \end{itemize}
177 }
178
179 \frame{\frametitle{maxloc}
180 \begin{itemize}
181 \item "TODO
182 \end{itemize}
183 }
184
185 \frame{\frametitle{Pospopcount.c}
186
187 Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
188
189 \lstinputlisting[language={}]{pospopcount.c}
190
191 }
192 \frame{\frametitle{Pospopcount.s}
193
194
195 \lstinputlisting[language={}]{pospopcount.s}
196
197 }
198
199
200 \frame{\frametitle{strncpy}
201
202 \begin{itemize}
203 \item "TODO
204 \end{itemize}
205 }
206
207 \frame{\frametitle{strncpy assembler}
208
209 \lstinputlisting[language={}]{strncpy.s}
210
211 }
212
213 \frame{\frametitle{linked-list walking}
214 \begin{itemize}
215 \item "TODO
216 \end{itemize}
217 }
218 \frame{\frametitle{Summary}
219
220 \begin{itemize}
221 \item Goal is to create a mass-volume low-power embedded SoC suitable
222 for use in netbooks, chromebooks, tablets, smartphones, IoT SBCs.
223 \item No way we could implement a project of this magnitude without
224 nmigen (being able to use python OO to HDL)
225 \item Collaboration with OpenPOWER Foundation and Members absolutely
226 essential. No short-cuts. Standards to be developed and ratified
227 so that everyone benefits.
228 \item Riding the wave of huge stability of OpenPOWER ecosystem
229 \item Greatly simplified open 3D and Video drivers reduces product
230 development costs for customers
231 \item It also happens to be fascinating, deeply rewarding technically
232 challenging, and funded by NLnet
233
234 \end{itemize}
235 }
236
237
238 \frame{
239 \begin{center}
240 {\Huge The end\vspace{12pt}\\
241 Thank you\vspace{12pt}\\
242 Questions?\vspace{12pt}
243 }
244 \end{center}
245
246 \begin{itemize}
247 \item Discussion: http://lists.libre-soc.org
248 \item Freenode IRC \#libre-soc
249 \item http://libre-soc.org/
250 \item http://nlnet.nl/PET
251 \item https://libre-soc.org/nlnet/\#faq
252 \end{itemize}
253 }
254
255
256 \end{document}