Bug 1244: removed additional simple-V features
[libreriscv.git] / conferences / fosdem2024 / fosdem2024_ddffirst / fosdem2024_ddffirst.tex
1 \documentclass[slidestop]{beamer}
2 \usepackage{beamerthemesplit}
3 \usepackage{graphics}
4 \usepackage{pstricks}
5 \usepackage{pgffor}
6 \usepackage{listings}
7
8 \graphicspath{{./}}
9
10 \title{Data-Dependent-Fail-First}
11 \author{Luke Kenneth Casson Leighton and Shriya Sharma}
12
13
14 \begin{document}
15
16 \frame{
17 \begin{center}
18 \huge{The Libre-SOC Hybrid 3D CPU}\\
19 \vspace{32pt}
20 \Large{Data-Dependent-Fail-First}\\
21
22 \vspace{24pt}
23 \Large{FOSDEM2024}\\
24 \vspace{16pt}
25 \large{Sponsored by NLnet's PET Programme}\\
26 \vspace{6pt}
27 \large{\today}
28 \end{center}
29 }
30
31
32 \frame{\frametitle{Why another SoC?}
33
34 \begin{itemize}
35 \item Intel Management Engine, Apple QA issues, Spectre\vspace{6pt}
36 \item Endless proprietary drivers, "simplest" solution: \\
37 License proprietary hard macros (with proprietary firmware)\\
38 Adversely affects product development cost\\
39 due to opaque driver bugs (Samsung S3C6410 / S5P100)
40 \vspace{6pt}
41 \item Alternative: Intel and Valve-Steam collaboration\\
42 "Most productive business meeting ever!"\\
43 https://tinyurl.com/valve-steam-intel
44 \vspace{6pt}
45 \item Because for 30 years I Always Wanted To Design A CPU
46 \vspace{6pt}
47 \item Ultimately it is a strategic \textit{business} objective to
48 develop entirely Libre hardware, firmware and drivers.
49 \end{itemize}
50 }
51
52
53
54 \frame{\frametitle{How can you help?}
55
56 \vspace{5pt}
57
58 \begin{itemize}
59 \item Start here! https://libre-soc.org \\
60 Mailing lists https://lists.libre-soc.org \\
61 IRC Freenode libre-soc \\
62 etc. etc. (it's a Libre project, go figure) \\
63 \vspace{3pt}
64 \item Can I get paid? Yes! NLnet funded\\
65 See https://libre-soc.org/nlnet/\#faq \\
66 \vspace{3pt}
67 \item Also profit-sharing in any commercial ventures \\
68 \vspace{3pt}
69 \item How many opportunities to develop Libre SoCs exist,\\
70 and actually get paid for it?
71 \vspace{3pt}
72 \item I'm not a developer, how can I help?\\
73 - Plenty of research needed, artwork, website \\
74 - Help find customers and OEMs willing to commit (LOI)
75 \end{itemize}
76 }
77
78
79
80 \frame{\frametitle{What goes into a typical SoC?}
81 \vspace{9pt}
82 \begin{itemize}
83 \item 15 to 20mm BGA package: 2.5 to 5 watt power consumption\\
84 heat sink normally not required (simplifies overall design)
85 \vspace{3pt}
86 \item Fully-integrated peripherals (not Northbridge/Southbridge)\\
87 USB, HDMI, RGB/TTL, SD/MMC, I2C, UART, SPI, GPIO etc. etc.
88 \vspace{3pt}
89 \item Built-in GPU (shared memory bus, 3rd party licensed) \vspace{3pt}
90 \item Built-in VPU (likewise, proprietary)\vspace{3pt}
91 \item Target price between \$2.50 and \$30 depending on market\\
92 Radically different from IBM POWER9 Core (200 Watt)
93 \vspace{3pt}
94 \item We're doing the same, just with a hybrid architecture.\\
95 CPU == GPU == VPU
96 \end{itemize}
97 }
98
99
100
101 \frame{\frametitle{Simple SBC-style SoC}
102
103 \begin{center}
104 \includegraphics[width=0.6\textwidth]{pospopcount.png}
105 \end{center}
106
107 }
108
109
110
111
112 \begin{frame}[fragile]
113 \frametitle{Simple-V CMPI in a nutshell}
114
115 \begin{semiverbatim}
116 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
117 (assuming you know power-isa)
118  int i, id=0, ira=0;
119  for (i = 0; i < VL; i++)
120   CR[BA+id] <= compare(ireg[RA+ira], SI);
121 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
122 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
123 \end{semiverbatim}
124
125 \begin{itemize}
126 \item Above is oversimplified: predication etc. left out
127 \item Scalar-scalar and scalar-vector and vector-vector now all in one
128 \item OoO may choose to push CMPIs into instr. queue (v. busy!)
129 \end{itemize}
130 \end{frame}
131
132
133 \frame{\frametitle{Load/Store Fault-First}
134
135 \begin{itemize}
136 \item Problem: vector load and store can cause a page fault
137 \item Solution: a protocol that allows optional load/store
138 \item instruction \textit{requests} a number of elements
139 \item instruction \textit{informs} the number actually loaded
140 \item first element load/store is not optional (cannot fail)
141 \item ARM SVE: https://arxiv.org/pdf/1803.06185.pdf
142 \item more: wikipedia Vector processor page: Fault/Fail First
143 \vspace{10pt}
144 \item Load/Store is Memory to/from Register, what about
145 Register to Register?
146 \item Register-to-register: "Data-Dependent Fail-First."
147 \item Z80 LDIR: Mem-Register, CPIR: Register-Register
148 \end{itemize}
149 }
150
151 \begin{frame}[fragile]
152 \frametitle{Data-Dependent-Fail-First in a nutshell}
153
154 \begin{semiverbatim}
155 function op\_cmpi(BA, RA, SI) # cmpi not vector-cmpi!
156 int i, id=0, ira=0;
157 for (i = 0; i < VL; i++)
158 CR[BA+id] <= compare(ireg[RA+ira], SI);
159 if (reg\_is\_vectorised[BA] ) \{ id += 1; \}
160 if (reg\_is\_vectorised[RA])  \{ ira += 1; \}
161 if test (CR[BA+id]) == FAIL: \{ VL = i + 1; break \}
162 \end{semiverbatim}
163
164 \begin{itemize}
165 \item Parallelism still perfectly possible
166 ("hold" writing results until sequential post-analysis
167 carried out. Best done with OoO)
168 \item VL truncation can be inclusive or exclusive
169 (include or exclude a NULL pointer or a
170 string-end character, or overflow result)
171 \item \textit{Truncation can be to zero Vector Length}
172 \end{itemize}
173 \end{frame}
174
175
176
177 \frame{\frametitle{maxloc}
178 \begin{itemize}
179 \item "TODO
180 \end{itemize}
181 }
182
183 \frame{\frametitle{Pospopcount}
184
185 \begin{itemize}
186 \item Positional popcount adds up the totals of each bit set to 1 in each bit-position, of an array of input values.
187 \item Notoriously difficult to do in SIMD assembler: typically 550 lines
188 \end{itemize}
189
190 \lstinputlisting[language={}]{pospopcount.c}
191
192 }
193 \frame{\frametitle{Pospopcount.s}
194
195
196 \lstinputlisting[language={}]{pospopcount.s}
197
198 }
199
200
201 \frame{\frametitle{strncpy}
202
203 \begin{itemize}
204 \item "TODO
205 \end{itemize}
206 }
207
208 \frame{\frametitle{strncpy assembler}
209
210 \lstinputlisting[language={}]{strncpy.s}
211
212 }
213
214 \frame{\frametitle{linked-list walking}
215 \begin{itemize}
216 \item "TODO
217 \end{itemize}
218 }
219 \frame{\frametitle{Summary}
220
221 \begin{itemize}
222 \item Goal is to create a mass-volume low-power embedded SoC suitable
223 for use in netbooks, chromebooks, tablets, smartphones, IoT SBCs.
224 \item No way we could implement a project of this magnitude without
225 nmigen (being able to use python OO to HDL)
226 \item Collaboration with OpenPOWER Foundation and Members absolutely
227 essential. No short-cuts. Standards to be developed and ratified
228 so that everyone benefits.
229 \item Riding the wave of huge stability of OpenPOWER ecosystem
230 \item Greatly simplified open 3D and Video drivers reduces product
231 development costs for customers
232 \item It also happens to be fascinating, deeply rewarding technically
233 challenging, and funded by NLnet
234
235 \end{itemize}
236 }
237
238
239 \frame{
240 \begin{center}
241 {\Huge The end\vspace{12pt}\\
242 Thank you\vspace{12pt}\\
243 Questions?\vspace{12pt}
244 }
245 \end{center}
246
247 \begin{itemize}
248 \item Discussion: http://lists.libre-soc.org
249 \item Freenode IRC \#libre-soc
250 \item http://libre-soc.org/
251 \item http://nlnet.nl/PET
252 \item https://libre-soc.org/nlnet/\#faq
253 \end{itemize}
254 }
255
256
257 \end{document}