add slids
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 12 Jul 2018 01:41:29 +0000 (02:41 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 12 Jul 2018 01:41:36 +0000 (02:41 +0100)
shakti/m_class/libre_riscv_chennai_2018.tex
shakti/m_class/video_decode.mdwn [new file with mode: 0644]

index 9453f1625d300bc3f52a03c238b18bfd959104c9..607d3f9963970986a8eec0fb84eda3ec6d6d4d69 100644 (file)
 }
 
 
+\frame{\frametitle{Challenging Stuff [2] - Video Decode Engine}
+
+ \begin{itemize}
+   \item Richard Herveille's Video Core Blocks\\
+           https://opencores.org/project/video\_systems
+   \item Symbiotic EDA MP4 decoder in FPGA
+   \item H.264 seems to have been done...\\
+         https://github.com/adsc-hls/synthesizable\_h264
+   \item Really needs SIMD (or better, not-SIMD)\\
+         {http://libre-riscv.org/simple\_v\_extension/}
+   \item Definitely needs xBitManip (parallelised by Simple-V)\\
+         https://github.com/cliffordwolf/xbitmanip
+  \end{itemize}
+   {\it SIMD is insane. $O(N^6)$ opcode proliferation.  See\\
+     https://www.sigarch.org/simd-instructions-considered-harmful/ \\
+     (1): P-Ext designed for Audio. (2): Investigate RI5CY's SIMD
+   }
+}
+
+
+\frame{\frametitle{Challenging Stuff [3] - 3D GPU.  Sigh.}
+
+ \begin{itemize}
+   \item Actual requirements quite modest: 30MP/s 100MT/s 5GFLOPS
+         but power/area is crucial ($2mm^2$ @ 40nm)
+   \item Nyuzi, MIAOW, GPLGPU (Number Nine), OGP.
+   \item Nyuzi based on Larrabee. Jeff Bush really helpful.
+   \item MIAOW is an OpenCL engine.  GPLGPU is fixed-function
+   \item Nyuzi lessons: Software-only rendering not enough.
+            Getting through L1 cache takes most power. Fixed functions
+            such as parallel FP-Quad to ARGB Pixel, and Z-Buffer
+            needed.
+   \item Fallback is GC800 (\$250k) {\it contact me if you can do better!}
+  \end{itemize}
+   {\it Jacob Bachmeyer's Cache-control proposal turns L1 Cache into
+   scratchpad RAM.  RVV is just too heavy (sorry!), Simple-V much
+   more light-weight and flexible.
+   }
+}
+
+
 \frame{\frametitle{TODO}
 
  \begin{itemize}
diff --git a/shakti/m_class/video_decode.mdwn b/shakti/m_class/video_decode.mdwn
new file mode 100644 (file)
index 0000000..bc209f8
--- /dev/null
@@ -0,0 +1,4 @@
+# Video Decode blocks
+
+* https://github.com/adsc-hls/synthesizable_h264
+* https://opencores.org/project/video_systems