c1d3f2d6138b210228e0c4911478b2dd81e77a67
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29 #include <map>
30
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_event.hpp"
35 #include "gen_ar_eventhandlerfile.hpp"
36
37 namespace ArchRast
38 {
39 //////////////////////////////////////////////////////////////////////////
40 /// @brief struct that keeps track of depth and stencil event information
41 struct DepthStencilStats
42 {
43 uint32_t earlyZTestPassCount = 0;
44 uint32_t earlyZTestFailCount = 0;
45 uint32_t lateZTestPassCount = 0;
46 uint32_t lateZTestFailCount = 0;
47 uint32_t earlyStencilTestPassCount = 0;
48 uint32_t earlyStencilTestFailCount = 0;
49 uint32_t lateStencilTestPassCount = 0;
50 uint32_t lateStencilTestFailCount = 0;
51 };
52
53 struct CStats
54 {
55 uint32_t trivialRejectCount;
56 uint32_t trivialAcceptCount;
57 uint32_t mustClipCount;
58 };
59
60 struct TEStats
61 {
62 uint32_t inputPrims = 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
64 };
65
66 struct GSStateInfo
67 {
68 uint32_t inputPrimCount;
69 uint32_t primGeneratedCount;
70 uint32_t vertsInput;
71 };
72
73 struct RastStats
74 {
75 uint32_t rasterTiles = 0;
76 };
77
78 struct CullStats
79 {
80 uint32_t degeneratePrimCount = 0;
81 uint32_t backfacePrimCount = 0;
82 };
83
84 struct AlphaStats
85 {
86 uint32_t alphaTestCount = 0;
87 uint32_t alphaBlendCount = 0;
88 };
89
90
91 //////////////////////////////////////////////////////////////////////////
92 /// @brief Event handler that handles API thread events. This is shared
93 /// between the API and its caller (e.g. driver shim) but typically
94 /// there is only a single API thread per context. So you can save
95 /// information in the class to be used for other events.
96 class EventHandlerApiStats : public EventHandlerFile
97 {
98 public:
99 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
100 {
101 #if defined(_WIN32)
102 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
103 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
104 // exists, this will attempt to copy it the first time we get here to package it with
105 // the stats. Otherwise, the user would need to specify the events.proto location when
106 // parsing the stats in post.
107 std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
108 eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
109 eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
110 << "\\events.proto" << std::ends;
111
112 // If event.proto already exists, we're done; else do the copy
113 struct stat buf; // Use a Posix stat for file existence check
114 if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
115 {
116 // Now check to make sure the events.proto source exists
117 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
118 {
119 std::ifstream srcFile;
120 srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
121 if (srcFile.is_open())
122 {
123 // Just do a binary buffer copy
124 std::ofstream dstFile;
125 dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
126 dstFile << srcFile.rdbuf();
127 dstFile.close();
128 }
129 srcFile.close();
130 }
131 }
132 #endif
133 }
134
135 virtual void Handle(const DrawInstancedEvent& event)
136 {
137 DrawInfoEvent e(event.data.drawId,
138 ArchRast::Instanced,
139 event.data.topology,
140 event.data.numVertices,
141 0,
142 0,
143 event.data.startVertex,
144 event.data.numInstances,
145 event.data.startInstance,
146 event.data.tsEnable,
147 event.data.gsEnable,
148 event.data.soEnable,
149 event.data.soTopology,
150 event.data.splitId);
151
152 EventHandlerFile::Handle(e);
153 }
154
155 virtual void Handle(const DrawIndexedInstancedEvent& event)
156 {
157 DrawInfoEvent e(event.data.drawId,
158 ArchRast::IndexedInstanced,
159 event.data.topology,
160 0,
161 event.data.numIndices,
162 event.data.indexOffset,
163 event.data.baseVertex,
164 event.data.numInstances,
165 event.data.startInstance,
166 event.data.tsEnable,
167 event.data.gsEnable,
168 event.data.soEnable,
169 event.data.soTopology,
170 event.data.splitId);
171
172 EventHandlerFile::Handle(e);
173 }
174 };
175
176 //////////////////////////////////////////////////////////////////////////
177 /// @brief Event handler that handles worker thread events. There is one
178 /// event handler per thread. The python script will need to sum
179 /// up counters across all of the threads.
180 class EventHandlerWorkerStats : public EventHandlerFile
181 {
182 public:
183 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
184 {
185 memset(mShaderStats, 0, sizeof(mShaderStats));
186 }
187
188 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
189 {
190 // earlyZ test compute
191 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
192 mDSSingleSample.earlyZTestFailCount +=
193 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
194
195 // earlyStencil test compute
196 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
197 mDSSingleSample.earlyStencilTestFailCount +=
198 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
199
200 // earlyZ test single and multi sample
201 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
202 mDSCombined.earlyZTestFailCount +=
203 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
204
205 // earlyStencil test single and multi sample
206 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
207 mDSCombined.earlyStencilTestFailCount +=
208 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
209
210 mNeedFlush = true;
211 }
212
213 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
214 {
215 // earlyZ test compute
216 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
217 mDSSampleRate.earlyZTestFailCount +=
218 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
219
220 // earlyStencil test compute
221 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
222 mDSSampleRate.earlyStencilTestFailCount +=
223 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
224
225 // earlyZ test single and multi sample
226 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
227 mDSCombined.earlyZTestFailCount +=
228 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
229
230 // earlyStencil test single and multi sample
231 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
232 mDSCombined.earlyStencilTestFailCount +=
233 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
234
235 mNeedFlush = true;
236 }
237
238 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
239 {
240 // earlyZ test compute
241 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
242 mDSNullPS.earlyZTestFailCount +=
243 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
244
245 // earlyStencil test compute
246 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
247 mDSNullPS.earlyStencilTestFailCount +=
248 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
249 mNeedFlush = true;
250 }
251
252 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
253 {
254 // lateZ test compute
255 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
256 mDSSingleSample.lateZTestFailCount +=
257 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
258
259 // lateStencil test compute
260 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
261 mDSSingleSample.lateStencilTestFailCount +=
262 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
263
264 // lateZ test single and multi sample
265 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
266 mDSCombined.lateZTestFailCount +=
267 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
268
269 // lateStencil test single and multi sample
270 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
271 mDSCombined.lateStencilTestFailCount +=
272 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
273
274 mNeedFlush = true;
275 }
276
277 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
278 {
279 // lateZ test compute
280 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
281 mDSSampleRate.lateZTestFailCount +=
282 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
283
284 // lateStencil test compute
285 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
286 mDSSampleRate.lateStencilTestFailCount +=
287 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
288
289 // lateZ test single and multi sample
290 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
291 mDSCombined.lateZTestFailCount +=
292 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
293
294 // lateStencil test single and multi sample
295 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
296 mDSCombined.lateStencilTestFailCount +=
297 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
298
299 mNeedFlush = true;
300 }
301
302 virtual void Handle(const LateDepthStencilInfoNullPS& event)
303 {
304 // lateZ test compute
305 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
306 mDSNullPS.lateZTestFailCount +=
307 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
308
309 // lateStencil test compute
310 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
311 mDSNullPS.lateStencilTestFailCount +=
312 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
313 mNeedFlush = true;
314 }
315
316 virtual void Handle(const EarlyDepthInfoPixelRate& event)
317 {
318 // earlyZ test compute
319 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
320 mDSPixelRate.earlyZTestFailCount +=
321 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
322 mNeedFlush = true;
323 }
324
325
326 virtual void Handle(const LateDepthInfoPixelRate& event)
327 {
328 // lateZ test compute
329 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
330 mDSPixelRate.lateZTestFailCount +=
331 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
332 mNeedFlush = true;
333 }
334
335
336 virtual void Handle(const ClipInfoEvent& event)
337 {
338 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
339 mClipper.trivialRejectCount +=
340 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
341 mClipper.trivialAcceptCount +=
342 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
343 }
344
345 void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
346 {
347 pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
348 pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
349 pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
350 pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
351 pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
352 pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
353 pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
354 pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
355 pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
356 pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
357 pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
358 pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
359 }
360
361 virtual void Handle(const VSStats& event)
362 {
363 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
364 UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
365 }
366
367 virtual void Handle(const GSStats& event)
368 {
369 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
370 UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
371 }
372
373 virtual void Handle(const DSStats& event)
374 {
375 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
376 UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
377 }
378
379 virtual void Handle(const HSStats& event)
380 {
381 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
382 UpdateStats(&mShaderStats[SHADER_HULL], pStats);
383 }
384
385 virtual void Handle(const PSStats& event)
386 {
387 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
388 UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
389 mNeedFlush = true;
390 }
391
392 virtual void Handle(const CSStats& event)
393 {
394 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
395 UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
396 mNeedFlush = true;
397 }
398
399 // Flush cached events for this draw
400 virtual void FlushDraw(uint32_t drawId)
401 {
402 if (mNeedFlush == false)
403 return;
404
405 EventHandlerFile::Handle(PSInfo(drawId,
406 mShaderStats[SHADER_PIXEL].numInstExecuted,
407 mShaderStats[SHADER_PIXEL].numSampleExecuted,
408 mShaderStats[SHADER_PIXEL].numSampleLExecuted,
409 mShaderStats[SHADER_PIXEL].numSampleBExecuted,
410 mShaderStats[SHADER_PIXEL].numSampleCExecuted,
411 mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
412 mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
413 mShaderStats[SHADER_PIXEL].numGather4Executed,
414 mShaderStats[SHADER_PIXEL].numGather4CExecuted,
415 mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
416 mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
417 mShaderStats[SHADER_PIXEL].numLodExecuted));
418 EventHandlerFile::Handle(CSInfo(drawId,
419 mShaderStats[SHADER_COMPUTE].numInstExecuted,
420 mShaderStats[SHADER_COMPUTE].numSampleExecuted,
421 mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
422 mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
423 mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
424 mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
425 mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
426 mShaderStats[SHADER_COMPUTE].numGather4Executed,
427 mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
428 mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
429 mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
430 mShaderStats[SHADER_COMPUTE].numLodExecuted));
431
432 // singleSample
433 EventHandlerFile::Handle(EarlyZSingleSample(
434 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
435 EventHandlerFile::Handle(LateZSingleSample(
436 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
437 EventHandlerFile::Handle(
438 EarlyStencilSingleSample(drawId,
439 mDSSingleSample.earlyStencilTestPassCount,
440 mDSSingleSample.earlyStencilTestFailCount));
441 EventHandlerFile::Handle(
442 LateStencilSingleSample(drawId,
443 mDSSingleSample.lateStencilTestPassCount,
444 mDSSingleSample.lateStencilTestFailCount));
445
446 // sampleRate
447 EventHandlerFile::Handle(EarlyZSampleRate(
448 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
449 EventHandlerFile::Handle(LateZSampleRate(
450 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
451 EventHandlerFile::Handle(
452 EarlyStencilSampleRate(drawId,
453 mDSSampleRate.earlyStencilTestPassCount,
454 mDSSampleRate.earlyStencilTestFailCount));
455 EventHandlerFile::Handle(LateStencilSampleRate(drawId,
456 mDSSampleRate.lateStencilTestPassCount,
457 mDSSampleRate.lateStencilTestFailCount));
458
459 // combined
460 EventHandlerFile::Handle(
461 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
462 EventHandlerFile::Handle(
463 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
464 EventHandlerFile::Handle(EarlyStencil(drawId,
465 mDSCombined.earlyStencilTestPassCount,
466 mDSCombined.earlyStencilTestFailCount));
467 EventHandlerFile::Handle(LateStencil(drawId,
468 mDSCombined.lateStencilTestPassCount,
469 mDSCombined.lateStencilTestFailCount));
470
471 // pixelRate
472 EventHandlerFile::Handle(EarlyZPixelRate(
473 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
474 EventHandlerFile::Handle(LateZPixelRate(
475 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
476
477
478 // NullPS
479 EventHandlerFile::Handle(
480 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
481 EventHandlerFile::Handle(EarlyStencilNullPS(
482 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
483
484 // Rasterized Subspans
485 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
486
487 // Alpha Subspans
488 EventHandlerFile::Handle(
489 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
490
491 // Primitive Culling
492 EventHandlerFile::Handle(
493 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
494
495 mDSSingleSample = {};
496 mDSSampleRate = {};
497 mDSCombined = {};
498 mDSPixelRate = {};
499 mDSNullPS = {};
500
501 rastStats = {};
502 mCullStats = {};
503 mAlphaStats = {};
504
505 mShaderStats[SHADER_PIXEL] = {};
506 mShaderStats[SHADER_COMPUTE] = {};
507
508 mNeedFlush = false;
509 }
510
511 virtual void Handle(const FrontendDrawEndEvent& event)
512 {
513 // Clipper
514 EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
515 mClipper.trivialRejectCount,
516 mClipper.trivialAcceptCount,
517 mClipper.mustClipCount));
518
519 // Tesselator
520 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
521
522 // Geometry Shader
523 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
524 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
525 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
526
527 EventHandlerFile::Handle(VSInfo(event.data.drawId,
528 mShaderStats[SHADER_VERTEX].numInstExecuted,
529 mShaderStats[SHADER_VERTEX].numSampleExecuted,
530 mShaderStats[SHADER_VERTEX].numSampleLExecuted,
531 mShaderStats[SHADER_VERTEX].numSampleBExecuted,
532 mShaderStats[SHADER_VERTEX].numSampleCExecuted,
533 mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
534 mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
535 mShaderStats[SHADER_VERTEX].numGather4Executed,
536 mShaderStats[SHADER_VERTEX].numGather4CExecuted,
537 mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
538 mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
539 mShaderStats[SHADER_VERTEX].numLodExecuted));
540 EventHandlerFile::Handle(HSInfo(event.data.drawId,
541 mShaderStats[SHADER_HULL].numInstExecuted,
542 mShaderStats[SHADER_HULL].numSampleExecuted,
543 mShaderStats[SHADER_HULL].numSampleLExecuted,
544 mShaderStats[SHADER_HULL].numSampleBExecuted,
545 mShaderStats[SHADER_HULL].numSampleCExecuted,
546 mShaderStats[SHADER_HULL].numSampleCLZExecuted,
547 mShaderStats[SHADER_HULL].numSampleCDExecuted,
548 mShaderStats[SHADER_HULL].numGather4Executed,
549 mShaderStats[SHADER_HULL].numGather4CExecuted,
550 mShaderStats[SHADER_HULL].numGather4CPOExecuted,
551 mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
552 mShaderStats[SHADER_HULL].numLodExecuted));
553 EventHandlerFile::Handle(DSInfo(event.data.drawId,
554 mShaderStats[SHADER_DOMAIN].numInstExecuted,
555 mShaderStats[SHADER_DOMAIN].numSampleExecuted,
556 mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
557 mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
558 mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
559 mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
560 mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
561 mShaderStats[SHADER_DOMAIN].numGather4Executed,
562 mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
563 mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
564 mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
565 mShaderStats[SHADER_DOMAIN].numLodExecuted));
566 EventHandlerFile::Handle(GSInfo(event.data.drawId,
567 mShaderStats[SHADER_GEOMETRY].numInstExecuted,
568 mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
569 mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
570 mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
571 mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
572 mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
573 mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
574 mShaderStats[SHADER_GEOMETRY].numGather4Executed,
575 mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
576 mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
577 mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
578 mShaderStats[SHADER_GEOMETRY].numLodExecuted));
579
580 mShaderStats[SHADER_VERTEX] = {};
581 mShaderStats[SHADER_HULL] = {};
582 mShaderStats[SHADER_DOMAIN] = {};
583 mShaderStats[SHADER_GEOMETRY] = {};
584
585 // Reset Internal Counters
586 mClipper = {};
587 mTS = {};
588 mGS = {};
589 }
590
591 virtual void Handle(const GSPrimInfo& event)
592 {
593 mGS.inputPrimCount += event.data.inputPrimCount;
594 mGS.primGeneratedCount += event.data.primGeneratedCount;
595 mGS.vertsInput += event.data.vertsInput;
596 }
597
598 virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
599
600 virtual void Handle(const RasterTileCount& event)
601 {
602 rastStats.rasterTiles += event.data.rasterTiles;
603 }
604
605 virtual void Handle(const CullInfoEvent& event)
606 {
607 mCullStats.degeneratePrimCount += _mm_popcnt_u32(
608 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
609 mCullStats.backfacePrimCount += _mm_popcnt_u32(
610 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
611 }
612
613 virtual void Handle(const AlphaInfoEvent& event)
614 {
615 mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
616 mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
617 }
618
619 protected:
620 bool mNeedFlush;
621 // Per draw stats
622 DepthStencilStats mDSSingleSample = {};
623 DepthStencilStats mDSSampleRate = {};
624 DepthStencilStats mDSPixelRate = {};
625 DepthStencilStats mDSCombined = {};
626 DepthStencilStats mDSNullPS = {};
627 DepthStencilStats mDSOmZ = {};
628 CStats mClipper = {};
629 TEStats mTS = {};
630 GSStateInfo mGS = {};
631 RastStats rastStats = {};
632 CullStats mCullStats = {};
633 AlphaStats mAlphaStats = {};
634
635 SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
636
637 };
638
639 static EventManager* FromHandle(HANDLE hThreadContext)
640 {
641 return reinterpret_cast<EventManager*>(hThreadContext);
642 }
643
644 // Construct an event manager and associate a handler with it.
645 HANDLE CreateThreadContext(AR_THREAD type)
646 {
647 // Can we assume single threaded here?
648 static std::atomic<uint32_t> counter(0);
649 uint32_t id = counter.fetch_add(1);
650
651 EventManager* pManager = new EventManager();
652
653 if (pManager)
654 {
655 EventHandlerFile* pHandler = nullptr;
656
657 if (type == AR_THREAD::API)
658 {
659 pHandler = new EventHandlerApiStats(id);
660 pManager->Attach(pHandler);
661 pHandler->Handle(ThreadStartApiEvent());
662 }
663 else
664 {
665 pHandler = new EventHandlerWorkerStats(id);
666 pManager->Attach(pHandler);
667 pHandler->Handle(ThreadStartWorkerEvent());
668 }
669
670 pHandler->MarkHeader();
671
672 return pManager;
673 }
674
675 SWR_INVALID("Failed to register thread.");
676 return nullptr;
677 }
678
679 void DestroyThreadContext(HANDLE hThreadContext)
680 {
681 EventManager* pManager = FromHandle(hThreadContext);
682 SWR_ASSERT(pManager != nullptr);
683
684 delete pManager;
685 }
686
687 // Dispatch event for this thread.
688 void Dispatch(HANDLE hThreadContext, const Event& event)
689 {
690 if (event.IsEnabled())
691 {
692 EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);
693 SWR_ASSERT(pManager != nullptr);
694 pManager->Dispatch(event);
695 }
696 }
697
698 // Flush for this thread.
699 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
700 {
701 EventManager* pManager = FromHandle(hThreadContext);
702 SWR_ASSERT(pManager != nullptr);
703
704 pManager->FlushDraw(drawId);
705 }
706 } // namespace ArchRast