1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_event.hpp"
35 #include "gen_ar_eventhandlerfile.hpp"
39 //////////////////////////////////////////////////////////////////////////
40 /// @brief struct that keeps track of depth and stencil event information
41 struct DepthStencilStats
43 uint32_t earlyZTestPassCount
= 0;
44 uint32_t earlyZTestFailCount
= 0;
45 uint32_t lateZTestPassCount
= 0;
46 uint32_t lateZTestFailCount
= 0;
47 uint32_t earlyStencilTestPassCount
= 0;
48 uint32_t earlyStencilTestFailCount
= 0;
49 uint32_t lateStencilTestPassCount
= 0;
50 uint32_t lateStencilTestFailCount
= 0;
55 uint32_t trivialRejectCount
;
56 uint32_t trivialAcceptCount
;
57 uint32_t mustClipCount
;
62 uint32_t inputPrims
= 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
68 uint32_t inputPrimCount
;
69 uint32_t primGeneratedCount
;
75 uint32_t rasterTiles
= 0;
80 uint32_t degeneratePrimCount
= 0;
81 uint32_t backfacePrimCount
= 0;
86 uint32_t alphaTestCount
= 0;
87 uint32_t alphaBlendCount
= 0;
91 //////////////////////////////////////////////////////////////////////////
92 /// @brief Event handler that handles API thread events. This is shared
93 /// between the API and its caller (e.g. driver shim) but typically
94 /// there is only a single API thread per context. So you can save
95 /// information in the class to be used for other events.
96 class EventHandlerApiStats
: public EventHandlerFile
99 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
)
102 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
103 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
104 // exists, this will attempt to copy it the first time we get here to package it with
105 // the stats. Otherwise, the user would need to specify the events.proto location when
106 // parsing the stats in post.
107 std::stringstream eventsProtoSrcFilename
, eventsProtoDstFilename
;
108 eventsProtoSrcFilename
<< KNOB_DEBUG_OUTPUT_DIR
<< "\\events.proto" << std::ends
;
109 eventsProtoDstFilename
<< mOutputDir
.substr(0, mOutputDir
.size() - 1)
110 << "\\events.proto" << std::ends
;
112 // If event.proto already exists, we're done; else do the copy
113 struct stat buf
; // Use a Posix stat for file existence check
114 if (!stat(eventsProtoDstFilename
.str().c_str(), &buf
) == 0)
116 // Now check to make sure the events.proto source exists
117 if (stat(eventsProtoSrcFilename
.str().c_str(), &buf
) == 0)
119 std::ifstream srcFile
;
120 srcFile
.open(eventsProtoSrcFilename
.str().c_str(), std::ios::binary
);
121 if (srcFile
.is_open())
123 // Just do a binary buffer copy
124 std::ofstream dstFile
;
125 dstFile
.open(eventsProtoDstFilename
.str().c_str(), std::ios::binary
);
126 dstFile
<< srcFile
.rdbuf();
135 virtual void Handle(const DrawInstancedEvent
& event
)
137 DrawInfoEvent
e(event
.data
.drawId
,
140 event
.data
.numVertices
,
143 event
.data
.startVertex
,
144 event
.data
.numInstances
,
145 event
.data
.startInstance
,
149 event
.data
.soTopology
,
152 EventHandlerFile::Handle(e
);
155 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
157 DrawInfoEvent
e(event
.data
.drawId
,
158 ArchRast::IndexedInstanced
,
161 event
.data
.numIndices
,
162 event
.data
.indexOffset
,
163 event
.data
.baseVertex
,
164 event
.data
.numInstances
,
165 event
.data
.startInstance
,
169 event
.data
.soTopology
,
172 EventHandlerFile::Handle(e
);
176 //////////////////////////////////////////////////////////////////////////
177 /// @brief Event handler that handles worker thread events. There is one
178 /// event handler per thread. The python script will need to sum
179 /// up counters across all of the threads.
180 class EventHandlerWorkerStats
: public EventHandlerFile
183 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false)
185 memset(mShaderStats
, 0, sizeof(mShaderStats
));
188 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
190 // earlyZ test compute
191 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
192 mDSSingleSample
.earlyZTestFailCount
+=
193 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
195 // earlyStencil test compute
196 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
197 mDSSingleSample
.earlyStencilTestFailCount
+=
198 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
200 // earlyZ test single and multi sample
201 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
202 mDSCombined
.earlyZTestFailCount
+=
203 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
205 // earlyStencil test single and multi sample
206 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
207 mDSCombined
.earlyStencilTestFailCount
+=
208 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
213 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
215 // earlyZ test compute
216 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
217 mDSSampleRate
.earlyZTestFailCount
+=
218 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
220 // earlyStencil test compute
221 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
222 mDSSampleRate
.earlyStencilTestFailCount
+=
223 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
225 // earlyZ test single and multi sample
226 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
227 mDSCombined
.earlyZTestFailCount
+=
228 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
230 // earlyStencil test single and multi sample
231 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
232 mDSCombined
.earlyStencilTestFailCount
+=
233 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
238 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
240 // earlyZ test compute
241 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
242 mDSNullPS
.earlyZTestFailCount
+=
243 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
245 // earlyStencil test compute
246 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
247 mDSNullPS
.earlyStencilTestFailCount
+=
248 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
252 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
254 // lateZ test compute
255 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
256 mDSSingleSample
.lateZTestFailCount
+=
257 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
259 // lateStencil test compute
260 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
261 mDSSingleSample
.lateStencilTestFailCount
+=
262 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
264 // lateZ test single and multi sample
265 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
266 mDSCombined
.lateZTestFailCount
+=
267 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
269 // lateStencil test single and multi sample
270 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
271 mDSCombined
.lateStencilTestFailCount
+=
272 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
277 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
279 // lateZ test compute
280 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
281 mDSSampleRate
.lateZTestFailCount
+=
282 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
284 // lateStencil test compute
285 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
286 mDSSampleRate
.lateStencilTestFailCount
+=
287 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
289 // lateZ test single and multi sample
290 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
291 mDSCombined
.lateZTestFailCount
+=
292 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
294 // lateStencil test single and multi sample
295 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
296 mDSCombined
.lateStencilTestFailCount
+=
297 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
302 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
304 // lateZ test compute
305 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
306 mDSNullPS
.lateZTestFailCount
+=
307 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
309 // lateStencil test compute
310 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
311 mDSNullPS
.lateStencilTestFailCount
+=
312 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
316 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
318 // earlyZ test compute
319 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
320 mDSPixelRate
.earlyZTestFailCount
+=
321 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
326 virtual void Handle(const LateDepthInfoPixelRate
& event
)
328 // lateZ test compute
329 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
330 mDSPixelRate
.lateZTestFailCount
+=
331 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
336 virtual void Handle(const ClipInfoEvent
& event
)
338 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
339 mClipper
.trivialRejectCount
+=
340 event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
341 mClipper
.trivialAcceptCount
+=
342 _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
345 void UpdateStats(SWR_SHADER_STATS
* pStatTotals
, const SWR_SHADER_STATS
* pStatUpdate
)
347 pStatTotals
->numInstExecuted
+= pStatUpdate
->numInstExecuted
;
348 pStatTotals
->numSampleExecuted
+= pStatUpdate
->numSampleExecuted
;
349 pStatTotals
->numSampleLExecuted
+= pStatUpdate
->numSampleLExecuted
;
350 pStatTotals
->numSampleBExecuted
+= pStatUpdate
->numSampleBExecuted
;
351 pStatTotals
->numSampleCExecuted
+= pStatUpdate
->numSampleCExecuted
;
352 pStatTotals
->numSampleCLZExecuted
+= pStatUpdate
->numSampleCLZExecuted
;
353 pStatTotals
->numSampleCDExecuted
+= pStatUpdate
->numSampleCDExecuted
;
354 pStatTotals
->numGather4Executed
+= pStatUpdate
->numGather4Executed
;
355 pStatTotals
->numGather4CExecuted
+= pStatUpdate
->numGather4CExecuted
;
356 pStatTotals
->numGather4CPOExecuted
+= pStatUpdate
->numGather4CPOExecuted
;
357 pStatTotals
->numGather4CPOCExecuted
+= pStatUpdate
->numGather4CPOCExecuted
;
358 pStatTotals
->numLodExecuted
+= pStatUpdate
->numLodExecuted
;
361 virtual void Handle(const VSStats
& event
)
363 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
364 UpdateStats(&mShaderStats
[SHADER_VERTEX
], pStats
);
367 virtual void Handle(const GSStats
& event
)
369 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
370 UpdateStats(&mShaderStats
[SHADER_GEOMETRY
], pStats
);
373 virtual void Handle(const DSStats
& event
)
375 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
376 UpdateStats(&mShaderStats
[SHADER_DOMAIN
], pStats
);
379 virtual void Handle(const HSStats
& event
)
381 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
382 UpdateStats(&mShaderStats
[SHADER_HULL
], pStats
);
385 virtual void Handle(const PSStats
& event
)
387 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
388 UpdateStats(&mShaderStats
[SHADER_PIXEL
], pStats
);
392 virtual void Handle(const CSStats
& event
)
394 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
395 UpdateStats(&mShaderStats
[SHADER_COMPUTE
], pStats
);
399 // Flush cached events for this draw
400 virtual void FlushDraw(uint32_t drawId
)
402 if (mNeedFlush
== false)
405 EventHandlerFile::Handle(PSInfo(drawId
,
406 mShaderStats
[SHADER_PIXEL
].numInstExecuted
,
407 mShaderStats
[SHADER_PIXEL
].numSampleExecuted
,
408 mShaderStats
[SHADER_PIXEL
].numSampleLExecuted
,
409 mShaderStats
[SHADER_PIXEL
].numSampleBExecuted
,
410 mShaderStats
[SHADER_PIXEL
].numSampleCExecuted
,
411 mShaderStats
[SHADER_PIXEL
].numSampleCLZExecuted
,
412 mShaderStats
[SHADER_PIXEL
].numSampleCDExecuted
,
413 mShaderStats
[SHADER_PIXEL
].numGather4Executed
,
414 mShaderStats
[SHADER_PIXEL
].numGather4CExecuted
,
415 mShaderStats
[SHADER_PIXEL
].numGather4CPOExecuted
,
416 mShaderStats
[SHADER_PIXEL
].numGather4CPOCExecuted
,
417 mShaderStats
[SHADER_PIXEL
].numLodExecuted
));
418 EventHandlerFile::Handle(CSInfo(drawId
,
419 mShaderStats
[SHADER_COMPUTE
].numInstExecuted
,
420 mShaderStats
[SHADER_COMPUTE
].numSampleExecuted
,
421 mShaderStats
[SHADER_COMPUTE
].numSampleLExecuted
,
422 mShaderStats
[SHADER_COMPUTE
].numSampleBExecuted
,
423 mShaderStats
[SHADER_COMPUTE
].numSampleCExecuted
,
424 mShaderStats
[SHADER_COMPUTE
].numSampleCLZExecuted
,
425 mShaderStats
[SHADER_COMPUTE
].numSampleCDExecuted
,
426 mShaderStats
[SHADER_COMPUTE
].numGather4Executed
,
427 mShaderStats
[SHADER_COMPUTE
].numGather4CExecuted
,
428 mShaderStats
[SHADER_COMPUTE
].numGather4CPOExecuted
,
429 mShaderStats
[SHADER_COMPUTE
].numGather4CPOCExecuted
,
430 mShaderStats
[SHADER_COMPUTE
].numLodExecuted
));
433 EventHandlerFile::Handle(EarlyZSingleSample(
434 drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
435 EventHandlerFile::Handle(LateZSingleSample(
436 drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
437 EventHandlerFile::Handle(
438 EarlyStencilSingleSample(drawId
,
439 mDSSingleSample
.earlyStencilTestPassCount
,
440 mDSSingleSample
.earlyStencilTestFailCount
));
441 EventHandlerFile::Handle(
442 LateStencilSingleSample(drawId
,
443 mDSSingleSample
.lateStencilTestPassCount
,
444 mDSSingleSample
.lateStencilTestFailCount
));
447 EventHandlerFile::Handle(EarlyZSampleRate(
448 drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
449 EventHandlerFile::Handle(LateZSampleRate(
450 drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
451 EventHandlerFile::Handle(
452 EarlyStencilSampleRate(drawId
,
453 mDSSampleRate
.earlyStencilTestPassCount
,
454 mDSSampleRate
.earlyStencilTestFailCount
));
455 EventHandlerFile::Handle(LateStencilSampleRate(drawId
,
456 mDSSampleRate
.lateStencilTestPassCount
,
457 mDSSampleRate
.lateStencilTestFailCount
));
460 EventHandlerFile::Handle(
461 EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
462 EventHandlerFile::Handle(
463 LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
464 EventHandlerFile::Handle(EarlyStencil(drawId
,
465 mDSCombined
.earlyStencilTestPassCount
,
466 mDSCombined
.earlyStencilTestFailCount
));
467 EventHandlerFile::Handle(LateStencil(drawId
,
468 mDSCombined
.lateStencilTestPassCount
,
469 mDSCombined
.lateStencilTestFailCount
));
472 EventHandlerFile::Handle(EarlyZPixelRate(
473 drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
474 EventHandlerFile::Handle(LateZPixelRate(
475 drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
479 EventHandlerFile::Handle(
480 EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
481 EventHandlerFile::Handle(EarlyStencilNullPS(
482 drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
484 // Rasterized Subspans
485 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
488 EventHandlerFile::Handle(
489 AlphaEvent(drawId
, mAlphaStats
.alphaTestCount
, mAlphaStats
.alphaBlendCount
));
492 EventHandlerFile::Handle(
493 CullEvent(drawId
, mCullStats
.backfacePrimCount
, mCullStats
.degeneratePrimCount
));
495 mDSSingleSample
= {};
505 mShaderStats
[SHADER_PIXEL
] = {};
506 mShaderStats
[SHADER_COMPUTE
] = {};
511 virtual void Handle(const FrontendDrawEndEvent
& event
)
514 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
,
515 mClipper
.trivialRejectCount
,
516 mClipper
.trivialAcceptCount
,
517 mClipper
.mustClipCount
));
520 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
523 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
524 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
525 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
527 EventHandlerFile::Handle(VSInfo(event
.data
.drawId
,
528 mShaderStats
[SHADER_VERTEX
].numInstExecuted
,
529 mShaderStats
[SHADER_VERTEX
].numSampleExecuted
,
530 mShaderStats
[SHADER_VERTEX
].numSampleLExecuted
,
531 mShaderStats
[SHADER_VERTEX
].numSampleBExecuted
,
532 mShaderStats
[SHADER_VERTEX
].numSampleCExecuted
,
533 mShaderStats
[SHADER_VERTEX
].numSampleCLZExecuted
,
534 mShaderStats
[SHADER_VERTEX
].numSampleCDExecuted
,
535 mShaderStats
[SHADER_VERTEX
].numGather4Executed
,
536 mShaderStats
[SHADER_VERTEX
].numGather4CExecuted
,
537 mShaderStats
[SHADER_VERTEX
].numGather4CPOExecuted
,
538 mShaderStats
[SHADER_VERTEX
].numGather4CPOCExecuted
,
539 mShaderStats
[SHADER_VERTEX
].numLodExecuted
));
540 EventHandlerFile::Handle(HSInfo(event
.data
.drawId
,
541 mShaderStats
[SHADER_HULL
].numInstExecuted
,
542 mShaderStats
[SHADER_HULL
].numSampleExecuted
,
543 mShaderStats
[SHADER_HULL
].numSampleLExecuted
,
544 mShaderStats
[SHADER_HULL
].numSampleBExecuted
,
545 mShaderStats
[SHADER_HULL
].numSampleCExecuted
,
546 mShaderStats
[SHADER_HULL
].numSampleCLZExecuted
,
547 mShaderStats
[SHADER_HULL
].numSampleCDExecuted
,
548 mShaderStats
[SHADER_HULL
].numGather4Executed
,
549 mShaderStats
[SHADER_HULL
].numGather4CExecuted
,
550 mShaderStats
[SHADER_HULL
].numGather4CPOExecuted
,
551 mShaderStats
[SHADER_HULL
].numGather4CPOCExecuted
,
552 mShaderStats
[SHADER_HULL
].numLodExecuted
));
553 EventHandlerFile::Handle(DSInfo(event
.data
.drawId
,
554 mShaderStats
[SHADER_DOMAIN
].numInstExecuted
,
555 mShaderStats
[SHADER_DOMAIN
].numSampleExecuted
,
556 mShaderStats
[SHADER_DOMAIN
].numSampleLExecuted
,
557 mShaderStats
[SHADER_DOMAIN
].numSampleBExecuted
,
558 mShaderStats
[SHADER_DOMAIN
].numSampleCExecuted
,
559 mShaderStats
[SHADER_DOMAIN
].numSampleCLZExecuted
,
560 mShaderStats
[SHADER_DOMAIN
].numSampleCDExecuted
,
561 mShaderStats
[SHADER_DOMAIN
].numGather4Executed
,
562 mShaderStats
[SHADER_DOMAIN
].numGather4CExecuted
,
563 mShaderStats
[SHADER_DOMAIN
].numGather4CPOExecuted
,
564 mShaderStats
[SHADER_DOMAIN
].numGather4CPOCExecuted
,
565 mShaderStats
[SHADER_DOMAIN
].numLodExecuted
));
566 EventHandlerFile::Handle(GSInfo(event
.data
.drawId
,
567 mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
,
568 mShaderStats
[SHADER_GEOMETRY
].numSampleExecuted
,
569 mShaderStats
[SHADER_GEOMETRY
].numSampleLExecuted
,
570 mShaderStats
[SHADER_GEOMETRY
].numSampleBExecuted
,
571 mShaderStats
[SHADER_GEOMETRY
].numSampleCExecuted
,
572 mShaderStats
[SHADER_GEOMETRY
].numSampleCLZExecuted
,
573 mShaderStats
[SHADER_GEOMETRY
].numSampleCDExecuted
,
574 mShaderStats
[SHADER_GEOMETRY
].numGather4Executed
,
575 mShaderStats
[SHADER_GEOMETRY
].numGather4CExecuted
,
576 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOExecuted
,
577 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOCExecuted
,
578 mShaderStats
[SHADER_GEOMETRY
].numLodExecuted
));
580 mShaderStats
[SHADER_VERTEX
] = {};
581 mShaderStats
[SHADER_HULL
] = {};
582 mShaderStats
[SHADER_DOMAIN
] = {};
583 mShaderStats
[SHADER_GEOMETRY
] = {};
585 // Reset Internal Counters
591 virtual void Handle(const GSPrimInfo
& event
)
593 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
594 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
595 mGS
.vertsInput
+= event
.data
.vertsInput
;
598 virtual void Handle(const TessPrimCount
& event
) { mTS
.inputPrims
+= event
.data
.primCount
; }
600 virtual void Handle(const RasterTileCount
& event
)
602 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
605 virtual void Handle(const CullInfoEvent
& event
)
607 mCullStats
.degeneratePrimCount
+= _mm_popcnt_u32(
608 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.degeneratePrimMask
));
609 mCullStats
.backfacePrimCount
+= _mm_popcnt_u32(
610 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.backfacePrimMask
));
613 virtual void Handle(const AlphaInfoEvent
& event
)
615 mAlphaStats
.alphaTestCount
+= event
.data
.alphaTestEnable
;
616 mAlphaStats
.alphaBlendCount
+= event
.data
.alphaBlendEnable
;
622 DepthStencilStats mDSSingleSample
= {};
623 DepthStencilStats mDSSampleRate
= {};
624 DepthStencilStats mDSPixelRate
= {};
625 DepthStencilStats mDSCombined
= {};
626 DepthStencilStats mDSNullPS
= {};
627 DepthStencilStats mDSOmZ
= {};
628 CStats mClipper
= {};
630 GSStateInfo mGS
= {};
631 RastStats rastStats
= {};
632 CullStats mCullStats
= {};
633 AlphaStats mAlphaStats
= {};
635 SWR_SHADER_STATS mShaderStats
[NUM_SHADER_TYPES
];
639 static EventManager
* FromHandle(HANDLE hThreadContext
)
641 return reinterpret_cast<EventManager
*>(hThreadContext
);
644 // Construct an event manager and associate a handler with it.
645 HANDLE
CreateThreadContext(AR_THREAD type
)
647 // Can we assume single threaded here?
648 static std::atomic
<uint32_t> counter(0);
649 uint32_t id
= counter
.fetch_add(1);
651 EventManager
* pManager
= new EventManager();
655 EventHandlerFile
* pHandler
= nullptr;
657 if (type
== AR_THREAD::API
)
659 pHandler
= new EventHandlerApiStats(id
);
660 pManager
->Attach(pHandler
);
661 pHandler
->Handle(ThreadStartApiEvent());
665 pHandler
= new EventHandlerWorkerStats(id
);
666 pManager
->Attach(pHandler
);
667 pHandler
->Handle(ThreadStartWorkerEvent());
670 pHandler
->MarkHeader();
675 SWR_INVALID("Failed to register thread.");
679 void DestroyThreadContext(HANDLE hThreadContext
)
681 EventManager
* pManager
= FromHandle(hThreadContext
);
682 SWR_ASSERT(pManager
!= nullptr);
687 // Dispatch event for this thread.
688 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
690 if (event
.IsEnabled())
692 EventManager
* pManager
= reinterpret_cast<EventManager
*>(hThreadContext
);
693 SWR_ASSERT(pManager
!= nullptr);
694 pManager
->Dispatch(event
);
698 // Flush for this thread.
699 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
701 EventManager
* pManager
= FromHandle(hThreadContext
);
702 SWR_ASSERT(pManager
!= nullptr);
704 pManager
->FlushDraw(drawId
);
706 } // namespace ArchRast