10 #include <type_traits>
15 std::optional
<std::uint32_t> thread_count
;
16 std::optional
<std::uint64_t> iteration_count
;
17 std::uint32_t log2_memory_location_count
= 0;
18 std::uint32_t log2_stride
= 0;
19 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride
= 28;
22 template <typename Fn
, typename Input
> class BenchHarness
;
24 class BenchHarnessBase
26 template <typename Fn
, typename Input
> friend class BenchHarness
;
29 std::shared_ptr
<void> thread_cache
;
31 friend class ThreadCache
;
32 void base_run(Config config
,
33 void (*fn
)(BenchHarnessBase
*bench_harness_base
,
34 std::uint64_t iteration_count
,
35 std::uint32_t thread_num
));
38 static std::shared_ptr
<void> get_thread_cache();
41 template <typename Fn
, typename Input
>
42 class BenchHarness final
: private BenchHarnessBase
49 BenchHarness(Fn fn
, Input input
)
50 : fn(std::move(fn
)), input(std::move(input
))
53 void run(Config config
)
55 base_run(config
, [](BenchHarnessBase
*bench_harness_base
,
56 std::uint64_t iteration_count
,
57 std::uint32_t thread_num
) {
58 auto self
= static_cast<BenchHarness
*>(bench_harness_base
);
60 // copy for repeatability, also so optimization barrier is on copy,
62 auto input
= self
->input
;
63 for (std::uint64_t i
= 0; i
< iteration_count
; i
++)
65 // optimization barrier
66 asm("" : : "r"(std::addressof(input
)) : "memory");
68 if constexpr (std::is_void_v
<std::invoke_result_t
<
69 Fn
&, Input
, decltype(i
),
70 decltype(thread_num
)>>)
72 fn(input
, i
, thread_num
);
76 auto output
= fn(input
, i
, thread_num
);
78 // optimization barrier
79 asm("" : : "r"(std::addressof(output
)) : "memory");
90 std::function
<void(Config config
)> m_run
;
93 template <typename Fn
, typename Input
>
94 explicit Benchmark(Fn fn
, Input input
, std::string name
)
95 : m_name(std::move(name
)), m_run([fn
, input
](Config config
) {
96 return BenchHarness(std::move(fn
), std::move(input
)).run(config
);
100 void run(Config config
)
102 return m_run(config
);
104 const std::string
&name() const
110 template <typename Fn
, typename Input
, typename
... NameParts
>
111 void push_bench(std::vector
<Benchmark
> &benches
, Fn fn
, Input input
,
112 NameParts
&&...name_parts
)
114 std::ostringstream os
;
115 (os
<< ... << std::forward
<NameParts
>(name_parts
));
116 benches
.push_back(Benchmark(std::move(fn
), std::move(input
), os
.str()));