10 #include <type_traits>
15 std::optional
<std::uint32_t> thread_count
;
16 std::optional
<std::uint64_t> iteration_count
;
17 std::uint32_t log2_memory_location_count
= 0;
18 std::uint32_t log2_stride
= 0;
19 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride
= 28;
20 bool use_json
= false;
23 template <typename Fn
, typename Input
> class BenchHarness
;
25 class BenchHarnessBase
27 template <typename Fn
, typename Input
> friend class BenchHarness
;
30 std::shared_ptr
<void> thread_cache
;
32 friend class ThreadCache
;
33 void base_run(const Config
&config
,
34 void (*fn
)(BenchHarnessBase
*bench_harness_base
,
35 std::uint64_t iteration_count
,
36 std::uint32_t thread_num
));
39 static std::shared_ptr
<void> get_thread_cache();
42 template <typename Fn
, typename Input
>
43 class BenchHarness final
: private BenchHarnessBase
50 BenchHarness(Fn fn
, Input input
)
51 : fn(std::move(fn
)), input(std::move(input
))
54 void run(const Config
&config
)
56 base_run(config
, [](BenchHarnessBase
*bench_harness_base
,
57 std::uint64_t iteration_count
,
58 std::uint32_t thread_num
) {
59 auto self
= static_cast<BenchHarness
*>(bench_harness_base
);
61 // copy for repeatability, also so optimization barrier is on copy,
63 auto input
= self
->input
;
64 for (std::uint64_t i
= 0; i
< iteration_count
; i
++)
66 // optimization barrier
67 asm("" : : "r"(std::addressof(input
)) : "memory");
69 if constexpr (std::is_void_v
<std::invoke_result_t
<
70 Fn
&, Input
, decltype(i
),
71 decltype(thread_num
)>>)
73 fn(input
, i
, thread_num
);
77 auto output
= fn(input
, i
, thread_num
);
79 // optimization barrier
80 asm("" : : "r"(std::addressof(output
)) : "memory");
91 std::function
<void(const Config
&config
)> m_run
;
94 template <typename Fn
, typename Input
>
95 explicit Benchmark(Fn fn
, Input input
, std::string name
)
96 : m_name(std::move(name
)), m_run([fn
, input
](const Config
&config
) {
97 return BenchHarness(std::move(fn
), std::move(input
)).run(config
);
101 void run(const Config
&config
)
103 return m_run(config
);
105 const std::string
&name() const
111 template <typename Fn
, typename Input
, typename
... NameParts
>
112 void push_bench(std::vector
<Benchmark
> &benches
, Fn fn
, Input input
,
113 NameParts
&&...name_parts
)
115 std::ostringstream os
;
116 (os
<< ... << std::forward
<NameParts
>(name_parts
));
117 benches
.push_back(Benchmark(std::move(fn
), std::move(input
), os
.str()));