10 #include <string_view>
11 #include <type_traits>
16 std::optional
<std::uint32_t> thread_count
;
17 std::optional
<std::uint64_t> iteration_count
;
18 std::uint32_t log2_memory_location_count
= 0;
19 std::uint32_t log2_stride
= 0;
20 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride
= 28;
21 bool use_json
= false;
22 operator JsonValue() const
24 return JsonValue::Object
{
25 {"thread_count", thread_count
},
26 {"iteration_count", iteration_count
},
27 {"log2_memory_location_count", log2_memory_location_count
},
28 {"log2_stride", log2_stride
},
29 {"use_json", use_json
},
34 template <typename Fn
, typename Input
> class BenchHarness
;
36 struct BenchmarkResult
38 BenchmarkResult() = default;
39 virtual ~BenchmarkResult() = default;
40 virtual void print() const = 0;
41 virtual operator JsonValue() const = 0;
44 class BenchHarnessBase
46 template <typename Fn
, typename Input
> friend class BenchHarness
;
49 std::shared_ptr
<void> thread_cache
;
51 friend class ThreadCache
;
52 std::shared_ptr
<BenchmarkResult
> base_run(
53 const Config
&config
, const std::string
&name
,
54 void (*fn
)(BenchHarnessBase
*bench_harness_base
,
55 std::uint64_t iteration_count
, std::uint32_t thread_num
));
58 static std::shared_ptr
<void> get_thread_cache();
61 template <typename Fn
, typename Input
>
62 class BenchHarness final
: private BenchHarnessBase
69 BenchHarness(Fn fn
, Input input
)
70 : fn(std::move(fn
)), input(std::move(input
))
73 std::shared_ptr
<BenchmarkResult
> run(const Config
&config
,
74 const std::string
&name
)
78 [](BenchHarnessBase
*bench_harness_base
,
79 std::uint64_t iteration_count
, std::uint32_t thread_num
) {
80 auto self
= static_cast<BenchHarness
*>(bench_harness_base
);
82 // copy for repeatability, also so optimization barrier is on
84 auto input
= self
->input
;
85 for (std::uint64_t i
= 0; i
< iteration_count
; i
++)
87 // optimization barrier
88 asm("" : : "r"(std::addressof(input
)) : "memory");
90 if constexpr (std::is_void_v
<std::invoke_result_t
<
91 Fn
&, Input
, decltype(i
),
92 decltype(thread_num
)>>)
94 fn(input
, i
, thread_num
);
98 auto output
= fn(input
, i
, thread_num
);
100 // optimization barrier
101 asm("" : : "r"(std::addressof(output
)) : "memory");
108 class Benchmark final
112 std::function
<std::shared_ptr
<BenchmarkResult
>(const Config
&config
,
113 const std::string
&name
)>
117 template <typename Fn
, typename Input
>
118 explicit Benchmark(Fn fn
, Input input
, std::string name
)
119 : m_name(std::move(name
)),
120 m_run([fn
, input
](const Config
&config
, const std::string
&name
) {
121 return BenchHarness(std::move(fn
), std::move(input
))
126 std::shared_ptr
<BenchmarkResult
> run(const Config
&config
) const
128 return m_run(config
, m_name
);
130 const std::string
&name() const
136 template <typename Fn
, typename Input
, typename
... NameParts
>
137 void push_bench(std::vector
<Benchmark
> &benches
, Fn fn
, Input input
,
138 NameParts
&&...name_parts
)
140 std::ostringstream os
;
141 (os
<< ... << std::forward
<NameParts
>(name_parts
));
142 benches
.push_back(Benchmark(std::move(fn
), std::move(input
), os
.str()));