10 #include <string_view>
11 #include <type_traits>
16 std::optional
<std::uint32_t> thread_count
;
17 std::optional
<std::uint64_t> iteration_count
;
18 std::optional
<double> target_duration
;
19 std::uint32_t log2_memory_location_count
= 0;
20 std::uint32_t log2_stride
= 0;
21 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride
= 28;
22 bool use_json
= false;
23 operator JsonValue() const
25 return JsonValue::Object
{
26 {"thread_count", thread_count
},
27 {"iteration_count", iteration_count
},
28 {"target_duration", target_duration
},
29 {"log2_memory_location_count", log2_memory_location_count
},
30 {"log2_stride", log2_stride
},
31 {"use_json", use_json
},
36 template <typename Fn
, typename Input
> class BenchHarness
;
38 struct BenchmarkResult
40 BenchmarkResult() = default;
41 virtual ~BenchmarkResult() = default;
42 virtual void print() const = 0;
43 virtual operator JsonValue() const = 0;
46 class BenchHarnessBase
48 template <typename Fn
, typename Input
> friend class BenchHarness
;
51 std::shared_ptr
<void> thread_cache
;
53 friend class ThreadCache
;
54 std::shared_ptr
<BenchmarkResult
> base_run(
55 const Config
&config
, const std::string
&name
,
56 void (*fn
)(BenchHarnessBase
*bench_harness_base
,
57 std::uint64_t iteration_count
, std::uint32_t thread_num
));
60 static std::shared_ptr
<void> get_thread_cache();
63 template <typename Fn
, typename Input
>
64 class BenchHarness final
: private BenchHarnessBase
71 BenchHarness(Fn fn
, Input input
)
72 : fn(std::move(fn
)), input(std::move(input
))
75 std::shared_ptr
<BenchmarkResult
> run(const Config
&config
,
76 const std::string
&name
)
80 [](BenchHarnessBase
*bench_harness_base
,
81 std::uint64_t iteration_count
, std::uint32_t thread_num
) {
82 auto self
= static_cast<BenchHarness
*>(bench_harness_base
);
84 // copy for repeatability, also so optimization barrier is on
86 auto input
= self
->input
;
87 for (std::uint64_t i
= 0; i
< iteration_count
; i
++)
89 // optimization barrier
90 asm("" : : "r"(std::addressof(input
)) : "memory");
92 if constexpr (std::is_void_v
<std::invoke_result_t
<
93 Fn
&, Input
, decltype(i
),
94 decltype(thread_num
)>>)
96 fn(input
, i
, thread_num
);
100 auto output
= fn(input
, i
, thread_num
);
102 // optimization barrier
103 asm("" : : "r"(std::addressof(output
)) : "memory");
110 class Benchmark final
114 std::function
<std::shared_ptr
<BenchmarkResult
>(const Config
&config
,
115 const std::string
&name
)>
119 template <typename Fn
, typename Input
>
120 explicit Benchmark(Fn fn
, Input input
, std::string name
)
121 : m_name(std::move(name
)),
122 m_run([fn
, input
](const Config
&config
, const std::string
&name
) {
123 return BenchHarness(std::move(fn
), std::move(input
))
128 std::shared_ptr
<BenchmarkResult
> run(const Config
&config
) const
130 return m_run(config
, m_name
);
132 const std::string
&name() const
138 template <typename Fn
, typename Input
, typename
... NameParts
>
139 void push_bench(std::vector
<Benchmark
> &benches
, Fn fn
, Input input
,
140 NameParts
&&...name_parts
)
142 std::ostringstream os
;
143 (os
<< ... << std::forward
<NameParts
>(name_parts
));
144 benches
.push_back(Benchmark(std::move(fn
), std::move(input
), os
.str()));