aacd27ae491f9bc540f777f237e56fa49b1bc3f7
[benchmarks.git] / src / harness.h
1 #pragma once
2
3 #include <cstdint>
4 #include <functional>
5 #include <memory>
6 #include <optional>
7 #include <sstream>
8 #include <string>
9 #include <string_view>
10 #include <type_traits>
11 #include <utility>
12
13 struct Config final
14 {
15 std::optional<std::uint32_t> thread_count;
16 std::optional<std::uint64_t> iteration_count;
17 std::uint32_t log2_memory_location_count = 0;
18 std::uint32_t log2_stride = 0;
19 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
20 };
21
22 template <typename Fn, typename Input> class BenchHarness;
23
24 class BenchHarnessBase
25 {
26 template <typename Fn, typename Input> friend class BenchHarness;
27
28 private:
29 std::shared_ptr<void> thread_cache;
30 class ThreadCache;
31 friend class ThreadCache;
32 void base_run(Config config,
33 void (*fn)(BenchHarnessBase *bench_harness_base,
34 std::uint64_t iteration_count,
35 std::uint32_t thread_num));
36
37 public:
38 static std::shared_ptr<void> get_thread_cache();
39 };
40
41 template <typename Fn, typename Input>
42 class BenchHarness final : private BenchHarnessBase
43 {
44 private:
45 Fn fn;
46 Input input;
47
48 public:
49 BenchHarness(Fn fn, Input input)
50 : fn(std::move(fn)), input(std::move(input))
51 {
52 }
53 void run(Config config)
54 {
55 base_run(config, [](BenchHarnessBase *bench_harness_base,
56 std::uint64_t iteration_count,
57 std::uint32_t thread_num) {
58 auto self = static_cast<BenchHarness *>(bench_harness_base);
59 auto &fn = self->fn;
60 // copy for repeatability, also so optimization barrier is on copy,
61 // not self
62 auto input = self->input;
63 for (std::uint64_t i = 0; i < iteration_count; i++)
64 {
65 // optimization barrier
66 asm("" : : "r"(std::addressof(input)) : "memory");
67
68 if constexpr (std::is_void_v<std::invoke_result_t<
69 Fn &, Input, decltype(i),
70 decltype(thread_num)>>)
71 {
72 fn(input, i, thread_num);
73 }
74 else
75 {
76 auto output = fn(input, i, thread_num);
77
78 // optimization barrier
79 asm("" : : "r"(std::addressof(output)) : "memory");
80 }
81 }
82 });
83 }
84 };
85
86 class Benchmark final
87 {
88 private:
89 std::string m_name;
90 std::function<void(Config config)> m_run;
91
92 public:
93 template <typename Fn, typename Input>
94 explicit Benchmark(Fn fn, Input input, std::string name)
95 : m_name(std::move(name)), m_run([fn, input](Config config) {
96 return BenchHarness(std::move(fn), std::move(input)).run(config);
97 })
98 {
99 }
100 void run(Config config)
101 {
102 return m_run(config);
103 }
104 const std::string &name() const
105 {
106 return m_name;
107 }
108 };
109
110 template <typename Fn, typename Input, typename... NameParts>
111 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
112 NameParts &&...name_parts)
113 {
114 std::ostringstream os;
115 (os << ... << std::forward<NameParts>(name_parts));
116 benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
117 }