b9ab39d38709d1ae217f317e009040c781ab1696
[benchmarks.git] / src / harness.h
1 #pragma once
2
3 #include "json.h"
4 #include <cstdint>
5 #include <functional>
6 #include <memory>
7 #include <optional>
8 #include <sstream>
9 #include <string>
10 #include <string_view>
11 #include <type_traits>
12 #include <utility>
13
14 struct Config final
15 {
16 std::optional<std::uint32_t> thread_count;
17 std::optional<std::uint64_t> iteration_count;
18 std::uint32_t log2_memory_location_count = 0;
19 std::uint32_t log2_stride = 0;
20 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
21 bool use_json = false;
22 operator JsonValue() const
23 {
24 return JsonValue::Object{
25 {"thread_count", thread_count},
26 {"iteration_count", iteration_count},
27 {"log2_memory_location_count", log2_memory_location_count},
28 {"log2_stride", log2_stride},
29 {"use_json", use_json},
30 };
31 }
32 };
33
34 template <typename Fn, typename Input> class BenchHarness;
35
36 struct BenchmarkResult
37 {
38 BenchmarkResult() = default;
39 virtual ~BenchmarkResult() = default;
40 virtual void print() const = 0;
41 virtual operator JsonValue() const = 0;
42 };
43
44 class BenchHarnessBase
45 {
46 template <typename Fn, typename Input> friend class BenchHarness;
47
48 private:
49 std::shared_ptr<void> thread_cache;
50 class ThreadCache;
51 friend class ThreadCache;
52 std::shared_ptr<BenchmarkResult> base_run(
53 const Config &config, const std::string &name,
54 void (*fn)(BenchHarnessBase *bench_harness_base,
55 std::uint64_t iteration_count, std::uint32_t thread_num));
56
57 public:
58 static std::shared_ptr<void> get_thread_cache();
59 };
60
61 template <typename Fn, typename Input>
62 class BenchHarness final : private BenchHarnessBase
63 {
64 private:
65 Fn fn;
66 Input input;
67
68 public:
69 BenchHarness(Fn fn, Input input)
70 : fn(std::move(fn)), input(std::move(input))
71 {
72 }
73 std::shared_ptr<BenchmarkResult> run(const Config &config,
74 const std::string &name)
75 {
76 return base_run(
77 config, name,
78 [](BenchHarnessBase *bench_harness_base,
79 std::uint64_t iteration_count, std::uint32_t thread_num) {
80 auto self = static_cast<BenchHarness *>(bench_harness_base);
81 auto &fn = self->fn;
82 // copy for repeatability, also so optimization barrier is on
83 // copy, not self
84 auto input = self->input;
85 for (std::uint64_t i = 0; i < iteration_count; i++)
86 {
87 // optimization barrier
88 asm("" : : "r"(std::addressof(input)) : "memory");
89
90 if constexpr (std::is_void_v<std::invoke_result_t<
91 Fn &, Input, decltype(i),
92 decltype(thread_num)>>)
93 {
94 fn(input, i, thread_num);
95 }
96 else
97 {
98 auto output = fn(input, i, thread_num);
99
100 // optimization barrier
101 asm("" : : "r"(std::addressof(output)) : "memory");
102 }
103 }
104 });
105 }
106 };
107
108 class Benchmark final
109 {
110 private:
111 std::string m_name;
112 std::function<std::shared_ptr<BenchmarkResult>(const Config &config,
113 const std::string &name)>
114 m_run;
115
116 public:
117 template <typename Fn, typename Input>
118 explicit Benchmark(Fn fn, Input input, std::string name)
119 : m_name(std::move(name)),
120 m_run([fn, input](const Config &config, const std::string &name) {
121 return BenchHarness(std::move(fn), std::move(input))
122 .run(config, name);
123 })
124 {
125 }
126 std::shared_ptr<BenchmarkResult> run(const Config &config) const
127 {
128 return m_run(config, m_name);
129 }
130 const std::string &name() const
131 {
132 return m_name;
133 }
134 };
135
136 template <typename Fn, typename Input, typename... NameParts>
137 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
138 NameParts &&...name_parts)
139 {
140 std::ostringstream os;
141 (os << ... << std::forward<NameParts>(name_parts));
142 benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
143 }