add --target-duration option
[benchmarks.git] / src / harness.h
1 #pragma once
2
3 #include "json.h"
4 #include <cstdint>
5 #include <functional>
6 #include <memory>
7 #include <optional>
8 #include <sstream>
9 #include <string>
10 #include <string_view>
11 #include <type_traits>
12 #include <utility>
13
14 struct Config final
15 {
16 std::optional<std::uint32_t> thread_count;
17 std::optional<std::uint64_t> iteration_count;
18 std::optional<double> target_duration;
19 std::uint32_t log2_memory_location_count = 0;
20 std::uint32_t log2_stride = 0;
21 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
22 bool use_json = false;
23 operator JsonValue() const
24 {
25 return JsonValue::Object{
26 {"thread_count", thread_count},
27 {"iteration_count", iteration_count},
28 {"target_duration", target_duration},
29 {"log2_memory_location_count", log2_memory_location_count},
30 {"log2_stride", log2_stride},
31 {"use_json", use_json},
32 };
33 }
34 };
35
36 template <typename Fn, typename Input> class BenchHarness;
37
38 struct BenchmarkResult
39 {
40 BenchmarkResult() = default;
41 virtual ~BenchmarkResult() = default;
42 virtual void print() const = 0;
43 virtual operator JsonValue() const = 0;
44 };
45
46 class BenchHarnessBase
47 {
48 template <typename Fn, typename Input> friend class BenchHarness;
49
50 private:
51 std::shared_ptr<void> thread_cache;
52 class ThreadCache;
53 friend class ThreadCache;
54 std::shared_ptr<BenchmarkResult> base_run(
55 const Config &config, const std::string &name,
56 void (*fn)(BenchHarnessBase *bench_harness_base,
57 std::uint64_t iteration_count, std::uint32_t thread_num));
58
59 public:
60 static std::shared_ptr<void> get_thread_cache();
61 };
62
63 template <typename Fn, typename Input>
64 class BenchHarness final : private BenchHarnessBase
65 {
66 private:
67 Fn fn;
68 Input input;
69
70 public:
71 BenchHarness(Fn fn, Input input)
72 : fn(std::move(fn)), input(std::move(input))
73 {
74 }
75 std::shared_ptr<BenchmarkResult> run(const Config &config,
76 const std::string &name)
77 {
78 return base_run(
79 config, name,
80 [](BenchHarnessBase *bench_harness_base,
81 std::uint64_t iteration_count, std::uint32_t thread_num) {
82 auto self = static_cast<BenchHarness *>(bench_harness_base);
83 auto &fn = self->fn;
84 // copy for repeatability, also so optimization barrier is on
85 // copy, not self
86 auto input = self->input;
87 for (std::uint64_t i = 0; i < iteration_count; i++)
88 {
89 // optimization barrier
90 asm("" : : "r"(std::addressof(input)) : "memory");
91
92 if constexpr (std::is_void_v<std::invoke_result_t<
93 Fn &, Input, decltype(i),
94 decltype(thread_num)>>)
95 {
96 fn(input, i, thread_num);
97 }
98 else
99 {
100 auto output = fn(input, i, thread_num);
101
102 // optimization barrier
103 asm("" : : "r"(std::addressof(output)) : "memory");
104 }
105 }
106 });
107 }
108 };
109
110 class Benchmark final
111 {
112 private:
113 std::string m_name;
114 std::function<std::shared_ptr<BenchmarkResult>(const Config &config,
115 const std::string &name)>
116 m_run;
117
118 public:
119 template <typename Fn, typename Input>
120 explicit Benchmark(Fn fn, Input input, std::string name)
121 : m_name(std::move(name)),
122 m_run([fn, input](const Config &config, const std::string &name) {
123 return BenchHarness(std::move(fn), std::move(input))
124 .run(config, name);
125 })
126 {
127 }
128 std::shared_ptr<BenchmarkResult> run(const Config &config) const
129 {
130 return m_run(config, m_name);
131 }
132 const std::string &name() const
133 {
134 return m_name;
135 }
136 };
137
138 template <typename Fn, typename Input, typename... NameParts>
139 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
140 NameParts &&...name_parts)
141 {
142 std::ostringstream os;
143 (os << ... << std::forward<NameParts>(name_parts));
144 benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
145 }