start adding JSON output -- it's easier to use data in that format
[benchmarks.git] / src / harness.h
1 #pragma once
2
3 #include <cstdint>
4 #include <functional>
5 #include <memory>
6 #include <optional>
7 #include <sstream>
8 #include <string>
9 #include <string_view>
10 #include <type_traits>
11 #include <utility>
12
13 struct Config final
14 {
15 std::optional<std::uint32_t> thread_count;
16 std::optional<std::uint64_t> iteration_count;
17 std::uint32_t log2_memory_location_count = 0;
18 std::uint32_t log2_stride = 0;
19 static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
20 bool use_json = false;
21 };
22
23 template <typename Fn, typename Input> class BenchHarness;
24
25 class BenchHarnessBase
26 {
27 template <typename Fn, typename Input> friend class BenchHarness;
28
29 private:
30 std::shared_ptr<void> thread_cache;
31 class ThreadCache;
32 friend class ThreadCache;
33 void base_run(Config config,
34 void (*fn)(BenchHarnessBase *bench_harness_base,
35 std::uint64_t iteration_count,
36 std::uint32_t thread_num));
37
38 public:
39 static std::shared_ptr<void> get_thread_cache();
40 };
41
42 template <typename Fn, typename Input>
43 class BenchHarness final : private BenchHarnessBase
44 {
45 private:
46 Fn fn;
47 Input input;
48
49 public:
50 BenchHarness(Fn fn, Input input)
51 : fn(std::move(fn)), input(std::move(input))
52 {
53 }
54 void run(Config config)
55 {
56 base_run(config, [](BenchHarnessBase *bench_harness_base,
57 std::uint64_t iteration_count,
58 std::uint32_t thread_num) {
59 auto self = static_cast<BenchHarness *>(bench_harness_base);
60 auto &fn = self->fn;
61 // copy for repeatability, also so optimization barrier is on copy,
62 // not self
63 auto input = self->input;
64 for (std::uint64_t i = 0; i < iteration_count; i++)
65 {
66 // optimization barrier
67 asm("" : : "r"(std::addressof(input)) : "memory");
68
69 if constexpr (std::is_void_v<std::invoke_result_t<
70 Fn &, Input, decltype(i),
71 decltype(thread_num)>>)
72 {
73 fn(input, i, thread_num);
74 }
75 else
76 {
77 auto output = fn(input, i, thread_num);
78
79 // optimization barrier
80 asm("" : : "r"(std::addressof(output)) : "memory");
81 }
82 }
83 });
84 }
85 };
86
87 class Benchmark final
88 {
89 private:
90 std::string m_name;
91 std::function<void(Config config)> m_run;
92
93 public:
94 template <typename Fn, typename Input>
95 explicit Benchmark(Fn fn, Input input, std::string name)
96 : m_name(std::move(name)), m_run([fn, input](Config config) {
97 return BenchHarness(std::move(fn), std::move(input)).run(config);
98 })
99 {
100 }
101 void run(Config config)
102 {
103 return m_run(config);
104 }
105 const std::string &name() const
106 {
107 return m_name;
108 }
109 };
110
111 template <typename Fn, typename Input, typename... NameParts>
112 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
113 NameParts &&...name_parts)
114 {
115 std::ostringstream os;
116 (os << ... << std::forward<NameParts>(name_parts));
117 benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
118 }