src/harness.h

   1 #pragma once
   2
   3 #include "json.h"
   4 #include <cstdint>
   5 #include <functional>
   6 #include <memory>
   7 #include <optional>
   8 #include <sstream>
   9 #include <string>
  10 #include <string_view>
  11 #include <type_traits>
  12 #include <utility>
  13
  14 struct Config final
  15 {
  16     std::optional<std::uint32_t> thread_count;
  17     std::optional<std::uint64_t> iteration_count;
  18     std::optional<double> target_duration;
  19     std::uint32_t log2_memory_location_count = 0;
  20     std::uint32_t log2_stride = 0;
  21     static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
  22     bool use_json = false;
  23     operator JsonValue() const
  24     {
  25         return JsonValue::Object{
  26             {"thread_count", thread_count},
  27             {"iteration_count", iteration_count},
  28             {"target_duration", target_duration},
  29             {"log2_memory_location_count", log2_memory_location_count},
  30             {"log2_stride", log2_stride},
  31             {"use_json", use_json},
  32         };
  33     }
  34 };
  35
  36 template <typename Fn, typename Input> class BenchHarness;
  37
  38 struct BenchmarkResult
  39 {
  40     BenchmarkResult() = default;
  41     virtual ~BenchmarkResult() = default;
  42     virtual void print() const = 0;
  43     virtual operator JsonValue() const = 0;
  44 };
  45
  46 class BenchHarnessBase
  47 {
  48     template <typename Fn, typename Input> friend class BenchHarness;
  49
  50   private:
  51     std::shared_ptr<void> thread_cache;
  52     class ThreadCache;
  53     friend class ThreadCache;
  54     std::shared_ptr<BenchmarkResult> base_run(
  55         const Config &config, const std::string &name,
  56         void (*fn)(BenchHarnessBase *bench_harness_base,
  57                    std::uint64_t iteration_count, std::uint32_t thread_num));
  58
  59   public:
  60     static std::shared_ptr<void> get_thread_cache();
  61 };
  62
  63 template <typename Fn, typename Input>
  64 class BenchHarness final : private BenchHarnessBase
  65 {
  66   private:
  67     Fn fn;
  68     Input input;
  69
  70   public:
  71     BenchHarness(Fn fn, Input input)
  72         : fn(std::move(fn)), input(std::move(input))
  73     {
  74     }
  75     std::shared_ptr<BenchmarkResult> run(const Config &config,
  76                                          const std::string &name)
  77     {
  78         return base_run(
  79             config, name,
  80             [](BenchHarnessBase *bench_harness_base,
  81                std::uint64_t iteration_count, std::uint32_t thread_num) {
  82                 auto self = static_cast<BenchHarness *>(bench_harness_base);
  83                 auto &fn = self->fn;
  84                 // copy for repeatability, also so optimization barrier is on
  85                 // copy, not self
  86                 auto input = self->input;
  87                 for (std::uint64_t i = 0; i < iteration_count; i++)
  88                 {
  89                     // optimization barrier
  90                     asm("" : : "r"(std::addressof(input)) : "memory");
  91
  92                     if constexpr (std::is_void_v<std::invoke_result_t<
  93                                       Fn &, Input, decltype(i),
  94                                       decltype(thread_num)>>)
  95                     {
  96                         fn(input, i, thread_num);
  97                     }
  98                     else
  99                     {
 100                         auto output = fn(input, i, thread_num);
 101
 102                         // optimization barrier
 103                         asm("" : : "r"(std::addressof(output)) : "memory");
 104                     }
 105                 }
 106             });
 107     }
 108 };
 109
 110 class Benchmark final
 111 {
 112   private:
 113     std::string m_name;
 114     std::function<std::shared_ptr<BenchmarkResult>(const Config &config,
 115                                                    const std::string &name)>
 116         m_run;
 117
 118   public:
 119     template <typename Fn, typename Input>
 120     explicit Benchmark(Fn fn, Input input, std::string name)
 121         : m_name(std::move(name)),
 122           m_run([fn, input](const Config &config, const std::string &name) {
 123               return BenchHarness(std::move(fn), std::move(input))
 124                   .run(config, name);
 125           })
 126     {
 127     }
 128     std::shared_ptr<BenchmarkResult> run(const Config &config) const
 129     {
 130         return m_run(config, m_name);
 131     }
 132     const std::string &name() const
 133     {
 134         return m_name;
 135     }
 136 };
 137
 138 template <typename Fn, typename Input, typename... NameParts>
 139 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
 140                 NameParts &&...name_parts)
 141 {
 142     std::ostringstream os;
 143     (os << ... << std::forward<NameParts>(name_parts));
 144     benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
 145 }