src/harness.h

   1 #pragma once
   2
   3 #include "json.h"
   4 #include <cstdint>
   5 #include <functional>
   6 #include <memory>
   7 #include <optional>
   8 #include <sstream>
   9 #include <string>
  10 #include <string_view>
  11 #include <type_traits>
  12 #include <utility>
  13
  14 struct Config final
  15 {
  16     std::optional<std::uint32_t> thread_count;
  17     std::optional<std::uint64_t> iteration_count;
  18     std::uint32_t log2_memory_location_count = 0;
  19     std::uint32_t log2_stride = 0;
  20     static constexpr std::uint32_t max_sum_log2_mem_loc_count_and_stride = 28;
  21     bool use_json = false;
  22     operator JsonValue() const
  23     {
  24         return JsonValue::Object{
  25             {"thread_count", thread_count},
  26             {"iteration_count", iteration_count},
  27             {"log2_memory_location_count", log2_memory_location_count},
  28             {"log2_stride", log2_stride},
  29             {"use_json", use_json},
  30         };
  31     }
  32 };
  33
  34 template <typename Fn, typename Input> class BenchHarness;
  35
  36 struct BenchmarkResult
  37 {
  38     BenchmarkResult() = default;
  39     virtual ~BenchmarkResult() = default;
  40     virtual void print() const = 0;
  41     virtual operator JsonValue() const = 0;
  42 };
  43
  44 class BenchHarnessBase
  45 {
  46     template <typename Fn, typename Input> friend class BenchHarness;
  47
  48   private:
  49     std::shared_ptr<void> thread_cache;
  50     class ThreadCache;
  51     friend class ThreadCache;
  52     std::shared_ptr<BenchmarkResult> base_run(
  53         const Config &config, const std::string &name,
  54         void (*fn)(BenchHarnessBase *bench_harness_base,
  55                    std::uint64_t iteration_count, std::uint32_t thread_num));
  56
  57   public:
  58     static std::shared_ptr<void> get_thread_cache();
  59 };
  60
  61 template <typename Fn, typename Input>
  62 class BenchHarness final : private BenchHarnessBase
  63 {
  64   private:
  65     Fn fn;
  66     Input input;
  67
  68   public:
  69     BenchHarness(Fn fn, Input input)
  70         : fn(std::move(fn)), input(std::move(input))
  71     {
  72     }
  73     std::shared_ptr<BenchmarkResult> run(const Config &config,
  74                                          const std::string &name)
  75     {
  76         return base_run(
  77             config, name,
  78             [](BenchHarnessBase *bench_harness_base,
  79                std::uint64_t iteration_count, std::uint32_t thread_num) {
  80                 auto self = static_cast<BenchHarness *>(bench_harness_base);
  81                 auto &fn = self->fn;
  82                 // copy for repeatability, also so optimization barrier is on
  83                 // copy, not self
  84                 auto input = self->input;
  85                 for (std::uint64_t i = 0; i < iteration_count; i++)
  86                 {
  87                     // optimization barrier
  88                     asm("" : : "r"(std::addressof(input)) : "memory");
  89
  90                     if constexpr (std::is_void_v<std::invoke_result_t<
  91                                       Fn &, Input, decltype(i),
  92                                       decltype(thread_num)>>)
  93                     {
  94                         fn(input, i, thread_num);
  95                     }
  96                     else
  97                     {
  98                         auto output = fn(input, i, thread_num);
  99
 100                         // optimization barrier
 101                         asm("" : : "r"(std::addressof(output)) : "memory");
 102                     }
 103                 }
 104             });
 105     }
 106 };
 107
 108 class Benchmark final
 109 {
 110   private:
 111     std::string m_name;
 112     std::function<std::shared_ptr<BenchmarkResult>(const Config &config,
 113                                                    const std::string &name)>
 114         m_run;
 115
 116   public:
 117     template <typename Fn, typename Input>
 118     explicit Benchmark(Fn fn, Input input, std::string name)
 119         : m_name(std::move(name)),
 120           m_run([fn, input](const Config &config, const std::string &name) {
 121               return BenchHarness(std::move(fn), std::move(input))
 122                   .run(config, name);
 123           })
 124     {
 125     }
 126     std::shared_ptr<BenchmarkResult> run(const Config &config) const
 127     {
 128         return m_run(config, m_name);
 129     }
 130     const std::string &name() const
 131     {
 132         return m_name;
 133     }
 134 };
 135
 136 template <typename Fn, typename Input, typename... NameParts>
 137 void push_bench(std::vector<Benchmark> &benches, Fn fn, Input input,
 138                 NameParts &&...name_parts)
 139 {
 140     std::ostringstream os;
 141     (os << ... << std::forward<NameParts>(name_parts));
 142     benches.push_back(Benchmark(std::move(fn), std::move(input), os.str()));
 143 }