start adding JSON output -- it's easier to use data in that format
[benchmarks.git] / src / json.h
1 #pragma once
2
3 #include <charconv>
4 #include <cmath>
5 #include <cstddef>
6 #include <cstdint>
7 #include <cstdio>
8 #include <cstdlib>
9 #include <initializer_list>
10 #include <ios>
11 #include <memory>
12 #include <optional>
13 #include <ostream>
14 #include <string>
15 #include <string_view>
16 #include <system_error>
17 #include <unordered_map>
18 #include <variant>
19 #include <vector>
20
21 struct JsonValue;
22
23 using JsonString = std::string;
24 using JsonFloat = double;
25 using JsonNull = std::nullptr_t;
26 using JsonArray = std::vector<JsonValue>;
27 using JsonMap = std::vector<std::pair<std::string, JsonValue>>;
28
29 struct JsonValue final
30 {
31 std::variant<JsonString, JsonFloat, JsonNull, std::unique_ptr<JsonArray>,
32 std::unique_ptr<JsonMap>>
33 value;
34 constexpr JsonValue() noexcept : value(nullptr)
35 {
36 }
37 constexpr JsonValue(JsonNull) noexcept : value(nullptr)
38 {
39 }
40 constexpr JsonValue(JsonFloat value) noexcept : value(value)
41 {
42 }
43 JsonValue(JsonString value) noexcept : value(std::move(value))
44 {
45 }
46 JsonValue(std::unique_ptr<JsonArray> value) noexcept
47 : value(std::move(value))
48 {
49 }
50 JsonValue(JsonArray value) noexcept
51 : value(std::make_unique<JsonArray>(std::move(value)))
52 {
53 }
54 JsonValue(std::unique_ptr<JsonMap> value) noexcept
55 : value(std::move(value))
56 {
57 }
58 JsonValue(JsonMap value) noexcept
59 : value(std::make_unique<JsonMap>(std::move(value)))
60 {
61 }
62 /// decode a JsonString from WTF-8 to the encoding logically used in JSON:
63 /// WTF-16 aka. potentially ill-formed UTF-16
64 ///
65 /// https://simonsapin.github.io/wtf-8/
66 class JsonStringDecoder final
67 {
68 private:
69 std::string_view remaining;
70 bool last_was_lead_surrogate = false;
71 std::optional<std::uint16_t> trail_surrogate;
72
73 public:
74 constexpr JsonStringDecoder() noexcept : JsonStringDecoder("")
75 {
76 }
77 constexpr explicit JsonStringDecoder(
78 std::string_view json_string) noexcept
79 : remaining(json_string)
80 {
81 }
82
83 private:
84 [[noreturn]] void throw_wtf8_err()
85 {
86 *this = JsonStringDecoder();
87 throw std::ios_base::failure("Invalid WTF-8");
88 }
89
90 public:
91 constexpr std::optional<std::uint16_t> next()
92 {
93 if (trail_surrogate)
94 {
95 auto retval = *trail_surrogate;
96 trail_surrogate = std::nullopt;
97 return retval;
98 }
99 if (remaining.empty())
100 {
101 return std::nullopt;
102 }
103 std::uint32_t code_point = 0;
104 struct min_max final
105 {
106 std::uint8_t min = 0x80;
107 std::uint8_t max = 0xBF;
108 };
109 auto get = [&]() -> std::uint8_t {
110 if (remaining.empty())
111 {
112 throw_wtf8_err();
113 }
114 std::uint8_t retval = remaining[0];
115 remaining.remove_prefix(1);
116 return retval;
117 };
118 auto cont = [&](min_max min_max = {}) {
119 std::uint8_t v = get();
120 if (v < min_max.min || v > min_max.max)
121 {
122 throw_wtf8_err();
123 }
124 code_point <<= 6;
125 code_point |= v & 0x3F;
126 };
127 std::uint8_t initial = get();
128 if (initial < 0x80)
129 {
130 code_point = initial;
131 }
132 else if (initial < 0xC2)
133 {
134 throw_wtf8_err();
135 }
136 else if (initial < 0xE0)
137 {
138 code_point = initial & 0x1F;
139 cont();
140 }
141 else if (initial == 0xE0)
142 {
143 code_point = initial & 0xF;
144 cont({.min = 0xA0});
145 cont();
146 }
147 else if (initial < 0xF0)
148 {
149 code_point = initial & 0xF;
150 cont();
151 cont();
152 }
153 else if (initial == 0xF0)
154 {
155 code_point = initial & 0x7;
156 cont({.min = 0x90});
157 cont();
158 cont();
159 }
160 else if (initial < 0xF4)
161 {
162 code_point = initial & 0x7;
163 cont();
164 cont();
165 cont();
166 }
167 else if (initial == 0xF4)
168 {
169 code_point = initial & 0x7;
170 cont({.max = 0x8F});
171 cont();
172 cont();
173 }
174 else
175 {
176 throw_wtf8_err();
177 }
178 if (last_was_lead_surrogate && code_point >= 0xDC00 &&
179 code_point <= 0xDFFF)
180 {
181 // got lead surrogate followed by trail surrogate --
182 // invalid in WTF-8
183 throw_wtf8_err();
184 }
185 last_was_lead_surrogate =
186 (code_point >= 0xD800 && code_point <= 0xDBFF);
187 bool is_supplementary_code_point = code_point >= 0x10000;
188 if (is_supplementary_code_point)
189 {
190 auto value = code_point - 0x10000;
191 std::uint16_t retval = (value >> 10) + 0xD800;
192 trail_surrogate = (value & 0x3FF) + 0xDC00;
193 return retval;
194 }
195 else
196 {
197 return code_point;
198 }
199 }
200 };
201 template <typename WriteStringView>
202 void write(WriteStringView &&write_fn) const
203 {
204 struct Visitor final
205 {
206 WriteStringView &write_fn;
207 void write(std::string_view str)
208 {
209 write_fn(str);
210 }
211 void write(char ch)
212 {
213 write_fn(std::string_view(&ch, 1));
214 }
215 void operator()(const JsonString &value)
216 {
217 write('\"');
218 JsonStringDecoder decoder(value);
219 while (auto value_opt = decoder.next())
220 {
221 std::uint16_t value = *value_opt;
222 switch (value)
223 {
224 case '\"':
225 case '\\':
226 write('\\');
227 write(static_cast<char>(value));
228 break;
229 case '\b':
230 write("\\b");
231 break;
232 case '\f':
233 write("\\f");
234 break;
235 case '\n':
236 write("\\n");
237 break;
238 case '\r':
239 write("\\r");
240 break;
241 case '\t':
242 write("\\t");
243 break;
244 default:
245 if (value >= 0x20 && value <= 0x7E)
246 {
247 write(static_cast<char>(value));
248 }
249 else
250 {
251 static constexpr char hex_digits[] =
252 "0123456789ABCDEF";
253 write("\\u");
254 for (int i = 0; i < 4; i++)
255 {
256 write(hex_digits[value >> 12]);
257 value <<= 4;
258 }
259 }
260 break;
261 }
262 }
263 write('"');
264 }
265 void operator()(JsonFloat value)
266 {
267 if (std::isnan(value))
268 {
269 write("NaN");
270 return;
271 }
272 if (std::signbit(value))
273 {
274 write('-');
275 value = -value;
276 }
277 if (std::isinf(value))
278 {
279 write("Infinity");
280 return;
281 }
282 if (value == 0)
283 {
284 write("0");
285 return;
286 }
287 using Buf = std::array<char, 32>;
288 auto try_format = [&](Buf &buf, bool e_format,
289 int prec) -> bool {
290 int result;
291 if (e_format)
292 {
293 result = std::snprintf(&buf[0], buf.size(), "%1.*e",
294 prec, value);
295 }
296 else
297 {
298 result = std::snprintf(&buf[0], buf.size(), "%1.*f",
299 prec, value);
300 }
301 if (result <= 0)
302 return false;
303 double parsed_value = std::strtod(&buf[0], nullptr);
304 if (parsed_value != value)
305 {
306 // not precise enough
307 return false;
308 }
309 return true;
310 };
311 Buf final_buf = {};
312 std::optional<std::string_view> final;
313 std::size_t end_prec = final_buf.size();
314 for (std::size_t prec = 0;
315 prec < final_buf.size() && prec < end_prec; prec++)
316 {
317 Buf buf;
318 if (try_format(buf, true, prec))
319 {
320 std::string_view str(&buf[0]);
321 if (!final || str.size() < final->size())
322 {
323 final_buf = buf;
324 final =
325 std::string_view(&final_buf[0], str.size());
326 end_prec = prec + 3;
327 }
328 }
329 if (try_format(buf, false, prec))
330 {
331 std::string_view str(&buf[0]);
332 if (!final || str.size() < final->size())
333 {
334 final_buf = buf;
335 final =
336 std::string_view(&final_buf[0], str.size());
337 end_prec = prec + 3;
338 }
339 }
340 }
341 if (final_buf[0] == '.')
342 {
343 write('0');
344 }
345 write(*final);
346 }
347 void operator()(JsonNull)
348 {
349 write("null");
350 }
351 void operator()(const std::unique_ptr<JsonArray> &value)
352 {
353 write('[');
354 std::string_view sep{};
355 for (auto &i : *value)
356 {
357 write(sep);
358 sep = ",";
359 std::visit(*this, i.value);
360 }
361 write(']');
362 }
363 void operator()(const std::unique_ptr<JsonMap> &value)
364 {
365 write('{');
366 std::string_view sep{};
367 for (auto &[k, v] : *value)
368 {
369 write(sep);
370 sep = ",";
371 operator()(k);
372 write(':');
373 std::visit(*this, v.value);
374 }
375 write('}');
376 }
377 };
378 std::visit(Visitor{.write_fn = write_fn}, value);
379 }
380 friend std::ostream &operator<<(std::ostream &os, const JsonValue &self)
381 {
382 self.write([&](std::string_view str) { os << str; });
383 return os;
384 }
385 };