Revert "reduce LHS for RSQRT by a factor of fract_width and"
[ieee754fpu.git] / src / ieee754 / div_rem_sqrt_rsqrt / core.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3 """ Core of the div/rem/sqrt/rsqrt pipeline.
4
5 Special case handling, input/output conversion, and muxid handling are handled
6 outside of these classes.
7
8 Algorithms based on ``algorithm.FixedUDivRemSqrtRSqrt``.
9
10 Formulas solved are:
11 * div/rem:
12 ``dividend == quotient_root * divisor_radicand``
13 * sqrt/rem:
14 ``divisor_radicand == quotient_root * quotient_root``
15 * rsqrt/rem:
16 ``1 == quotient_root * quotient_root * divisor_radicand``
17
18 The remainder is the left-hand-side of the comparison minus the
19 right-hand-side of the comparison in the above formulas.
20 """
21 from nmigen import (Elaboratable, Module, Signal, Const, Mux, Cat, Array)
22 from nmigen.lib.coding import PriorityEncoder
23 import enum
24
25
26 class DivPipeCoreConfig:
27 """ Configuration for core of the div/rem/sqrt/rsqrt pipeline.
28
29 :attribute bit_width: base bit-width.
30 :attribute fract_width: base fract-width. Specifies location of base-2
31 radix point.
32 :attribute log2_radix: number of bits of ``quotient_root`` that should be
33 computed per pipeline stage.
34 """
35
36 def __init__(self, bit_width, fract_width, log2_radix):
37 """ Create a ``DivPipeCoreConfig`` instance. """
38 self.bit_width = bit_width
39 self.fract_width = fract_width
40 self.log2_radix = log2_radix
41
42 def __repr__(self):
43 """ Get repr. """
44 return f"DivPipeCoreConfig({self.bit_width}, " \
45 + f"{self.fract_width}, {self.log2_radix})"
46
47 @property
48 def n_stages(self):
49 """ Get the number of ``DivPipeCoreCalculateStage`` needed. """
50 return (self.bit_width + self.log2_radix - 1) // self.log2_radix
51
52
53 class DivPipeCoreOperation(enum.Enum):
54 """ Operation for ``DivPipeCore``.
55
56 :attribute UDivRem: unsigned divide/remainder.
57 :attribute SqrtRem: square-root/remainder.
58 :attribute RSqrtRem: reciprocal-square-root/remainder.
59 """
60
61 UDivRem = 0
62 SqrtRem = 1
63 RSqrtRem = 2
64
65 def __int__(self):
66 """ Convert to int. """
67 return self.value
68
69 @classmethod
70 def create_signal(cls, *, src_loc_at=0, **kwargs):
71 """ Create a signal that can contain a ``DivPipeCoreOperation``. """
72 return Signal(min=min(map(int, cls)),
73 max=max(map(int, cls)) + 2,
74 src_loc_at=(src_loc_at + 1),
75 decoder=lambda v: str(cls(v)),
76 **kwargs)
77
78
79 DP = DivPipeCoreOperation
80
81
82 class DivPipeCoreInputData:
83 """ input data type for ``DivPipeCore``.
84
85 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
86 configuration to be used.
87 :attribute dividend: dividend for div/rem. Signal with a bit-width of
88 ``core_config.bit_width + core_config.fract_width`` and a fract-width
89 of ``core_config.fract_width * 2`` bits.
90 :attribute divisor_radicand: divisor for div/rem and radicand for
91 sqrt/rsqrt. Signal with a bit-width of ``core_config.bit_width`` and a
92 fract-width of ``core_config.fract_width`` bits.
93 :attribute operation: the ``DivPipeCoreOperation`` to be computed.
94 """
95
96 def __init__(self, core_config, reset_less=True):
97 """ Create a ``DivPipeCoreInputData`` instance. """
98 self.core_config = core_config
99 self.dividend = Signal(core_config.bit_width + core_config.fract_width,
100 reset_less=reset_less)
101 self.divisor_radicand = Signal(core_config.bit_width,
102 reset_less=reset_less)
103 self.operation = DP.create_signal(reset_less=reset_less)
104
105 def __iter__(self):
106 """ Get member signals. """
107 yield self.dividend
108 yield self.divisor_radicand
109 yield self.operation
110
111 def eq(self, rhs):
112 """ Assign member signals. """
113 return [self.dividend.eq(rhs.dividend),
114 self.divisor_radicand.eq(rhs.divisor_radicand),
115 self.operation.eq(rhs.operation),
116 ]
117
118
119 class DivPipeCoreInterstageData:
120 """ interstage data type for ``DivPipeCore``.
121
122 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
123 configuration to be used.
124 :attribute divisor_radicand: divisor for div/rem and radicand for
125 sqrt/rsqrt. Signal with a bit-width of ``core_config.bit_width`` and a
126 fract-width of ``core_config.fract_width`` bits.
127 :attribute operation: the ``DivPipeCoreOperation`` to be computed.
128 :attribute quotient_root: the quotient or root part of the result of the
129 operation. Signal with a bit-width of ``core_config.bit_width`` and a
130 fract-width of ``core_config.fract_width`` bits.
131 :attribute root_times_radicand: ``quotient_root * divisor_radicand``.
132 Signal with a bit-width of ``core_config.bit_width * 2`` and a
133 fract-width of ``core_config.fract_width * 2`` bits.
134 :attribute compare_lhs: The left-hand-side of the comparison in the
135 equation to be solved. Signal with a bit-width of
136 ``core_config.bit_width * 3`` and a fract-width of
137 ``core_config.fract_width * 3`` bits.
138 :attribute compare_rhs: The right-hand-side of the comparison in the
139 equation to be solved. Signal with a bit-width of
140 ``core_config.bit_width * 3`` and a fract-width of
141 ``core_config.fract_width * 3`` bits.
142 """
143
144 def __init__(self, core_config, reset_less=True):
145 """ Create a ``DivPipeCoreInterstageData`` instance. """
146 self.core_config = core_config
147 self.divisor_radicand = Signal(core_config.bit_width,
148 reset_less=reset_less)
149 self.operation = DP.create_signal(reset_less=reset_less)
150 self.quotient_root = Signal(core_config.bit_width,
151 reset_less=reset_less)
152 self.root_times_radicand = Signal(core_config.bit_width * 2,
153 reset_less=reset_less)
154 self.compare_lhs = Signal(core_config.bit_width * 3,
155 reset_less=reset_less)
156 self.compare_rhs = Signal(core_config.bit_width * 3,
157 reset_less=reset_less)
158
159 def __iter__(self):
160 """ Get member signals. """
161 yield self.divisor_radicand
162 yield self.operation
163 yield self.quotient_root
164 yield self.root_times_radicand
165 yield self.compare_lhs
166 yield self.compare_rhs
167
168 def eq(self, rhs):
169 """ Assign member signals. """
170 return [self.divisor_radicand.eq(rhs.divisor_radicand),
171 self.operation.eq(rhs.operation),
172 self.quotient_root.eq(rhs.quotient_root),
173 self.root_times_radicand.eq(rhs.root_times_radicand),
174 self.compare_lhs.eq(rhs.compare_lhs),
175 self.compare_rhs.eq(rhs.compare_rhs)]
176
177
178 class DivPipeCoreOutputData:
179 """ output data type for ``DivPipeCore``.
180
181 :attribute core_config: ``DivPipeCoreConfig`` instance describing the
182 configuration to be used.
183 :attribute quotient_root: the quotient or root part of the result of the
184 operation. Signal with a bit-width of ``core_config.bit_width`` and a
185 fract-width of ``core_config.fract_width`` bits.
186 :attribute remainder: the remainder part of the result of the operation.
187 Signal with a bit-width of ``core_config.bit_width * 3`` and a
188 fract-width of ``core_config.fract_width * 3`` bits.
189 """
190
191 def __init__(self, core_config, reset_less=True):
192 """ Create a ``DivPipeCoreOutputData`` instance. """
193 self.core_config = core_config
194 self.quotient_root = Signal(core_config.bit_width,
195 reset_less=reset_less)
196 self.remainder = Signal(core_config.bit_width * 3,
197 reset_less=reset_less)
198
199 def __iter__(self):
200 """ Get member signals. """
201 yield self.quotient_root
202 yield self.remainder
203 return
204
205 def eq(self, rhs):
206 """ Assign member signals. """
207 return [self.quotient_root.eq(rhs.quotient_root),
208 self.remainder.eq(rhs.remainder)]
209
210
211 class DivPipeCoreSetupStage(Elaboratable):
212 """ Setup Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
213
214 def __init__(self, core_config):
215 """ Create a ``DivPipeCoreSetupStage`` instance."""
216 self.core_config = core_config
217 self.i = self.ispec()
218 self.o = self.ospec()
219
220 def ispec(self):
221 """ Get the input spec for this pipeline stage."""
222 return DivPipeCoreInputData(self.core_config)
223
224 def ospec(self):
225 """ Get the output spec for this pipeline stage."""
226 return DivPipeCoreInterstageData(self.core_config)
227
228 def setup(self, m, i):
229 """ Pipeline stage setup. """
230 m.submodules.div_pipe_core_setup = self
231 m.d.comb += self.i.eq(i)
232
233 def process(self, i):
234 """ Pipeline stage process. """
235 return self.o # return processed data (ignore i)
236
237 def elaborate(self, platform):
238 """ Elaborate into ``Module``. """
239 m = Module()
240
241 m.d.comb += self.o.divisor_radicand.eq(self.i.divisor_radicand)
242 m.d.comb += self.o.quotient_root.eq(0)
243 m.d.comb += self.o.root_times_radicand.eq(0)
244
245 with m.If(self.i.operation == int(DP.UDivRem)):
246 m.d.comb += self.o.compare_lhs.eq(self.i.dividend
247 << self.core_config.fract_width)
248 with m.Elif(self.i.operation == int(DP.SqrtRem)):
249 m.d.comb += self.o.compare_lhs.eq(
250 self.i.divisor_radicand << (self.core_config.fract_width * 2))
251 with m.Else(): # DivPipeCoreOperation.RSqrtRem
252 m.d.comb += self.o.compare_lhs.eq(
253 1 << (self.core_config.fract_width * 3))
254
255 m.d.comb += self.o.compare_rhs.eq(0)
256 m.d.comb += self.o.operation.eq(self.i.operation)
257
258 return m
259
260
261 class Trial(Elaboratable):
262 def __init__(self, core_config, trial_bits, current_shift, log2_radix):
263 self.core_config = core_config
264 self.trial_bits = trial_bits
265 self.current_shift = current_shift
266 self.log2_radix = log2_radix
267 bw = core_config.bit_width
268 self.divisor_radicand = Signal(bw, reset_less=True)
269 self.quotient_root = Signal(bw, reset_less=True)
270 self.root_times_radicand = Signal(bw * 2, reset_less=True)
271 self.compare_rhs = Signal(bw * 3, reset_less=True)
272 self.trial_compare_rhs = Signal(bw * 3, reset_less=True)
273 self.operation = DP.create_signal(reset_less=True)
274
275 def elaborate(self, platform):
276
277 m = Module()
278
279 dr = self.divisor_radicand
280 qr = self.quotient_root
281 rr = self.root_times_radicand
282
283 trial_bits_sig = Const(self.trial_bits, self.log2_radix)
284 trial_bits_sqrd_sig = Const(self.trial_bits * self.trial_bits,
285 self.log2_radix * 2)
286
287 tblen = self.core_config.bit_width+self.log2_radix
288 tblen2 = self.core_config.bit_width+self.log2_radix*2
289 dr_times_trial_bits_sqrd = Signal(tblen2, reset_less=True)
290 m.d.comb += dr_times_trial_bits_sqrd.eq(dr * trial_bits_sqrd_sig)
291
292 # UDivRem
293 with m.If(self.operation == int(DP.UDivRem)):
294 dr_times_trial_bits = Signal(tblen, reset_less=True)
295 m.d.comb += dr_times_trial_bits.eq(dr * trial_bits_sig)
296 div_rhs = self.compare_rhs
297
298 div_term1 = dr_times_trial_bits
299 div_term1_shift = self.core_config.fract_width
300 div_term1_shift += self.current_shift
301 div_rhs += div_term1 << div_term1_shift
302
303 m.d.comb += self.trial_compare_rhs.eq(div_rhs)
304
305 # SqrtRem
306 with m.Elif(self.operation == int(DP.SqrtRem)):
307 qr_times_trial_bits = Signal((tblen+1)*2, reset_less=True)
308 m.d.comb += qr_times_trial_bits.eq(qr * trial_bits_sig)
309 sqrt_rhs = self.compare_rhs
310
311 sqrt_term1 = qr_times_trial_bits
312 sqrt_term1_shift = self.core_config.fract_width
313 sqrt_term1_shift += self.current_shift + 1
314 sqrt_rhs += sqrt_term1 << sqrt_term1_shift
315 sqrt_term2 = trial_bits_sqrd_sig
316 sqrt_term2_shift = self.core_config.fract_width
317 sqrt_term2_shift += self.current_shift * 2
318 sqrt_rhs += sqrt_term2 << sqrt_term2_shift
319
320 m.d.comb += self.trial_compare_rhs.eq(sqrt_rhs)
321
322 # RSqrtRem
323 with m.Else():
324 rr_times_trial_bits = Signal((tblen+1)*3, reset_less=True)
325 m.d.comb += rr_times_trial_bits.eq(rr * trial_bits_sig)
326 rsqrt_rhs = self.compare_rhs
327
328 rsqrt_term1 = rr_times_trial_bits
329 rsqrt_term1_shift = self.current_shift + 1
330 rsqrt_rhs += rsqrt_term1 << rsqrt_term1_shift
331 rsqrt_term2 = dr_times_trial_bits_sqrd
332 rsqrt_term2_shift = self.current_shift * 2
333 rsqrt_rhs += rsqrt_term2 << rsqrt_term2_shift
334
335 m.d.comb += self.trial_compare_rhs.eq(rsqrt_rhs)
336
337 return m
338
339
340 class DivPipeCoreCalculateStage(Elaboratable):
341 """ Calculate Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
342
343 def __init__(self, core_config, stage_index):
344 """ Create a ``DivPipeCoreSetupStage`` instance. """
345 self.core_config = core_config
346 assert stage_index in range(core_config.n_stages)
347 self.stage_index = stage_index
348 self.i = self.ispec()
349 self.o = self.ospec()
350
351 def ispec(self):
352 """ Get the input spec for this pipeline stage. """
353 return DivPipeCoreInterstageData(self.core_config)
354
355 def ospec(self):
356 """ Get the output spec for this pipeline stage. """
357 return DivPipeCoreInterstageData(self.core_config)
358
359 def setup(self, m, i):
360 """ Pipeline stage setup. """
361 setattr(m.submodules,
362 f"div_pipe_core_calculate_{self.stage_index}",
363 self)
364 m.d.comb += self.i.eq(i)
365
366 def process(self, i):
367 """ Pipeline stage process. """
368 return self.o
369
370 def elaborate(self, platform):
371 """ Elaborate into ``Module``. """
372 m = Module()
373
374 # copy invariant inputs to outputs (for next stage)
375 m.d.comb += self.o.divisor_radicand.eq(self.i.divisor_radicand)
376 m.d.comb += self.o.operation.eq(self.i.operation)
377 m.d.comb += self.o.compare_lhs.eq(self.i.compare_lhs)
378
379 # constants
380 log2_radix = self.core_config.log2_radix
381 current_shift = self.core_config.bit_width
382 current_shift -= self.stage_index * log2_radix
383 log2_radix = min(log2_radix, current_shift)
384 assert log2_radix > 0
385 current_shift -= log2_radix
386 radix = 1 << log2_radix
387
388 # trials within this radix range. carried out by Trial module,
389 # results stored in pass_flags. pass_flags are unary priority.
390 trial_compare_rhs_values = []
391 pfl = []
392 for trial_bits in range(radix):
393 t = Trial(self.core_config, trial_bits, current_shift, log2_radix)
394 setattr(m.submodules, "trial%d" % trial_bits, t)
395
396 m.d.comb += t.divisor_radicand.eq(self.i.divisor_radicand)
397 m.d.comb += t.quotient_root.eq(self.i.quotient_root)
398 m.d.comb += t.root_times_radicand.eq(self.i.root_times_radicand)
399 m.d.comb += t.compare_rhs.eq(self.i.compare_rhs)
400 m.d.comb += t.operation.eq(self.i.operation)
401
402 # get the trial output
403 trial_compare_rhs_values.append(t.trial_compare_rhs)
404
405 # make the trial comparison against the [invariant] lhs.
406 # trial_compare_rhs is always decreasing as trial_bits increases
407 pass_flag = Signal(name=f"pass_flag_{trial_bits}", reset_less=True)
408 m.d.comb += pass_flag.eq(self.i.compare_lhs >= t.trial_compare_rhs)
409 pfl.append(pass_flag)
410
411 # Cat all the pass flags list together (easier to handle, below)
412 pass_flags = Signal(radix, reset_less=True)
413 m.d.comb += pass_flags.eq(Cat(*pfl))
414
415 # convert pass_flags (unary priority) to next_bits (binary index)
416 #
417 # Assumes that for each set bit in pass_flag, all previous bits are
418 # also set.
419 #
420 # Assumes that pass_flag[0] is always set (since
421 # compare_lhs >= compare_rhs is a pipeline invariant).
422
423 m.submodules.pe = pe = PriorityEncoder(radix)
424 next_bits = Signal(log2_radix, reset_less=True)
425 m.d.comb += pe.i.eq(~pass_flags)
426 with m.If(~pe.n):
427 m.d.comb += next_bits.eq(pe.o-1)
428 with m.Else():
429 m.d.comb += next_bits.eq(radix-1)
430
431 # get the highest passing rhs trial (indexed by next_bits)
432 ta = Array(trial_compare_rhs_values)
433 m.d.comb += self.o.compare_rhs.eq(ta[next_bits])
434
435 # create outputs for next phase
436 m.d.comb += self.o.root_times_radicand.eq(self.i.root_times_radicand
437 + ((self.i.divisor_radicand
438 * next_bits)
439 << current_shift))
440 m.d.comb += self.o.quotient_root.eq(self.i.quotient_root
441 | (next_bits << current_shift))
442 return m
443
444
445 class DivPipeCoreFinalStage(Elaboratable):
446 """ Final Stage of the core of the div/rem/sqrt/rsqrt pipeline. """
447
448 def __init__(self, core_config):
449 """ Create a ``DivPipeCoreFinalStage`` instance."""
450 self.core_config = core_config
451 self.i = self.ispec()
452 self.o = self.ospec()
453
454 def ispec(self):
455 """ Get the input spec for this pipeline stage."""
456 return DivPipeCoreInterstageData(self.core_config)
457
458 def ospec(self):
459 """ Get the output spec for this pipeline stage."""
460 return DivPipeCoreOutputData(self.core_config)
461
462 def setup(self, m, i):
463 """ Pipeline stage setup. """
464 m.submodules.div_pipe_core_final = self
465 m.d.comb += self.i.eq(i)
466
467 def process(self, i):
468 """ Pipeline stage process. """
469 return self.o # return processed data (ignore i)
470
471 def elaborate(self, platform):
472 """ Elaborate into ``Module``. """
473 m = Module()
474
475 m.d.comb += self.o.quotient_root.eq(self.i.quotient_root)
476 m.d.comb += self.o.remainder.eq(self.i.compare_lhs
477 - self.i.compare_rhs)
478
479 return m