1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
11 #from fpbase import FPNumShiftMultiRight
13 class FPState(FPBase
):
14 def __init__(self
, state_from
):
15 self
.state_from
= state_from
17 def set_inputs(self
, inputs
):
19 for k
,v
in inputs
.items():
22 def set_outputs(self
, outputs
):
23 self
.outputs
= outputs
24 for k
,v
in outputs
.items():
29 def __init__(self
, width
):
30 self
.in_op
= FPOp(width
)
31 self
.out_op
= FPNumIn(self
.in_op
, width
)
32 self
.out_decode
= Signal(reset_less
=True)
34 def elaborate(self
, platform
):
36 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
37 #m.submodules.get_op_in = self.in_op
38 m
.submodules
.get_op_out
= self
.out_op
39 with m
.If(self
.out_decode
):
41 self
.out_op
.decode(self
.in_op
.v
),
46 class FPGetOp(FPState
):
50 def __init__(self
, in_state
, out_state
, in_op
, width
):
51 FPState
.__init
__(self
, in_state
)
52 self
.out_state
= out_state
53 self
.mod
= FPGetOpMod(width
)
55 self
.out_op
= FPNumIn(in_op
, width
)
56 self
.out_decode
= Signal(reset_less
=True)
58 def setup(self
, m
, in_op
):
59 """ links module to inputs and outputs
61 setattr(m
.submodules
, self
.state_from
, self
.mod
)
62 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
63 m
.d
.comb
+= self
.out_op
.v
.eq(self
.mod
.out_op
.v
)
64 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
67 with m
.If(self
.out_decode
):
68 m
.next
= self
.out_state
71 self
.out_op
.copy(self
.mod
.out_op
)
74 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
77 class FPAddSpecialCasesMod
:
78 """ special cases: NaNs, infs, zeros, denormalised
79 NOTE: some of these are unique to add. see "Special Operations"
80 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
83 def __init__(self
, width
):
84 self
.in_a
= FPNumBase(width
)
85 self
.in_b
= FPNumBase(width
)
86 self
.out_z
= FPNumOut(width
, False)
87 self
.out_do_z
= Signal(reset_less
=True)
89 def setup(self
, m
, in_a
, in_b
, out_z
, out_do_z
):
90 """ links module to inputs and outputs
92 m
.d
.comb
+= self
.in_a
.copy(in_a
)
93 m
.d
.comb
+= self
.in_b
.copy(in_b
)
94 #m.d.comb += out_z.v.eq(self.out_z.v)
95 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
97 def elaborate(self
, platform
):
100 m
.submodules
.sc_in_a
= self
.in_a
101 m
.submodules
.sc_in_b
= self
.in_b
102 m
.submodules
.sc_out_z
= self
.out_z
105 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
108 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
110 # if a is NaN or b is NaN return NaN
111 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
112 m
.d
.comb
+= self
.out_do_z
.eq(1)
113 m
.d
.comb
+= self
.out_z
.nan(0)
115 # XXX WEIRDNESS for FP16 non-canonical NaN handling
118 ## if a is zero and b is NaN return -b
119 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
120 # m.d.comb += self.out_do_z.eq(1)
121 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
123 ## if b is zero and a is NaN return -a
124 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
125 # m.d.comb += self.out_do_z.eq(1)
126 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
128 ## if a is -zero and b is NaN return -b
129 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
130 # m.d.comb += self.out_do_z.eq(1)
131 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
133 ## if b is -zero and a is NaN return -a
134 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
135 # m.d.comb += self.out_do_z.eq(1)
136 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
138 # if a is inf return inf (or NaN)
139 with m
.Elif(self
.in_a
.is_inf
):
140 m
.d
.comb
+= self
.out_do_z
.eq(1)
141 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
142 # if a is inf and signs don't match return NaN
143 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
144 m
.d
.comb
+= self
.out_z
.nan(0)
146 # if b is inf return inf
147 with m
.Elif(self
.in_b
.is_inf
):
148 m
.d
.comb
+= self
.out_do_z
.eq(1)
149 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
151 # if a is zero and b zero return signed-a/b
152 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
153 m
.d
.comb
+= self
.out_do_z
.eq(1)
154 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
158 # if a is zero return b
159 with m
.Elif(self
.in_a
.is_zero
):
160 m
.d
.comb
+= self
.out_do_z
.eq(1)
161 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
164 # if b is zero return a
165 with m
.Elif(self
.in_b
.is_zero
):
166 m
.d
.comb
+= self
.out_do_z
.eq(1)
167 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
170 # if a equal to -b return zero (+ve zero)
171 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
172 m
.d
.comb
+= self
.out_do_z
.eq(1)
173 m
.d
.comb
+= self
.out_z
.zero(0)
175 # Denormalised Number checks
177 m
.d
.comb
+= self
.out_do_z
.eq(0)
182 class FPAddSpecialCases(FPState
):
183 """ special cases: NaNs, infs, zeros, denormalised
184 NOTE: some of these are unique to add. see "Special Operations"
185 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
188 def __init__(self
, width
):
189 FPState
.__init
__(self
, "special_cases")
190 self
.mod
= FPAddSpecialCasesMod(width
)
191 self
.out_z
= FPNumOut(width
, False)
192 self
.out_do_z
= Signal(reset_less
=True)
195 with m
.If(self
.out_do_z
):
196 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
199 m
.next
= "denormalise"
202 class FPAddDeNormMod(FPState
):
204 def __init__(self
, width
):
205 self
.in_a
= FPNumBase(width
)
206 self
.in_b
= FPNumBase(width
)
207 self
.out_a
= FPNumBase(width
)
208 self
.out_b
= FPNumBase(width
)
210 def elaborate(self
, platform
):
212 m
.submodules
.denorm_in_a
= self
.in_a
213 m
.submodules
.denorm_in_b
= self
.in_b
214 m
.submodules
.denorm_out_a
= self
.out_a
215 m
.submodules
.denorm_out_b
= self
.out_b
216 # hmmm, don't like repeating identical code
217 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
218 with m
.If(self
.in_a
.exp_n127
):
219 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
221 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
223 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
224 with m
.If(self
.in_b
.exp_n127
):
225 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
227 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
232 class FPAddDeNorm(FPState
):
234 def __init__(self
, width
):
235 FPState
.__init
__(self
, "denormalise")
236 self
.mod
= FPAddDeNormMod(width
)
237 self
.out_a
= FPNumBase(width
)
238 self
.out_b
= FPNumBase(width
)
240 def setup(self
, m
, in_a
, in_b
):
241 """ links module to inputs and outputs
243 m
.submodules
.denormalise
= self
.mod
244 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
245 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
248 # Denormalised Number checks
250 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
251 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
254 class FPAddAlignMultiMod(FPState
):
256 def __init__(self
, width
):
257 self
.in_a
= FPNumBase(width
)
258 self
.in_b
= FPNumBase(width
)
259 self
.out_a
= FPNumIn(None, width
)
260 self
.out_b
= FPNumIn(None, width
)
261 self
.exp_eq
= Signal(reset_less
=True)
263 def elaborate(self
, platform
):
264 # This one however (single-cycle) will do the shift
269 m
.submodules
.align_in_a
= self
.in_a
270 m
.submodules
.align_in_b
= self
.in_b
271 m
.submodules
.align_out_a
= self
.out_a
272 m
.submodules
.align_out_b
= self
.out_b
274 # NOTE: this does *not* do single-cycle multi-shifting,
275 # it *STAYS* in the align state until exponents match
277 # exponent of a greater than b: shift b down
278 m
.d
.comb
+= self
.exp_eq
.eq(0)
279 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
280 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
281 agtb
= Signal(reset_less
=True)
282 altb
= Signal(reset_less
=True)
283 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
284 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
286 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
287 # exponent of b greater than a: shift a down
289 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
290 # exponents equal: move to next stage.
292 m
.d
.comb
+= self
.exp_eq
.eq(1)
296 class FPAddAlignMulti(FPState
):
298 def __init__(self
, width
):
299 FPState
.__init
__(self
, "align")
300 self
.mod
= FPAddAlignMultiMod(width
)
301 self
.out_a
= FPNumIn(None, width
)
302 self
.out_b
= FPNumIn(None, width
)
303 self
.exp_eq
= Signal(reset_less
=True)
305 def setup(self
, m
, in_a
, in_b
):
306 """ links module to inputs and outputs
308 m
.submodules
.align
= self
.mod
309 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
310 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
311 #m.d.comb += self.out_a.copy(self.mod.out_a)
312 #m.d.comb += self.out_b.copy(self.mod.out_b)
313 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
316 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
317 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
318 with m
.If(self
.exp_eq
):
322 class FPAddAlignSingleMod
:
324 def __init__(self
, width
):
326 self
.in_a
= FPNumBase(width
)
327 self
.in_b
= FPNumBase(width
)
328 self
.out_a
= FPNumIn(None, width
)
329 self
.out_b
= FPNumIn(None, width
)
331 def elaborate(self
, platform
):
332 """ Aligns A against B or B against A, depending on which has the
333 greater exponent. This is done in a *single* cycle using
334 variable-width bit-shift
336 the shifter used here is quite expensive in terms of gates.
337 Mux A or B in (and out) into temporaries, as only one of them
338 needs to be aligned against the other
342 m
.submodules
.align_in_a
= self
.in_a
343 m
.submodules
.align_in_b
= self
.in_b
344 m
.submodules
.align_out_a
= self
.out_a
345 m
.submodules
.align_out_b
= self
.out_b
347 # temporary (muxed) input and output to be shifted
348 t_inp
= FPNumBase(self
.width
)
349 t_out
= FPNumIn(None, self
.width
)
350 espec
= (len(self
.in_a
.e
), True)
351 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
352 m
.submodules
.align_t_in
= t_inp
353 m
.submodules
.align_t_out
= t_out
354 m
.submodules
.multishift_r
= msr
356 ediff
= Signal(espec
, reset_less
=True)
357 ediffr
= Signal(espec
, reset_less
=True)
358 tdiff
= Signal(espec
, reset_less
=True)
359 elz
= Signal(reset_less
=True)
360 egz
= Signal(reset_less
=True)
362 # connect multi-shifter to t_inp/out mantissa (and tdiff)
363 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
364 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
365 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
366 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
367 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
369 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
370 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
371 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
372 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
374 # default: A-exp == B-exp, A and B untouched (fall through)
375 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
376 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
377 # only one shifter (muxed)
378 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
379 # exponent of a greater than b: shift b down
381 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
383 self
.out_b
.copy(t_out
),
384 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
386 # exponent of b greater than a: shift a down
388 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
390 self
.out_a
.copy(t_out
),
391 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
396 class FPAddAlignSingle(FPState
):
398 def __init__(self
, width
):
399 FPState
.__init
__(self
, "align")
400 self
.mod
= FPAddAlignSingleMod(width
)
401 self
.out_a
= FPNumIn(None, width
)
402 self
.out_b
= FPNumIn(None, width
)
404 def setup(self
, m
, in_a
, in_b
):
405 """ links module to inputs and outputs
407 m
.submodules
.align
= self
.mod
408 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
409 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
412 # NOTE: could be done as comb
413 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
414 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
418 class FPAddStage0Mod
:
420 def __init__(self
, width
):
421 self
.in_a
= FPNumBase(width
)
422 self
.in_b
= FPNumBase(width
)
423 self
.in_z
= FPNumBase(width
, False)
424 self
.out_z
= FPNumBase(width
, False)
425 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
427 def elaborate(self
, platform
):
429 m
.submodules
.add0_in_a
= self
.in_a
430 m
.submodules
.add0_in_b
= self
.in_b
431 m
.submodules
.add0_out_z
= self
.out_z
433 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
435 # store intermediate tests (and zero-extended mantissas)
436 seq
= Signal(reset_less
=True)
437 mge
= Signal(reset_less
=True)
438 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
439 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
440 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
441 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
442 am0
.eq(Cat(self
.in_a
.m
, 0)),
443 bm0
.eq(Cat(self
.in_b
.m
, 0))
445 # same-sign (both negative or both positive) add mantissas
448 self
.out_tot
.eq(am0
+ bm0
),
449 self
.out_z
.s
.eq(self
.in_a
.s
)
451 # a mantissa greater than b, use a
454 self
.out_tot
.eq(am0
- bm0
),
455 self
.out_z
.s
.eq(self
.in_a
.s
)
457 # b mantissa greater than a, use b
460 self
.out_tot
.eq(bm0
- am0
),
461 self
.out_z
.s
.eq(self
.in_b
.s
)
466 class FPAddStage0(FPState
):
467 """ First stage of add. covers same-sign (add) and subtract
468 special-casing when mantissas are greater or equal, to
469 give greatest accuracy.
472 def __init__(self
, width
):
473 FPState
.__init
__(self
, "add_0")
474 self
.mod
= FPAddStage0Mod(width
)
475 self
.out_z
= FPNumBase(width
, False)
476 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
478 def setup(self
, m
, in_a
, in_b
):
479 """ links module to inputs and outputs
481 m
.submodules
.add0
= self
.mod
483 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
484 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
488 # NOTE: these could be done as combinatorial (merge add0+add1)
489 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
490 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
493 class FPAddStage1Mod(FPState
):
494 """ Second stage of add: preparation for normalisation.
495 detects when tot sum is too big (tot[27] is kinda a carry bit)
498 def __init__(self
, width
):
499 self
.out_norm
= Signal(reset_less
=True)
500 self
.in_z
= FPNumBase(width
, False)
501 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
502 self
.out_z
= FPNumBase(width
, False)
503 self
.out_of
= Overflow()
505 def elaborate(self
, platform
):
507 #m.submodules.norm1_in_overflow = self.in_of
508 #m.submodules.norm1_out_overflow = self.out_of
509 #m.submodules.norm1_in_z = self.in_z
510 #m.submodules.norm1_out_z = self.out_z
511 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
512 # tot[27] gets set when the sum overflows. shift result down
513 with m
.If(self
.in_tot
[-1]):
515 self
.out_z
.m
.eq(self
.in_tot
[4:]),
516 self
.out_of
.m0
.eq(self
.in_tot
[4]),
517 self
.out_of
.guard
.eq(self
.in_tot
[3]),
518 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
519 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
520 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
525 self
.out_z
.m
.eq(self
.in_tot
[3:]),
526 self
.out_of
.m0
.eq(self
.in_tot
[3]),
527 self
.out_of
.guard
.eq(self
.in_tot
[2]),
528 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
529 self
.out_of
.sticky
.eq(self
.in_tot
[0])
534 class FPAddStage1(FPState
):
536 def __init__(self
, width
):
537 FPState
.__init
__(self
, "add_1")
538 self
.mod
= FPAddStage1Mod(width
)
539 self
.out_z
= FPNumBase(width
, False)
540 self
.out_of
= Overflow()
541 self
.norm_stb
= Signal()
543 def setup(self
, m
, in_tot
, in_z
):
544 """ links module to inputs and outputs
546 m
.submodules
.add1
= self
.mod
548 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
549 m
.d
.comb
+= self
.mod
.in_tot
.eq(in_tot
)
551 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
554 m
.submodules
.add1_out_overflow
= self
.out_of
555 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
556 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
557 m
.d
.sync
+= self
.norm_stb
.eq(1)
558 m
.next
= "normalise_1"
561 class FPNorm1ModSingle
:
563 def __init__(self
, width
):
565 self
.in_select
= Signal(reset_less
=True)
566 self
.out_norm
= Signal(reset_less
=True)
567 self
.in_z
= FPNumBase(width
, False)
568 self
.in_of
= Overflow()
569 self
.temp_z
= FPNumBase(width
, False)
570 self
.temp_of
= Overflow()
571 self
.out_z
= FPNumBase(width
, False)
572 self
.out_of
= Overflow()
574 def elaborate(self
, platform
):
577 mwid
= self
.out_z
.m_width
+2
578 pe
= PriorityEncoder(mwid
)
579 m
.submodules
.norm_pe
= pe
581 m
.submodules
.norm1_out_z
= self
.out_z
582 m
.submodules
.norm1_out_overflow
= self
.out_of
583 m
.submodules
.norm1_temp_z
= self
.temp_z
584 m
.submodules
.norm1_temp_of
= self
.temp_of
585 m
.submodules
.norm1_in_z
= self
.in_z
586 m
.submodules
.norm1_in_overflow
= self
.in_of
588 in_z
= FPNumBase(self
.width
, False)
590 m
.submodules
.norm1_insel_z
= in_z
591 m
.submodules
.norm1_insel_overflow
= in_of
593 espec
= (len(in_z
.e
), True)
594 ediff_n126
= Signal(espec
, reset_less
=True)
595 msr
= MultiShiftRMerge(mwid
, espec
)
596 m
.submodules
.multishift_r
= msr
598 # select which of temp or in z/of to use
599 with m
.If(self
.in_select
):
600 m
.d
.comb
+= in_z
.copy(self
.in_z
)
601 m
.d
.comb
+= in_of
.copy(self
.in_of
)
603 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
604 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
605 # initialise out from in (overridden below)
606 m
.d
.comb
+= self
.out_z
.copy(in_z
)
607 m
.d
.comb
+= self
.out_of
.copy(in_of
)
608 # normalisation increase/decrease conditions
609 decrease
= Signal(reset_less
=True)
610 increase
= Signal(reset_less
=True)
611 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
612 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
613 m
.d
.comb
+= self
.out_norm
.eq(0) # loop-end condition
616 # *sigh* not entirely obvious: count leading zeros (clz)
617 # with a PriorityEncoder: to find from the MSB
618 # we reverse the order of the bits.
619 temp_m
= Signal(mwid
, reset_less
=True)
620 temp_s
= Signal(mwid
+1, reset_less
=True)
621 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
622 # make sure that the amount to decrease by does NOT
623 # go below the minimum non-INF/NaN exponent
624 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
627 # cat round and guard bits back into the mantissa
628 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
629 pe
.i
.eq(temp_m
[::-1]), # inverted
630 clz
.eq(limclz
), # count zeros from MSB down
631 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
632 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
633 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
634 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
635 # overflow in bits 0..1: got shifted too (leave sticky)
636 self
.out_of
.guard
.eq(temp_s
[1]), # guard
637 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
640 with m
.Elif(increase
):
641 temp_m
= Signal(mwid
+1, reset_less
=True)
643 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
645 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
646 # connect multi-shifter to inp/out mantissa (and ediff)
648 msr
.diff
.eq(ediff_n126
),
649 self
.out_z
.m
.eq(msr
.m
[3:]),
650 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
651 # overflow in bits 0..1: got shifted too (leave sticky)
652 self
.out_of
.guard
.eq(temp_s
[2]), # guard
653 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
654 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
655 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
661 class FPNorm1ModMulti
:
663 def __init__(self
, width
, single_cycle
=True):
665 self
.in_select
= Signal(reset_less
=True)
666 self
.out_norm
= Signal(reset_less
=True)
667 self
.in_z
= FPNumBase(width
, False)
668 self
.in_of
= Overflow()
669 self
.temp_z
= FPNumBase(width
, False)
670 self
.temp_of
= Overflow()
671 self
.out_z
= FPNumBase(width
, False)
672 self
.out_of
= Overflow()
674 def elaborate(self
, platform
):
677 m
.submodules
.norm1_out_z
= self
.out_z
678 m
.submodules
.norm1_out_overflow
= self
.out_of
679 m
.submodules
.norm1_temp_z
= self
.temp_z
680 m
.submodules
.norm1_temp_of
= self
.temp_of
681 m
.submodules
.norm1_in_z
= self
.in_z
682 m
.submodules
.norm1_in_overflow
= self
.in_of
684 in_z
= FPNumBase(self
.width
, False)
686 m
.submodules
.norm1_insel_z
= in_z
687 m
.submodules
.norm1_insel_overflow
= in_of
689 # select which of temp or in z/of to use
690 with m
.If(self
.in_select
):
691 m
.d
.comb
+= in_z
.copy(self
.in_z
)
692 m
.d
.comb
+= in_of
.copy(self
.in_of
)
694 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
695 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
696 # initialise out from in (overridden below)
697 m
.d
.comb
+= self
.out_z
.copy(in_z
)
698 m
.d
.comb
+= self
.out_of
.copy(in_of
)
699 # normalisation increase/decrease conditions
700 decrease
= Signal(reset_less
=True)
701 increase
= Signal(reset_less
=True)
702 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
703 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
704 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
708 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
709 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
710 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
711 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
712 self
.out_of
.round_bit
.eq(0), # reset round bit
713 self
.out_of
.m0
.eq(in_of
.guard
),
716 with m
.Elif(increase
):
718 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
719 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
720 self
.out_of
.guard
.eq(in_z
.m
[0]),
721 self
.out_of
.m0
.eq(in_z
.m
[1]),
722 self
.out_of
.round_bit
.eq(in_of
.guard
),
723 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
729 class FPNorm1(FPState
):
731 def __init__(self
, width
, single_cycle
=True):
732 FPState
.__init
__(self
, "normalise_1")
734 self
.mod
= FPNorm1ModSingle(width
)
736 self
.mod
= FPNorm1ModMulti(width
)
737 self
.stb
= Signal(reset_less
=True)
738 self
.ack
= Signal(reset
=0, reset_less
=True)
739 self
.out_norm
= Signal(reset_less
=True)
740 self
.in_accept
= Signal(reset_less
=True)
741 self
.temp_z
= FPNumBase(width
)
742 self
.temp_of
= Overflow()
743 self
.out_z
= FPNumBase(width
)
744 self
.out_roundz
= Signal(reset_less
=True)
746 def setup(self
, m
, in_z
, in_of
, norm_stb
):
747 """ links module to inputs and outputs
749 m
.submodules
.normalise_1
= self
.mod
751 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
752 m
.d
.comb
+= self
.mod
.in_of
.copy(in_of
)
754 m
.d
.comb
+= self
.mod
.in_select
.eq(self
.in_accept
)
755 m
.d
.comb
+= self
.mod
.temp_z
.copy(self
.temp_z
)
756 m
.d
.comb
+= self
.mod
.temp_of
.copy(self
.temp_of
)
758 m
.d
.comb
+= self
.out_z
.copy(self
.mod
.out_z
)
759 m
.d
.comb
+= self
.out_norm
.eq(self
.mod
.out_norm
)
761 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
762 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
766 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
767 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
768 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
769 with m
.If(self
.out_norm
):
770 with m
.If(self
.in_accept
):
775 m
.d
.sync
+= self
.ack
.eq(0)
777 # normalisation not required (or done).
779 m
.d
.sync
+= self
.ack
.eq(1)
780 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
785 def __init__(self
, width
):
786 self
.in_roundz
= Signal(reset_less
=True)
787 self
.in_z
= FPNumBase(width
, False)
788 self
.out_z
= FPNumBase(width
, False)
790 def elaborate(self
, platform
):
792 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
793 with m
.If(self
.in_roundz
):
794 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
795 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
796 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
800 class FPRound(FPState
):
802 def __init__(self
, width
):
803 FPState
.__init
__(self
, "round")
804 self
.mod
= FPRoundMod(width
)
805 self
.out_z
= FPNumBase(width
)
807 def setup(self
, m
, in_z
, roundz
):
808 """ links module to inputs and outputs
810 m
.submodules
.roundz
= self
.mod
812 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
813 m
.d
.comb
+= self
.mod
.in_roundz
.eq(roundz
)
816 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
817 m
.next
= "corrections"
820 class FPCorrectionsMod
:
822 def __init__(self
, width
):
823 self
.in_z
= FPNumOut(width
, False)
824 self
.out_z
= FPNumOut(width
, False)
826 def elaborate(self
, platform
):
828 m
.submodules
.corr_in_z
= self
.in_z
829 m
.submodules
.corr_out_z
= self
.out_z
830 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
831 with m
.If(self
.in_z
.is_denormalised
):
832 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
836 class FPCorrections(FPState
):
838 def __init__(self
, width
):
839 FPState
.__init
__(self
, "corrections")
840 self
.mod
= FPCorrectionsMod(width
)
841 self
.out_z
= FPNumBase(width
)
843 def setup(self
, m
, in_z
):
844 """ links module to inputs and outputs
846 m
.submodules
.corrections
= self
.mod
847 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
850 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
856 def __init__(self
, width
):
857 self
.in_z
= FPNumOut(width
, False)
858 self
.out_z
= FPNumOut(width
, False)
860 def elaborate(self
, platform
):
862 m
.submodules
.pack_in_z
= self
.in_z
863 with m
.If(self
.in_z
.is_overflowed
):
864 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
866 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
870 class FPPack(FPState
):
872 def __init__(self
, width
):
873 FPState
.__init
__(self
, "pack")
874 self
.mod
= FPPackMod(width
)
875 self
.out_z
= FPNumOut(width
, False)
877 def setup(self
, m
, in_z
):
878 """ links module to inputs and outputs
880 m
.submodules
.pack
= self
.mod
881 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
884 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
885 m
.next
= "pack_put_z"
888 class FPPutZ(FPState
):
890 def __init__(self
, state
, in_z
, out_z
):
891 FPState
.__init
__(self
, state
)
897 self
.out_z
.v
.eq(self
.in_z
.v
)
899 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
900 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
903 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
908 def __init__(self
, width
, single_cycle
=False):
910 self
.single_cycle
= single_cycle
912 self
.in_a
= FPOp(width
)
913 self
.in_b
= FPOp(width
)
914 self
.out_z
= FPOp(width
)
918 def add_state(self
, state
):
919 self
.states
.append(state
)
922 def get_fragment(self
, platform
=None):
923 """ creates the HDL code-fragment for FPAdd
926 m
.submodules
.in_a
= self
.in_a
927 m
.submodules
.in_b
= self
.in_b
928 m
.submodules
.out_z
= self
.out_z
930 geta
= self
.add_state(FPGetOp("get_a", "get_b",
931 self
.in_a
, self
.width
))
932 geta
.setup(m
, self
.in_a
)
935 getb
= self
.add_state(FPGetOp("get_b", "special_cases",
936 self
.in_b
, self
.width
))
937 getb
.setup(m
, self
.in_b
)
940 sc
= self
.add_state(FPAddSpecialCases(self
.width
))
941 sc
.mod
.setup(m
, a
, b
, sc
.out_z
, sc
.out_do_z
)
942 m
.submodules
.specialcases
= sc
.mod
944 dn
= self
.add_state(FPAddDeNorm(self
.width
))
947 if self
.single_cycle
:
948 alm
= self
.add_state(FPAddAlignSingle(self
.width
))
949 alm
.setup(m
, dn
.out_a
, dn
.out_b
)
951 alm
= self
.add_state(FPAddAlignMulti(self
.width
))
952 alm
.setup(m
, dn
.out_a
, dn
.out_b
)
954 add0
= self
.add_state(FPAddStage0(self
.width
))
955 add0
.setup(m
, alm
.out_a
, alm
.out_b
)
957 add1
= self
.add_state(FPAddStage1(self
.width
))
958 add1
.setup(m
, add0
.out_tot
, add0
.out_z
)
960 n1
= self
.add_state(FPNorm1(self
.width
))
961 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
)
963 rn
= self
.add_state(FPRound(self
.width
))
964 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
)
966 cor
= self
.add_state(FPCorrections(self
.width
))
967 cor
.setup(m
, rn
.out_z
)
969 pa
= self
.add_state(FPPack(self
.width
))
970 pa
.setup(m
, cor
.out_z
)
972 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
))
974 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
))
978 for state
in self
.states
:
979 with m
.State(state
.state_from
):
985 if __name__
== "__main__":
986 alu
= FPADD(width
=32, single_cycle
=True)
987 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
990 # works... but don't use, just do "python fname.py convert -t v"
991 #print (verilog.convert(alu, ports=[
992 # ports=alu.in_a.ports() + \
993 # alu.in_b.ports() + \