1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
11 #from fpbase import FPNumShiftMultiRight
13 class FPState(FPBase
):
14 def __init__(self
, state_from
):
15 self
.state_from
= state_from
17 def set_inputs(self
, inputs
):
19 for k
,v
in inputs
.items():
22 def set_outputs(self
, outputs
):
23 self
.outputs
= outputs
24 for k
,v
in outputs
.items():
29 def __init__(self
, width
):
30 self
.in_op
= FPOp(width
)
31 self
.out_op
= FPNumIn(self
.in_op
, width
)
32 self
.out_decode
= Signal(reset_less
=True)
34 def setup(self
, m
, in_op
, out_op
, out_decode
):
35 """ links module to inputs and outputs
37 m
.d
.comb
+= self
.in_op
.copy(in_op
)
38 m
.d
.comb
+= out_op
.v
.eq(self
.out_op
.v
)
39 m
.d
.comb
+= out_decode
.eq(self
.out_decode
)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 #m.submodules.get_op_in = self.in_op
45 m
.submodules
.get_op_out
= self
.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.decode(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= FPNumIn(in_op
, width
)
63 self
.out_decode
= Signal(reset_less
=True)
66 with m
.If(self
.out_decode
):
67 m
.next
= self
.out_state
70 self
.out_op
.copy(self
.mod
.out_op
)
73 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
76 class FPGetOpB(FPState
):
80 def __init__(self
, in_b
, width
):
81 FPState
.__init
__(self
, "get_b")
83 self
.b
= FPNumIn(self
.in_b
, width
)
86 self
.get_op(m
, self
.in_b
, self
.b
, "special_cases")
89 class FPAddSpecialCasesMod
:
90 """ special cases: NaNs, infs, zeros, denormalised
91 NOTE: some of these are unique to add. see "Special Operations"
92 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
95 def __init__(self
, width
):
96 self
.in_a
= FPNumBase(width
)
97 self
.in_b
= FPNumBase(width
)
98 self
.out_z
= FPNumOut(width
, False)
99 self
.out_do_z
= Signal(reset_less
=True)
101 def setup(self
, m
, in_a
, in_b
, out_z
, out_do_z
):
102 """ links module to inputs and outputs
104 m
.d
.comb
+= self
.in_a
.copy(in_a
)
105 m
.d
.comb
+= self
.in_b
.copy(in_b
)
106 #m.d.comb += out_z.v.eq(self.out_z.v)
107 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
109 def elaborate(self
, platform
):
112 m
.submodules
.sc_in_a
= self
.in_a
113 m
.submodules
.sc_in_b
= self
.in_b
114 m
.submodules
.sc_out_z
= self
.out_z
117 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
120 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
122 # if a is NaN or b is NaN return NaN
123 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
124 m
.d
.comb
+= self
.out_do_z
.eq(1)
125 m
.d
.comb
+= self
.out_z
.nan(0)
127 # XXX WEIRDNESS for FP16 non-canonical NaN handling
130 ## if a is zero and b is NaN return -b
131 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
132 # m.d.comb += self.out_do_z.eq(1)
133 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
135 ## if b is zero and a is NaN return -a
136 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
137 # m.d.comb += self.out_do_z.eq(1)
138 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
140 ## if a is -zero and b is NaN return -b
141 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
142 # m.d.comb += self.out_do_z.eq(1)
143 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
145 ## if b is -zero and a is NaN return -a
146 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
147 # m.d.comb += self.out_do_z.eq(1)
148 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
150 # if a is inf return inf (or NaN)
151 with m
.Elif(self
.in_a
.is_inf
):
152 m
.d
.comb
+= self
.out_do_z
.eq(1)
153 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
154 # if a is inf and signs don't match return NaN
155 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
156 m
.d
.comb
+= self
.out_z
.nan(0)
158 # if b is inf return inf
159 with m
.Elif(self
.in_b
.is_inf
):
160 m
.d
.comb
+= self
.out_do_z
.eq(1)
161 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
163 # if a is zero and b zero return signed-a/b
164 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
165 m
.d
.comb
+= self
.out_do_z
.eq(1)
166 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
170 # if a is zero return b
171 with m
.Elif(self
.in_a
.is_zero
):
172 m
.d
.comb
+= self
.out_do_z
.eq(1)
173 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
176 # if b is zero return a
177 with m
.Elif(self
.in_b
.is_zero
):
178 m
.d
.comb
+= self
.out_do_z
.eq(1)
179 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
182 # if a equal to -b return zero (+ve zero)
183 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
184 m
.d
.comb
+= self
.out_do_z
.eq(1)
185 m
.d
.comb
+= self
.out_z
.zero(0)
187 # Denormalised Number checks
189 m
.d
.comb
+= self
.out_do_z
.eq(0)
194 class FPAddSpecialCases(FPState
):
195 """ special cases: NaNs, infs, zeros, denormalised
196 NOTE: some of these are unique to add. see "Special Operations"
197 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
200 def __init__(self
, width
):
201 FPState
.__init
__(self
, "special_cases")
202 self
.mod
= FPAddSpecialCasesMod(width
)
203 self
.out_z
= FPNumOut(width
, False)
204 self
.out_do_z
= Signal(reset_less
=True)
207 with m
.If(self
.out_do_z
):
208 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
211 m
.next
= "denormalise"
214 class FPAddDeNormMod(FPState
):
216 def __init__(self
, width
):
217 self
.in_a
= FPNumBase(width
)
218 self
.in_b
= FPNumBase(width
)
219 self
.out_a
= FPNumBase(width
)
220 self
.out_b
= FPNumBase(width
)
222 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
):
223 """ links module to inputs and outputs
225 m
.d
.comb
+= self
.in_a
.copy(in_a
)
226 m
.d
.comb
+= self
.in_b
.copy(in_b
)
227 m
.d
.comb
+= out_a
.copy(self
.out_a
)
228 m
.d
.comb
+= out_b
.copy(self
.out_b
)
230 def elaborate(self
, platform
):
232 m
.submodules
.denorm_in_a
= self
.in_a
233 m
.submodules
.denorm_in_b
= self
.in_b
234 m
.submodules
.denorm_out_a
= self
.out_a
235 m
.submodules
.denorm_out_b
= self
.out_b
236 # hmmm, don't like repeating identical code
237 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
238 with m
.If(self
.in_a
.exp_n127
):
239 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
241 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
243 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
244 with m
.If(self
.in_b
.exp_n127
):
245 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
247 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
252 class FPAddDeNorm(FPState
):
254 def __init__(self
, width
):
255 FPState
.__init
__(self
, "denormalise")
256 self
.mod
= FPAddDeNormMod(width
)
257 self
.out_a
= FPNumBase(width
)
258 self
.out_b
= FPNumBase(width
)
261 # Denormalised Number checks
263 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
264 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
267 class FPAddAlignMultiMod(FPState
):
269 def __init__(self
, width
):
270 self
.in_a
= FPNumBase(width
)
271 self
.in_b
= FPNumBase(width
)
272 self
.out_a
= FPNumIn(None, width
)
273 self
.out_b
= FPNumIn(None, width
)
274 self
.exp_eq
= Signal(reset_less
=True)
276 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
, exp_eq
):
277 """ links module to inputs and outputs
279 m
.d
.comb
+= self
.in_a
.copy(in_a
)
280 m
.d
.comb
+= self
.in_b
.copy(in_b
)
281 m
.d
.comb
+= out_a
.copy(self
.out_a
)
282 m
.d
.comb
+= out_b
.copy(self
.out_b
)
283 m
.d
.comb
+= exp_eq
.eq(self
.exp_eq
)
285 def elaborate(self
, platform
):
286 # This one however (single-cycle) will do the shift
291 #m.submodules.align_in_a = self.in_a
292 #m.submodules.align_in_b = self.in_b
293 m
.submodules
.align_out_a
= self
.out_a
294 m
.submodules
.align_out_b
= self
.out_b
296 # NOTE: this does *not* do single-cycle multi-shifting,
297 # it *STAYS* in the align state until exponents match
299 # exponent of a greater than b: shift b down
300 m
.d
.comb
+= self
.exp_eq
.eq(0)
301 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
302 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
303 agtb
= Signal(reset_less
=True)
304 altb
= Signal(reset_less
=True)
305 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
306 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
308 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
309 # exponent of b greater than a: shift a down
311 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
312 # exponents equal: move to next stage.
314 m
.d
.comb
+= self
.exp_eq
.eq(1)
318 class FPAddAlignMulti(FPState
):
320 def __init__(self
, width
):
321 FPState
.__init
__(self
, "align")
322 self
.mod
= FPAddAlignMultiMod(width
)
323 self
.out_a
= FPNumIn(None, width
)
324 self
.out_b
= FPNumIn(None, width
)
325 self
.exp_eq
= Signal(reset_less
=True)
328 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
329 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
330 with m
.If(self
.exp_eq
):
334 class FPAddAlignSingleMod
:
336 def __init__(self
, width
):
338 self
.in_a
= FPNumBase(width
)
339 self
.in_b
= FPNumBase(width
)
340 self
.out_a
= FPNumIn(None, width
)
341 self
.out_b
= FPNumIn(None, width
)
343 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
):
344 """ links module to inputs and outputs
346 m
.d
.comb
+= self
.in_a
.copy(in_a
)
347 m
.d
.comb
+= self
.in_b
.copy(in_b
)
348 m
.d
.comb
+= out_a
.copy(self
.out_a
)
349 m
.d
.comb
+= out_b
.copy(self
.out_b
)
351 def elaborate(self
, platform
):
352 """ Aligns A against B or B against A, depending on which has the
353 greater exponent. This is done in a *single* cycle using
354 variable-width bit-shift
356 the shifter used here is quite expensive in terms of gates.
357 Mux A or B in (and out) into temporaries, as only one of them
358 needs to be aligned against the other
362 m
.submodules
.align_in_a
= self
.in_a
363 m
.submodules
.align_in_b
= self
.in_b
364 m
.submodules
.align_out_a
= self
.out_a
365 m
.submodules
.align_out_b
= self
.out_b
367 # temporary (muxed) input and output to be shifted
368 t_inp
= FPNumBase(self
.width
)
369 t_out
= FPNumIn(None, self
.width
)
370 espec
= (len(self
.in_a
.e
), True)
371 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
372 m
.submodules
.align_t_in
= t_inp
373 m
.submodules
.align_t_out
= t_out
374 m
.submodules
.multishift_r
= msr
376 ediff
= Signal(espec
, reset_less
=True)
377 ediffr
= Signal(espec
, reset_less
=True)
378 tdiff
= Signal(espec
, reset_less
=True)
379 elz
= Signal(reset_less
=True)
380 egz
= Signal(reset_less
=True)
382 # connect multi-shifter to t_inp/out mantissa (and tdiff)
383 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
384 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
385 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
386 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
387 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
389 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
390 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
391 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
392 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
394 # default: A-exp == B-exp, A and B untouched (fall through)
395 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
396 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
397 # only one shifter (muxed)
398 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
399 # exponent of a greater than b: shift b down
401 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
403 self
.out_b
.copy(t_out
),
404 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
406 # exponent of b greater than a: shift a down
408 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
410 self
.out_a
.copy(t_out
),
411 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
416 class FPAddAlignSingle(FPState
):
418 def __init__(self
, width
):
419 FPState
.__init
__(self
, "align")
420 self
.mod
= FPAddAlignSingleMod(width
)
421 self
.out_a
= FPNumIn(None, width
)
422 self
.out_b
= FPNumIn(None, width
)
425 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
426 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
430 class FPAddStage0Mod
:
432 def __init__(self
, width
):
433 self
.in_a
= FPNumBase(width
)
434 self
.in_b
= FPNumBase(width
)
435 self
.in_z
= FPNumBase(width
, False)
436 self
.out_z
= FPNumBase(width
, False)
437 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
439 def elaborate(self
, platform
):
441 m
.submodules
.add0_in_a
= self
.in_a
442 m
.submodules
.add0_in_b
= self
.in_b
443 m
.submodules
.add0_out_z
= self
.out_z
445 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
447 # store intermediate tests (and zero-extended mantissas)
448 seq
= Signal(reset_less
=True)
449 mge
= Signal(reset_less
=True)
450 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
451 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
452 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
453 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
454 am0
.eq(Cat(self
.in_a
.m
, 0)),
455 bm0
.eq(Cat(self
.in_b
.m
, 0))
457 # same-sign (both negative or both positive) add mantissas
460 self
.out_tot
.eq(am0
+ bm0
),
461 self
.out_z
.s
.eq(self
.in_a
.s
)
463 # a mantissa greater than b, use a
466 self
.out_tot
.eq(am0
- bm0
),
467 self
.out_z
.s
.eq(self
.in_a
.s
)
469 # b mantissa greater than a, use b
472 self
.out_tot
.eq(bm0
- am0
),
473 self
.out_z
.s
.eq(self
.in_b
.s
)
478 class FPAddStage0(FPState
):
479 """ First stage of add. covers same-sign (add) and subtract
480 special-casing when mantissas are greater or equal, to
481 give greatest accuracy.
484 def __init__(self
, width
):
485 FPState
.__init
__(self
, "add_0")
486 self
.mod
= FPAddStage0Mod(width
)
487 self
.out_z
= FPNumBase(width
, False)
488 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
490 def setup(self
, m
, in_a
, in_b
):
491 """ links module to inputs and outputs
493 m
.submodules
.add0
= self
.mod
495 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
496 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
500 # NOTE: these could be done as combinatorial (merge add0+add1)
501 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
502 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
505 class FPAddStage1Mod(FPState
):
506 """ Second stage of add: preparation for normalisation.
507 detects when tot sum is too big (tot[27] is kinda a carry bit)
510 def __init__(self
, width
):
511 self
.out_norm
= Signal(reset_less
=True)
512 self
.in_z
= FPNumBase(width
, False)
513 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
514 self
.out_z
= FPNumBase(width
, False)
515 self
.out_of
= Overflow()
517 def elaborate(self
, platform
):
519 #m.submodules.norm1_in_overflow = self.in_of
520 #m.submodules.norm1_out_overflow = self.out_of
521 #m.submodules.norm1_in_z = self.in_z
522 #m.submodules.norm1_out_z = self.out_z
523 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
524 # tot[27] gets set when the sum overflows. shift result down
525 with m
.If(self
.in_tot
[-1]):
527 self
.out_z
.m
.eq(self
.in_tot
[4:]),
528 self
.out_of
.m0
.eq(self
.in_tot
[4]),
529 self
.out_of
.guard
.eq(self
.in_tot
[3]),
530 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
531 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
532 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
537 self
.out_z
.m
.eq(self
.in_tot
[3:]),
538 self
.out_of
.m0
.eq(self
.in_tot
[3]),
539 self
.out_of
.guard
.eq(self
.in_tot
[2]),
540 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
541 self
.out_of
.sticky
.eq(self
.in_tot
[0])
546 class FPAddStage1(FPState
):
548 def __init__(self
, width
):
549 FPState
.__init
__(self
, "add_1")
550 self
.mod
= FPAddStage1Mod(width
)
551 self
.out_z
= FPNumBase(width
, False)
552 self
.out_of
= Overflow()
553 self
.norm_stb
= Signal()
555 def setup(self
, m
, in_tot
, in_z
):
556 """ links module to inputs and outputs
558 m
.submodules
.add1
= self
.mod
560 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
561 m
.d
.comb
+= self
.mod
.in_tot
.eq(in_tot
)
563 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
566 m
.submodules
.add1_out_overflow
= self
.out_of
567 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
568 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
569 m
.d
.sync
+= self
.norm_stb
.eq(1)
570 m
.next
= "normalise_1"
573 class FPNorm1ModSingle
:
575 def __init__(self
, width
):
577 self
.in_select
= Signal(reset_less
=True)
578 self
.out_norm
= Signal(reset_less
=True)
579 self
.in_z
= FPNumBase(width
, False)
580 self
.in_of
= Overflow()
581 self
.temp_z
= FPNumBase(width
, False)
582 self
.temp_of
= Overflow()
583 self
.out_z
= FPNumBase(width
, False)
584 self
.out_of
= Overflow()
586 def elaborate(self
, platform
):
589 mwid
= self
.out_z
.m_width
+2
590 pe
= PriorityEncoder(mwid
)
591 m
.submodules
.norm_pe
= pe
593 m
.submodules
.norm1_out_z
= self
.out_z
594 m
.submodules
.norm1_out_overflow
= self
.out_of
595 m
.submodules
.norm1_temp_z
= self
.temp_z
596 m
.submodules
.norm1_temp_of
= self
.temp_of
597 m
.submodules
.norm1_in_z
= self
.in_z
598 m
.submodules
.norm1_in_overflow
= self
.in_of
600 in_z
= FPNumBase(self
.width
, False)
602 m
.submodules
.norm1_insel_z
= in_z
603 m
.submodules
.norm1_insel_overflow
= in_of
605 espec
= (len(in_z
.e
), True)
606 ediff_n126
= Signal(espec
, reset_less
=True)
607 msr
= MultiShiftRMerge(mwid
, espec
)
608 m
.submodules
.multishift_r
= msr
610 # select which of temp or in z/of to use
611 with m
.If(self
.in_select
):
612 m
.d
.comb
+= in_z
.copy(self
.in_z
)
613 m
.d
.comb
+= in_of
.copy(self
.in_of
)
615 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
616 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
617 # initialise out from in (overridden below)
618 m
.d
.comb
+= self
.out_z
.copy(in_z
)
619 m
.d
.comb
+= self
.out_of
.copy(in_of
)
620 # normalisation increase/decrease conditions
621 decrease
= Signal(reset_less
=True)
622 increase
= Signal(reset_less
=True)
623 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
624 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
625 m
.d
.comb
+= self
.out_norm
.eq(0) # loop-end condition
628 # *sigh* not entirely obvious: count leading zeros (clz)
629 # with a PriorityEncoder: to find from the MSB
630 # we reverse the order of the bits.
631 temp_m
= Signal(mwid
, reset_less
=True)
632 temp_s
= Signal(mwid
+1, reset_less
=True)
633 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
634 # make sure that the amount to decrease by does NOT
635 # go below the minimum non-INF/NaN exponent
636 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
639 # cat round and guard bits back into the mantissa
640 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
641 pe
.i
.eq(temp_m
[::-1]), # inverted
642 clz
.eq(limclz
), # count zeros from MSB down
643 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
644 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
645 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
646 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
647 # overflow in bits 0..1: got shifted too (leave sticky)
648 self
.out_of
.guard
.eq(temp_s
[1]), # guard
649 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
652 with m
.Elif(increase
):
653 temp_m
= Signal(mwid
+1, reset_less
=True)
655 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
657 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
658 # connect multi-shifter to inp/out mantissa (and ediff)
660 msr
.diff
.eq(ediff_n126
),
661 self
.out_z
.m
.eq(msr
.m
[3:]),
662 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
663 # overflow in bits 0..1: got shifted too (leave sticky)
664 self
.out_of
.guard
.eq(temp_s
[2]), # guard
665 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
666 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
667 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
673 class FPNorm1ModMulti
:
675 def __init__(self
, width
, single_cycle
=True):
677 self
.in_select
= Signal(reset_less
=True)
678 self
.out_norm
= Signal(reset_less
=True)
679 self
.in_z
= FPNumBase(width
, False)
680 self
.in_of
= Overflow()
681 self
.temp_z
= FPNumBase(width
, False)
682 self
.temp_of
= Overflow()
683 self
.out_z
= FPNumBase(width
, False)
684 self
.out_of
= Overflow()
686 def elaborate(self
, platform
):
689 m
.submodules
.norm1_out_z
= self
.out_z
690 m
.submodules
.norm1_out_overflow
= self
.out_of
691 m
.submodules
.norm1_temp_z
= self
.temp_z
692 m
.submodules
.norm1_temp_of
= self
.temp_of
693 m
.submodules
.norm1_in_z
= self
.in_z
694 m
.submodules
.norm1_in_overflow
= self
.in_of
696 in_z
= FPNumBase(self
.width
, False)
698 m
.submodules
.norm1_insel_z
= in_z
699 m
.submodules
.norm1_insel_overflow
= in_of
701 # select which of temp or in z/of to use
702 with m
.If(self
.in_select
):
703 m
.d
.comb
+= in_z
.copy(self
.in_z
)
704 m
.d
.comb
+= in_of
.copy(self
.in_of
)
706 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
707 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
708 # initialise out from in (overridden below)
709 m
.d
.comb
+= self
.out_z
.copy(in_z
)
710 m
.d
.comb
+= self
.out_of
.copy(in_of
)
711 # normalisation increase/decrease conditions
712 decrease
= Signal(reset_less
=True)
713 increase
= Signal(reset_less
=True)
714 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
715 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
716 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
720 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
721 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
722 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
723 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
724 self
.out_of
.round_bit
.eq(0), # reset round bit
725 self
.out_of
.m0
.eq(in_of
.guard
),
728 with m
.Elif(increase
):
730 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
731 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
732 self
.out_of
.guard
.eq(in_z
.m
[0]),
733 self
.out_of
.m0
.eq(in_z
.m
[1]),
734 self
.out_of
.round_bit
.eq(in_of
.guard
),
735 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
741 class FPNorm1(FPState
):
743 def __init__(self
, width
, single_cycle
=True):
744 FPState
.__init
__(self
, "normalise_1")
746 self
.mod
= FPNorm1ModSingle(width
)
748 self
.mod
= FPNorm1ModMulti(width
)
749 self
.stb
= Signal(reset_less
=True)
750 self
.ack
= Signal(reset
=0, reset_less
=True)
751 self
.out_norm
= Signal(reset_less
=True)
752 self
.in_accept
= Signal(reset_less
=True)
753 self
.temp_z
= FPNumBase(width
)
754 self
.temp_of
= Overflow()
755 self
.out_z
= FPNumBase(width
)
756 self
.out_roundz
= Signal(reset_less
=True)
758 def setup(self
, m
, in_z
, in_of
, norm_stb
):
759 """ links module to inputs and outputs
761 m
.submodules
.normalise_1
= self
.mod
763 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
764 m
.d
.comb
+= self
.mod
.in_of
.copy(in_of
)
766 m
.d
.comb
+= self
.mod
.in_select
.eq(self
.in_accept
)
767 m
.d
.comb
+= self
.mod
.temp_z
.copy(self
.temp_z
)
768 m
.d
.comb
+= self
.mod
.temp_of
.copy(self
.temp_of
)
770 m
.d
.comb
+= self
.out_z
.copy(self
.mod
.out_z
)
771 m
.d
.comb
+= self
.out_norm
.eq(self
.mod
.out_norm
)
773 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
774 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
778 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
779 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
780 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
781 with m
.If(self
.out_norm
):
782 with m
.If(self
.in_accept
):
787 m
.d
.sync
+= self
.ack
.eq(0)
789 # normalisation not required (or done).
791 m
.d
.sync
+= self
.ack
.eq(1)
792 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
797 def __init__(self
, width
):
798 self
.in_roundz
= Signal(reset_less
=True)
799 self
.in_z
= FPNumBase(width
, False)
800 self
.out_z
= FPNumBase(width
, False)
802 def elaborate(self
, platform
):
804 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
805 with m
.If(self
.in_roundz
):
806 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
807 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
808 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
812 class FPRound(FPState
):
814 def __init__(self
, width
):
815 FPState
.__init
__(self
, "round")
816 self
.mod
= FPRoundMod(width
)
817 self
.out_z
= FPNumBase(width
)
819 def setup(self
, m
, in_z
, roundz
):
820 """ links module to inputs and outputs
822 m
.submodules
.roundz
= self
.mod
824 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
825 m
.d
.comb
+= self
.mod
.in_roundz
.eq(roundz
)
828 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
829 m
.next
= "corrections"
832 class FPCorrectionsMod
:
834 def __init__(self
, width
):
835 self
.in_z
= FPNumOut(width
, False)
836 self
.out_z
= FPNumOut(width
, False)
838 def elaborate(self
, platform
):
840 m
.submodules
.corr_in_z
= self
.in_z
841 m
.submodules
.corr_out_z
= self
.out_z
842 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
843 with m
.If(self
.in_z
.is_denormalised
):
844 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
846 # with m.If(self.in_z.is_overflowed):
847 # m.d.comb += self.out_z.inf(self.in_z.s)
849 # m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
853 class FPCorrections(FPState
):
855 def __init__(self
, width
):
856 FPState
.__init
__(self
, "corrections")
857 self
.mod
= FPCorrectionsMod(width
)
858 self
.out_z
= FPNumBase(width
)
860 def setup(self
, m
, in_z
):
861 """ links module to inputs and outputs
863 m
.submodules
.corrections
= self
.mod
864 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
867 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
873 def __init__(self
, width
):
874 self
.in_z
= FPNumOut(width
, False)
875 self
.out_z
= FPNumOut(width
, False)
877 def elaborate(self
, platform
):
879 m
.submodules
.pack_in_z
= self
.in_z
880 with m
.If(self
.in_z
.is_overflowed
):
881 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
883 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
887 class FPPack(FPState
):
889 def __init__(self
, width
):
890 FPState
.__init
__(self
, "pack")
891 self
.mod
= FPPackMod(width
)
892 self
.out_z
= FPNumOut(width
, False)
894 def setup(self
, m
, in_z
):
895 """ links module to inputs and outputs
897 m
.submodules
.pack
= self
.mod
898 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
901 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
902 m
.next
= "pack_put_z"
905 class FPPutZ(FPState
):
907 def __init__(self
, state
, in_z
, out_z
):
908 FPState
.__init
__(self
, state
)
914 self
.out_z
.v
.eq(self
.in_z
.v
)
916 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
917 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
920 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
925 def __init__(self
, width
, single_cycle
=False):
927 self
.single_cycle
= single_cycle
929 self
.in_a
= FPOp(width
)
930 self
.in_b
= FPOp(width
)
931 self
.out_z
= FPOp(width
)
935 def add_state(self
, state
):
936 self
.states
.append(state
)
939 def get_fragment(self
, platform
=None):
940 """ creates the HDL code-fragment for FPAdd
943 m
.submodules
.in_a
= self
.in_a
944 m
.submodules
.in_b
= self
.in_b
945 m
.submodules
.out_z
= self
.out_z
947 geta
= self
.add_state(FPGetOp("get_a", "get_b",
948 self
.in_a
, self
.width
))
950 geta
.mod
.setup(m
, self
.in_a
, geta
.out_op
, geta
.out_decode
)
951 m
.submodules
.get_a
= geta
.mod
953 getb
= self
.add_state(FPGetOp("get_b", "special_cases",
954 self
.in_b
, self
.width
))
956 getb
.mod
.setup(m
, self
.in_b
, getb
.out_op
, getb
.out_decode
)
957 m
.submodules
.get_b
= getb
.mod
959 sc
= self
.add_state(FPAddSpecialCases(self
.width
))
960 sc
.mod
.setup(m
, a
, b
, sc
.out_z
, sc
.out_do_z
)
961 m
.submodules
.specialcases
= sc
.mod
963 dn
= self
.add_state(FPAddDeNorm(self
.width
))
964 dn
.set_inputs({"a": a
, "b": b
})
965 #dn.set_outputs({"a": a, "b": b}) # XXX outputs same as inputs
966 dn
.mod
.setup(m
, a
, b
, dn
.out_a
, dn
.out_b
)
967 m
.submodules
.denormalise
= dn
.mod
969 if self
.single_cycle
:
970 alm
= self
.add_state(FPAddAlignSingle(self
.width
))
971 alm
.set_inputs({"a": a
, "b": b
})
972 alm
.set_outputs({"a": a
, "b": b
}) # XXX outputs same as inputs
973 alm
.mod
.setup(m
, a
, b
, alm
.out_a
, alm
.out_b
)
975 alm
= self
.add_state(FPAddAlignMulti(self
.width
))
976 alm
.set_inputs({"a": a
, "b": b
})
977 #alm.set_outputs({"a": a, "b": b}) # XXX outputs same as inputs
978 alm
.mod
.setup(m
, a
, b
, alm
.out_a
, alm
.out_b
, alm
.exp_eq
)
979 m
.submodules
.align
= alm
.mod
981 add0
= self
.add_state(FPAddStage0(self
.width
))
982 add0
.setup(m
, alm
.out_a
, alm
.out_b
)
984 add1
= self
.add_state(FPAddStage1(self
.width
))
985 add1
.setup(m
, add0
.out_tot
, add0
.out_z
)
987 n1
= self
.add_state(FPNorm1(self
.width
))
988 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
)
990 rn
= self
.add_state(FPRound(self
.width
))
991 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
)
993 cor
= self
.add_state(FPCorrections(self
.width
))
994 cor
.setup(m
, rn
.out_z
)
996 pa
= self
.add_state(FPPack(self
.width
))
997 pa
.setup(m
, cor
.out_z
)
999 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
))
1001 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
))
1003 with m
.FSM() as fsm
:
1005 for state
in self
.states
:
1006 with m
.State(state
.state_from
):
1012 if __name__
== "__main__":
1013 alu
= FPADD(width
=32, single_cycle
=True)
1014 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
1017 # works... but don't use, just do "python fname.py convert -t v"
1018 #print (verilog.convert(alu, ports=[
1019 # ports=alu.in_a.ports() + \
1020 # alu.in_b.ports() + \
1021 # alu.out_z.ports())