1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 #from fpbase import FPNumShiftMultiRight
12 class FPState(FPBase
):
13 def __init__(self
, state_from
):
14 self
.state_from
= state_from
16 def set_inputs(self
, inputs
):
18 for k
,v
in inputs
.items():
21 def set_outputs(self
, outputs
):
22 self
.outputs
= outputs
23 for k
,v
in outputs
.items():
28 def __init__(self
, width
):
29 self
.in_op
= FPOp(width
)
30 self
.out_op
= FPNumIn(self
.in_op
, width
)
31 self
.out_decode
= Signal(reset_less
=True)
33 def setup(self
, m
, in_op
, out_op
, out_decode
):
34 """ links module to inputs and outputs
36 m
.d
.comb
+= self
.in_op
.copy(in_op
)
37 m
.d
.comb
+= out_op
.v
.eq(self
.out_op
.v
)
38 m
.d
.comb
+= out_decode
.eq(self
.out_decode
)
40 def elaborate(self
, platform
):
42 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
43 #m.submodules.get_op_in = self.in_op
44 m
.submodules
.get_op_out
= self
.out_op
45 with m
.If(self
.out_decode
):
47 self
.out_op
.decode(self
.in_op
.v
),
52 class FPGetOp(FPState
):
56 def __init__(self
, in_state
, out_state
, in_op
, width
):
57 FPState
.__init
__(self
, in_state
)
58 self
.out_state
= out_state
59 self
.mod
= FPGetOpMod(width
)
61 self
.out_op
= FPNumIn(in_op
, width
)
62 self
.out_decode
= Signal(reset_less
=True)
65 with m
.If(self
.out_decode
):
66 m
.next
= self
.out_state
69 self
.out_op
.copy(self
.mod
.out_op
)
72 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
75 class FPGetOpB(FPState
):
79 def __init__(self
, in_b
, width
):
80 FPState
.__init
__(self
, "get_b")
82 self
.b
= FPNumIn(self
.in_b
, width
)
85 self
.get_op(m
, self
.in_b
, self
.b
, "special_cases")
88 class FPAddSpecialCasesMod
:
89 """ special cases: NaNs, infs, zeros, denormalised
90 NOTE: some of these are unique to add. see "Special Operations"
91 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
94 def __init__(self
, width
):
95 self
.in_a
= FPNumBase(width
)
96 self
.in_b
= FPNumBase(width
)
97 self
.out_z
= FPNumOut(width
, False)
98 self
.out_do_z
= Signal(reset_less
=True)
100 def setup(self
, m
, in_a
, in_b
, out_z
, out_do_z
):
101 """ links module to inputs and outputs
103 m
.d
.comb
+= self
.in_a
.copy(in_a
)
104 m
.d
.comb
+= self
.in_b
.copy(in_b
)
105 #m.d.comb += out_z.v.eq(self.out_z.v)
106 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
108 def elaborate(self
, platform
):
111 m
.submodules
.sc_in_a
= self
.in_a
112 m
.submodules
.sc_in_b
= self
.in_b
113 m
.submodules
.sc_out_z
= self
.out_z
116 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
119 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
121 # if a is NaN or b is NaN return NaN
122 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
123 m
.d
.comb
+= self
.out_do_z
.eq(1)
124 m
.d
.comb
+= self
.out_z
.nan(0)
126 # XXX WEIRDNESS for FP16 non-canonical NaN handling
129 ## if a is zero and b is NaN return -b
130 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
131 # m.d.comb += self.out_do_z.eq(1)
132 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
134 ## if b is zero and a is NaN return -a
135 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
136 # m.d.comb += self.out_do_z.eq(1)
137 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
139 ## if a is -zero and b is NaN return -b
140 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
141 # m.d.comb += self.out_do_z.eq(1)
142 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
144 ## if b is -zero and a is NaN return -a
145 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
146 # m.d.comb += self.out_do_z.eq(1)
147 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
149 # if a is inf return inf (or NaN)
150 with m
.Elif(self
.in_a
.is_inf
):
151 m
.d
.comb
+= self
.out_do_z
.eq(1)
152 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
153 # if a is inf and signs don't match return NaN
154 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
155 m
.d
.comb
+= self
.out_z
.nan(0)
157 # if b is inf return inf
158 with m
.Elif(self
.in_b
.is_inf
):
159 m
.d
.comb
+= self
.out_do_z
.eq(1)
160 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
162 # if a is zero and b zero return signed-a/b
163 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
164 m
.d
.comb
+= self
.out_do_z
.eq(1)
165 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
169 # if a is zero return b
170 with m
.Elif(self
.in_a
.is_zero
):
171 m
.d
.comb
+= self
.out_do_z
.eq(1)
172 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
175 # if b is zero return a
176 with m
.Elif(self
.in_b
.is_zero
):
177 m
.d
.comb
+= self
.out_do_z
.eq(1)
178 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
181 # if a equal to -b return zero (+ve zero)
182 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
183 m
.d
.comb
+= self
.out_do_z
.eq(1)
184 m
.d
.comb
+= self
.out_z
.zero(0)
186 # Denormalised Number checks
188 m
.d
.comb
+= self
.out_do_z
.eq(0)
193 class FPAddSpecialCases(FPState
):
194 """ special cases: NaNs, infs, zeros, denormalised
195 NOTE: some of these are unique to add. see "Special Operations"
196 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
199 def __init__(self
, width
):
200 FPState
.__init
__(self
, "special_cases")
201 self
.mod
= FPAddSpecialCasesMod(width
)
202 self
.out_z
= FPNumOut(width
, False)
203 self
.out_do_z
= Signal(reset_less
=True)
206 with m
.If(self
.out_do_z
):
207 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
210 m
.next
= "denormalise"
213 class FPAddDeNormMod(FPState
):
215 def __init__(self
, width
):
216 self
.in_a
= FPNumBase(width
)
217 self
.in_b
= FPNumBase(width
)
218 self
.out_a
= FPNumBase(width
)
219 self
.out_b
= FPNumBase(width
)
221 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
):
222 """ links module to inputs and outputs
224 m
.d
.comb
+= self
.in_a
.copy(in_a
)
225 m
.d
.comb
+= self
.in_b
.copy(in_b
)
226 m
.d
.comb
+= out_a
.copy(self
.out_a
)
227 m
.d
.comb
+= out_b
.copy(self
.out_b
)
229 def elaborate(self
, platform
):
231 m
.submodules
.denorm_in_a
= self
.in_a
232 m
.submodules
.denorm_in_b
= self
.in_b
233 m
.submodules
.denorm_out_a
= self
.out_a
234 m
.submodules
.denorm_out_b
= self
.out_b
235 # hmmm, don't like repeating identical code
236 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
237 with m
.If(self
.in_a
.exp_n127
):
238 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
240 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
242 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
243 with m
.If(self
.in_b
.exp_n127
):
244 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
246 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
251 class FPAddDeNorm(FPState
):
253 def __init__(self
, width
):
254 FPState
.__init
__(self
, "denormalise")
255 self
.mod
= FPAddDeNormMod(width
)
256 self
.out_a
= FPNumBase(width
)
257 self
.out_b
= FPNumBase(width
)
260 # Denormalised Number checks
262 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
263 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
266 class FPAddAlignMultiMod(FPState
):
268 def __init__(self
, width
):
269 self
.in_a
= FPNumBase(width
)
270 self
.in_b
= FPNumBase(width
)
271 self
.out_a
= FPNumIn(None, width
)
272 self
.out_b
= FPNumIn(None, width
)
273 self
.exp_eq
= Signal(reset_less
=True)
275 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
, exp_eq
):
276 """ links module to inputs and outputs
278 m
.d
.comb
+= self
.in_a
.copy(in_a
)
279 m
.d
.comb
+= self
.in_b
.copy(in_b
)
280 m
.d
.comb
+= out_a
.copy(self
.out_a
)
281 m
.d
.comb
+= out_b
.copy(self
.out_b
)
282 m
.d
.comb
+= exp_eq
.eq(self
.exp_eq
)
284 def elaborate(self
, platform
):
285 # This one however (single-cycle) will do the shift
290 #m.submodules.align_in_a = self.in_a
291 #m.submodules.align_in_b = self.in_b
292 m
.submodules
.align_out_a
= self
.out_a
293 m
.submodules
.align_out_b
= self
.out_b
295 # NOTE: this does *not* do single-cycle multi-shifting,
296 # it *STAYS* in the align state until exponents match
298 # exponent of a greater than b: shift b down
299 m
.d
.comb
+= self
.exp_eq
.eq(0)
300 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
301 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
302 agtb
= Signal(reset_less
=True)
303 altb
= Signal(reset_less
=True)
304 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
305 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
307 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
308 # exponent of b greater than a: shift a down
310 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
311 # exponents equal: move to next stage.
313 m
.d
.comb
+= self
.exp_eq
.eq(1)
317 class FPAddAlignMulti(FPState
):
319 def __init__(self
, width
):
320 FPState
.__init
__(self
, "align")
321 self
.mod
= FPAddAlignMultiMod(width
)
322 self
.out_a
= FPNumIn(None, width
)
323 self
.out_b
= FPNumIn(None, width
)
324 self
.exp_eq
= Signal(reset_less
=True)
327 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
328 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
329 with m
.If(self
.exp_eq
):
333 class FPAddAlignSingleMod
:
335 def __init__(self
, width
):
337 self
.in_a
= FPNumBase(width
)
338 self
.in_b
= FPNumBase(width
)
339 self
.out_a
= FPNumIn(None, width
)
340 self
.out_b
= FPNumIn(None, width
)
342 def setup(self
, m
, in_a
, in_b
, out_a
, out_b
):
343 """ links module to inputs and outputs
345 m
.d
.comb
+= self
.in_a
.copy(in_a
)
346 m
.d
.comb
+= self
.in_b
.copy(in_b
)
347 m
.d
.comb
+= out_a
.copy(self
.out_a
)
348 m
.d
.comb
+= out_b
.copy(self
.out_b
)
350 def elaborate(self
, platform
):
351 """ Aligns A against B or B against A, depending on which has the
352 greater exponent. This is done in a *single* cycle using
353 variable-width bit-shift
355 the shifter used here is quite expensive in terms of gates.
356 Mux A or B in (and out) into temporaries, as only one of them
357 needs to be aligned against the other
361 m
.submodules
.align_in_a
= self
.in_a
362 m
.submodules
.align_in_b
= self
.in_b
363 m
.submodules
.align_out_a
= self
.out_a
364 m
.submodules
.align_out_b
= self
.out_b
366 # temporary (muxed) input and output to be shifted
367 t_inp
= FPNumBase(self
.width
)
368 t_out
= FPNumIn(None, self
.width
)
369 m
.submodules
.align_t_in
= t_inp
370 m
.submodules
.align_t_out
= t_out
372 ediff
= Signal((len(self
.in_a
.e
), True), reset_less
=True)
373 ediffr
= Signal((len(self
.in_a
.e
), True), reset_less
=True)
374 tdiff
= Signal((len(self
.in_a
.e
), True), reset_less
=True)
375 elz
= Signal(reset_less
=True)
376 egz
= Signal(reset_less
=True)
378 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
379 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
380 m
.d
.comb
+= elz
.eq(ediff
< 0)
381 m
.d
.comb
+= egz
.eq(ediff
> 0)
383 # default: A-exp == B-exp, A and B untouched (fall through)
384 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
385 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
386 # only one shifter (muxed)
387 m
.d
.comb
+= t_out
.shift_down_multi(tdiff
, t_inp
)
388 # exponent of a greater than b: shift b down
390 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
392 self
.out_b
.copy(t_out
),
393 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
395 # exponent of b greater than a: shift a down
397 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
399 self
.out_a
.copy(t_out
),
400 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
405 class FPAddAlignSingle(FPState
):
407 def __init__(self
, width
):
408 FPState
.__init
__(self
, "align")
409 self
.mod
= FPAddAlignSingleMod(width
)
410 self
.out_a
= FPNumIn(None, width
)
411 self
.out_b
= FPNumIn(None, width
)
414 m
.d
.sync
+= self
.a
.copy(self
.out_a
)
415 m
.d
.sync
+= self
.b
.copy(self
.out_b
)
419 class FPAddStage0Mod
:
421 def __init__(self
, width
):
422 self
.in_a
= FPNumBase(width
)
423 self
.in_b
= FPNumBase(width
)
424 self
.in_z
= FPNumBase(width
, False)
425 self
.out_z
= FPNumBase(width
, False)
426 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
428 def elaborate(self
, platform
):
430 m
.submodules
.add0_in_a
= self
.in_a
431 m
.submodules
.add0_in_b
= self
.in_b
432 m
.submodules
.add0_out_z
= self
.out_z
434 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
436 # store intermediate tests (and zero-extended mantissas)
437 seq
= Signal(reset_less
=True)
438 mge
= Signal(reset_less
=True)
439 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
440 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
441 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
442 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
443 am0
.eq(Cat(self
.in_a
.m
, 0)),
444 bm0
.eq(Cat(self
.in_b
.m
, 0))
446 # same-sign (both negative or both positive) add mantissas
449 self
.out_tot
.eq(am0
+ bm0
),
450 self
.out_z
.s
.eq(self
.in_a
.s
)
452 # a mantissa greater than b, use a
455 self
.out_tot
.eq(am0
- bm0
),
456 self
.out_z
.s
.eq(self
.in_a
.s
)
458 # b mantissa greater than a, use b
461 self
.out_tot
.eq(bm0
- am0
),
462 self
.out_z
.s
.eq(self
.in_b
.s
)
467 class FPAddStage0(FPState
):
468 """ First stage of add. covers same-sign (add) and subtract
469 special-casing when mantissas are greater or equal, to
470 give greatest accuracy.
473 def __init__(self
, width
):
474 FPState
.__init
__(self
, "add_0")
475 self
.mod
= FPAddStage0Mod(width
)
476 self
.out_z
= FPNumBase(width
, False)
477 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
479 def setup(self
, m
, in_a
, in_b
):
480 """ links module to inputs and outputs
482 m
.submodules
.add0
= self
.mod
484 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
485 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
489 # NOTE: these could be done as combinatorial (merge add0+add1)
490 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
491 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
494 class FPAddStage1Mod(FPState
):
495 """ Second stage of add: preparation for normalisation.
496 detects when tot sum is too big (tot[27] is kinda a carry bit)
499 def __init__(self
, width
):
500 self
.out_norm
= Signal(reset_less
=True)
501 self
.in_z
= FPNumBase(width
, False)
502 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
503 self
.out_z
= FPNumBase(width
, False)
504 self
.out_of
= Overflow()
506 def elaborate(self
, platform
):
508 #m.submodules.norm1_in_overflow = self.in_of
509 #m.submodules.norm1_out_overflow = self.out_of
510 #m.submodules.norm1_in_z = self.in_z
511 #m.submodules.norm1_out_z = self.out_z
512 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
513 # tot[27] gets set when the sum overflows. shift result down
514 with m
.If(self
.in_tot
[-1]):
516 self
.out_z
.m
.eq(self
.in_tot
[4:]),
517 self
.out_of
.m0
.eq(self
.in_tot
[4]),
518 self
.out_of
.guard
.eq(self
.in_tot
[3]),
519 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
520 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
521 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
526 self
.out_z
.m
.eq(self
.in_tot
[3:]),
527 self
.out_of
.m0
.eq(self
.in_tot
[3]),
528 self
.out_of
.guard
.eq(self
.in_tot
[2]),
529 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
530 self
.out_of
.sticky
.eq(self
.in_tot
[0])
535 class FPAddStage1(FPState
):
537 def __init__(self
, width
):
538 FPState
.__init
__(self
, "add_1")
539 self
.mod
= FPAddStage1Mod(width
)
540 self
.out_z
= FPNumBase(width
, False)
541 self
.out_of
= Overflow()
542 self
.norm_stb
= Signal()
544 def setup(self
, m
, in_tot
, in_z
):
545 """ links module to inputs and outputs
547 m
.submodules
.add1
= self
.mod
549 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
550 m
.d
.comb
+= self
.mod
.in_tot
.eq(in_tot
)
552 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
555 m
.submodules
.add1_out_overflow
= self
.out_of
556 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
557 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
558 m
.d
.sync
+= self
.norm_stb
.eq(1)
559 m
.next
= "normalise_1"
564 def __init__(self
, width
, single_cycle
=True):
565 self
.single_cycle
= single_cycle
567 self
.in_select
= Signal(reset_less
=True)
568 self
.out_norm
= Signal(reset_less
=True)
569 self
.in_z
= FPNumBase(width
, False)
570 self
.in_of
= Overflow()
571 self
.temp_z
= FPNumBase(width
, False)
572 self
.temp_of
= Overflow()
573 self
.out_z
= FPNumBase(width
, False)
574 self
.out_of
= Overflow()
576 def elaborate(self
, platform
):
579 mwid
= self
.out_z
.m_width
+2
580 pe
= PriorityEncoder(mwid
)
581 m
.submodules
.norm_pe
= pe
583 m
.submodules
.norm1_out_z
= self
.out_z
584 m
.submodules
.norm1_out_overflow
= self
.out_of
585 m
.submodules
.norm1_temp_z
= self
.temp_z
586 m
.submodules
.norm1_temp_of
= self
.temp_of
587 m
.submodules
.norm1_in_z
= self
.in_z
588 m
.submodules
.norm1_in_overflow
= self
.in_of
590 in_z
= FPNumBase(self
.width
, False)
592 m
.submodules
.norm1_insel_z
= in_z
593 m
.submodules
.norm1_insel_overflow
= in_of
595 ediff_n126
= Signal((len(in_z
.e
), True), reset_less
=True)
596 #smr = FPNumShiftMultiRight(in_z, ediff_n126, in_z.m_width+2)
597 #m.submodules.norm1_smr = smr
599 # select which of temp or in z/of to use
600 with m
.If(self
.in_select
):
601 m
.d
.comb
+= in_z
.copy(self
.in_z
)
602 m
.d
.comb
+= in_of
.copy(self
.in_of
)
604 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
605 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
606 # initialise out from in (overridden below)
607 m
.d
.comb
+= self
.out_z
.copy(in_z
)
608 m
.d
.comb
+= self
.out_of
.copy(in_of
)
609 # normalisation increase/decrease conditions
610 decrease
= Signal(reset_less
=True)
611 increase
= Signal(reset_less
=True)
612 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
613 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
614 if not self
.single_cycle
:
615 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
617 m
.d
.comb
+= self
.out_norm
.eq(increase
) # loop-end condition
620 if not self
.single_cycle
:
622 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
623 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
624 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
625 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
626 self
.out_of
.round_bit
.eq(0), # reset round bit
627 self
.out_of
.m0
.eq(in_of
.guard
),
630 # *sigh* not entirely obvious: count leading zeros (clz)
631 # with a PriorityEncoder: to find from the MSB
632 # we reverse the order of the bits.
633 temp_m
= Signal(mwid
, reset_less
=True)
634 temp_s
= Signal(mwid
+1, reset_less
=True)
635 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
636 # make sure that the amount to decrease by does NOT
637 # go below the minimum non-INF/NaN exponent
638 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
641 # cat round and guard bits back into the mantissa
642 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
643 pe
.i
.eq(temp_m
[::-1]), # inverted
644 clz
.eq(limclz
), # count zeros from MSB down
645 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
646 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
647 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
648 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
649 # overflow in bits 0..1: got shifted too (leave sticky)
650 self
.out_of
.guard
.eq(temp_s
[1]), # guard
651 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
654 with m
.Elif(increase
):
655 if self
.single_cycle
:
657 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
658 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
659 self
.out_of
.guard
.eq(in_z
.m
[0]),
660 self
.out_of
.m0
.eq(in_z
.m
[1]),
661 self
.out_of
.round_bit
.eq(in_of
.guard
),
662 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
666 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
672 class FPNorm1(FPState
):
674 def __init__(self
, width
):
675 FPState
.__init
__(self
, "normalise_1")
676 self
.mod
= FPNorm1Mod(width
)
677 self
.stb
= Signal(reset_less
=True)
678 self
.ack
= Signal(reset
=0, reset_less
=True)
679 self
.out_norm
= Signal(reset_less
=True)
680 self
.in_accept
= Signal(reset_less
=True)
681 self
.temp_z
= FPNumBase(width
)
682 self
.temp_of
= Overflow()
683 self
.out_z
= FPNumBase(width
)
684 self
.out_roundz
= Signal(reset_less
=True)
686 def setup(self
, m
, in_z
, in_of
, norm_stb
):
687 """ links module to inputs and outputs
689 m
.submodules
.normalise_1
= self
.mod
691 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
692 m
.d
.comb
+= self
.mod
.in_of
.copy(in_of
)
694 m
.d
.comb
+= self
.mod
.in_select
.eq(self
.in_accept
)
695 m
.d
.comb
+= self
.mod
.temp_z
.copy(self
.temp_z
)
696 m
.d
.comb
+= self
.mod
.temp_of
.copy(self
.temp_of
)
698 m
.d
.comb
+= self
.out_z
.copy(self
.mod
.out_z
)
699 m
.d
.comb
+= self
.out_norm
.eq(self
.mod
.out_norm
)
701 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
702 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
706 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
707 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
708 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
709 with m
.If(self
.out_norm
):
710 with m
.If(self
.in_accept
):
715 m
.d
.sync
+= self
.ack
.eq(0)
717 # normalisation not required (or done).
719 m
.d
.sync
+= self
.ack
.eq(1)
720 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
725 def __init__(self
, width
):
726 self
.in_roundz
= Signal(reset_less
=True)
727 self
.in_z
= FPNumBase(width
, False)
728 self
.out_z
= FPNumBase(width
, False)
730 def elaborate(self
, platform
):
732 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
733 with m
.If(self
.in_roundz
):
734 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
735 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
736 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
740 class FPRound(FPState
):
742 def __init__(self
, width
):
743 FPState
.__init
__(self
, "round")
744 self
.mod
= FPRoundMod(width
)
745 self
.out_z
= FPNumBase(width
)
747 def setup(self
, m
, in_z
, roundz
):
748 """ links module to inputs and outputs
750 m
.submodules
.roundz
= self
.mod
752 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
753 m
.d
.comb
+= self
.mod
.in_roundz
.eq(roundz
)
756 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
757 m
.next
= "corrections"
760 class FPCorrectionsMod
:
762 def __init__(self
, width
):
763 self
.in_z
= FPNumOut(width
, False)
764 self
.out_z
= FPNumOut(width
, False)
766 def elaborate(self
, platform
):
768 m
.submodules
.corr_in_z
= self
.in_z
769 m
.submodules
.corr_out_z
= self
.out_z
770 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
771 with m
.If(self
.in_z
.is_denormalised
):
772 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
774 # with m.If(self.in_z.is_overflowed):
775 # m.d.comb += self.out_z.inf(self.in_z.s)
777 # m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
781 class FPCorrections(FPState
):
783 def __init__(self
, width
):
784 FPState
.__init
__(self
, "corrections")
785 self
.mod
= FPCorrectionsMod(width
)
786 self
.out_z
= FPNumBase(width
)
788 def setup(self
, m
, in_z
):
789 """ links module to inputs and outputs
791 m
.submodules
.corrections
= self
.mod
792 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
795 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
801 def __init__(self
, width
):
802 self
.in_z
= FPNumOut(width
, False)
803 self
.out_z
= FPNumOut(width
, False)
805 def elaborate(self
, platform
):
807 m
.submodules
.pack_in_z
= self
.in_z
808 with m
.If(self
.in_z
.is_overflowed
):
809 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
811 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
815 class FPPack(FPState
):
817 def __init__(self
, width
):
818 FPState
.__init
__(self
, "pack")
819 self
.mod
= FPPackMod(width
)
820 self
.out_z
= FPNumOut(width
, False)
822 def setup(self
, m
, in_z
):
823 """ links module to inputs and outputs
825 m
.submodules
.pack
= self
.mod
826 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
829 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
830 m
.next
= "pack_put_z"
833 class FPPutZ(FPState
):
835 def __init__(self
, state
, in_z
, out_z
):
836 FPState
.__init
__(self
, state
)
842 self
.out_z
.v
.eq(self
.in_z
.v
)
844 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
845 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
848 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
853 def __init__(self
, width
, single_cycle
=False):
855 self
.single_cycle
= single_cycle
857 self
.in_a
= FPOp(width
)
858 self
.in_b
= FPOp(width
)
859 self
.out_z
= FPOp(width
)
863 def add_state(self
, state
):
864 self
.states
.append(state
)
867 def get_fragment(self
, platform
=None):
868 """ creates the HDL code-fragment for FPAdd
871 m
.submodules
.in_a
= self
.in_a
872 m
.submodules
.in_b
= self
.in_b
873 m
.submodules
.out_z
= self
.out_z
875 geta
= self
.add_state(FPGetOp("get_a", "get_b",
876 self
.in_a
, self
.width
))
878 geta
.mod
.setup(m
, self
.in_a
, geta
.out_op
, geta
.out_decode
)
879 m
.submodules
.get_a
= geta
.mod
881 getb
= self
.add_state(FPGetOp("get_b", "special_cases",
882 self
.in_b
, self
.width
))
884 getb
.mod
.setup(m
, self
.in_b
, getb
.out_op
, getb
.out_decode
)
885 m
.submodules
.get_b
= getb
.mod
887 sc
= self
.add_state(FPAddSpecialCases(self
.width
))
888 sc
.mod
.setup(m
, a
, b
, sc
.out_z
, sc
.out_do_z
)
889 m
.submodules
.specialcases
= sc
.mod
891 dn
= self
.add_state(FPAddDeNorm(self
.width
))
892 dn
.set_inputs({"a": a
, "b": b
})
893 #dn.set_outputs({"a": a, "b": b}) # XXX outputs same as inputs
894 dn
.mod
.setup(m
, a
, b
, dn
.out_a
, dn
.out_b
)
895 m
.submodules
.denormalise
= dn
.mod
897 if self
.single_cycle
:
898 alm
= self
.add_state(FPAddAlignSingle(self
.width
))
899 alm
.set_inputs({"a": a
, "b": b
})
900 alm
.set_outputs({"a": a
, "b": b
}) # XXX outputs same as inputs
901 alm
.mod
.setup(m
, a
, b
, alm
.out_a
, alm
.out_b
)
903 alm
= self
.add_state(FPAddAlignMulti(self
.width
))
904 alm
.set_inputs({"a": a
, "b": b
})
905 #alm.set_outputs({"a": a, "b": b}) # XXX outputs same as inputs
906 alm
.mod
.setup(m
, a
, b
, alm
.out_a
, alm
.out_b
, alm
.exp_eq
)
907 m
.submodules
.align
= alm
.mod
909 add0
= self
.add_state(FPAddStage0(self
.width
))
910 add0
.setup(m
, alm
.out_a
, alm
.out_b
)
912 add1
= self
.add_state(FPAddStage1(self
.width
))
913 add1
.setup(m
, add0
.out_tot
, add0
.out_z
)
915 n1
= self
.add_state(FPNorm1(self
.width
))
916 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
)
918 rn
= self
.add_state(FPRound(self
.width
))
919 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
)
921 cor
= self
.add_state(FPCorrections(self
.width
))
922 cor
.setup(m
, rn
.out_z
)
924 pa
= self
.add_state(FPPack(self
.width
))
925 pa
.setup(m
, cor
.out_z
)
927 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
))
929 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
))
933 for state
in self
.states
:
934 with m
.State(state
.state_from
):
940 if __name__
== "__main__":
941 alu
= FPADD(width
=32, single_cycle
=True)
942 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
945 # works... but don't use, just do "python fname.py convert -t v"
946 #print (verilog.convert(alu, ports=[
947 # ports=alu.in_a.ports() + \
948 # alu.in_b.ports() + \