1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
, Trigger
11 #from fpbase import FPNumShiftMultiRight
13 class FPState(FPBase
):
14 def __init__(self
, state_from
):
15 self
.state_from
= state_from
17 def set_inputs(self
, inputs
):
19 for k
,v
in inputs
.items():
22 def set_outputs(self
, outputs
):
23 self
.outputs
= outputs
24 for k
,v
in outputs
.items():
29 def __init__(self
, width
):
30 self
.in_op
= FPOp(width
)
31 self
.out_op
= Signal(width
)
32 self
.out_decode
= Signal(reset_less
=True)
34 def elaborate(self
, platform
):
36 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
37 m
.submodules
.get_op_in
= self
.in_op
38 #m.submodules.get_op_out = self.out_op
39 with m
.If(self
.out_decode
):
41 self
.out_op
.eq(self
.in_op
.v
),
46 class FPGetOp(FPState
):
50 def __init__(self
, in_state
, out_state
, in_op
, width
):
51 FPState
.__init
__(self
, in_state
)
52 self
.out_state
= out_state
53 self
.mod
= FPGetOpMod(width
)
55 self
.out_op
= Signal(width
)
56 self
.out_decode
= Signal(reset_less
=True)
58 def setup(self
, m
, in_op
):
59 """ links module to inputs and outputs
61 setattr(m
.submodules
, self
.state_from
, self
.mod
)
62 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
63 #m.d.comb += self.out_op.eq(self.mod.out_op)
64 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
67 with m
.If(self
.out_decode
):
68 m
.next
= self
.out_state
71 self
.out_op
.eq(self
.mod
.out_op
)
74 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
77 class FPGet2OpMod(Trigger
):
78 def __init__(self
, width
):
79 Trigger
.__init
__(self
)
80 self
.in_op1
= Signal(width
, reset_less
=True)
81 self
.in_op2
= Signal(width
, reset_less
=True)
82 self
.out_op1
= FPNumIn(None, width
)
83 self
.out_op2
= FPNumIn(None, width
)
85 def elaborate(self
, platform
):
86 m
= Trigger
.elaborate(self
, platform
)
87 #m.submodules.get_op_in = self.in_op
88 m
.submodules
.get_op1_out
= self
.out_op1
89 m
.submodules
.get_op2_out
= self
.out_op2
90 with m
.If(self
.trigger
):
92 self
.out_op1
.decode(self
.in_op1
),
93 self
.out_op2
.decode(self
.in_op2
),
98 class FPGet2Op(FPState
):
102 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
103 FPState
.__init
__(self
, in_state
)
104 self
.out_state
= out_state
105 self
.mod
= FPGet2OpMod(width
)
108 self
.out_op1
= FPNumIn(None, width
)
109 self
.out_op2
= FPNumIn(None, width
)
110 self
.in_stb
= Signal(reset_less
=True)
111 self
.out_ack
= Signal(reset_less
=True)
112 self
.out_decode
= Signal(reset_less
=True)
114 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
115 """ links module to inputs and outputs
117 m
.submodules
.get_ops
= self
.mod
118 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
119 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
120 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
121 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
122 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
123 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
126 with m
.If(self
.out_decode
):
127 m
.next
= self
.out_state
130 #self.out_op1.v.eq(self.mod.out_op1.v),
131 #self.out_op2.v.eq(self.mod.out_op2.v),
132 self
.out_op1
.copy(self
.mod
.out_op1
),
133 self
.out_op2
.copy(self
.mod
.out_op2
)
136 m
.d
.sync
+= self
.mod
.ack
.eq(1)
139 class FPAddSpecialCasesMod
:
140 """ special cases: NaNs, infs, zeros, denormalised
141 NOTE: some of these are unique to add. see "Special Operations"
142 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
145 def __init__(self
, width
):
146 self
.in_a
= FPNumBase(width
)
147 self
.in_b
= FPNumBase(width
)
148 self
.out_z
= FPNumOut(width
, False)
149 self
.out_do_z
= Signal(reset_less
=True)
151 def elaborate(self
, platform
):
154 m
.submodules
.sc_in_a
= self
.in_a
155 m
.submodules
.sc_in_b
= self
.in_b
156 m
.submodules
.sc_out_z
= self
.out_z
159 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
162 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
164 # if a is NaN or b is NaN return NaN
165 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
166 m
.d
.comb
+= self
.out_do_z
.eq(1)
167 m
.d
.comb
+= self
.out_z
.nan(0)
169 # XXX WEIRDNESS for FP16 non-canonical NaN handling
172 ## if a is zero and b is NaN return -b
173 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
174 # m.d.comb += self.out_do_z.eq(1)
175 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
177 ## if b is zero and a is NaN return -a
178 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
179 # m.d.comb += self.out_do_z.eq(1)
180 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
182 ## if a is -zero and b is NaN return -b
183 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
184 # m.d.comb += self.out_do_z.eq(1)
185 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
187 ## if b is -zero and a is NaN return -a
188 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
189 # m.d.comb += self.out_do_z.eq(1)
190 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
192 # if a is inf return inf (or NaN)
193 with m
.Elif(self
.in_a
.is_inf
):
194 m
.d
.comb
+= self
.out_do_z
.eq(1)
195 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
196 # if a is inf and signs don't match return NaN
197 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
198 m
.d
.comb
+= self
.out_z
.nan(0)
200 # if b is inf return inf
201 with m
.Elif(self
.in_b
.is_inf
):
202 m
.d
.comb
+= self
.out_do_z
.eq(1)
203 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
205 # if a is zero and b zero return signed-a/b
206 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
207 m
.d
.comb
+= self
.out_do_z
.eq(1)
208 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
212 # if a is zero return b
213 with m
.Elif(self
.in_a
.is_zero
):
214 m
.d
.comb
+= self
.out_do_z
.eq(1)
215 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
218 # if b is zero return a
219 with m
.Elif(self
.in_b
.is_zero
):
220 m
.d
.comb
+= self
.out_do_z
.eq(1)
221 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
224 # if a equal to -b return zero (+ve zero)
225 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
226 m
.d
.comb
+= self
.out_do_z
.eq(1)
227 m
.d
.comb
+= self
.out_z
.zero(0)
229 # Denormalised Number checks
231 m
.d
.comb
+= self
.out_do_z
.eq(0)
237 def __init__(self
, id_wid
):
240 self
.in_mid
= Signal(id_wid
, reset_less
=True)
241 self
.out_mid
= Signal(id_wid
, reset_less
=True)
247 if self
.id_wid
is not None:
248 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
251 class FPAddSpecialCases(FPState
, FPID
):
252 """ special cases: NaNs, infs, zeros, denormalised
253 NOTE: some of these are unique to add. see "Special Operations"
254 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
257 def __init__(self
, width
, id_wid
):
258 FPState
.__init
__(self
, "special_cases")
259 FPID
.__init
__(self
, id_wid
)
260 self
.mod
= FPAddSpecialCasesMod(width
)
261 self
.out_z
= FPNumOut(width
, False)
262 self
.out_do_z
= Signal(reset_less
=True)
264 def setup(self
, m
, in_a
, in_b
, in_mid
):
265 """ links module to inputs and outputs
267 m
.submodules
.specialcases
= self
.mod
268 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
269 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
270 #m.d.comb += self.out_z.v.eq(self.mod.out_z.v)
271 m
.d
.comb
+= self
.out_do_z
.eq(self
.mod
.out_do_z
)
272 if self
.in_mid
is not None:
273 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
277 with m
.If(self
.out_do_z
):
278 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
281 m
.next
= "denormalise"
284 class FPAddDeNormMod(FPState
):
286 def __init__(self
, width
):
287 self
.in_a
= FPNumBase(width
)
288 self
.in_b
= FPNumBase(width
)
289 self
.out_a
= FPNumBase(width
)
290 self
.out_b
= FPNumBase(width
)
292 def elaborate(self
, platform
):
294 m
.submodules
.denorm_in_a
= self
.in_a
295 m
.submodules
.denorm_in_b
= self
.in_b
296 m
.submodules
.denorm_out_a
= self
.out_a
297 m
.submodules
.denorm_out_b
= self
.out_b
298 # hmmm, don't like repeating identical code
299 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
300 with m
.If(self
.in_a
.exp_n127
):
301 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
303 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
305 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
306 with m
.If(self
.in_b
.exp_n127
):
307 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
309 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
314 class FPAddDeNorm(FPState
, FPID
):
316 def __init__(self
, width
, id_wid
):
317 FPState
.__init
__(self
, "denormalise")
318 FPID
.__init
__(self
, id_wid
)
319 self
.mod
= FPAddDeNormMod(width
)
320 self
.out_a
= FPNumBase(width
)
321 self
.out_b
= FPNumBase(width
)
323 def setup(self
, m
, in_a
, in_b
, in_mid
):
324 """ links module to inputs and outputs
326 m
.submodules
.denormalise
= self
.mod
327 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
328 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
329 if self
.in_mid
is not None:
330 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
334 # Denormalised Number checks
336 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
337 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
340 class FPAddAlignMultiMod(FPState
):
342 def __init__(self
, width
):
343 self
.in_a
= FPNumBase(width
)
344 self
.in_b
= FPNumBase(width
)
345 self
.out_a
= FPNumIn(None, width
)
346 self
.out_b
= FPNumIn(None, width
)
347 self
.exp_eq
= Signal(reset_less
=True)
349 def elaborate(self
, platform
):
350 # This one however (single-cycle) will do the shift
355 m
.submodules
.align_in_a
= self
.in_a
356 m
.submodules
.align_in_b
= self
.in_b
357 m
.submodules
.align_out_a
= self
.out_a
358 m
.submodules
.align_out_b
= self
.out_b
360 # NOTE: this does *not* do single-cycle multi-shifting,
361 # it *STAYS* in the align state until exponents match
363 # exponent of a greater than b: shift b down
364 m
.d
.comb
+= self
.exp_eq
.eq(0)
365 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
366 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
367 agtb
= Signal(reset_less
=True)
368 altb
= Signal(reset_less
=True)
369 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
370 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
372 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
373 # exponent of b greater than a: shift a down
375 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
376 # exponents equal: move to next stage.
378 m
.d
.comb
+= self
.exp_eq
.eq(1)
382 class FPAddAlignMulti(FPState
, FPID
):
384 def __init__(self
, width
, id_wid
):
385 FPID
.__init
__(self
, id_wid
)
386 FPState
.__init
__(self
, "align")
387 self
.mod
= FPAddAlignMultiMod(width
)
388 self
.out_a
= FPNumIn(None, width
)
389 self
.out_b
= FPNumIn(None, width
)
390 self
.exp_eq
= Signal(reset_less
=True)
392 def setup(self
, m
, in_a
, in_b
, in_mid
):
393 """ links module to inputs and outputs
395 m
.submodules
.align
= self
.mod
396 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
397 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
398 #m.d.comb += self.out_a.copy(self.mod.out_a)
399 #m.d.comb += self.out_b.copy(self.mod.out_b)
400 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
401 if self
.in_mid
is not None:
402 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
406 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
407 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
408 with m
.If(self
.exp_eq
):
412 class FPAddAlignSingleMod
:
414 def __init__(self
, width
):
416 self
.in_a
= FPNumBase(width
)
417 self
.in_b
= FPNumBase(width
)
418 self
.out_a
= FPNumIn(None, width
)
419 self
.out_b
= FPNumIn(None, width
)
421 def elaborate(self
, platform
):
422 """ Aligns A against B or B against A, depending on which has the
423 greater exponent. This is done in a *single* cycle using
424 variable-width bit-shift
426 the shifter used here is quite expensive in terms of gates.
427 Mux A or B in (and out) into temporaries, as only one of them
428 needs to be aligned against the other
432 m
.submodules
.align_in_a
= self
.in_a
433 m
.submodules
.align_in_b
= self
.in_b
434 m
.submodules
.align_out_a
= self
.out_a
435 m
.submodules
.align_out_b
= self
.out_b
437 # temporary (muxed) input and output to be shifted
438 t_inp
= FPNumBase(self
.width
)
439 t_out
= FPNumIn(None, self
.width
)
440 espec
= (len(self
.in_a
.e
), True)
441 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
442 m
.submodules
.align_t_in
= t_inp
443 m
.submodules
.align_t_out
= t_out
444 m
.submodules
.multishift_r
= msr
446 ediff
= Signal(espec
, reset_less
=True)
447 ediffr
= Signal(espec
, reset_less
=True)
448 tdiff
= Signal(espec
, reset_less
=True)
449 elz
= Signal(reset_less
=True)
450 egz
= Signal(reset_less
=True)
452 # connect multi-shifter to t_inp/out mantissa (and tdiff)
453 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
454 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
455 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
456 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
457 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
459 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
460 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
461 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
462 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
464 # default: A-exp == B-exp, A and B untouched (fall through)
465 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
466 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
467 # only one shifter (muxed)
468 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
469 # exponent of a greater than b: shift b down
471 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
473 self
.out_b
.copy(t_out
),
474 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
476 # exponent of b greater than a: shift a down
478 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
480 self
.out_a
.copy(t_out
),
481 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
486 class FPAddAlignSingle(FPState
, FPID
):
488 def __init__(self
, width
, id_wid
):
489 FPState
.__init
__(self
, "align")
490 FPID
.__init
__(self
, id_wid
)
491 self
.mod
= FPAddAlignSingleMod(width
)
492 self
.out_a
= FPNumIn(None, width
)
493 self
.out_b
= FPNumIn(None, width
)
495 def setup(self
, m
, in_a
, in_b
, in_mid
):
496 """ links module to inputs and outputs
498 m
.submodules
.align
= self
.mod
499 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
500 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
501 if self
.in_mid
is not None:
502 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
506 # NOTE: could be done as comb
507 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
508 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
512 class FPAddStage0Mod
:
514 def __init__(self
, width
):
515 self
.in_a
= FPNumBase(width
)
516 self
.in_b
= FPNumBase(width
)
517 self
.in_z
= FPNumBase(width
, False)
518 self
.out_z
= FPNumBase(width
, False)
519 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
521 def elaborate(self
, platform
):
523 m
.submodules
.add0_in_a
= self
.in_a
524 m
.submodules
.add0_in_b
= self
.in_b
525 m
.submodules
.add0_out_z
= self
.out_z
527 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
529 # store intermediate tests (and zero-extended mantissas)
530 seq
= Signal(reset_less
=True)
531 mge
= Signal(reset_less
=True)
532 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
533 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
534 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
535 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
536 am0
.eq(Cat(self
.in_a
.m
, 0)),
537 bm0
.eq(Cat(self
.in_b
.m
, 0))
539 # same-sign (both negative or both positive) add mantissas
542 self
.out_tot
.eq(am0
+ bm0
),
543 self
.out_z
.s
.eq(self
.in_a
.s
)
545 # a mantissa greater than b, use a
548 self
.out_tot
.eq(am0
- bm0
),
549 self
.out_z
.s
.eq(self
.in_a
.s
)
551 # b mantissa greater than a, use b
554 self
.out_tot
.eq(bm0
- am0
),
555 self
.out_z
.s
.eq(self
.in_b
.s
)
560 class FPAddStage0(FPState
, FPID
):
561 """ First stage of add. covers same-sign (add) and subtract
562 special-casing when mantissas are greater or equal, to
563 give greatest accuracy.
566 def __init__(self
, width
, id_wid
):
567 FPState
.__init
__(self
, "add_0")
568 FPID
.__init
__(self
, id_wid
)
569 self
.mod
= FPAddStage0Mod(width
)
570 self
.out_z
= FPNumBase(width
, False)
571 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
573 def setup(self
, m
, in_a
, in_b
, in_mid
):
574 """ links module to inputs and outputs
576 m
.submodules
.add0
= self
.mod
577 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
578 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
579 if self
.in_mid
is not None:
580 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
584 # NOTE: these could be done as combinatorial (merge add0+add1)
585 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
586 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
590 class FPAddStage1Mod(FPState
):
591 """ Second stage of add: preparation for normalisation.
592 detects when tot sum is too big (tot[27] is kinda a carry bit)
595 def __init__(self
, width
):
596 self
.out_norm
= Signal(reset_less
=True)
597 self
.in_z
= FPNumBase(width
, False)
598 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
599 self
.out_z
= FPNumBase(width
, False)
600 self
.out_of
= Overflow()
602 def elaborate(self
, platform
):
604 #m.submodules.norm1_in_overflow = self.in_of
605 #m.submodules.norm1_out_overflow = self.out_of
606 #m.submodules.norm1_in_z = self.in_z
607 #m.submodules.norm1_out_z = self.out_z
608 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
609 # tot[27] gets set when the sum overflows. shift result down
610 with m
.If(self
.in_tot
[-1]):
612 self
.out_z
.m
.eq(self
.in_tot
[4:]),
613 self
.out_of
.m0
.eq(self
.in_tot
[4]),
614 self
.out_of
.guard
.eq(self
.in_tot
[3]),
615 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
616 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
617 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
622 self
.out_z
.m
.eq(self
.in_tot
[3:]),
623 self
.out_of
.m0
.eq(self
.in_tot
[3]),
624 self
.out_of
.guard
.eq(self
.in_tot
[2]),
625 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
626 self
.out_of
.sticky
.eq(self
.in_tot
[0])
631 class FPAddStage1(FPState
, FPID
):
633 def __init__(self
, width
, id_wid
):
634 FPState
.__init
__(self
, "add_1")
635 FPID
.__init
__(self
, id_wid
)
636 self
.mod
= FPAddStage1Mod(width
)
637 self
.out_z
= FPNumBase(width
, False)
638 self
.out_of
= Overflow()
639 self
.norm_stb
= Signal()
641 def setup(self
, m
, in_tot
, in_z
, in_mid
):
642 """ links module to inputs and outputs
644 m
.submodules
.add1
= self
.mod
645 m
.submodules
.add1_out_overflow
= self
.out_of
647 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
648 m
.d
.comb
+= self
.mod
.in_tot
.eq(in_tot
)
650 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
652 if self
.in_mid
is not None:
653 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
657 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
658 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
659 m
.d
.sync
+= self
.norm_stb
.eq(1)
660 m
.next
= "normalise_1"
663 class FPNorm1ModSingle
:
665 def __init__(self
, width
):
667 self
.out_norm
= Signal(reset_less
=True)
668 self
.in_z
= FPNumBase(width
, False)
669 self
.in_of
= Overflow()
670 self
.out_z
= FPNumBase(width
, False)
671 self
.out_of
= Overflow()
673 def setup(self
, m
, in_z
, in_of
, out_z
):
674 """ links module to inputs and outputs
676 m
.submodules
.normalise_1
= self
678 m
.d
.comb
+= self
.in_z
.copy(in_z
)
679 m
.d
.comb
+= self
.in_of
.copy(in_of
)
681 m
.d
.comb
+= out_z
.copy(self
.out_z
)
683 def elaborate(self
, platform
):
686 mwid
= self
.out_z
.m_width
+2
687 pe
= PriorityEncoder(mwid
)
688 m
.submodules
.norm_pe
= pe
690 m
.submodules
.norm1_out_z
= self
.out_z
691 m
.submodules
.norm1_out_overflow
= self
.out_of
692 m
.submodules
.norm1_in_z
= self
.in_z
693 m
.submodules
.norm1_in_overflow
= self
.in_of
695 in_z
= FPNumBase(self
.width
, False)
697 m
.submodules
.norm1_insel_z
= in_z
698 m
.submodules
.norm1_insel_overflow
= in_of
700 espec
= (len(in_z
.e
), True)
701 ediff_n126
= Signal(espec
, reset_less
=True)
702 msr
= MultiShiftRMerge(mwid
, espec
)
703 m
.submodules
.multishift_r
= msr
705 m
.d
.comb
+= in_z
.copy(self
.in_z
)
706 m
.d
.comb
+= in_of
.copy(self
.in_of
)
707 # initialise out from in (overridden below)
708 m
.d
.comb
+= self
.out_z
.copy(in_z
)
709 m
.d
.comb
+= self
.out_of
.copy(in_of
)
710 # normalisation increase/decrease conditions
711 decrease
= Signal(reset_less
=True)
712 increase
= Signal(reset_less
=True)
713 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
714 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
717 # *sigh* not entirely obvious: count leading zeros (clz)
718 # with a PriorityEncoder: to find from the MSB
719 # we reverse the order of the bits.
720 temp_m
= Signal(mwid
, reset_less
=True)
721 temp_s
= Signal(mwid
+1, reset_less
=True)
722 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
723 # make sure that the amount to decrease by does NOT
724 # go below the minimum non-INF/NaN exponent
725 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
728 # cat round and guard bits back into the mantissa
729 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
730 pe
.i
.eq(temp_m
[::-1]), # inverted
731 clz
.eq(limclz
), # count zeros from MSB down
732 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
733 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
734 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
735 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
736 # overflow in bits 0..1: got shifted too (leave sticky)
737 self
.out_of
.guard
.eq(temp_s
[1]), # guard
738 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
741 with m
.Elif(increase
):
742 temp_m
= Signal(mwid
+1, reset_less
=True)
744 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
746 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
747 # connect multi-shifter to inp/out mantissa (and ediff)
749 msr
.diff
.eq(ediff_n126
),
750 self
.out_z
.m
.eq(msr
.m
[3:]),
751 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
752 # overflow in bits 0..1: got shifted too (leave sticky)
753 self
.out_of
.guard
.eq(temp_s
[2]), # guard
754 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
755 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
756 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
762 class FPNorm1ModMulti
:
764 def __init__(self
, width
, single_cycle
=True):
766 self
.in_select
= Signal(reset_less
=True)
767 self
.out_norm
= Signal(reset_less
=True)
768 self
.in_z
= FPNumBase(width
, False)
769 self
.in_of
= Overflow()
770 self
.temp_z
= FPNumBase(width
, False)
771 self
.temp_of
= Overflow()
772 self
.out_z
= FPNumBase(width
, False)
773 self
.out_of
= Overflow()
775 def elaborate(self
, platform
):
778 m
.submodules
.norm1_out_z
= self
.out_z
779 m
.submodules
.norm1_out_overflow
= self
.out_of
780 m
.submodules
.norm1_temp_z
= self
.temp_z
781 m
.submodules
.norm1_temp_of
= self
.temp_of
782 m
.submodules
.norm1_in_z
= self
.in_z
783 m
.submodules
.norm1_in_overflow
= self
.in_of
785 in_z
= FPNumBase(self
.width
, False)
787 m
.submodules
.norm1_insel_z
= in_z
788 m
.submodules
.norm1_insel_overflow
= in_of
790 # select which of temp or in z/of to use
791 with m
.If(self
.in_select
):
792 m
.d
.comb
+= in_z
.copy(self
.in_z
)
793 m
.d
.comb
+= in_of
.copy(self
.in_of
)
795 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
796 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
797 # initialise out from in (overridden below)
798 m
.d
.comb
+= self
.out_z
.copy(in_z
)
799 m
.d
.comb
+= self
.out_of
.copy(in_of
)
800 # normalisation increase/decrease conditions
801 decrease
= Signal(reset_less
=True)
802 increase
= Signal(reset_less
=True)
803 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
804 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
805 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
809 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
810 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
811 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
812 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
813 self
.out_of
.round_bit
.eq(0), # reset round bit
814 self
.out_of
.m0
.eq(in_of
.guard
),
817 with m
.Elif(increase
):
819 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
820 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
821 self
.out_of
.guard
.eq(in_z
.m
[0]),
822 self
.out_of
.m0
.eq(in_z
.m
[1]),
823 self
.out_of
.round_bit
.eq(in_of
.guard
),
824 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
830 class FPNorm1Single(FPState
, FPID
):
832 def __init__(self
, width
, id_wid
, single_cycle
=True):
833 FPID
.__init
__(self
, id_wid
)
834 FPState
.__init
__(self
, "normalise_1")
835 self
.mod
= FPNorm1ModSingle(width
)
836 self
.out_norm
= Signal(reset_less
=True)
837 self
.out_z
= FPNumBase(width
)
838 self
.out_roundz
= Signal(reset_less
=True)
840 def setup(self
, m
, in_z
, in_of
, in_mid
):
841 """ links module to inputs and outputs
843 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
845 if self
.in_mid
is not None:
846 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
850 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
854 class FPNorm1Multi(FPState
, FPID
):
856 def __init__(self
, width
, id_wid
):
857 FPID
.__init
__(self
, id_wid
)
858 FPState
.__init
__(self
, "normalise_1")
859 self
.mod
= FPNorm1ModMulti(width
)
860 self
.stb
= Signal(reset_less
=True)
861 self
.ack
= Signal(reset
=0, reset_less
=True)
862 self
.out_norm
= Signal(reset_less
=True)
863 self
.in_accept
= Signal(reset_less
=True)
864 self
.temp_z
= FPNumBase(width
)
865 self
.temp_of
= Overflow()
866 self
.out_z
= FPNumBase(width
)
867 self
.out_roundz
= Signal(reset_less
=True)
869 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
870 """ links module to inputs and outputs
872 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
873 self
.in_accept
, self
.temp_z
, self
.temp_of
,
874 self
.out_z
, self
.out_norm
)
876 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
877 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
879 if self
.in_mid
is not None:
880 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
884 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
885 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
886 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
887 with m
.If(self
.out_norm
):
888 with m
.If(self
.in_accept
):
893 m
.d
.sync
+= self
.ack
.eq(0)
895 # normalisation not required (or done).
897 m
.d
.sync
+= self
.ack
.eq(1)
898 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
901 class FPNorm1ToPack(FPState
, FPID
):
903 def __init__(self
, width
, id_wid
, single_cycle
=True):
904 FPID
.__init
__(self
, id_wid
)
905 FPState
.__init
__(self
, "normalise_1")
907 self
.mod
= FPNorm1ModSingle(width
)
909 self
.mod
= FPNorm1ModMulti(width
)
910 self
.mod
= FPNorm1ModMulti(width
)
911 self
.stb
= Signal(reset_less
=True)
912 self
.ack
= Signal(reset
=0, reset_less
=True)
913 self
.out_norm
= Signal(reset_less
=True)
914 self
.in_accept
= Signal(reset_less
=True)
915 self
.temp_z
= FPNumBase(width
)
916 self
.temp_of
= Overflow()
917 self
.n_out_z
= FPNumBase(width
)
918 self
.n_out_roundz
= Signal(reset_less
=True)
920 self
.rmod
= FPRoundMod(width
)
921 self
.out_z
= FPNumBase(width
)
922 self
.rmod
.setup(m
, self
.n_out_z
, self
.n_out_roundz
)
924 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
925 """ links module to inputs and outputs
927 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
928 self
.in_accept
, self
.temp_z
, self
.temp_of
,
929 self
.n_out_z
, self
.out_norm
)
931 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
932 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
934 if self
.in_mid
is not None:
935 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
939 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
940 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
941 m
.d
.sync
+= self
.temp_z
.copy(self
.n_out_z
)
942 with m
.If(self
.out_norm
):
943 with m
.If(self
.in_accept
):
948 m
.d
.sync
+= self
.ack
.eq(0)
950 # normalisation not required (or done).
952 m
.d
.sync
+= self
.ack
.eq(1)
953 m
.d
.sync
+= self
.n_out_roundz
.eq(self
.mod
.out_of
.roundz
)
958 def __init__(self
, width
):
959 self
.in_roundz
= Signal(reset_less
=True)
960 self
.in_z
= FPNumBase(width
, False)
961 self
.out_z
= FPNumBase(width
, False)
963 def setup(self
, m
, in_z
, roundz
):
964 m
.submodules
.roundz
= self
966 m
.d
.comb
+= self
.in_z
.copy(in_z
)
967 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
969 def elaborate(self
, platform
):
971 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
972 with m
.If(self
.in_roundz
):
973 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
974 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
975 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
979 class FPRound(FPState
, FPID
):
981 def __init__(self
, width
, id_wid
):
982 FPState
.__init
__(self
, "round")
983 FPID
.__init
__(self
, id_wid
)
984 self
.mod
= FPRoundMod(width
)
985 self
.out_z
= FPNumBase(width
)
987 def setup(self
, m
, in_z
, roundz
, in_mid
):
988 """ links module to inputs and outputs
990 self
.mod
.setup(m
, in_z
, roundz
)
992 if self
.in_mid
is not None:
993 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
997 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
998 m
.next
= "corrections"
1001 class FPCorrectionsMod
:
1003 def __init__(self
, width
):
1004 self
.in_z
= FPNumOut(width
, False)
1005 self
.out_z
= FPNumOut(width
, False)
1007 def elaborate(self
, platform
):
1009 m
.submodules
.corr_in_z
= self
.in_z
1010 m
.submodules
.corr_out_z
= self
.out_z
1011 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1012 with m
.If(self
.in_z
.is_denormalised
):
1013 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1017 class FPCorrections(FPState
, FPID
):
1019 def __init__(self
, width
, id_wid
):
1020 FPState
.__init
__(self
, "corrections")
1021 FPID
.__init
__(self
, id_wid
)
1022 self
.mod
= FPCorrectionsMod(width
)
1023 self
.out_z
= FPNumBase(width
)
1025 def setup(self
, m
, in_z
, in_mid
):
1026 """ links module to inputs and outputs
1028 m
.submodules
.corrections
= self
.mod
1029 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
1030 if self
.in_mid
is not None:
1031 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1033 def action(self
, m
):
1035 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1041 def __init__(self
, width
):
1042 self
.in_z
= FPNumOut(width
, False)
1043 self
.out_z
= FPNumOut(width
, False)
1045 def elaborate(self
, platform
):
1047 m
.submodules
.pack_in_z
= self
.in_z
1048 with m
.If(self
.in_z
.is_overflowed
):
1049 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1051 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1055 class FPPack(FPState
, FPID
):
1057 def __init__(self
, width
, id_wid
):
1058 FPState
.__init
__(self
, "pack")
1059 FPID
.__init
__(self
, id_wid
)
1060 self
.mod
= FPPackMod(width
)
1061 self
.out_z
= FPNumOut(width
, False)
1063 def setup(self
, m
, in_z
, in_mid
):
1064 """ links module to inputs and outputs
1066 m
.submodules
.pack
= self
.mod
1067 m
.d
.comb
+= self
.mod
.in_z
.copy(in_z
)
1068 if self
.in_mid
is not None:
1069 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1071 def action(self
, m
):
1073 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1074 m
.next
= "pack_put_z"
1077 class FPPutZ(FPState
):
1079 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
):
1080 FPState
.__init
__(self
, state
)
1083 self
.in_mid
= in_mid
1084 self
.out_mid
= out_mid
1086 def action(self
, m
):
1087 if self
.in_mid
is not None:
1088 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1090 self
.out_z
.v
.eq(self
.in_z
.v
)
1092 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1093 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1096 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1099 class FPADDBaseMod(FPID
):
1101 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=False):
1104 * width: bit-width of IEEE754. supported: 16, 32, 64
1105 * id_wid: an identifier that is sync-connected to the input
1106 * single_cycle: True indicates each stage to complete in 1 clock
1107 * compact: True indicates a reduced number of stages
1109 FPID
.__init
__(self
, id_wid
)
1111 self
.single_cycle
= single_cycle
1112 self
.compact
= compact
1114 self
.in_t
= Trigger()
1115 self
.in_a
= Signal(width
)
1116 self
.in_b
= Signal(width
)
1117 self
.out_z
= FPOp(width
)
1121 def add_state(self
, state
):
1122 self
.states
.append(state
)
1125 def get_fragment(self
, platform
=None):
1126 """ creates the HDL code-fragment for FPAdd
1129 m
.submodules
.out_z
= self
.out_z
1130 m
.submodules
.in_t
= self
.in_t
1132 self
.get_compact_fragment(m
, platform
)
1134 self
.get_longer_fragment(m
, platform
)
1136 with m
.FSM() as fsm
:
1138 for state
in self
.states
:
1139 with m
.State(state
.state_from
):
1144 def get_longer_fragment(self
, m
, platform
=None):
1146 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1147 self
.in_a
, self
.in_b
, self
.width
))
1148 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1152 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1153 sc
.setup(m
, a
, b
, self
.in_mid
)
1155 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1156 dn
.setup(m
, a
, b
, sc
.in_mid
)
1158 if self
.single_cycle
:
1159 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1160 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1162 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1163 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1165 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1166 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1168 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1169 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1171 if self
.single_cycle
:
1172 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1173 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1175 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1176 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1178 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1179 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1181 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1182 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1184 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1185 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1187 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1188 pa
.in_mid
, self
.out_mid
))
1190 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1191 pa
.in_mid
, self
.out_mid
))
1193 def get_compact_fragment(self
, m
, platform
=None):
1195 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1196 self
.in_a
, self
.in_b
, self
.width
))
1197 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1201 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1202 sc
.setup(m
, a
, b
, self
.in_mid
)
1204 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1205 dn
.setup(m
, a
, b
, sc
.in_mid
)
1207 if self
.single_cycle
:
1208 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1209 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1211 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1212 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1214 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1215 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1217 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1218 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1220 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1221 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1223 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1224 cor
.setup(m
, n1
.out_z
, rn
.in_mid
)
1226 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1227 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1229 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1230 pa
.in_mid
, self
.out_mid
))
1232 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1233 pa
.in_mid
, self
.out_mid
))
1236 class FPADDBase(FPState
, FPID
):
1238 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1241 * width: bit-width of IEEE754. supported: 16, 32, 64
1242 * id_wid: an identifier that is sync-connected to the input
1243 * single_cycle: True indicates each stage to complete in 1 clock
1245 FPID
.__init
__(self
, id_wid
)
1246 FPState
.__init
__(self
, "fpadd")
1248 self
.single_cycle
= single_cycle
1249 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1251 self
.in_t
= Trigger()
1252 self
.in_a
= Signal(width
)
1253 self
.in_b
= Signal(width
)
1254 #self.out_z = FPOp(width)
1256 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1257 self
.in_accept
= Signal(reset_less
=True)
1258 self
.add_stb
= Signal(reset_less
=True)
1259 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1261 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1263 self
.out_mid
= out_mid
1264 m
.d
.comb
+= [self
.in_a
.eq(a
),
1266 self
.mod
.in_a
.eq(self
.in_a
),
1267 self
.mod
.in_b
.eq(self
.in_b
),
1268 self
.in_mid
.eq(in_mid
),
1269 self
.mod
.in_mid
.eq(self
.in_mid
),
1270 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1271 #self.add_stb.eq(add_stb),
1272 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1273 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1274 self
.out_mid
.eq(self
.mod
.out_mid
),
1275 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1276 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1277 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1280 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1281 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1282 #m.d.sync += self.in_t.stb.eq(0)
1284 m
.submodules
.fpadd
= self
.mod
1286 def action(self
, m
):
1288 # in_accept is set on incoming strobe HIGH and ack LOW.
1289 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1291 #with m.If(self.in_t.ack):
1292 # m.d.sync += self.in_t.stb.eq(0)
1293 with m
.If(~self
.z_done
):
1294 # not done: test for accepting an incoming operand pair
1295 with m
.If(self
.in_accept
):
1297 self
.add_ack
.eq(1), # acknowledge receipt...
1298 self
.in_t
.stb
.eq(1), # initiate add
1301 m
.d
.sync
+= [self
.add_ack
.eq(0),
1302 self
.in_t
.stb
.eq(0),
1305 # done: acknowledge, and write out id and value
1306 m
.d
.sync
+= [self
.add_ack
.eq(1),
1313 if self
.in_mid
is not None:
1314 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1317 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1319 # move to output state on detecting z ack
1320 with m
.If(self
.out_z
.trigger
):
1321 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1324 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1328 """ FPADD: stages as follows:
1334 FPAddBase---> FPAddBaseMod
1336 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1338 FPAddBase is tricky: it is both a stage and *has* stages.
1339 Connection to FPAddBaseMod therefore requires an in stb/ack
1340 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1341 needs to be the thing that raises the incoming stb.
1344 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1347 * width: bit-width of IEEE754. supported: 16, 32, 64
1348 * id_wid: an identifier that is sync-connected to the input
1349 * single_cycle: True indicates each stage to complete in 1 clock
1351 FPID
.__init
__(self
, id_wid
)
1353 self
.id_wid
= id_wid
1354 self
.single_cycle
= single_cycle
1356 self
.in_a
= FPOp(width
)
1357 self
.in_b
= FPOp(width
)
1358 self
.out_z
= FPOp(width
)
1362 def add_state(self
, state
):
1363 self
.states
.append(state
)
1366 def get_fragment(self
, platform
=None):
1367 """ creates the HDL code-fragment for FPAdd
1370 m
.submodules
.in_a
= self
.in_a
1371 m
.submodules
.in_b
= self
.in_b
1372 m
.submodules
.out_z
= self
.out_z
1374 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1375 self
.in_a
, self
.width
))
1376 geta
.setup(m
, self
.in_a
)
1379 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1380 self
.in_b
, self
.width
))
1381 getb
.setup(m
, self
.in_b
)
1384 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1385 ab
= self
.add_state(ab
)
1386 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.in_mid
,
1387 self
.out_z
, self
.out_mid
)
1389 #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1390 # ab.out_mid, self.out_mid))
1392 with m
.FSM() as fsm
:
1394 for state
in self
.states
:
1395 with m
.State(state
.state_from
):
1401 if __name__
== "__main__":
1403 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1404 main(alu
, ports
=alu
.in_a
.ports() + \
1405 alu
.in_b
.ports() + \
1406 alu
.out_z
.ports() + \
1407 [alu
.in_mid
, alu
.out_mid
])
1409 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1410 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1411 alu
.in_t
.ports() + \
1412 alu
.out_z
.ports() + \
1413 [alu
.in_mid
, alu
.out_mid
])
1416 # works... but don't use, just do "python fname.py convert -t v"
1417 #print (verilog.convert(alu, ports=[
1418 # ports=alu.in_a.ports() + \
1419 # alu.in_b.ports() + \
1420 # alu.out_z.ports())