1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
153 return self
.mod
.ispec()
156 return self
.mod
.ospec()
158 def trigger_setup(self
, m
, in_stb
, in_ack
):
161 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
162 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
164 def setup(self
, m
, i
):
165 """ links module to inputs and outputs
167 m
.submodules
.get_ops
= self
.mod
168 m
.d
.comb
+= self
.mod
.i
.eq(i
)
169 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
170 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
172 def process(self
, i
):
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
180 self
.o
.eq(self
.mod
.o
),
183 m
.d
.sync
+= self
.mod
.ack
.eq(1)
188 def __init__(self
, width
, id_wid
):
189 self
.a
= FPNumBase(width
, True)
190 self
.b
= FPNumBase(width
, True)
191 self
.z
= FPNumOut(width
, False)
192 self
.oz
= Signal(width
, reset_less
=True)
193 self
.out_do_z
= Signal(reset_less
=True)
194 self
.mid
= Signal(id_wid
, reset_less
=True)
197 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
198 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
201 class FPAddSpecialCasesMod
:
202 """ special cases: NaNs, infs, zeros, denormalised
203 NOTE: some of these are unique to add. see "Special Operations"
204 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
207 def __init__(self
, width
, id_wid
):
210 self
.i
= self
.ispec()
211 self
.o
= self
.ospec()
214 return FPADDBaseData(self
.width
, self
.id_wid
)
217 return FPSCData(self
.width
, self
.id_wid
)
219 def setup(self
, m
, i
):
220 """ links module to inputs and outputs
222 m
.submodules
.specialcases
= self
223 m
.d
.comb
+= self
.i
.eq(i
)
225 def process(self
, i
):
228 def elaborate(self
, platform
):
231 m
.submodules
.sc_out_z
= self
.o
.z
233 # decode: XXX really should move to separate stage
234 a1
= FPNumIn(None, self
.width
)
235 b1
= FPNumIn(None, self
.width
)
236 m
.submodules
.sc_decode_a
= a1
237 m
.submodules
.sc_decode_b
= b1
238 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
243 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
246 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
248 # if a is NaN or b is NaN return NaN
249 with m
.If(a1
.is_nan | b1
.is_nan
):
250 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
251 m
.d
.comb
+= self
.o
.z
.nan(0)
253 # XXX WEIRDNESS for FP16 non-canonical NaN handling
256 ## if a is zero and b is NaN return -b
257 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
258 # m.d.comb += self.o.out_do_z.eq(1)
259 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
261 ## if b is zero and a is NaN return -a
262 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
263 # m.d.comb += self.o.out_do_z.eq(1)
264 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
266 ## if a is -zero and b is NaN return -b
267 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
268 # m.d.comb += self.o.out_do_z.eq(1)
269 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
271 ## if b is -zero and a is NaN return -a
272 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
273 # m.d.comb += self.o.out_do_z.eq(1)
274 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
276 # if a is inf return inf (or NaN)
277 with m
.Elif(a1
.is_inf
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
280 # if a is inf and signs don't match return NaN
281 with m
.If(b1
.exp_128
& s_nomatch
):
282 m
.d
.comb
+= self
.o
.z
.nan(0)
284 # if b is inf return inf
285 with m
.Elif(b1
.is_inf
):
286 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
287 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
289 # if a is zero and b zero return signed-a/b
290 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
291 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
292 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
294 # if a is zero return b
295 with m
.Elif(a1
.is_zero
):
296 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
297 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
299 # if b is zero return a
300 with m
.Elif(b1
.is_zero
):
301 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
302 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
304 # if a equal to -b return zero (+ve zero)
305 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
306 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
307 m
.d
.comb
+= self
.o
.z
.zero(0)
309 # Denormalised Number checks next, so pass a/b data through
311 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
312 m
.d
.comb
+= self
.o
.a
.eq(a1
)
313 m
.d
.comb
+= self
.o
.b
.eq(b1
)
315 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
316 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
322 def __init__(self
, id_wid
):
325 self
.in_mid
= Signal(id_wid
, reset_less
=True)
326 self
.out_mid
= Signal(id_wid
, reset_less
=True)
332 if self
.id_wid
is not None:
333 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
336 class FPAddSpecialCases(FPState
):
337 """ special cases: NaNs, infs, zeros, denormalised
338 NOTE: some of these are unique to add. see "Special Operations"
339 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
342 def __init__(self
, width
, id_wid
):
343 FPState
.__init
__(self
, "special_cases")
344 self
.mod
= FPAddSpecialCasesMod(width
)
345 self
.out_z
= self
.mod
.ospec()
346 self
.out_do_z
= Signal(reset_less
=True)
348 def setup(self
, m
, i
):
349 """ links module to inputs and outputs
351 self
.mod
.setup(m
, i
, self
.out_do_z
)
352 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
353 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
357 with m
.If(self
.out_do_z
):
360 m
.next
= "denormalise"
363 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
364 """ special cases: NaNs, infs, zeros, denormalised
365 NOTE: some of these are unique to add. see "Special Operations"
366 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
369 def __init__(self
, width
, id_wid
):
370 FPState
.__init
__(self
, "special_cases")
373 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
374 self
.out
= self
.ospec()
377 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
380 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
382 def setup(self
, m
, i
):
383 """ links module to inputs and outputs
385 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
386 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
388 chain
= StageChain([smod
, dmod
])
391 # only needed for break-out (early-out)
392 # self.out_do_z = smod.o.out_do_z
396 def process(self
, i
):
400 # for break-out (early-out)
401 #with m.If(self.out_do_z):
404 m
.d
.sync
+= self
.out
.eq(self
.process(None))
408 class FPAddDeNormMod(FPState
):
410 def __init__(self
, width
, id_wid
):
413 self
.i
= self
.ispec()
414 self
.o
= self
.ospec()
417 return FPSCData(self
.width
, self
.id_wid
)
420 return FPSCData(self
.width
, self
.id_wid
)
422 def process(self
, i
):
425 def setup(self
, m
, i
):
426 """ links module to inputs and outputs
428 m
.submodules
.denormalise
= self
429 m
.d
.comb
+= self
.i
.eq(i
)
431 def elaborate(self
, platform
):
433 m
.submodules
.denorm_in_a
= self
.i
.a
434 m
.submodules
.denorm_in_b
= self
.i
.b
435 m
.submodules
.denorm_out_a
= self
.o
.a
436 m
.submodules
.denorm_out_b
= self
.o
.b
438 with m
.If(~self
.i
.out_do_z
):
439 # XXX hmmm, don't like repeating identical code
440 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
441 with m
.If(self
.i
.a
.exp_n127
):
442 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
444 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
446 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
447 with m
.If(self
.i
.b
.exp_n127
):
448 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
450 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
452 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
453 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
454 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
455 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
460 class FPAddDeNorm(FPState
):
462 def __init__(self
, width
, id_wid
):
463 FPState
.__init
__(self
, "denormalise")
464 self
.mod
= FPAddDeNormMod(width
)
465 self
.out_a
= FPNumBase(width
)
466 self
.out_b
= FPNumBase(width
)
468 def setup(self
, m
, i
):
469 """ links module to inputs and outputs
473 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
474 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
477 # Denormalised Number checks
481 class FPAddAlignMultiMod(FPState
):
483 def __init__(self
, width
):
484 self
.in_a
= FPNumBase(width
)
485 self
.in_b
= FPNumBase(width
)
486 self
.out_a
= FPNumIn(None, width
)
487 self
.out_b
= FPNumIn(None, width
)
488 self
.exp_eq
= Signal(reset_less
=True)
490 def elaborate(self
, platform
):
491 # This one however (single-cycle) will do the shift
496 m
.submodules
.align_in_a
= self
.in_a
497 m
.submodules
.align_in_b
= self
.in_b
498 m
.submodules
.align_out_a
= self
.out_a
499 m
.submodules
.align_out_b
= self
.out_b
501 # NOTE: this does *not* do single-cycle multi-shifting,
502 # it *STAYS* in the align state until exponents match
504 # exponent of a greater than b: shift b down
505 m
.d
.comb
+= self
.exp_eq
.eq(0)
506 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
507 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
508 agtb
= Signal(reset_less
=True)
509 altb
= Signal(reset_less
=True)
510 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
511 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
513 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
514 # exponent of b greater than a: shift a down
516 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
517 # exponents equal: move to next stage.
519 m
.d
.comb
+= self
.exp_eq
.eq(1)
523 class FPAddAlignMulti(FPState
):
525 def __init__(self
, width
, id_wid
):
526 FPState
.__init
__(self
, "align")
527 self
.mod
= FPAddAlignMultiMod(width
)
528 self
.out_a
= FPNumIn(None, width
)
529 self
.out_b
= FPNumIn(None, width
)
530 self
.exp_eq
= Signal(reset_less
=True)
532 def setup(self
, m
, in_a
, in_b
):
533 """ links module to inputs and outputs
535 m
.submodules
.align
= self
.mod
536 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
537 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
538 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
539 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
540 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
543 with m
.If(self
.exp_eq
):
549 def __init__(self
, width
, id_wid
):
550 self
.a
= FPNumIn(None, width
)
551 self
.b
= FPNumIn(None, width
)
552 self
.z
= FPNumOut(width
, False)
553 self
.out_do_z
= Signal(reset_less
=True)
554 self
.oz
= Signal(width
, reset_less
=True)
555 self
.mid
= Signal(id_wid
, reset_less
=True)
558 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
559 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
562 class FPAddAlignSingleMod
:
564 def __init__(self
, width
, id_wid
):
567 self
.i
= self
.ispec()
568 self
.o
= self
.ospec()
571 return FPSCData(self
.width
, self
.id_wid
)
574 return FPNumIn2Ops(self
.width
, self
.id_wid
)
576 def process(self
, i
):
579 def setup(self
, m
, i
):
580 """ links module to inputs and outputs
582 m
.submodules
.align
= self
583 m
.d
.comb
+= self
.i
.eq(i
)
585 def elaborate(self
, platform
):
586 """ Aligns A against B or B against A, depending on which has the
587 greater exponent. This is done in a *single* cycle using
588 variable-width bit-shift
590 the shifter used here is quite expensive in terms of gates.
591 Mux A or B in (and out) into temporaries, as only one of them
592 needs to be aligned against the other
596 m
.submodules
.align_in_a
= self
.i
.a
597 m
.submodules
.align_in_b
= self
.i
.b
598 m
.submodules
.align_out_a
= self
.o
.a
599 m
.submodules
.align_out_b
= self
.o
.b
601 # temporary (muxed) input and output to be shifted
602 t_inp
= FPNumBase(self
.width
)
603 t_out
= FPNumIn(None, self
.width
)
604 espec
= (len(self
.i
.a
.e
), True)
605 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
606 m
.submodules
.align_t_in
= t_inp
607 m
.submodules
.align_t_out
= t_out
608 m
.submodules
.multishift_r
= msr
610 ediff
= Signal(espec
, reset_less
=True)
611 ediffr
= Signal(espec
, reset_less
=True)
612 tdiff
= Signal(espec
, reset_less
=True)
613 elz
= Signal(reset_less
=True)
614 egz
= Signal(reset_less
=True)
616 # connect multi-shifter to t_inp/out mantissa (and tdiff)
617 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
618 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
619 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
620 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
621 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
623 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
624 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
625 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
626 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
628 # default: A-exp == B-exp, A and B untouched (fall through)
629 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
630 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
631 # only one shifter (muxed)
632 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
633 # exponent of a greater than b: shift b down
634 with m
.If(~self
.i
.out_do_z
):
636 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
639 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
641 # exponent of b greater than a: shift a down
643 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
646 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
649 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
650 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
651 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
652 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
657 class FPAddAlignSingle(FPState
):
659 def __init__(self
, width
, id_wid
):
660 FPState
.__init
__(self
, "align")
661 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
662 self
.out_a
= FPNumIn(None, width
)
663 self
.out_b
= FPNumIn(None, width
)
665 def setup(self
, m
, i
):
666 """ links module to inputs and outputs
670 # NOTE: could be done as comb
671 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
672 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
678 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
680 def __init__(self
, width
, id_wid
):
681 FPState
.__init
__(self
, "align")
684 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
685 self
.a1o
= self
.ospec()
688 return FPSCData(self
.width
, self
.id_wid
)
691 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
693 def setup(self
, m
, i
):
694 """ links module to inputs and outputs
697 # chain AddAlignSingle, AddStage0 and AddStage1
698 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
699 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
700 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
702 chain
= StageChain([mod
, a0mod
, a1mod
])
707 def process(self
, i
):
711 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
712 m
.next
= "normalise_1"
715 class FPAddStage0Data
:
717 def __init__(self
, width
, id_wid
):
718 self
.z
= FPNumBase(width
, False)
719 self
.out_do_z
= Signal(reset_less
=True)
720 self
.oz
= Signal(width
, reset_less
=True)
721 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
722 self
.mid
= Signal(id_wid
, reset_less
=True)
725 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
726 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
729 class FPAddStage0Mod
:
731 def __init__(self
, width
, id_wid
):
734 self
.i
= self
.ispec()
735 self
.o
= self
.ospec()
738 return FPSCData(self
.width
, self
.id_wid
)
741 return FPAddStage0Data(self
.width
, self
.id_wid
)
743 def process(self
, i
):
746 def setup(self
, m
, i
):
747 """ links module to inputs and outputs
749 m
.submodules
.add0
= self
750 m
.d
.comb
+= self
.i
.eq(i
)
752 def elaborate(self
, platform
):
754 m
.submodules
.add0_in_a
= self
.i
.a
755 m
.submodules
.add0_in_b
= self
.i
.b
756 m
.submodules
.add0_out_z
= self
.o
.z
758 # store intermediate tests (and zero-extended mantissas)
759 seq
= Signal(reset_less
=True)
760 mge
= Signal(reset_less
=True)
761 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
762 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
763 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
764 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
765 am0
.eq(Cat(self
.i
.a
.m
, 0)),
766 bm0
.eq(Cat(self
.i
.b
.m
, 0))
768 # same-sign (both negative or both positive) add mantissas
769 with m
.If(~self
.i
.out_do_z
):
770 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
773 self
.o
.tot
.eq(am0
+ bm0
),
774 self
.o
.z
.s
.eq(self
.i
.a
.s
)
776 # a mantissa greater than b, use a
779 self
.o
.tot
.eq(am0
- bm0
),
780 self
.o
.z
.s
.eq(self
.i
.a
.s
)
782 # b mantissa greater than a, use b
785 self
.o
.tot
.eq(bm0
- am0
),
786 self
.o
.z
.s
.eq(self
.i
.b
.s
)
789 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
790 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
791 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
795 class FPAddStage0(FPState
):
796 """ First stage of add. covers same-sign (add) and subtract
797 special-casing when mantissas are greater or equal, to
798 give greatest accuracy.
801 def __init__(self
, width
, id_wid
):
802 FPState
.__init
__(self
, "add_0")
803 self
.mod
= FPAddStage0Mod(width
)
804 self
.o
= self
.mod
.ospec()
806 def setup(self
, m
, i
):
807 """ links module to inputs and outputs
811 # NOTE: these could be done as combinatorial (merge add0+add1)
812 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
818 class FPAddStage1Data
:
820 def __init__(self
, width
, id_wid
):
821 self
.z
= FPNumBase(width
, False)
822 self
.out_do_z
= Signal(reset_less
=True)
823 self
.oz
= Signal(width
, reset_less
=True)
825 self
.mid
= Signal(id_wid
, reset_less
=True)
828 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
829 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
833 class FPAddStage1Mod(FPState
):
834 """ Second stage of add: preparation for normalisation.
835 detects when tot sum is too big (tot[27] is kinda a carry bit)
838 def __init__(self
, width
, id_wid
):
841 self
.i
= self
.ispec()
842 self
.o
= self
.ospec()
845 return FPAddStage0Data(self
.width
, self
.id_wid
)
848 return FPAddStage1Data(self
.width
, self
.id_wid
)
850 def process(self
, i
):
853 def setup(self
, m
, i
):
854 """ links module to inputs and outputs
856 m
.submodules
.add1
= self
857 m
.submodules
.add1_out_overflow
= self
.o
.of
859 m
.d
.comb
+= self
.i
.eq(i
)
861 def elaborate(self
, platform
):
863 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
864 # tot[-1] (MSB) gets set when the sum overflows. shift result down
865 with m
.If(~self
.i
.out_do_z
):
866 with m
.If(self
.i
.tot
[-1]):
868 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
869 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
870 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
871 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
872 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
873 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
875 # tot[-1] (MSB) zero case
878 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
879 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
880 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
881 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
882 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
885 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
886 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
887 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
892 class FPAddStage1(FPState
):
894 def __init__(self
, width
, id_wid
):
895 FPState
.__init
__(self
, "add_1")
896 self
.mod
= FPAddStage1Mod(width
)
897 self
.out_z
= FPNumBase(width
, False)
898 self
.out_of
= Overflow()
899 self
.norm_stb
= Signal()
901 def setup(self
, m
, i
):
902 """ links module to inputs and outputs
906 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
908 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
909 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
910 m
.d
.sync
+= self
.norm_stb
.eq(1)
913 m
.next
= "normalise_1"
916 class FPNormaliseModSingle
:
918 def __init__(self
, width
):
920 self
.in_z
= self
.ispec()
921 self
.out_z
= self
.ospec()
924 return FPNumBase(self
.width
, False)
927 return FPNumBase(self
.width
, False)
929 def setup(self
, m
, i
):
930 """ links module to inputs and outputs
932 m
.submodules
.normalise
= self
933 m
.d
.comb
+= self
.i
.eq(i
)
935 def elaborate(self
, platform
):
938 mwid
= self
.out_z
.m_width
+2
939 pe
= PriorityEncoder(mwid
)
940 m
.submodules
.norm_pe
= pe
942 m
.submodules
.norm1_out_z
= self
.out_z
943 m
.submodules
.norm1_in_z
= self
.in_z
945 in_z
= FPNumBase(self
.width
, False)
947 m
.submodules
.norm1_insel_z
= in_z
948 m
.submodules
.norm1_insel_overflow
= in_of
950 espec
= (len(in_z
.e
), True)
951 ediff_n126
= Signal(espec
, reset_less
=True)
952 msr
= MultiShiftRMerge(mwid
, espec
)
953 m
.submodules
.multishift_r
= msr
955 m
.d
.comb
+= in_z
.eq(self
.in_z
)
956 m
.d
.comb
+= in_of
.eq(self
.in_of
)
957 # initialise out from in (overridden below)
958 m
.d
.comb
+= self
.out_z
.eq(in_z
)
959 m
.d
.comb
+= self
.out_of
.eq(in_of
)
960 # normalisation decrease condition
961 decrease
= Signal(reset_less
=True)
962 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
965 # *sigh* not entirely obvious: count leading zeros (clz)
966 # with a PriorityEncoder: to find from the MSB
967 # we reverse the order of the bits.
968 temp_m
= Signal(mwid
, reset_less
=True)
969 temp_s
= Signal(mwid
+1, reset_less
=True)
970 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
972 # cat round and guard bits back into the mantissa
973 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
974 pe
.i
.eq(temp_m
[::-1]), # inverted
975 clz
.eq(pe
.o
), # count zeros from MSB down
976 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
977 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
978 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
986 def __init__(self
, width
, id_wid
):
987 self
.roundz
= Signal(reset_less
=True)
988 self
.z
= FPNumBase(width
, False)
989 self
.out_do_z
= Signal(reset_less
=True)
990 self
.oz
= Signal(width
, reset_less
=True)
991 self
.mid
= Signal(id_wid
, reset_less
=True)
994 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
995 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
998 class FPNorm1ModSingle
:
1000 def __init__(self
, width
, id_wid
):
1002 self
.id_wid
= id_wid
1003 self
.i
= self
.ispec()
1004 self
.o
= self
.ospec()
1007 return FPAddStage1Data(self
.width
, self
.id_wid
)
1010 return FPNorm1Data(self
.width
, self
.id_wid
)
1012 def setup(self
, m
, i
):
1013 """ links module to inputs and outputs
1015 m
.submodules
.normalise_1
= self
1016 m
.d
.comb
+= self
.i
.eq(i
)
1018 def process(self
, i
):
1021 def elaborate(self
, platform
):
1024 mwid
= self
.o
.z
.m_width
+2
1025 pe
= PriorityEncoder(mwid
)
1026 m
.submodules
.norm_pe
= pe
1029 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1031 m
.submodules
.norm1_out_z
= self
.o
.z
1032 m
.submodules
.norm1_out_overflow
= of
1033 m
.submodules
.norm1_in_z
= self
.i
.z
1034 m
.submodules
.norm1_in_overflow
= self
.i
.of
1037 m
.submodules
.norm1_insel_z
= i
.z
1038 m
.submodules
.norm1_insel_overflow
= i
.of
1040 espec
= (len(i
.z
.e
), True)
1041 ediff_n126
= Signal(espec
, reset_less
=True)
1042 msr
= MultiShiftRMerge(mwid
, espec
)
1043 m
.submodules
.multishift_r
= msr
1045 m
.d
.comb
+= i
.eq(self
.i
)
1046 # initialise out from in (overridden below)
1047 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1048 m
.d
.comb
+= of
.eq(i
.of
)
1049 # normalisation increase/decrease conditions
1050 decrease
= Signal(reset_less
=True)
1051 increase
= Signal(reset_less
=True)
1052 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1053 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1055 with m
.If(~self
.i
.out_do_z
):
1056 with m
.If(decrease
):
1057 # *sigh* not entirely obvious: count leading zeros (clz)
1058 # with a PriorityEncoder: to find from the MSB
1059 # we reverse the order of the bits.
1060 temp_m
= Signal(mwid
, reset_less
=True)
1061 temp_s
= Signal(mwid
+1, reset_less
=True)
1062 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1063 # make sure that the amount to decrease by does NOT
1064 # go below the minimum non-INF/NaN exponent
1065 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1068 # cat round and guard bits back into the mantissa
1069 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1070 pe
.i
.eq(temp_m
[::-1]), # inverted
1071 clz
.eq(limclz
), # count zeros from MSB down
1072 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1073 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1074 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1075 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1076 # overflow in bits 0..1: got shifted too (leave sticky)
1077 of
.guard
.eq(temp_s
[1]), # guard
1078 of
.round_bit
.eq(temp_s
[0]), # round
1081 with m
.Elif(increase
):
1082 temp_m
= Signal(mwid
+1, reset_less
=True)
1084 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1086 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1087 # connect multi-shifter to inp/out mantissa (and ediff)
1089 msr
.diff
.eq(ediff_n126
),
1090 self
.o
.z
.m
.eq(msr
.m
[3:]),
1091 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1092 # overflow in bits 0..1: got shifted too (leave sticky)
1093 of
.guard
.eq(temp_s
[2]), # guard
1094 of
.round_bit
.eq(temp_s
[1]), # round
1095 of
.sticky
.eq(temp_s
[0]), # sticky
1096 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1099 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1100 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1101 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1106 class FPNorm1ModMulti
:
1108 def __init__(self
, width
, single_cycle
=True):
1110 self
.in_select
= Signal(reset_less
=True)
1111 self
.in_z
= FPNumBase(width
, False)
1112 self
.in_of
= Overflow()
1113 self
.temp_z
= FPNumBase(width
, False)
1114 self
.temp_of
= Overflow()
1115 self
.out_z
= FPNumBase(width
, False)
1116 self
.out_of
= Overflow()
1118 def elaborate(self
, platform
):
1121 m
.submodules
.norm1_out_z
= self
.out_z
1122 m
.submodules
.norm1_out_overflow
= self
.out_of
1123 m
.submodules
.norm1_temp_z
= self
.temp_z
1124 m
.submodules
.norm1_temp_of
= self
.temp_of
1125 m
.submodules
.norm1_in_z
= self
.in_z
1126 m
.submodules
.norm1_in_overflow
= self
.in_of
1128 in_z
= FPNumBase(self
.width
, False)
1130 m
.submodules
.norm1_insel_z
= in_z
1131 m
.submodules
.norm1_insel_overflow
= in_of
1133 # select which of temp or in z/of to use
1134 with m
.If(self
.in_select
):
1135 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1136 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1138 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1139 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1140 # initialise out from in (overridden below)
1141 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1142 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1143 # normalisation increase/decrease conditions
1144 decrease
= Signal(reset_less
=True)
1145 increase
= Signal(reset_less
=True)
1146 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1147 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1148 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1150 with m
.If(decrease
):
1152 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1153 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1154 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1155 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1156 self
.out_of
.round_bit
.eq(0), # reset round bit
1157 self
.out_of
.m0
.eq(in_of
.guard
),
1160 with m
.Elif(increase
):
1162 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1163 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1164 self
.out_of
.guard
.eq(in_z
.m
[0]),
1165 self
.out_of
.m0
.eq(in_z
.m
[1]),
1166 self
.out_of
.round_bit
.eq(in_of
.guard
),
1167 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1173 class FPNorm1Single(FPState
):
1175 def __init__(self
, width
, id_wid
, single_cycle
=True):
1176 FPState
.__init
__(self
, "normalise_1")
1177 self
.mod
= FPNorm1ModSingle(width
)
1178 self
.o
= self
.ospec()
1179 self
.out_z
= FPNumBase(width
, False)
1180 self
.out_roundz
= Signal(reset_less
=True)
1183 return self
.mod
.ispec()
1186 return self
.mod
.ospec()
1188 def setup(self
, m
, i
):
1189 """ links module to inputs and outputs
1191 self
.mod
.setup(m
, i
)
1193 def action(self
, m
):
1197 class FPNorm1Multi(FPState
):
1199 def __init__(self
, width
, id_wid
):
1200 FPState
.__init
__(self
, "normalise_1")
1201 self
.mod
= FPNorm1ModMulti(width
)
1202 self
.stb
= Signal(reset_less
=True)
1203 self
.ack
= Signal(reset
=0, reset_less
=True)
1204 self
.out_norm
= Signal(reset_less
=True)
1205 self
.in_accept
= Signal(reset_less
=True)
1206 self
.temp_z
= FPNumBase(width
)
1207 self
.temp_of
= Overflow()
1208 self
.out_z
= FPNumBase(width
)
1209 self
.out_roundz
= Signal(reset_less
=True)
1211 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1212 """ links module to inputs and outputs
1214 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1215 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1216 self
.out_z
, self
.out_norm
)
1218 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1219 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1221 def action(self
, m
):
1222 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1223 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1224 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1225 with m
.If(self
.out_norm
):
1226 with m
.If(self
.in_accept
):
1231 m
.d
.sync
+= self
.ack
.eq(0)
1233 # normalisation not required (or done).
1235 m
.d
.sync
+= self
.ack
.eq(1)
1236 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1239 class FPNormToPack(FPState
, UnbufferedPipeline
):
1241 def __init__(self
, width
, id_wid
):
1242 FPState
.__init
__(self
, "normalise_1")
1243 self
.id_wid
= id_wid
1245 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1248 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1251 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1253 def setup(self
, m
, i
):
1254 """ links module to inputs and outputs
1257 # Normalisation, Rounding Corrections, Pack - in a chain
1258 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1259 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1260 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1261 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1262 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1264 self
.out_z
= pmod
.ospec()
1268 def process(self
, i
):
1271 def action(self
, m
):
1272 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
1273 m
.next
= "pack_put_z"
1278 def __init__(self
, width
, id_wid
):
1279 self
.z
= FPNumBase(width
, False)
1280 self
.out_do_z
= Signal(reset_less
=True)
1281 self
.oz
= Signal(width
, reset_less
=True)
1282 self
.mid
= Signal(id_wid
, reset_less
=True)
1285 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1291 def __init__(self
, width
, id_wid
):
1293 self
.id_wid
= id_wid
1294 self
.i
= self
.ispec()
1295 self
.out_z
= self
.ospec()
1298 return FPNorm1Data(self
.width
, self
.id_wid
)
1301 return FPRoundData(self
.width
, self
.id_wid
)
1303 def process(self
, i
):
1306 def setup(self
, m
, i
):
1307 m
.submodules
.roundz
= self
1308 m
.d
.comb
+= self
.i
.eq(i
)
1310 def elaborate(self
, platform
):
1312 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1313 with m
.If(~self
.i
.out_do_z
):
1314 with m
.If(self
.i
.roundz
):
1315 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1316 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1317 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1322 class FPRound(FPState
):
1324 def __init__(self
, width
, id_wid
):
1325 FPState
.__init
__(self
, "round")
1326 self
.mod
= FPRoundMod(width
)
1327 self
.out_z
= self
.ospec()
1330 return self
.mod
.ispec()
1333 return self
.mod
.ospec()
1335 def setup(self
, m
, i
):
1336 """ links module to inputs and outputs
1338 self
.mod
.setup(m
, i
)
1341 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1342 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1344 def action(self
, m
):
1345 m
.next
= "corrections"
1348 class FPCorrectionsMod
:
1350 def __init__(self
, width
, id_wid
):
1352 self
.id_wid
= id_wid
1353 self
.i
= self
.ispec()
1354 self
.out_z
= self
.ospec()
1357 return FPRoundData(self
.width
, self
.id_wid
)
1360 return FPRoundData(self
.width
, self
.id_wid
)
1362 def process(self
, i
):
1365 def setup(self
, m
, i
):
1366 """ links module to inputs and outputs
1368 m
.submodules
.corrections
= self
1369 m
.d
.comb
+= self
.i
.eq(i
)
1371 def elaborate(self
, platform
):
1373 m
.submodules
.corr_in_z
= self
.i
.z
1374 m
.submodules
.corr_out_z
= self
.out_z
.z
1375 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1376 with m
.If(~self
.i
.out_do_z
):
1377 with m
.If(self
.i
.z
.is_denormalised
):
1378 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1382 class FPCorrections(FPState
):
1384 def __init__(self
, width
, id_wid
):
1385 FPState
.__init
__(self
, "corrections")
1386 self
.mod
= FPCorrectionsMod(width
)
1387 self
.out_z
= self
.ospec()
1390 return self
.mod
.ispec()
1393 return self
.mod
.ospec()
1395 def setup(self
, m
, in_z
):
1396 """ links module to inputs and outputs
1398 self
.mod
.setup(m
, in_z
)
1400 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1401 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1403 def action(self
, m
):
1409 def __init__(self
, width
, id_wid
):
1410 self
.z
= Signal(width
, reset_less
=True)
1411 self
.mid
= Signal(id_wid
, reset_less
=True)
1414 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1417 return [self
.z
, self
.mid
]
1422 def __init__(self
, width
, id_wid
):
1424 self
.id_wid
= id_wid
1425 self
.i
= self
.ispec()
1426 self
.o
= self
.ospec()
1429 return FPRoundData(self
.width
, self
.id_wid
)
1432 return FPPackData(self
.width
, self
.id_wid
)
1434 def process(self
, i
):
1437 def setup(self
, m
, in_z
):
1438 """ links module to inputs and outputs
1440 m
.submodules
.pack
= self
1441 m
.d
.comb
+= self
.i
.eq(in_z
)
1443 def elaborate(self
, platform
):
1445 z
= FPNumOut(self
.width
, False)
1446 m
.submodules
.pack_in_z
= self
.i
.z
1447 m
.submodules
.pack_out_z
= z
1448 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1449 with m
.If(~self
.i
.out_do_z
):
1450 with m
.If(self
.i
.z
.is_overflowed
):
1451 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1453 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1455 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1456 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1460 class FPPack(FPState
):
1462 def __init__(self
, width
, id_wid
):
1463 FPState
.__init
__(self
, "pack")
1464 self
.mod
= FPPackMod(width
)
1465 self
.out_z
= self
.ospec()
1468 return self
.mod
.ispec()
1471 return self
.mod
.ospec()
1473 def setup(self
, m
, in_z
):
1474 """ links module to inputs and outputs
1476 self
.mod
.setup(m
, in_z
)
1478 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1479 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1481 def action(self
, m
):
1482 m
.next
= "pack_put_z"
1485 class FPPutZ(FPState
):
1487 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1488 FPState
.__init
__(self
, state
)
1489 if to_state
is None:
1490 to_state
= "get_ops"
1491 self
.to_state
= to_state
1494 self
.in_mid
= in_mid
1495 self
.out_mid
= out_mid
1497 def action(self
, m
):
1498 if self
.in_mid
is not None:
1499 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1501 self
.out_z
.z
.v
.eq(self
.in_z
)
1503 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1504 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1505 m
.next
= self
.to_state
1507 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1510 class FPPutZIdx(FPState
):
1512 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1513 FPState
.__init
__(self
, state
)
1514 if to_state
is None:
1515 to_state
= "get_ops"
1516 self
.to_state
= to_state
1518 self
.out_zs
= out_zs
1519 self
.in_mid
= in_mid
1521 def action(self
, m
):
1522 outz_stb
= Signal(reset_less
=True)
1523 outz_ack
= Signal(reset_less
=True)
1524 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1525 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1528 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1530 with m
.If(outz_stb
& outz_ack
):
1531 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1532 m
.next
= self
.to_state
1534 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1538 def __init__(self
, width
, id_wid
):
1539 self
.z
= FPOp(width
)
1540 self
.mid
= Signal(id_wid
, reset_less
=True)
1543 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1546 return [self
.z
, self
.mid
]
1551 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1554 * width: bit-width of IEEE754. supported: 16, 32, 64
1555 * id_wid: an identifier that is sync-connected to the input
1556 * single_cycle: True indicates each stage to complete in 1 clock
1557 * compact: True indicates a reduced number of stages
1560 self
.id_wid
= id_wid
1561 self
.single_cycle
= single_cycle
1562 self
.compact
= compact
1564 self
.in_t
= Trigger()
1565 self
.i
= self
.ispec()
1566 self
.o
= self
.ospec()
1571 return FPADDBaseData(self
.width
, self
.id_wid
)
1574 return FPOpData(self
.width
, self
.id_wid
)
1576 def add_state(self
, state
):
1577 self
.states
.append(state
)
1580 def get_fragment(self
, platform
=None):
1581 """ creates the HDL code-fragment for FPAdd
1584 m
.submodules
.out_z
= self
.o
.z
1585 m
.submodules
.in_t
= self
.in_t
1587 self
.get_compact_fragment(m
, platform
)
1589 self
.get_longer_fragment(m
, platform
)
1591 with m
.FSM() as fsm
:
1593 for state
in self
.states
:
1594 with m
.State(state
.state_from
):
1599 def get_longer_fragment(self
, m
, platform
=None):
1601 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1603 get
.setup(m
, self
.i
)
1606 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1608 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1609 sc
.setup(m
, a
, b
, self
.in_mid
)
1611 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1612 dn
.setup(m
, a
, b
, sc
.in_mid
)
1614 if self
.single_cycle
:
1615 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1616 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1618 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1619 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1621 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1622 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1624 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1625 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1627 if self
.single_cycle
:
1628 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1629 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1631 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1632 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1634 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1635 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1637 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1638 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1640 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1641 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1643 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1644 pa
.in_mid
, self
.out_mid
))
1646 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1647 pa
.in_mid
, self
.out_mid
))
1649 def get_compact_fragment(self
, m
, platform
=None):
1652 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
1653 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1654 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1655 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1657 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1659 chainlist
= [get
, sc
, alm
, n1
]
1660 chain
= StageChain(chainlist
, specallocate
=True)
1661 chain
.setup(m
, self
.i
)
1663 for mod
in chainlist
:
1664 sc
= self
.add_state(mod
)
1666 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1667 n1
.out_z
.mid
, self
.o
.mid
))
1669 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1670 # sc.o.mid, self.o.mid))
1673 class FPADDBase(FPState
):
1675 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1678 * width: bit-width of IEEE754. supported: 16, 32, 64
1679 * id_wid: an identifier that is sync-connected to the input
1680 * single_cycle: True indicates each stage to complete in 1 clock
1682 FPState
.__init
__(self
, "fpadd")
1684 self
.single_cycle
= single_cycle
1685 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1686 self
.o
= self
.ospec()
1688 self
.in_t
= Trigger()
1689 self
.i
= self
.ispec()
1691 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1692 self
.in_accept
= Signal(reset_less
=True)
1693 self
.add_stb
= Signal(reset_less
=True)
1694 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1697 return self
.mod
.ispec()
1700 return self
.mod
.ospec()
1702 def setup(self
, m
, i
, add_stb
, in_mid
):
1703 m
.d
.comb
+= [self
.i
.eq(i
),
1704 self
.mod
.i
.eq(self
.i
),
1705 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1706 #self.add_stb.eq(add_stb),
1707 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1708 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1709 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1710 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1711 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1712 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1715 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1716 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1717 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1718 #m.d.sync += self.in_t.stb.eq(0)
1720 m
.submodules
.fpadd
= self
.mod
1722 def action(self
, m
):
1724 # in_accept is set on incoming strobe HIGH and ack LOW.
1725 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1727 #with m.If(self.in_t.ack):
1728 # m.d.sync += self.in_t.stb.eq(0)
1729 with m
.If(~self
.z_done
):
1730 # not done: test for accepting an incoming operand pair
1731 with m
.If(self
.in_accept
):
1733 self
.add_ack
.eq(1), # acknowledge receipt...
1734 self
.in_t
.stb
.eq(1), # initiate add
1737 m
.d
.sync
+= [self
.add_ack
.eq(0),
1738 self
.in_t
.stb
.eq(0),
1742 # done: acknowledge, and write out id and value
1743 m
.d
.sync
+= [self
.add_ack
.eq(1),
1750 if self
.in_mid
is not None:
1751 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1754 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1756 # move to output state on detecting z ack
1757 with m
.If(self
.out_z
.trigger
):
1758 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1761 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1764 class FPADDBasePipe(ControlBase
):
1765 def __init__(self
, width
, id_wid
):
1766 ControlBase
.__init
__(self
)
1767 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1768 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1769 self
.pipe3
= FPNormToPack(width
, id_wid
)
1771 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1773 def elaborate(self
, platform
):
1775 m
.submodules
.scnorm
= self
.pipe1
1776 m
.submodules
.addalign
= self
.pipe2
1777 m
.submodules
.normpack
= self
.pipe3
1778 m
.d
.comb
+= self
._eqs
1782 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1783 def __init__(self
, width
, id_wid
, num_rows
):
1784 self
.num_rows
= num_rows
1785 def iospec(): return FPADDBaseData(width
, id_wid
)
1786 stage
= PassThroughStage(iospec
)
1787 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1790 class FPADDMuxOutPipe(CombMuxOutPipe
):
1791 def __init__(self
, width
, id_wid
, num_rows
):
1792 self
.num_rows
= num_rows
1793 def iospec(): return FPPackData(width
, id_wid
)
1794 stage
= PassThroughStage(iospec
)
1795 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1798 class FPADDMuxInOut
:
1799 """ Reservation-Station version of FPADD pipeline.
1801 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1802 * 3-stage adder pipeline
1803 * fan-out on outputs (an array of FPPackData: z,mid)
1805 Fan-in and Fan-out are combinatorial.
1807 def __init__(self
, width
, id_wid
, num_rows
):
1808 self
.num_rows
= num_rows
1809 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1810 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1811 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1813 self
.p
= self
.inpipe
.p
# kinda annoying,
1814 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1815 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1817 def elaborate(self
, platform
):
1819 m
.submodules
.inpipe
= self
.inpipe
1820 m
.submodules
.fpadd
= self
.fpadd
1821 m
.submodules
.outpipe
= self
.outpipe
1823 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1824 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1833 """ FPADD: stages as follows:
1839 FPAddBase---> FPAddBaseMod
1841 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1843 FPAddBase is tricky: it is both a stage and *has* stages.
1844 Connection to FPAddBaseMod therefore requires an in stb/ack
1845 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1846 needs to be the thing that raises the incoming stb.
1849 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1852 * width: bit-width of IEEE754. supported: 16, 32, 64
1853 * id_wid: an identifier that is sync-connected to the input
1854 * single_cycle: True indicates each stage to complete in 1 clock
1857 self
.id_wid
= id_wid
1858 self
.single_cycle
= single_cycle
1860 #self.out_z = FPOp(width)
1861 self
.ids
= FPID(id_wid
)
1864 for i
in range(rs_sz
):
1867 in_a
.name
= "in_a_%d" % i
1868 in_b
.name
= "in_b_%d" % i
1869 rs
.append((in_a
, in_b
))
1873 for i
in range(rs_sz
):
1875 out_z
.name
= "out_z_%d" % i
1877 self
.res
= Array(res
)
1881 def add_state(self
, state
):
1882 self
.states
.append(state
)
1885 def get_fragment(self
, platform
=None):
1886 """ creates the HDL code-fragment for FPAdd
1889 m
.submodules
+= self
.rs
1891 in_a
= self
.rs
[0][0]
1892 in_b
= self
.rs
[0][1]
1894 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1899 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1904 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1905 ab
= self
.add_state(ab
)
1906 abd
= ab
.ispec() # create an input spec object for FPADDBase
1907 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1908 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1911 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1914 with m
.FSM() as fsm
:
1916 for state
in self
.states
:
1917 with m
.State(state
.state_from
):
1923 if __name__
== "__main__":
1925 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1926 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1927 alu
.rs
[0][1].ports() + \
1928 alu
.res
[0].ports() + \
1929 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1931 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1932 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1933 alu
.in_t
.ports() + \
1934 alu
.out_z
.ports() + \
1935 [alu
.in_mid
, alu
.out_mid
])
1938 # works... but don't use, just do "python fname.py convert -t v"
1939 #print (verilog.convert(alu, ports=[
1940 # ports=alu.in_a.ports() + \
1941 # alu.in_b.ports() + \
1942 # alu.out_z.ports())