1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.mod
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
152 def setup(self
, m
, i
, in_stb
, in_ack
):
153 """ links module to inputs and outputs
155 m
.submodules
.get_ops
= self
.mod
156 m
.d
.comb
+= self
.mod
.i
.eq(i
)
157 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
158 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
159 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
160 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
167 self
.o
.eq(self
.mod
.o
),
170 m
.d
.sync
+= self
.mod
.ack
.eq(1)
175 def __init__(self
, width
, id_wid
):
176 self
.a
= FPNumBase(width
, True)
177 self
.b
= FPNumBase(width
, True)
178 self
.z
= FPNumOut(width
, False)
179 self
.oz
= Signal(width
, reset_less
=True)
180 self
.out_do_z
= Signal(reset_less
=True)
181 self
.mid
= Signal(id_wid
, reset_less
=True)
184 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
185 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
188 class FPAddSpecialCasesMod
:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
194 def __init__(self
, width
, id_wid
):
197 self
.i
= self
.ispec()
198 self
.o
= self
.ospec()
201 return FPADDBaseData(self
.width
, self
.id_wid
)
204 return FPSCData(self
.width
, self
.id_wid
)
206 def setup(self
, m
, i
):
207 """ links module to inputs and outputs
209 m
.submodules
.specialcases
= self
210 m
.d
.comb
+= self
.i
.eq(i
)
212 def process(self
, i
):
215 def elaborate(self
, platform
):
218 m
.submodules
.sc_out_z
= self
.o
.z
220 # decode: XXX really should move to separate stage
221 a1
= FPNumIn(None, self
.width
)
222 b1
= FPNumIn(None, self
.width
)
223 m
.submodules
.sc_decode_a
= a1
224 m
.submodules
.sc_decode_b
= b1
225 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
230 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
233 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
235 # if a is NaN or b is NaN return NaN
236 with m
.If(a1
.is_nan | b1
.is_nan
):
237 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
238 m
.d
.comb
+= self
.o
.z
.nan(0)
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
263 # if a is inf return inf (or NaN)
264 with m
.Elif(a1
.is_inf
):
265 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
267 # if a is inf and signs don't match return NaN
268 with m
.If(b1
.exp_128
& s_nomatch
):
269 m
.d
.comb
+= self
.o
.z
.nan(0)
271 # if b is inf return inf
272 with m
.Elif(b1
.is_inf
):
273 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
274 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
276 # if a is zero and b zero return signed-a/b
277 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
281 # if a is zero return b
282 with m
.Elif(a1
.is_zero
):
283 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
284 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
286 # if b is zero return a
287 with m
.Elif(b1
.is_zero
):
288 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
289 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
291 # if a equal to -b return zero (+ve zero)
292 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
293 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
294 m
.d
.comb
+= self
.o
.z
.zero(0)
296 # Denormalised Number checks next, so pass a/b data through
298 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
299 m
.d
.comb
+= self
.o
.a
.eq(a1
)
300 m
.d
.comb
+= self
.o
.b
.eq(b1
)
302 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
303 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
309 def __init__(self
, id_wid
):
312 self
.in_mid
= Signal(id_wid
, reset_less
=True)
313 self
.out_mid
= Signal(id_wid
, reset_less
=True)
319 if self
.id_wid
is not None:
320 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
323 class FPAddSpecialCases(FPState
):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
329 def __init__(self
, width
, id_wid
):
330 FPState
.__init
__(self
, "special_cases")
331 self
.mod
= FPAddSpecialCasesMod(width
)
332 self
.out_z
= self
.mod
.ospec()
333 self
.out_do_z
= Signal(reset_less
=True)
335 def setup(self
, m
, i
):
336 """ links module to inputs and outputs
338 self
.mod
.setup(m
, i
, self
.out_do_z
)
339 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
344 with m
.If(self
.out_do_z
):
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
358 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
359 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
360 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
361 self
.o
= self
.ospec()
364 return self
.smod
.ispec()
367 return self
.dmod
.ospec()
369 def setup(self
, m
, i
):
370 """ links module to inputs and outputs
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self
.smod
.setup(m
, i
)
376 self
.dmod
.setup(m
, self
.smod
.o
)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
384 m
.d
.comb
+= self
.o
.eq(self
.dmod
.o
)
386 def process(self
, i
):
390 #with m.If(self.out_do_z):
396 class FPAddDeNormMod(FPState
):
398 def __init__(self
, width
, id_wid
):
401 self
.i
= self
.ispec()
402 self
.o
= self
.ospec()
405 return FPSCData(self
.width
, self
.id_wid
)
408 return FPSCData(self
.width
, self
.id_wid
)
410 def setup(self
, m
, i
):
411 """ links module to inputs and outputs
413 m
.submodules
.denormalise
= self
414 m
.d
.comb
+= self
.i
.eq(i
)
416 def elaborate(self
, platform
):
418 m
.submodules
.denorm_in_a
= self
.i
.a
419 m
.submodules
.denorm_in_b
= self
.i
.b
420 m
.submodules
.denorm_out_a
= self
.o
.a
421 m
.submodules
.denorm_out_b
= self
.o
.b
423 with m
.If(~self
.i
.out_do_z
):
424 # XXX hmmm, don't like repeating identical code
425 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
426 with m
.If(self
.i
.a
.exp_n127
):
427 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
429 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
431 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
432 with m
.If(self
.i
.b
.exp_n127
):
433 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
435 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
437 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
438 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
439 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
440 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
445 class FPAddDeNorm(FPState
):
447 def __init__(self
, width
, id_wid
):
448 FPState
.__init
__(self
, "denormalise")
449 self
.mod
= FPAddDeNormMod(width
)
450 self
.out_a
= FPNumBase(width
)
451 self
.out_b
= FPNumBase(width
)
453 def setup(self
, m
, i
):
454 """ links module to inputs and outputs
458 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
459 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
462 # Denormalised Number checks
466 class FPAddAlignMultiMod(FPState
):
468 def __init__(self
, width
):
469 self
.in_a
= FPNumBase(width
)
470 self
.in_b
= FPNumBase(width
)
471 self
.out_a
= FPNumIn(None, width
)
472 self
.out_b
= FPNumIn(None, width
)
473 self
.exp_eq
= Signal(reset_less
=True)
475 def elaborate(self
, platform
):
476 # This one however (single-cycle) will do the shift
481 m
.submodules
.align_in_a
= self
.in_a
482 m
.submodules
.align_in_b
= self
.in_b
483 m
.submodules
.align_out_a
= self
.out_a
484 m
.submodules
.align_out_b
= self
.out_b
486 # NOTE: this does *not* do single-cycle multi-shifting,
487 # it *STAYS* in the align state until exponents match
489 # exponent of a greater than b: shift b down
490 m
.d
.comb
+= self
.exp_eq
.eq(0)
491 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
492 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
493 agtb
= Signal(reset_less
=True)
494 altb
= Signal(reset_less
=True)
495 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
496 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
498 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
499 # exponent of b greater than a: shift a down
501 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
502 # exponents equal: move to next stage.
504 m
.d
.comb
+= self
.exp_eq
.eq(1)
508 class FPAddAlignMulti(FPState
):
510 def __init__(self
, width
, id_wid
):
511 FPState
.__init
__(self
, "align")
512 self
.mod
= FPAddAlignMultiMod(width
)
513 self
.out_a
= FPNumIn(None, width
)
514 self
.out_b
= FPNumIn(None, width
)
515 self
.exp_eq
= Signal(reset_less
=True)
517 def setup(self
, m
, in_a
, in_b
):
518 """ links module to inputs and outputs
520 m
.submodules
.align
= self
.mod
521 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
522 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
523 #m.d.comb += self.out_a.eq(self.mod.out_a)
524 #m.d.comb += self.out_b.eq(self.mod.out_b)
525 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
526 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
527 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
530 with m
.If(self
.exp_eq
):
536 def __init__(self
, width
, id_wid
):
537 self
.a
= FPNumIn(None, width
)
538 self
.b
= FPNumIn(None, width
)
539 self
.z
= FPNumOut(width
, False)
540 self
.out_do_z
= Signal(reset_less
=True)
541 self
.oz
= Signal(width
, reset_less
=True)
542 self
.mid
= Signal(id_wid
, reset_less
=True)
545 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
546 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
549 class FPAddAlignSingleMod
:
551 def __init__(self
, width
, id_wid
):
554 self
.i
= self
.ispec()
555 self
.o
= self
.ospec()
558 return FPSCData(self
.width
, self
.id_wid
)
561 return FPNumIn2Ops(self
.width
, self
.id_wid
)
563 def process(self
, i
):
566 def setup(self
, m
, i
):
567 """ links module to inputs and outputs
569 m
.submodules
.align
= self
570 m
.d
.comb
+= self
.i
.eq(i
)
572 def elaborate(self
, platform
):
573 """ Aligns A against B or B against A, depending on which has the
574 greater exponent. This is done in a *single* cycle using
575 variable-width bit-shift
577 the shifter used here is quite expensive in terms of gates.
578 Mux A or B in (and out) into temporaries, as only one of them
579 needs to be aligned against the other
583 m
.submodules
.align_in_a
= self
.i
.a
584 m
.submodules
.align_in_b
= self
.i
.b
585 m
.submodules
.align_out_a
= self
.o
.a
586 m
.submodules
.align_out_b
= self
.o
.b
588 # temporary (muxed) input and output to be shifted
589 t_inp
= FPNumBase(self
.width
)
590 t_out
= FPNumIn(None, self
.width
)
591 espec
= (len(self
.i
.a
.e
), True)
592 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
593 m
.submodules
.align_t_in
= t_inp
594 m
.submodules
.align_t_out
= t_out
595 m
.submodules
.multishift_r
= msr
597 ediff
= Signal(espec
, reset_less
=True)
598 ediffr
= Signal(espec
, reset_less
=True)
599 tdiff
= Signal(espec
, reset_less
=True)
600 elz
= Signal(reset_less
=True)
601 egz
= Signal(reset_less
=True)
603 # connect multi-shifter to t_inp/out mantissa (and tdiff)
604 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
605 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
606 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
607 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
608 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
610 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
611 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
612 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
613 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
615 # default: A-exp == B-exp, A and B untouched (fall through)
616 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
617 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
618 # only one shifter (muxed)
619 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
620 # exponent of a greater than b: shift b down
621 with m
.If(~self
.i
.out_do_z
):
623 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
626 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
628 # exponent of b greater than a: shift a down
630 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
633 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
636 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
637 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
638 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
639 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
644 class FPAddAlignSingle(FPState
):
646 def __init__(self
, width
, id_wid
):
647 FPState
.__init
__(self
, "align")
648 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
649 self
.out_a
= FPNumIn(None, width
)
650 self
.out_b
= FPNumIn(None, width
)
652 def setup(self
, m
, i
):
653 """ links module to inputs and outputs
657 # NOTE: could be done as comb
658 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
659 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
665 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
667 def __init__(self
, width
, id_wid
):
668 FPState
.__init
__(self
, "align")
671 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
672 self
.a1o
= self
.ospec()
675 return FPSCData(self
.width
, self
.id_wid
)
676 #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
679 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
681 def setup(self
, m
, i
):
682 """ links module to inputs and outputs
685 # chain AddAlignSingle, AddStage0 and AddStage1
686 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
687 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
688 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
690 chain
= StageChain([mod
, a0mod
, a1mod
])
693 m
.d
.comb
+= self
.a1o
.eq(a1mod
.o
)
695 def process(self
, i
):
699 m
.next
= "normalise_1"
702 class FPAddStage0Data
:
704 def __init__(self
, width
, id_wid
):
705 self
.z
= FPNumBase(width
, False)
706 self
.out_do_z
= Signal(reset_less
=True)
707 self
.oz
= Signal(width
, reset_less
=True)
708 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
709 self
.mid
= Signal(id_wid
, reset_less
=True)
712 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
713 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
716 class FPAddStage0Mod
:
718 def __init__(self
, width
, id_wid
):
721 self
.i
= self
.ispec()
722 self
.o
= self
.ospec()
725 return FPSCData(self
.width
, self
.id_wid
)
728 return FPAddStage0Data(self
.width
, self
.id_wid
)
730 def process(self
, i
):
733 def setup(self
, m
, i
):
734 """ links module to inputs and outputs
736 m
.submodules
.add0
= self
737 m
.d
.comb
+= self
.i
.eq(i
)
739 def elaborate(self
, platform
):
741 m
.submodules
.add0_in_a
= self
.i
.a
742 m
.submodules
.add0_in_b
= self
.i
.b
743 m
.submodules
.add0_out_z
= self
.o
.z
745 # store intermediate tests (and zero-extended mantissas)
746 seq
= Signal(reset_less
=True)
747 mge
= Signal(reset_less
=True)
748 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
749 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
750 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
751 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
752 am0
.eq(Cat(self
.i
.a
.m
, 0)),
753 bm0
.eq(Cat(self
.i
.b
.m
, 0))
755 # same-sign (both negative or both positive) add mantissas
756 with m
.If(~self
.i
.out_do_z
):
757 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
760 self
.o
.tot
.eq(am0
+ bm0
),
761 self
.o
.z
.s
.eq(self
.i
.a
.s
)
763 # a mantissa greater than b, use a
766 self
.o
.tot
.eq(am0
- bm0
),
767 self
.o
.z
.s
.eq(self
.i
.a
.s
)
769 # b mantissa greater than a, use b
772 self
.o
.tot
.eq(bm0
- am0
),
773 self
.o
.z
.s
.eq(self
.i
.b
.s
)
776 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
777 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
778 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
782 class FPAddStage0(FPState
):
783 """ First stage of add. covers same-sign (add) and subtract
784 special-casing when mantissas are greater or equal, to
785 give greatest accuracy.
788 def __init__(self
, width
, id_wid
):
789 FPState
.__init
__(self
, "add_0")
790 self
.mod
= FPAddStage0Mod(width
)
791 self
.o
= self
.mod
.ospec()
793 def setup(self
, m
, i
):
794 """ links module to inputs and outputs
798 # NOTE: these could be done as combinatorial (merge add0+add1)
799 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
805 class FPAddStage1Data
:
807 def __init__(self
, width
, id_wid
):
808 self
.z
= FPNumBase(width
, False)
809 self
.out_do_z
= Signal(reset_less
=True)
810 self
.oz
= Signal(width
, reset_less
=True)
812 self
.mid
= Signal(id_wid
, reset_less
=True)
815 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
816 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
820 class FPAddStage1Mod(FPState
):
821 """ Second stage of add: preparation for normalisation.
822 detects when tot sum is too big (tot[27] is kinda a carry bit)
825 def __init__(self
, width
, id_wid
):
828 self
.i
= self
.ispec()
829 self
.o
= self
.ospec()
832 return FPAddStage0Data(self
.width
, self
.id_wid
)
835 return FPAddStage1Data(self
.width
, self
.id_wid
)
837 def process(self
, i
):
840 def setup(self
, m
, i
):
841 """ links module to inputs and outputs
843 m
.submodules
.add1
= self
844 m
.submodules
.add1_out_overflow
= self
.o
.of
846 m
.d
.comb
+= self
.i
.eq(i
)
848 def elaborate(self
, platform
):
850 #m.submodules.norm1_in_overflow = self.in_of
851 #m.submodules.norm1_out_overflow = self.out_of
852 #m.submodules.norm1_in_z = self.in_z
853 #m.submodules.norm1_out_z = self.out_z
854 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
855 # tot[-1] (MSB) gets set when the sum overflows. shift result down
856 with m
.If(~self
.i
.out_do_z
):
857 with m
.If(self
.i
.tot
[-1]):
859 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
860 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
861 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
862 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
863 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
864 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
866 # tot[-1] (MSB) zero case
869 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
870 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
871 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
872 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
873 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
876 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
877 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
878 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
883 class FPAddStage1(FPState
):
885 def __init__(self
, width
, id_wid
):
886 FPState
.__init
__(self
, "add_1")
887 self
.mod
= FPAddStage1Mod(width
)
888 self
.out_z
= FPNumBase(width
, False)
889 self
.out_of
= Overflow()
890 self
.norm_stb
= Signal()
892 def setup(self
, m
, i
):
893 """ links module to inputs and outputs
897 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
899 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
900 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
901 m
.d
.sync
+= self
.norm_stb
.eq(1)
904 m
.next
= "normalise_1"
907 class FPNormaliseModSingle
:
909 def __init__(self
, width
):
911 self
.in_z
= self
.ispec()
912 self
.out_z
= self
.ospec()
915 return FPNumBase(self
.width
, False)
918 return FPNumBase(self
.width
, False)
920 def setup(self
, m
, i
):
921 """ links module to inputs and outputs
923 m
.submodules
.normalise
= self
924 m
.d
.comb
+= self
.i
.eq(i
)
926 def elaborate(self
, platform
):
929 mwid
= self
.out_z
.m_width
+2
930 pe
= PriorityEncoder(mwid
)
931 m
.submodules
.norm_pe
= pe
933 m
.submodules
.norm1_out_z
= self
.out_z
934 m
.submodules
.norm1_in_z
= self
.in_z
936 in_z
= FPNumBase(self
.width
, False)
938 m
.submodules
.norm1_insel_z
= in_z
939 m
.submodules
.norm1_insel_overflow
= in_of
941 espec
= (len(in_z
.e
), True)
942 ediff_n126
= Signal(espec
, reset_less
=True)
943 msr
= MultiShiftRMerge(mwid
, espec
)
944 m
.submodules
.multishift_r
= msr
946 m
.d
.comb
+= in_z
.eq(self
.in_z
)
947 m
.d
.comb
+= in_of
.eq(self
.in_of
)
948 # initialise out from in (overridden below)
949 m
.d
.comb
+= self
.out_z
.eq(in_z
)
950 m
.d
.comb
+= self
.out_of
.eq(in_of
)
951 # normalisation decrease condition
952 decrease
= Signal(reset_less
=True)
953 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
956 # *sigh* not entirely obvious: count leading zeros (clz)
957 # with a PriorityEncoder: to find from the MSB
958 # we reverse the order of the bits.
959 temp_m
= Signal(mwid
, reset_less
=True)
960 temp_s
= Signal(mwid
+1, reset_less
=True)
961 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
963 # cat round and guard bits back into the mantissa
964 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
965 pe
.i
.eq(temp_m
[::-1]), # inverted
966 clz
.eq(pe
.o
), # count zeros from MSB down
967 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
968 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
969 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
976 def __init__(self
, width
, id_wid
):
977 self
.roundz
= Signal(reset_less
=True)
978 self
.z
= FPNumBase(width
, False)
979 self
.out_do_z
= Signal(reset_less
=True)
980 self
.oz
= Signal(width
, reset_less
=True)
981 self
.mid
= Signal(id_wid
, reset_less
=True)
984 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
985 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
988 class FPNorm1ModSingle
:
990 def __init__(self
, width
, id_wid
):
993 self
.i
= self
.ispec()
994 self
.o
= self
.ospec()
997 return FPAddStage1Data(self
.width
, self
.id_wid
)
1000 return FPNorm1Data(self
.width
, self
.id_wid
)
1002 def setup(self
, m
, i
):
1003 """ links module to inputs and outputs
1005 m
.submodules
.normalise_1
= self
1006 m
.d
.comb
+= self
.i
.eq(i
)
1008 def process(self
, i
):
1011 def elaborate(self
, platform
):
1014 mwid
= self
.o
.z
.m_width
+2
1015 pe
= PriorityEncoder(mwid
)
1016 m
.submodules
.norm_pe
= pe
1019 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1021 m
.submodules
.norm1_out_z
= self
.o
.z
1022 m
.submodules
.norm1_out_overflow
= of
1023 m
.submodules
.norm1_in_z
= self
.i
.z
1024 m
.submodules
.norm1_in_overflow
= self
.i
.of
1027 m
.submodules
.norm1_insel_z
= i
.z
1028 m
.submodules
.norm1_insel_overflow
= i
.of
1030 espec
= (len(i
.z
.e
), True)
1031 ediff_n126
= Signal(espec
, reset_less
=True)
1032 msr
= MultiShiftRMerge(mwid
, espec
)
1033 m
.submodules
.multishift_r
= msr
1035 m
.d
.comb
+= i
.eq(self
.i
)
1036 # initialise out from in (overridden below)
1037 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1038 m
.d
.comb
+= of
.eq(i
.of
)
1039 # normalisation increase/decrease conditions
1040 decrease
= Signal(reset_less
=True)
1041 increase
= Signal(reset_less
=True)
1042 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1043 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1045 with m
.If(~self
.i
.out_do_z
):
1046 with m
.If(decrease
):
1047 # *sigh* not entirely obvious: count leading zeros (clz)
1048 # with a PriorityEncoder: to find from the MSB
1049 # we reverse the order of the bits.
1050 temp_m
= Signal(mwid
, reset_less
=True)
1051 temp_s
= Signal(mwid
+1, reset_less
=True)
1052 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1053 # make sure that the amount to decrease by does NOT
1054 # go below the minimum non-INF/NaN exponent
1055 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1058 # cat round and guard bits back into the mantissa
1059 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1060 pe
.i
.eq(temp_m
[::-1]), # inverted
1061 clz
.eq(limclz
), # count zeros from MSB down
1062 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1063 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1064 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1065 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1066 # overflow in bits 0..1: got shifted too (leave sticky)
1067 of
.guard
.eq(temp_s
[1]), # guard
1068 of
.round_bit
.eq(temp_s
[0]), # round
1071 with m
.Elif(increase
):
1072 temp_m
= Signal(mwid
+1, reset_less
=True)
1074 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1076 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1077 # connect multi-shifter to inp/out mantissa (and ediff)
1079 msr
.diff
.eq(ediff_n126
),
1080 self
.o
.z
.m
.eq(msr
.m
[3:]),
1081 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1082 # overflow in bits 0..1: got shifted too (leave sticky)
1083 of
.guard
.eq(temp_s
[2]), # guard
1084 of
.round_bit
.eq(temp_s
[1]), # round
1085 of
.sticky
.eq(temp_s
[0]), # sticky
1086 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1089 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1090 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1091 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1096 class FPNorm1ModMulti
:
1098 def __init__(self
, width
, single_cycle
=True):
1100 self
.in_select
= Signal(reset_less
=True)
1101 self
.in_z
= FPNumBase(width
, False)
1102 self
.in_of
= Overflow()
1103 self
.temp_z
= FPNumBase(width
, False)
1104 self
.temp_of
= Overflow()
1105 self
.out_z
= FPNumBase(width
, False)
1106 self
.out_of
= Overflow()
1108 def elaborate(self
, platform
):
1111 m
.submodules
.norm1_out_z
= self
.out_z
1112 m
.submodules
.norm1_out_overflow
= self
.out_of
1113 m
.submodules
.norm1_temp_z
= self
.temp_z
1114 m
.submodules
.norm1_temp_of
= self
.temp_of
1115 m
.submodules
.norm1_in_z
= self
.in_z
1116 m
.submodules
.norm1_in_overflow
= self
.in_of
1118 in_z
= FPNumBase(self
.width
, False)
1120 m
.submodules
.norm1_insel_z
= in_z
1121 m
.submodules
.norm1_insel_overflow
= in_of
1123 # select which of temp or in z/of to use
1124 with m
.If(self
.in_select
):
1125 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1126 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1128 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1129 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1130 # initialise out from in (overridden below)
1131 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1132 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1133 # normalisation increase/decrease conditions
1134 decrease
= Signal(reset_less
=True)
1135 increase
= Signal(reset_less
=True)
1136 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1137 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1138 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1140 with m
.If(decrease
):
1142 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1143 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1144 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1145 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1146 self
.out_of
.round_bit
.eq(0), # reset round bit
1147 self
.out_of
.m0
.eq(in_of
.guard
),
1150 with m
.Elif(increase
):
1152 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1153 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1154 self
.out_of
.guard
.eq(in_z
.m
[0]),
1155 self
.out_of
.m0
.eq(in_z
.m
[1]),
1156 self
.out_of
.round_bit
.eq(in_of
.guard
),
1157 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1163 class FPNorm1Single(FPState
):
1165 def __init__(self
, width
, id_wid
, single_cycle
=True):
1166 FPState
.__init
__(self
, "normalise_1")
1167 self
.mod
= FPNorm1ModSingle(width
)
1168 self
.o
= self
.ospec()
1169 self
.out_z
= FPNumBase(width
, False)
1170 self
.out_roundz
= Signal(reset_less
=True)
1173 return self
.mod
.ispec()
1176 return self
.mod
.ospec()
1178 def setup(self
, m
, i
):
1179 """ links module to inputs and outputs
1181 self
.mod
.setup(m
, i
)
1183 def action(self
, m
):
1187 class FPNorm1Multi(FPState
):
1189 def __init__(self
, width
, id_wid
):
1190 FPState
.__init
__(self
, "normalise_1")
1191 self
.mod
= FPNorm1ModMulti(width
)
1192 self
.stb
= Signal(reset_less
=True)
1193 self
.ack
= Signal(reset
=0, reset_less
=True)
1194 self
.out_norm
= Signal(reset_less
=True)
1195 self
.in_accept
= Signal(reset_less
=True)
1196 self
.temp_z
= FPNumBase(width
)
1197 self
.temp_of
= Overflow()
1198 self
.out_z
= FPNumBase(width
)
1199 self
.out_roundz
= Signal(reset_less
=True)
1201 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1202 """ links module to inputs and outputs
1204 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1205 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1206 self
.out_z
, self
.out_norm
)
1208 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1209 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1211 def action(self
, m
):
1212 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1213 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1214 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1215 with m
.If(self
.out_norm
):
1216 with m
.If(self
.in_accept
):
1221 m
.d
.sync
+= self
.ack
.eq(0)
1223 # normalisation not required (or done).
1225 m
.d
.sync
+= self
.ack
.eq(1)
1226 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1229 class FPNormToPack(FPState
, UnbufferedPipeline
):
1231 def __init__(self
, width
, id_wid
):
1232 FPState
.__init
__(self
, "normalise_1")
1233 self
.id_wid
= id_wid
1235 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1238 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1241 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1243 def setup(self
, m
, i
):
1244 """ links module to inputs and outputs
1247 # Normalisation, Rounding Corrections, Pack - in a chain
1248 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1249 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1250 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1251 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1252 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1254 self
.out_z
= pmod
.ospec()
1256 m
.d
.comb
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1257 m
.d
.comb
+= self
.out_z
.z
.eq(pmod
.o
.z
) # outputs packed result
1259 def process(self
, i
):
1262 def action(self
, m
):
1263 m
.next
= "pack_put_z"
1268 def __init__(self
, width
, id_wid
):
1269 self
.z
= FPNumBase(width
, False)
1270 self
.out_do_z
= Signal(reset_less
=True)
1271 self
.oz
= Signal(width
, reset_less
=True)
1272 self
.mid
= Signal(id_wid
, reset_less
=True)
1275 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1281 def __init__(self
, width
, id_wid
):
1283 self
.id_wid
= id_wid
1284 self
.i
= self
.ispec()
1285 self
.out_z
= self
.ospec()
1288 return FPNorm1Data(self
.width
, self
.id_wid
)
1291 return FPRoundData(self
.width
, self
.id_wid
)
1293 def process(self
, i
):
1296 def setup(self
, m
, i
):
1297 m
.submodules
.roundz
= self
1298 m
.d
.comb
+= self
.i
.eq(i
)
1300 def elaborate(self
, platform
):
1302 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1303 with m
.If(~self
.i
.out_do_z
):
1304 with m
.If(self
.i
.roundz
):
1305 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1306 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1307 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1312 class FPRound(FPState
):
1314 def __init__(self
, width
, id_wid
):
1315 FPState
.__init
__(self
, "round")
1316 self
.mod
= FPRoundMod(width
)
1317 self
.out_z
= self
.ospec()
1320 return self
.mod
.ispec()
1323 return self
.mod
.ospec()
1325 def setup(self
, m
, i
):
1326 """ links module to inputs and outputs
1328 self
.mod
.setup(m
, i
)
1331 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1332 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1334 def action(self
, m
):
1335 m
.next
= "corrections"
1338 class FPCorrectionsMod
:
1340 def __init__(self
, width
, id_wid
):
1342 self
.id_wid
= id_wid
1343 self
.i
= self
.ispec()
1344 self
.out_z
= self
.ospec()
1347 return FPRoundData(self
.width
, self
.id_wid
)
1350 return FPRoundData(self
.width
, self
.id_wid
)
1352 def process(self
, i
):
1355 def setup(self
, m
, i
):
1356 """ links module to inputs and outputs
1358 m
.submodules
.corrections
= self
1359 m
.d
.comb
+= self
.i
.eq(i
)
1361 def elaborate(self
, platform
):
1363 m
.submodules
.corr_in_z
= self
.i
.z
1364 m
.submodules
.corr_out_z
= self
.out_z
.z
1365 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1366 with m
.If(~self
.i
.out_do_z
):
1367 with m
.If(self
.i
.z
.is_denormalised
):
1368 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1372 class FPCorrections(FPState
):
1374 def __init__(self
, width
, id_wid
):
1375 FPState
.__init
__(self
, "corrections")
1376 self
.mod
= FPCorrectionsMod(width
)
1377 self
.out_z
= self
.ospec()
1380 return self
.mod
.ispec()
1383 return self
.mod
.ospec()
1385 def setup(self
, m
, in_z
):
1386 """ links module to inputs and outputs
1388 self
.mod
.setup(m
, in_z
)
1390 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1391 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1393 def action(self
, m
):
1399 def __init__(self
, width
, id_wid
):
1400 self
.z
= Signal(width
, reset_less
=True)
1401 self
.mid
= Signal(id_wid
, reset_less
=True)
1404 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1407 return [self
.z
, self
.mid
]
1412 def __init__(self
, width
, id_wid
):
1414 self
.id_wid
= id_wid
1415 self
.i
= self
.ispec()
1416 self
.o
= self
.ospec()
1419 return FPRoundData(self
.width
, self
.id_wid
)
1422 return FPPackData(self
.width
, self
.id_wid
)
1424 def process(self
, i
):
1427 def setup(self
, m
, in_z
):
1428 """ links module to inputs and outputs
1430 m
.submodules
.pack
= self
1431 m
.d
.comb
+= self
.i
.eq(in_z
)
1433 def elaborate(self
, platform
):
1435 z
= FPNumOut(self
.width
, False)
1436 m
.submodules
.pack_in_z
= self
.i
.z
1437 m
.submodules
.pack_out_z
= z
1438 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1439 with m
.If(~self
.i
.out_do_z
):
1440 with m
.If(self
.i
.z
.is_overflowed
):
1441 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1443 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1445 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1446 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1450 class FPPack(FPState
):
1452 def __init__(self
, width
, id_wid
):
1453 FPState
.__init
__(self
, "pack")
1454 self
.mod
= FPPackMod(width
)
1455 self
.out_z
= self
.ospec()
1458 return self
.mod
.ispec()
1461 return self
.mod
.ospec()
1463 def setup(self
, m
, in_z
):
1464 """ links module to inputs and outputs
1466 self
.mod
.setup(m
, in_z
)
1468 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1469 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1471 def action(self
, m
):
1472 m
.next
= "pack_put_z"
1475 class FPPutZ(FPState
):
1477 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1478 FPState
.__init
__(self
, state
)
1479 if to_state
is None:
1480 to_state
= "get_ops"
1481 self
.to_state
= to_state
1484 self
.in_mid
= in_mid
1485 self
.out_mid
= out_mid
1487 def action(self
, m
):
1488 if self
.in_mid
is not None:
1489 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1491 self
.out_z
.z
.v
.eq(self
.in_z
)
1493 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1494 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1495 m
.next
= self
.to_state
1497 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1500 class FPPutZIdx(FPState
):
1502 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1503 FPState
.__init
__(self
, state
)
1504 if to_state
is None:
1505 to_state
= "get_ops"
1506 self
.to_state
= to_state
1508 self
.out_zs
= out_zs
1509 self
.in_mid
= in_mid
1511 def action(self
, m
):
1512 outz_stb
= Signal(reset_less
=True)
1513 outz_ack
= Signal(reset_less
=True)
1514 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1515 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1518 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1520 with m
.If(outz_stb
& outz_ack
):
1521 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1522 m
.next
= self
.to_state
1524 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1527 def __init__(self
, width
, id_wid
):
1528 self
.z
= FPOp(width
)
1529 self
.mid
= Signal(id_wid
, reset_less
=True)
1532 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1535 return [self
.z
, self
.mid
]
1540 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1543 * width: bit-width of IEEE754. supported: 16, 32, 64
1544 * id_wid: an identifier that is sync-connected to the input
1545 * single_cycle: True indicates each stage to complete in 1 clock
1546 * compact: True indicates a reduced number of stages
1549 self
.id_wid
= id_wid
1550 self
.single_cycle
= single_cycle
1551 self
.compact
= compact
1553 self
.in_t
= Trigger()
1554 self
.i
= self
.ispec()
1555 self
.o
= self
.ospec()
1560 return FPADDBaseData(self
.width
, self
.id_wid
)
1563 return FPOpData(self
.width
, self
.id_wid
)
1565 def add_state(self
, state
):
1566 self
.states
.append(state
)
1569 def get_fragment(self
, platform
=None):
1570 """ creates the HDL code-fragment for FPAdd
1573 m
.submodules
.out_z
= self
.o
.z
1574 m
.submodules
.in_t
= self
.in_t
1576 self
.get_compact_fragment(m
, platform
)
1578 self
.get_longer_fragment(m
, platform
)
1580 with m
.FSM() as fsm
:
1582 for state
in self
.states
:
1583 with m
.State(state
.state_from
):
1588 def get_longer_fragment(self
, m
, platform
=None):
1590 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1592 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1596 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1597 sc
.setup(m
, a
, b
, self
.in_mid
)
1599 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1600 dn
.setup(m
, a
, b
, sc
.in_mid
)
1602 if self
.single_cycle
:
1603 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1604 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1606 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1607 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1609 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1610 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1612 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1613 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1615 if self
.single_cycle
:
1616 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1617 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1619 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1620 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1622 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1623 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1625 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1626 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1628 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1629 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1631 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1632 pa
.in_mid
, self
.out_mid
))
1634 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1635 pa
.in_mid
, self
.out_mid
))
1637 def get_compact_fragment(self
, m
, platform
=None):
1639 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1640 self
.width
, self
.id_wid
))
1641 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1643 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1646 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1649 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1650 n1
.setup(m
, alm
.a1o
)
1652 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1653 n1
.out_z
.mid
, self
.o
.mid
))
1655 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1656 # sc.o.mid, self.o.mid))
1659 class FPADDBase(FPState
):
1661 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1664 * width: bit-width of IEEE754. supported: 16, 32, 64
1665 * id_wid: an identifier that is sync-connected to the input
1666 * single_cycle: True indicates each stage to complete in 1 clock
1668 FPState
.__init
__(self
, "fpadd")
1670 self
.single_cycle
= single_cycle
1671 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1672 self
.o
= self
.ospec()
1674 self
.in_t
= Trigger()
1675 self
.i
= self
.ispec()
1677 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1678 self
.in_accept
= Signal(reset_less
=True)
1679 self
.add_stb
= Signal(reset_less
=True)
1680 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1683 return self
.mod
.ispec()
1686 return self
.mod
.ospec()
1688 def setup(self
, m
, i
, add_stb
, in_mid
):
1689 m
.d
.comb
+= [self
.i
.eq(i
),
1690 self
.mod
.i
.eq(self
.i
),
1691 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1692 #self.add_stb.eq(add_stb),
1693 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1694 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1695 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1696 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1697 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1698 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1701 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1702 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1703 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1704 #m.d.sync += self.in_t.stb.eq(0)
1706 m
.submodules
.fpadd
= self
.mod
1708 def action(self
, m
):
1710 # in_accept is set on incoming strobe HIGH and ack LOW.
1711 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1713 #with m.If(self.in_t.ack):
1714 # m.d.sync += self.in_t.stb.eq(0)
1715 with m
.If(~self
.z_done
):
1716 # not done: test for accepting an incoming operand pair
1717 with m
.If(self
.in_accept
):
1719 self
.add_ack
.eq(1), # acknowledge receipt...
1720 self
.in_t
.stb
.eq(1), # initiate add
1723 m
.d
.sync
+= [self
.add_ack
.eq(0),
1724 self
.in_t
.stb
.eq(0),
1728 # done: acknowledge, and write out id and value
1729 m
.d
.sync
+= [self
.add_ack
.eq(1),
1736 if self
.in_mid
is not None:
1737 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1740 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1742 # move to output state on detecting z ack
1743 with m
.If(self
.out_z
.trigger
):
1744 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1747 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1750 class FPADDBasePipe(ControlBase
):
1751 def __init__(self
, width
, id_wid
):
1752 ControlBase
.__init
__(self
)
1753 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1754 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1755 self
.pipe3
= FPNormToPack(width
, id_wid
)
1757 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1759 def elaborate(self
, platform
):
1761 m
.submodules
.scnorm
= self
.pipe1
1762 m
.submodules
.addalign
= self
.pipe2
1763 m
.submodules
.normpack
= self
.pipe3
1764 m
.d
.comb
+= self
._eqs
1768 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1769 def __init__(self
, width
, id_wid
, num_rows
):
1770 self
.num_rows
= num_rows
1771 def iospec(): return FPADDBaseData(width
, id_wid
)
1772 stage
= PassThroughStage(iospec
)
1773 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1776 class FPADDMuxOutPipe(CombMuxOutPipe
):
1777 def __init__(self
, width
, id_wid
, num_rows
):
1778 self
.num_rows
= num_rows
1779 def iospec(): return FPPackData(width
, id_wid
)
1780 stage
= PassThroughStage(iospec
)
1781 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1784 class FPADDMuxInOut
:
1785 """ Reservation-Station version of FPADD pipeline.
1787 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1788 * 3-stage adder pipeline
1789 * fan-out on outputs (an array of FPPackData: z,mid)
1791 Fan-in and Fan-out are combinatorial.
1793 def __init__(self
, width
, id_wid
, num_rows
):
1794 self
.num_rows
= num_rows
1795 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1796 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1797 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1799 self
.p
= self
.inpipe
.p
# kinda annoying,
1800 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1801 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1803 def elaborate(self
, platform
):
1805 m
.submodules
.inpipe
= self
.inpipe
1806 m
.submodules
.fpadd
= self
.fpadd
1807 m
.submodules
.outpipe
= self
.outpipe
1809 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1810 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1819 """ FPADD: stages as follows:
1825 FPAddBase---> FPAddBaseMod
1827 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1829 FPAddBase is tricky: it is both a stage and *has* stages.
1830 Connection to FPAddBaseMod therefore requires an in stb/ack
1831 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1832 needs to be the thing that raises the incoming stb.
1835 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1838 * width: bit-width of IEEE754. supported: 16, 32, 64
1839 * id_wid: an identifier that is sync-connected to the input
1840 * single_cycle: True indicates each stage to complete in 1 clock
1843 self
.id_wid
= id_wid
1844 self
.single_cycle
= single_cycle
1846 #self.out_z = FPOp(width)
1847 self
.ids
= FPID(id_wid
)
1850 for i
in range(rs_sz
):
1853 in_a
.name
= "in_a_%d" % i
1854 in_b
.name
= "in_b_%d" % i
1855 rs
.append((in_a
, in_b
))
1859 for i
in range(rs_sz
):
1861 out_z
.name
= "out_z_%d" % i
1863 self
.res
= Array(res
)
1867 def add_state(self
, state
):
1868 self
.states
.append(state
)
1871 def get_fragment(self
, platform
=None):
1872 """ creates the HDL code-fragment for FPAdd
1875 m
.submodules
+= self
.rs
1877 in_a
= self
.rs
[0][0]
1878 in_b
= self
.rs
[0][1]
1880 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1885 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1890 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1891 ab
= self
.add_state(ab
)
1892 abd
= ab
.ispec() # create an input spec object for FPADDBase
1893 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1894 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1897 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1900 with m
.FSM() as fsm
:
1902 for state
in self
.states
:
1903 with m
.State(state
.state_from
):
1909 if __name__
== "__main__":
1911 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1912 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1913 alu
.rs
[0][1].ports() + \
1914 alu
.res
[0].ports() + \
1915 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1917 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1918 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1919 alu
.in_t
.ports() + \
1920 alu
.out_z
.ports() + \
1921 [alu
.in_mid
, alu
.out_mid
])
1924 # works... but don't use, just do "python fname.py convert -t v"
1925 #print (verilog.convert(alu, ports=[
1926 # ports=alu.in_a.ports() + \
1927 # alu.in_b.ports() + \
1928 # alu.out_z.ports())