1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.mod
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
152 def setup(self
, m
, i
, in_stb
, in_ack
):
153 """ links module to inputs and outputs
155 m
.submodules
.get_ops
= self
.mod
156 m
.d
.comb
+= self
.mod
.i
.eq(i
)
157 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
158 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
159 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
160 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
167 self
.o
.eq(self
.mod
.o
),
170 m
.d
.sync
+= self
.mod
.ack
.eq(1)
175 def __init__(self
, width
, id_wid
):
176 self
.a
= FPNumBase(width
, True)
177 self
.b
= FPNumBase(width
, True)
178 self
.z
= FPNumOut(width
, False)
179 self
.oz
= Signal(width
, reset_less
=True)
180 self
.out_do_z
= Signal(reset_less
=True)
181 self
.mid
= Signal(id_wid
, reset_less
=True)
184 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
185 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
188 class FPAddSpecialCasesMod
:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
194 def __init__(self
, width
, id_wid
):
197 self
.i
= self
.ispec()
198 self
.o
= self
.ospec()
201 return FPADDBaseData(self
.width
, self
.id_wid
)
204 return FPSCData(self
.width
, self
.id_wid
)
206 def setup(self
, m
, i
):
207 """ links module to inputs and outputs
209 m
.submodules
.specialcases
= self
210 m
.d
.comb
+= self
.i
.eq(i
)
212 def process(self
, i
):
215 def elaborate(self
, platform
):
218 m
.submodules
.sc_out_z
= self
.o
.z
220 # decode: XXX really should move to separate stage
221 a1
= FPNumIn(None, self
.width
)
222 b1
= FPNumIn(None, self
.width
)
223 m
.submodules
.sc_decode_a
= a1
224 m
.submodules
.sc_decode_b
= b1
225 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
230 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
233 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
235 # if a is NaN or b is NaN return NaN
236 with m
.If(a1
.is_nan | b1
.is_nan
):
237 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
238 m
.d
.comb
+= self
.o
.z
.nan(0)
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
263 # if a is inf return inf (or NaN)
264 with m
.Elif(a1
.is_inf
):
265 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
267 # if a is inf and signs don't match return NaN
268 with m
.If(b1
.exp_128
& s_nomatch
):
269 m
.d
.comb
+= self
.o
.z
.nan(0)
271 # if b is inf return inf
272 with m
.Elif(b1
.is_inf
):
273 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
274 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
276 # if a is zero and b zero return signed-a/b
277 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
281 # if a is zero return b
282 with m
.Elif(a1
.is_zero
):
283 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
284 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
286 # if b is zero return a
287 with m
.Elif(b1
.is_zero
):
288 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
289 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
291 # if a equal to -b return zero (+ve zero)
292 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
293 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
294 m
.d
.comb
+= self
.o
.z
.zero(0)
296 # Denormalised Number checks next, so pass a/b data through
298 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
299 m
.d
.comb
+= self
.o
.a
.eq(a1
)
300 m
.d
.comb
+= self
.o
.b
.eq(b1
)
302 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
303 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
309 def __init__(self
, id_wid
):
312 self
.in_mid
= Signal(id_wid
, reset_less
=True)
313 self
.out_mid
= Signal(id_wid
, reset_less
=True)
319 if self
.id_wid
is not None:
320 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
323 class FPAddSpecialCases(FPState
):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
329 def __init__(self
, width
, id_wid
):
330 FPState
.__init
__(self
, "special_cases")
331 self
.mod
= FPAddSpecialCasesMod(width
)
332 self
.out_z
= self
.mod
.ospec()
333 self
.out_do_z
= Signal(reset_less
=True)
335 def setup(self
, m
, i
):
336 """ links module to inputs and outputs
338 self
.mod
.setup(m
, i
, self
.out_do_z
)
339 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
344 with m
.If(self
.out_do_z
):
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
360 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
361 self
.out
= self
.ospec()
364 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
367 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
369 def setup(self
, m
, i
):
370 """ links module to inputs and outputs
372 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
373 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
375 chain
= StageChain([smod
, dmod
])
378 # only needed for break-out (early-out)
379 # self.out_do_z = smod.o.out_do_z
383 def process(self
, i
):
387 # for break-out (early-out)
388 #with m.If(self.out_do_z):
391 m
.d
.sync
+= self
.out
.eq(self
.process(None))
395 class FPAddDeNormMod(FPState
):
397 def __init__(self
, width
, id_wid
):
400 self
.i
= self
.ispec()
401 self
.o
= self
.ospec()
404 return FPSCData(self
.width
, self
.id_wid
)
407 return FPSCData(self
.width
, self
.id_wid
)
409 def process(self
, i
):
412 def setup(self
, m
, i
):
413 """ links module to inputs and outputs
415 m
.submodules
.denormalise
= self
416 m
.d
.comb
+= self
.i
.eq(i
)
418 def elaborate(self
, platform
):
420 m
.submodules
.denorm_in_a
= self
.i
.a
421 m
.submodules
.denorm_in_b
= self
.i
.b
422 m
.submodules
.denorm_out_a
= self
.o
.a
423 m
.submodules
.denorm_out_b
= self
.o
.b
425 with m
.If(~self
.i
.out_do_z
):
426 # XXX hmmm, don't like repeating identical code
427 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
428 with m
.If(self
.i
.a
.exp_n127
):
429 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
431 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
433 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
434 with m
.If(self
.i
.b
.exp_n127
):
435 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
437 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
439 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
440 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
441 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
442 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
447 class FPAddDeNorm(FPState
):
449 def __init__(self
, width
, id_wid
):
450 FPState
.__init
__(self
, "denormalise")
451 self
.mod
= FPAddDeNormMod(width
)
452 self
.out_a
= FPNumBase(width
)
453 self
.out_b
= FPNumBase(width
)
455 def setup(self
, m
, i
):
456 """ links module to inputs and outputs
460 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
461 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
464 # Denormalised Number checks
468 class FPAddAlignMultiMod(FPState
):
470 def __init__(self
, width
):
471 self
.in_a
= FPNumBase(width
)
472 self
.in_b
= FPNumBase(width
)
473 self
.out_a
= FPNumIn(None, width
)
474 self
.out_b
= FPNumIn(None, width
)
475 self
.exp_eq
= Signal(reset_less
=True)
477 def elaborate(self
, platform
):
478 # This one however (single-cycle) will do the shift
483 m
.submodules
.align_in_a
= self
.in_a
484 m
.submodules
.align_in_b
= self
.in_b
485 m
.submodules
.align_out_a
= self
.out_a
486 m
.submodules
.align_out_b
= self
.out_b
488 # NOTE: this does *not* do single-cycle multi-shifting,
489 # it *STAYS* in the align state until exponents match
491 # exponent of a greater than b: shift b down
492 m
.d
.comb
+= self
.exp_eq
.eq(0)
493 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
494 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
495 agtb
= Signal(reset_less
=True)
496 altb
= Signal(reset_less
=True)
497 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
498 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
500 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
501 # exponent of b greater than a: shift a down
503 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
504 # exponents equal: move to next stage.
506 m
.d
.comb
+= self
.exp_eq
.eq(1)
510 class FPAddAlignMulti(FPState
):
512 def __init__(self
, width
, id_wid
):
513 FPState
.__init
__(self
, "align")
514 self
.mod
= FPAddAlignMultiMod(width
)
515 self
.out_a
= FPNumIn(None, width
)
516 self
.out_b
= FPNumIn(None, width
)
517 self
.exp_eq
= Signal(reset_less
=True)
519 def setup(self
, m
, in_a
, in_b
):
520 """ links module to inputs and outputs
522 m
.submodules
.align
= self
.mod
523 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
524 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
525 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
526 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
527 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
530 with m
.If(self
.exp_eq
):
536 def __init__(self
, width
, id_wid
):
537 self
.a
= FPNumIn(None, width
)
538 self
.b
= FPNumIn(None, width
)
539 self
.z
= FPNumOut(width
, False)
540 self
.out_do_z
= Signal(reset_less
=True)
541 self
.oz
= Signal(width
, reset_less
=True)
542 self
.mid
= Signal(id_wid
, reset_less
=True)
545 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
546 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
549 class FPAddAlignSingleMod
:
551 def __init__(self
, width
, id_wid
):
554 self
.i
= self
.ispec()
555 self
.o
= self
.ospec()
558 return FPSCData(self
.width
, self
.id_wid
)
561 return FPNumIn2Ops(self
.width
, self
.id_wid
)
563 def process(self
, i
):
566 def setup(self
, m
, i
):
567 """ links module to inputs and outputs
569 m
.submodules
.align
= self
570 m
.d
.comb
+= self
.i
.eq(i
)
572 def elaborate(self
, platform
):
573 """ Aligns A against B or B against A, depending on which has the
574 greater exponent. This is done in a *single* cycle using
575 variable-width bit-shift
577 the shifter used here is quite expensive in terms of gates.
578 Mux A or B in (and out) into temporaries, as only one of them
579 needs to be aligned against the other
583 m
.submodules
.align_in_a
= self
.i
.a
584 m
.submodules
.align_in_b
= self
.i
.b
585 m
.submodules
.align_out_a
= self
.o
.a
586 m
.submodules
.align_out_b
= self
.o
.b
588 # temporary (muxed) input and output to be shifted
589 t_inp
= FPNumBase(self
.width
)
590 t_out
= FPNumIn(None, self
.width
)
591 espec
= (len(self
.i
.a
.e
), True)
592 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
593 m
.submodules
.align_t_in
= t_inp
594 m
.submodules
.align_t_out
= t_out
595 m
.submodules
.multishift_r
= msr
597 ediff
= Signal(espec
, reset_less
=True)
598 ediffr
= Signal(espec
, reset_less
=True)
599 tdiff
= Signal(espec
, reset_less
=True)
600 elz
= Signal(reset_less
=True)
601 egz
= Signal(reset_less
=True)
603 # connect multi-shifter to t_inp/out mantissa (and tdiff)
604 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
605 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
606 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
607 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
608 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
610 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
611 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
612 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
613 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
615 # default: A-exp == B-exp, A and B untouched (fall through)
616 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
617 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
618 # only one shifter (muxed)
619 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
620 # exponent of a greater than b: shift b down
621 with m
.If(~self
.i
.out_do_z
):
623 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
626 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
628 # exponent of b greater than a: shift a down
630 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
633 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
636 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
637 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
638 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
639 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
644 class FPAddAlignSingle(FPState
):
646 def __init__(self
, width
, id_wid
):
647 FPState
.__init
__(self
, "align")
648 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
649 self
.out_a
= FPNumIn(None, width
)
650 self
.out_b
= FPNumIn(None, width
)
652 def setup(self
, m
, i
):
653 """ links module to inputs and outputs
657 # NOTE: could be done as comb
658 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
659 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
665 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
667 def __init__(self
, width
, id_wid
):
668 FPState
.__init
__(self
, "align")
671 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
672 self
.a1o
= self
.ospec()
675 return FPSCData(self
.width
, self
.id_wid
)
678 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
680 def setup(self
, m
, i
):
681 """ links module to inputs and outputs
684 # chain AddAlignSingle, AddStage0 and AddStage1
685 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
686 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
687 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
689 chain
= StageChain([mod
, a0mod
, a1mod
])
694 def process(self
, i
):
698 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
699 m
.next
= "normalise_1"
702 class FPAddStage0Data
:
704 def __init__(self
, width
, id_wid
):
705 self
.z
= FPNumBase(width
, False)
706 self
.out_do_z
= Signal(reset_less
=True)
707 self
.oz
= Signal(width
, reset_less
=True)
708 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
709 self
.mid
= Signal(id_wid
, reset_less
=True)
712 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
713 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
716 class FPAddStage0Mod
:
718 def __init__(self
, width
, id_wid
):
721 self
.i
= self
.ispec()
722 self
.o
= self
.ospec()
725 return FPSCData(self
.width
, self
.id_wid
)
728 return FPAddStage0Data(self
.width
, self
.id_wid
)
730 def process(self
, i
):
733 def setup(self
, m
, i
):
734 """ links module to inputs and outputs
736 m
.submodules
.add0
= self
737 m
.d
.comb
+= self
.i
.eq(i
)
739 def elaborate(self
, platform
):
741 m
.submodules
.add0_in_a
= self
.i
.a
742 m
.submodules
.add0_in_b
= self
.i
.b
743 m
.submodules
.add0_out_z
= self
.o
.z
745 # store intermediate tests (and zero-extended mantissas)
746 seq
= Signal(reset_less
=True)
747 mge
= Signal(reset_less
=True)
748 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
749 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
750 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
751 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
752 am0
.eq(Cat(self
.i
.a
.m
, 0)),
753 bm0
.eq(Cat(self
.i
.b
.m
, 0))
755 # same-sign (both negative or both positive) add mantissas
756 with m
.If(~self
.i
.out_do_z
):
757 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
760 self
.o
.tot
.eq(am0
+ bm0
),
761 self
.o
.z
.s
.eq(self
.i
.a
.s
)
763 # a mantissa greater than b, use a
766 self
.o
.tot
.eq(am0
- bm0
),
767 self
.o
.z
.s
.eq(self
.i
.a
.s
)
769 # b mantissa greater than a, use b
772 self
.o
.tot
.eq(bm0
- am0
),
773 self
.o
.z
.s
.eq(self
.i
.b
.s
)
776 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
777 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
778 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
782 class FPAddStage0(FPState
):
783 """ First stage of add. covers same-sign (add) and subtract
784 special-casing when mantissas are greater or equal, to
785 give greatest accuracy.
788 def __init__(self
, width
, id_wid
):
789 FPState
.__init
__(self
, "add_0")
790 self
.mod
= FPAddStage0Mod(width
)
791 self
.o
= self
.mod
.ospec()
793 def setup(self
, m
, i
):
794 """ links module to inputs and outputs
798 # NOTE: these could be done as combinatorial (merge add0+add1)
799 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
805 class FPAddStage1Data
:
807 def __init__(self
, width
, id_wid
):
808 self
.z
= FPNumBase(width
, False)
809 self
.out_do_z
= Signal(reset_less
=True)
810 self
.oz
= Signal(width
, reset_less
=True)
812 self
.mid
= Signal(id_wid
, reset_less
=True)
815 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
816 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
820 class FPAddStage1Mod(FPState
):
821 """ Second stage of add: preparation for normalisation.
822 detects when tot sum is too big (tot[27] is kinda a carry bit)
825 def __init__(self
, width
, id_wid
):
828 self
.i
= self
.ispec()
829 self
.o
= self
.ospec()
832 return FPAddStage0Data(self
.width
, self
.id_wid
)
835 return FPAddStage1Data(self
.width
, self
.id_wid
)
837 def process(self
, i
):
840 def setup(self
, m
, i
):
841 """ links module to inputs and outputs
843 m
.submodules
.add1
= self
844 m
.submodules
.add1_out_overflow
= self
.o
.of
846 m
.d
.comb
+= self
.i
.eq(i
)
848 def elaborate(self
, platform
):
850 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
851 # tot[-1] (MSB) gets set when the sum overflows. shift result down
852 with m
.If(~self
.i
.out_do_z
):
853 with m
.If(self
.i
.tot
[-1]):
855 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
856 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
857 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
858 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
859 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
860 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
862 # tot[-1] (MSB) zero case
865 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
866 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
867 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
868 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
869 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
872 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
873 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
874 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
879 class FPAddStage1(FPState
):
881 def __init__(self
, width
, id_wid
):
882 FPState
.__init
__(self
, "add_1")
883 self
.mod
= FPAddStage1Mod(width
)
884 self
.out_z
= FPNumBase(width
, False)
885 self
.out_of
= Overflow()
886 self
.norm_stb
= Signal()
888 def setup(self
, m
, i
):
889 """ links module to inputs and outputs
893 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
895 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
896 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
897 m
.d
.sync
+= self
.norm_stb
.eq(1)
900 m
.next
= "normalise_1"
903 class FPNormaliseModSingle
:
905 def __init__(self
, width
):
907 self
.in_z
= self
.ispec()
908 self
.out_z
= self
.ospec()
911 return FPNumBase(self
.width
, False)
914 return FPNumBase(self
.width
, False)
916 def setup(self
, m
, i
):
917 """ links module to inputs and outputs
919 m
.submodules
.normalise
= self
920 m
.d
.comb
+= self
.i
.eq(i
)
922 def elaborate(self
, platform
):
925 mwid
= self
.out_z
.m_width
+2
926 pe
= PriorityEncoder(mwid
)
927 m
.submodules
.norm_pe
= pe
929 m
.submodules
.norm1_out_z
= self
.out_z
930 m
.submodules
.norm1_in_z
= self
.in_z
932 in_z
= FPNumBase(self
.width
, False)
934 m
.submodules
.norm1_insel_z
= in_z
935 m
.submodules
.norm1_insel_overflow
= in_of
937 espec
= (len(in_z
.e
), True)
938 ediff_n126
= Signal(espec
, reset_less
=True)
939 msr
= MultiShiftRMerge(mwid
, espec
)
940 m
.submodules
.multishift_r
= msr
942 m
.d
.comb
+= in_z
.eq(self
.in_z
)
943 m
.d
.comb
+= in_of
.eq(self
.in_of
)
944 # initialise out from in (overridden below)
945 m
.d
.comb
+= self
.out_z
.eq(in_z
)
946 m
.d
.comb
+= self
.out_of
.eq(in_of
)
947 # normalisation decrease condition
948 decrease
= Signal(reset_less
=True)
949 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
952 # *sigh* not entirely obvious: count leading zeros (clz)
953 # with a PriorityEncoder: to find from the MSB
954 # we reverse the order of the bits.
955 temp_m
= Signal(mwid
, reset_less
=True)
956 temp_s
= Signal(mwid
+1, reset_less
=True)
957 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
959 # cat round and guard bits back into the mantissa
960 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
961 pe
.i
.eq(temp_m
[::-1]), # inverted
962 clz
.eq(pe
.o
), # count zeros from MSB down
963 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
964 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
965 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
973 def __init__(self
, width
, id_wid
):
974 self
.roundz
= Signal(reset_less
=True)
975 self
.z
= FPNumBase(width
, False)
976 self
.out_do_z
= Signal(reset_less
=True)
977 self
.oz
= Signal(width
, reset_less
=True)
978 self
.mid
= Signal(id_wid
, reset_less
=True)
981 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
982 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
985 class FPNorm1ModSingle
:
987 def __init__(self
, width
, id_wid
):
990 self
.i
= self
.ispec()
991 self
.o
= self
.ospec()
994 return FPAddStage1Data(self
.width
, self
.id_wid
)
997 return FPNorm1Data(self
.width
, self
.id_wid
)
999 def setup(self
, m
, i
):
1000 """ links module to inputs and outputs
1002 m
.submodules
.normalise_1
= self
1003 m
.d
.comb
+= self
.i
.eq(i
)
1005 def process(self
, i
):
1008 def elaborate(self
, platform
):
1011 mwid
= self
.o
.z
.m_width
+2
1012 pe
= PriorityEncoder(mwid
)
1013 m
.submodules
.norm_pe
= pe
1016 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1018 m
.submodules
.norm1_out_z
= self
.o
.z
1019 m
.submodules
.norm1_out_overflow
= of
1020 m
.submodules
.norm1_in_z
= self
.i
.z
1021 m
.submodules
.norm1_in_overflow
= self
.i
.of
1024 m
.submodules
.norm1_insel_z
= i
.z
1025 m
.submodules
.norm1_insel_overflow
= i
.of
1027 espec
= (len(i
.z
.e
), True)
1028 ediff_n126
= Signal(espec
, reset_less
=True)
1029 msr
= MultiShiftRMerge(mwid
, espec
)
1030 m
.submodules
.multishift_r
= msr
1032 m
.d
.comb
+= i
.eq(self
.i
)
1033 # initialise out from in (overridden below)
1034 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1035 m
.d
.comb
+= of
.eq(i
.of
)
1036 # normalisation increase/decrease conditions
1037 decrease
= Signal(reset_less
=True)
1038 increase
= Signal(reset_less
=True)
1039 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1040 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1042 with m
.If(~self
.i
.out_do_z
):
1043 with m
.If(decrease
):
1044 # *sigh* not entirely obvious: count leading zeros (clz)
1045 # with a PriorityEncoder: to find from the MSB
1046 # we reverse the order of the bits.
1047 temp_m
= Signal(mwid
, reset_less
=True)
1048 temp_s
= Signal(mwid
+1, reset_less
=True)
1049 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1050 # make sure that the amount to decrease by does NOT
1051 # go below the minimum non-INF/NaN exponent
1052 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1055 # cat round and guard bits back into the mantissa
1056 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1057 pe
.i
.eq(temp_m
[::-1]), # inverted
1058 clz
.eq(limclz
), # count zeros from MSB down
1059 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1060 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1061 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1062 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1063 # overflow in bits 0..1: got shifted too (leave sticky)
1064 of
.guard
.eq(temp_s
[1]), # guard
1065 of
.round_bit
.eq(temp_s
[0]), # round
1068 with m
.Elif(increase
):
1069 temp_m
= Signal(mwid
+1, reset_less
=True)
1071 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1073 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1074 # connect multi-shifter to inp/out mantissa (and ediff)
1076 msr
.diff
.eq(ediff_n126
),
1077 self
.o
.z
.m
.eq(msr
.m
[3:]),
1078 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1079 # overflow in bits 0..1: got shifted too (leave sticky)
1080 of
.guard
.eq(temp_s
[2]), # guard
1081 of
.round_bit
.eq(temp_s
[1]), # round
1082 of
.sticky
.eq(temp_s
[0]), # sticky
1083 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1086 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1087 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1088 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1093 class FPNorm1ModMulti
:
1095 def __init__(self
, width
, single_cycle
=True):
1097 self
.in_select
= Signal(reset_less
=True)
1098 self
.in_z
= FPNumBase(width
, False)
1099 self
.in_of
= Overflow()
1100 self
.temp_z
= FPNumBase(width
, False)
1101 self
.temp_of
= Overflow()
1102 self
.out_z
= FPNumBase(width
, False)
1103 self
.out_of
= Overflow()
1105 def elaborate(self
, platform
):
1108 m
.submodules
.norm1_out_z
= self
.out_z
1109 m
.submodules
.norm1_out_overflow
= self
.out_of
1110 m
.submodules
.norm1_temp_z
= self
.temp_z
1111 m
.submodules
.norm1_temp_of
= self
.temp_of
1112 m
.submodules
.norm1_in_z
= self
.in_z
1113 m
.submodules
.norm1_in_overflow
= self
.in_of
1115 in_z
= FPNumBase(self
.width
, False)
1117 m
.submodules
.norm1_insel_z
= in_z
1118 m
.submodules
.norm1_insel_overflow
= in_of
1120 # select which of temp or in z/of to use
1121 with m
.If(self
.in_select
):
1122 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1123 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1125 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1126 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1127 # initialise out from in (overridden below)
1128 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1129 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1130 # normalisation increase/decrease conditions
1131 decrease
= Signal(reset_less
=True)
1132 increase
= Signal(reset_less
=True)
1133 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1134 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1135 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1137 with m
.If(decrease
):
1139 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1140 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1141 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1142 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1143 self
.out_of
.round_bit
.eq(0), # reset round bit
1144 self
.out_of
.m0
.eq(in_of
.guard
),
1147 with m
.Elif(increase
):
1149 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1150 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1151 self
.out_of
.guard
.eq(in_z
.m
[0]),
1152 self
.out_of
.m0
.eq(in_z
.m
[1]),
1153 self
.out_of
.round_bit
.eq(in_of
.guard
),
1154 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1160 class FPNorm1Single(FPState
):
1162 def __init__(self
, width
, id_wid
, single_cycle
=True):
1163 FPState
.__init
__(self
, "normalise_1")
1164 self
.mod
= FPNorm1ModSingle(width
)
1165 self
.o
= self
.ospec()
1166 self
.out_z
= FPNumBase(width
, False)
1167 self
.out_roundz
= Signal(reset_less
=True)
1170 return self
.mod
.ispec()
1173 return self
.mod
.ospec()
1175 def setup(self
, m
, i
):
1176 """ links module to inputs and outputs
1178 self
.mod
.setup(m
, i
)
1180 def action(self
, m
):
1184 class FPNorm1Multi(FPState
):
1186 def __init__(self
, width
, id_wid
):
1187 FPState
.__init
__(self
, "normalise_1")
1188 self
.mod
= FPNorm1ModMulti(width
)
1189 self
.stb
= Signal(reset_less
=True)
1190 self
.ack
= Signal(reset
=0, reset_less
=True)
1191 self
.out_norm
= Signal(reset_less
=True)
1192 self
.in_accept
= Signal(reset_less
=True)
1193 self
.temp_z
= FPNumBase(width
)
1194 self
.temp_of
= Overflow()
1195 self
.out_z
= FPNumBase(width
)
1196 self
.out_roundz
= Signal(reset_less
=True)
1198 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1199 """ links module to inputs and outputs
1201 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1202 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1203 self
.out_z
, self
.out_norm
)
1205 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1206 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1208 def action(self
, m
):
1209 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1210 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1211 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1212 with m
.If(self
.out_norm
):
1213 with m
.If(self
.in_accept
):
1218 m
.d
.sync
+= self
.ack
.eq(0)
1220 # normalisation not required (or done).
1222 m
.d
.sync
+= self
.ack
.eq(1)
1223 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1226 class FPNormToPack(FPState
, UnbufferedPipeline
):
1228 def __init__(self
, width
, id_wid
):
1229 FPState
.__init
__(self
, "normalise_1")
1230 self
.id_wid
= id_wid
1232 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1235 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1238 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1240 def setup(self
, m
, i
):
1241 """ links module to inputs and outputs
1244 # Normalisation, Rounding Corrections, Pack - in a chain
1245 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1246 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1247 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1248 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1249 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1251 self
.out_z
= pmod
.ospec()
1255 def process(self
, i
):
1258 def action(self
, m
):
1259 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
1260 m
.next
= "pack_put_z"
1265 def __init__(self
, width
, id_wid
):
1266 self
.z
= FPNumBase(width
, False)
1267 self
.out_do_z
= Signal(reset_less
=True)
1268 self
.oz
= Signal(width
, reset_less
=True)
1269 self
.mid
= Signal(id_wid
, reset_less
=True)
1272 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1278 def __init__(self
, width
, id_wid
):
1280 self
.id_wid
= id_wid
1281 self
.i
= self
.ispec()
1282 self
.out_z
= self
.ospec()
1285 return FPNorm1Data(self
.width
, self
.id_wid
)
1288 return FPRoundData(self
.width
, self
.id_wid
)
1290 def process(self
, i
):
1293 def setup(self
, m
, i
):
1294 m
.submodules
.roundz
= self
1295 m
.d
.comb
+= self
.i
.eq(i
)
1297 def elaborate(self
, platform
):
1299 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1300 with m
.If(~self
.i
.out_do_z
):
1301 with m
.If(self
.i
.roundz
):
1302 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1303 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1304 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1309 class FPRound(FPState
):
1311 def __init__(self
, width
, id_wid
):
1312 FPState
.__init
__(self
, "round")
1313 self
.mod
= FPRoundMod(width
)
1314 self
.out_z
= self
.ospec()
1317 return self
.mod
.ispec()
1320 return self
.mod
.ospec()
1322 def setup(self
, m
, i
):
1323 """ links module to inputs and outputs
1325 self
.mod
.setup(m
, i
)
1328 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1329 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1331 def action(self
, m
):
1332 m
.next
= "corrections"
1335 class FPCorrectionsMod
:
1337 def __init__(self
, width
, id_wid
):
1339 self
.id_wid
= id_wid
1340 self
.i
= self
.ispec()
1341 self
.out_z
= self
.ospec()
1344 return FPRoundData(self
.width
, self
.id_wid
)
1347 return FPRoundData(self
.width
, self
.id_wid
)
1349 def process(self
, i
):
1352 def setup(self
, m
, i
):
1353 """ links module to inputs and outputs
1355 m
.submodules
.corrections
= self
1356 m
.d
.comb
+= self
.i
.eq(i
)
1358 def elaborate(self
, platform
):
1360 m
.submodules
.corr_in_z
= self
.i
.z
1361 m
.submodules
.corr_out_z
= self
.out_z
.z
1362 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1363 with m
.If(~self
.i
.out_do_z
):
1364 with m
.If(self
.i
.z
.is_denormalised
):
1365 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1369 class FPCorrections(FPState
):
1371 def __init__(self
, width
, id_wid
):
1372 FPState
.__init
__(self
, "corrections")
1373 self
.mod
= FPCorrectionsMod(width
)
1374 self
.out_z
= self
.ospec()
1377 return self
.mod
.ispec()
1380 return self
.mod
.ospec()
1382 def setup(self
, m
, in_z
):
1383 """ links module to inputs and outputs
1385 self
.mod
.setup(m
, in_z
)
1387 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1388 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1390 def action(self
, m
):
1396 def __init__(self
, width
, id_wid
):
1397 self
.z
= Signal(width
, reset_less
=True)
1398 self
.mid
= Signal(id_wid
, reset_less
=True)
1401 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1404 return [self
.z
, self
.mid
]
1409 def __init__(self
, width
, id_wid
):
1411 self
.id_wid
= id_wid
1412 self
.i
= self
.ispec()
1413 self
.o
= self
.ospec()
1416 return FPRoundData(self
.width
, self
.id_wid
)
1419 return FPPackData(self
.width
, self
.id_wid
)
1421 def process(self
, i
):
1424 def setup(self
, m
, in_z
):
1425 """ links module to inputs and outputs
1427 m
.submodules
.pack
= self
1428 m
.d
.comb
+= self
.i
.eq(in_z
)
1430 def elaborate(self
, platform
):
1432 z
= FPNumOut(self
.width
, False)
1433 m
.submodules
.pack_in_z
= self
.i
.z
1434 m
.submodules
.pack_out_z
= z
1435 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1436 with m
.If(~self
.i
.out_do_z
):
1437 with m
.If(self
.i
.z
.is_overflowed
):
1438 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1440 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1442 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1443 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1447 class FPPack(FPState
):
1449 def __init__(self
, width
, id_wid
):
1450 FPState
.__init
__(self
, "pack")
1451 self
.mod
= FPPackMod(width
)
1452 self
.out_z
= self
.ospec()
1455 return self
.mod
.ispec()
1458 return self
.mod
.ospec()
1460 def setup(self
, m
, in_z
):
1461 """ links module to inputs and outputs
1463 self
.mod
.setup(m
, in_z
)
1465 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1466 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1468 def action(self
, m
):
1469 m
.next
= "pack_put_z"
1472 class FPPutZ(FPState
):
1474 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1475 FPState
.__init
__(self
, state
)
1476 if to_state
is None:
1477 to_state
= "get_ops"
1478 self
.to_state
= to_state
1481 self
.in_mid
= in_mid
1482 self
.out_mid
= out_mid
1484 def action(self
, m
):
1485 if self
.in_mid
is not None:
1486 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1488 self
.out_z
.z
.v
.eq(self
.in_z
)
1490 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1491 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1492 m
.next
= self
.to_state
1494 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1497 class FPPutZIdx(FPState
):
1499 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1500 FPState
.__init
__(self
, state
)
1501 if to_state
is None:
1502 to_state
= "get_ops"
1503 self
.to_state
= to_state
1505 self
.out_zs
= out_zs
1506 self
.in_mid
= in_mid
1508 def action(self
, m
):
1509 outz_stb
= Signal(reset_less
=True)
1510 outz_ack
= Signal(reset_less
=True)
1511 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1512 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1515 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1517 with m
.If(outz_stb
& outz_ack
):
1518 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1519 m
.next
= self
.to_state
1521 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1525 def __init__(self
, width
, id_wid
):
1526 self
.z
= FPOp(width
)
1527 self
.mid
= Signal(id_wid
, reset_less
=True)
1530 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1533 return [self
.z
, self
.mid
]
1538 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1541 * width: bit-width of IEEE754. supported: 16, 32, 64
1542 * id_wid: an identifier that is sync-connected to the input
1543 * single_cycle: True indicates each stage to complete in 1 clock
1544 * compact: True indicates a reduced number of stages
1547 self
.id_wid
= id_wid
1548 self
.single_cycle
= single_cycle
1549 self
.compact
= compact
1551 self
.in_t
= Trigger()
1552 self
.i
= self
.ispec()
1553 self
.o
= self
.ospec()
1558 return FPADDBaseData(self
.width
, self
.id_wid
)
1561 return FPOpData(self
.width
, self
.id_wid
)
1563 def add_state(self
, state
):
1564 self
.states
.append(state
)
1567 def get_fragment(self
, platform
=None):
1568 """ creates the HDL code-fragment for FPAdd
1571 m
.submodules
.out_z
= self
.o
.z
1572 m
.submodules
.in_t
= self
.in_t
1574 self
.get_compact_fragment(m
, platform
)
1576 self
.get_longer_fragment(m
, platform
)
1578 with m
.FSM() as fsm
:
1580 for state
in self
.states
:
1581 with m
.State(state
.state_from
):
1586 def get_longer_fragment(self
, m
, platform
=None):
1588 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1590 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1594 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1595 sc
.setup(m
, a
, b
, self
.in_mid
)
1597 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1598 dn
.setup(m
, a
, b
, sc
.in_mid
)
1600 if self
.single_cycle
:
1601 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1602 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1604 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1605 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1607 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1608 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1610 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1611 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1613 if self
.single_cycle
:
1614 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1615 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1617 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1618 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1620 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1621 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1623 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1624 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1626 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1627 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1629 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1630 pa
.in_mid
, self
.out_mid
))
1632 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1633 pa
.in_mid
, self
.out_mid
))
1635 def get_compact_fragment(self
, m
, platform
=None):
1637 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1638 self
.width
, self
.id_wid
))
1639 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1641 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1642 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1643 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1645 chainlist
= [sc
, alm
, n1
]
1646 chain
= StageChain(chainlist
, specallocate
=True)
1647 chain
.setup(m
, get
.o
)
1649 for mod
in chainlist
:
1650 sc
= self
.add_state(mod
)
1652 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1653 n1
.out_z
.mid
, self
.o
.mid
))
1655 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1656 # sc.o.mid, self.o.mid))
1659 class FPADDBase(FPState
):
1661 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1664 * width: bit-width of IEEE754. supported: 16, 32, 64
1665 * id_wid: an identifier that is sync-connected to the input
1666 * single_cycle: True indicates each stage to complete in 1 clock
1668 FPState
.__init
__(self
, "fpadd")
1670 self
.single_cycle
= single_cycle
1671 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1672 self
.o
= self
.ospec()
1674 self
.in_t
= Trigger()
1675 self
.i
= self
.ispec()
1677 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1678 self
.in_accept
= Signal(reset_less
=True)
1679 self
.add_stb
= Signal(reset_less
=True)
1680 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1683 return self
.mod
.ispec()
1686 return self
.mod
.ospec()
1688 def setup(self
, m
, i
, add_stb
, in_mid
):
1689 m
.d
.comb
+= [self
.i
.eq(i
),
1690 self
.mod
.i
.eq(self
.i
),
1691 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1692 #self.add_stb.eq(add_stb),
1693 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1694 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1695 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1696 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1697 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1698 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1701 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1702 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1703 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1704 #m.d.sync += self.in_t.stb.eq(0)
1706 m
.submodules
.fpadd
= self
.mod
1708 def action(self
, m
):
1710 # in_accept is set on incoming strobe HIGH and ack LOW.
1711 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1713 #with m.If(self.in_t.ack):
1714 # m.d.sync += self.in_t.stb.eq(0)
1715 with m
.If(~self
.z_done
):
1716 # not done: test for accepting an incoming operand pair
1717 with m
.If(self
.in_accept
):
1719 self
.add_ack
.eq(1), # acknowledge receipt...
1720 self
.in_t
.stb
.eq(1), # initiate add
1723 m
.d
.sync
+= [self
.add_ack
.eq(0),
1724 self
.in_t
.stb
.eq(0),
1728 # done: acknowledge, and write out id and value
1729 m
.d
.sync
+= [self
.add_ack
.eq(1),
1736 if self
.in_mid
is not None:
1737 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1740 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1742 # move to output state on detecting z ack
1743 with m
.If(self
.out_z
.trigger
):
1744 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1747 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1750 class FPADDBasePipe(ControlBase
):
1751 def __init__(self
, width
, id_wid
):
1752 ControlBase
.__init
__(self
)
1753 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1754 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1755 self
.pipe3
= FPNormToPack(width
, id_wid
)
1757 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1759 def elaborate(self
, platform
):
1761 m
.submodules
.scnorm
= self
.pipe1
1762 m
.submodules
.addalign
= self
.pipe2
1763 m
.submodules
.normpack
= self
.pipe3
1764 m
.d
.comb
+= self
._eqs
1768 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1769 def __init__(self
, width
, id_wid
, num_rows
):
1770 self
.num_rows
= num_rows
1771 def iospec(): return FPADDBaseData(width
, id_wid
)
1772 stage
= PassThroughStage(iospec
)
1773 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1776 class FPADDMuxOutPipe(CombMuxOutPipe
):
1777 def __init__(self
, width
, id_wid
, num_rows
):
1778 self
.num_rows
= num_rows
1779 def iospec(): return FPPackData(width
, id_wid
)
1780 stage
= PassThroughStage(iospec
)
1781 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1784 class FPADDMuxInOut
:
1785 """ Reservation-Station version of FPADD pipeline.
1787 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1788 * 3-stage adder pipeline
1789 * fan-out on outputs (an array of FPPackData: z,mid)
1791 Fan-in and Fan-out are combinatorial.
1793 def __init__(self
, width
, id_wid
, num_rows
):
1794 self
.num_rows
= num_rows
1795 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1796 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1797 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1799 self
.p
= self
.inpipe
.p
# kinda annoying,
1800 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1801 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1803 def elaborate(self
, platform
):
1805 m
.submodules
.inpipe
= self
.inpipe
1806 m
.submodules
.fpadd
= self
.fpadd
1807 m
.submodules
.outpipe
= self
.outpipe
1809 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1810 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1819 """ FPADD: stages as follows:
1825 FPAddBase---> FPAddBaseMod
1827 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1829 FPAddBase is tricky: it is both a stage and *has* stages.
1830 Connection to FPAddBaseMod therefore requires an in stb/ack
1831 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1832 needs to be the thing that raises the incoming stb.
1835 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1838 * width: bit-width of IEEE754. supported: 16, 32, 64
1839 * id_wid: an identifier that is sync-connected to the input
1840 * single_cycle: True indicates each stage to complete in 1 clock
1843 self
.id_wid
= id_wid
1844 self
.single_cycle
= single_cycle
1846 #self.out_z = FPOp(width)
1847 self
.ids
= FPID(id_wid
)
1850 for i
in range(rs_sz
):
1853 in_a
.name
= "in_a_%d" % i
1854 in_b
.name
= "in_b_%d" % i
1855 rs
.append((in_a
, in_b
))
1859 for i
in range(rs_sz
):
1861 out_z
.name
= "out_z_%d" % i
1863 self
.res
= Array(res
)
1867 def add_state(self
, state
):
1868 self
.states
.append(state
)
1871 def get_fragment(self
, platform
=None):
1872 """ creates the HDL code-fragment for FPAdd
1875 m
.submodules
+= self
.rs
1877 in_a
= self
.rs
[0][0]
1878 in_b
= self
.rs
[0][1]
1880 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1885 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1890 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1891 ab
= self
.add_state(ab
)
1892 abd
= ab
.ispec() # create an input spec object for FPADDBase
1893 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1894 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1897 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1900 with m
.FSM() as fsm
:
1902 for state
in self
.states
:
1903 with m
.State(state
.state_from
):
1909 if __name__
== "__main__":
1911 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1912 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1913 alu
.rs
[0][1].ports() + \
1914 alu
.res
[0].ports() + \
1915 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1917 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1918 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1919 alu
.in_t
.ports() + \
1920 alu
.out_z
.ports() + \
1921 [alu
.in_mid
, alu
.out_mid
])
1924 # works... but don't use, just do "python fname.py convert -t v"
1925 #print (verilog.convert(alu, ports=[
1926 # ports=alu.in_a.ports() + \
1927 # alu.in_b.ports() + \
1928 # alu.out_z.ports())