8b5f2a64cc49a10cd58b72ab6ccf0d844052266a
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.mod
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
152 def setup(self
, m
, i
, in_stb
, in_ack
):
153 """ links module to inputs and outputs
155 m
.submodules
.get_ops
= self
.mod
156 m
.d
.comb
+= self
.mod
.i
.eq(i
)
157 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
158 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
159 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
160 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
167 self
.o
.eq(self
.mod
.o
),
170 m
.d
.sync
+= self
.mod
.ack
.eq(1)
175 def __init__(self
, width
, id_wid
):
176 self
.a
= FPNumBase(width
, True)
177 self
.b
= FPNumBase(width
, True)
178 self
.z
= FPNumOut(width
, False)
179 self
.oz
= Signal(width
, reset_less
=True)
180 self
.out_do_z
= Signal(reset_less
=True)
181 self
.mid
= Signal(id_wid
, reset_less
=True)
184 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
185 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
188 class FPAddSpecialCasesMod
:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
194 def __init__(self
, width
, id_wid
):
197 self
.i
= self
.ispec()
198 self
.o
= self
.ospec()
201 return FPADDBaseData(self
.width
, self
.id_wid
)
204 return FPSCData(self
.width
, self
.id_wid
)
206 def setup(self
, m
, i
):
207 """ links module to inputs and outputs
209 m
.submodules
.specialcases
= self
210 m
.d
.comb
+= self
.i
.eq(i
)
212 def process(self
, i
):
215 def elaborate(self
, platform
):
218 m
.submodules
.sc_out_z
= self
.o
.z
220 # decode: XXX really should move to separate stage
221 a1
= FPNumIn(None, self
.width
)
222 b1
= FPNumIn(None, self
.width
)
223 m
.submodules
.sc_decode_a
= a1
224 m
.submodules
.sc_decode_b
= b1
225 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
230 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
233 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
235 # if a is NaN or b is NaN return NaN
236 with m
.If(a1
.is_nan | b1
.is_nan
):
237 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
238 m
.d
.comb
+= self
.o
.z
.nan(0)
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
263 # if a is inf return inf (or NaN)
264 with m
.Elif(a1
.is_inf
):
265 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
267 # if a is inf and signs don't match return NaN
268 with m
.If(b1
.exp_128
& s_nomatch
):
269 m
.d
.comb
+= self
.o
.z
.nan(0)
271 # if b is inf return inf
272 with m
.Elif(b1
.is_inf
):
273 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
274 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
276 # if a is zero and b zero return signed-a/b
277 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
281 # if a is zero return b
282 with m
.Elif(a1
.is_zero
):
283 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
284 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
286 # if b is zero return a
287 with m
.Elif(b1
.is_zero
):
288 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
289 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
291 # if a equal to -b return zero (+ve zero)
292 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
293 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
294 m
.d
.comb
+= self
.o
.z
.zero(0)
296 # Denormalised Number checks next, so pass a/b data through
298 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
299 m
.d
.comb
+= self
.o
.a
.eq(a1
)
300 m
.d
.comb
+= self
.o
.b
.eq(b1
)
302 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
303 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
309 def __init__(self
, id_wid
):
312 self
.in_mid
= Signal(id_wid
, reset_less
=True)
313 self
.out_mid
= Signal(id_wid
, reset_less
=True)
319 if self
.id_wid
is not None:
320 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
323 class FPAddSpecialCases(FPState
):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
329 def __init__(self
, width
, id_wid
):
330 FPState
.__init
__(self
, "special_cases")
331 self
.mod
= FPAddSpecialCasesMod(width
)
332 self
.out_z
= self
.mod
.ospec()
333 self
.out_do_z
= Signal(reset_less
=True)
335 def setup(self
, m
, i
):
336 """ links module to inputs and outputs
338 self
.mod
.setup(m
, i
, self
.out_do_z
)
339 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
344 with m
.If(self
.out_do_z
):
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
358 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
359 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
360 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
361 self
.o
= self
.ospec()
364 return self
.smod
.ispec()
367 return self
.dmod
.ospec()
369 def setup(self
, m
, i
):
370 """ links module to inputs and outputs
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self
.smod
.setup(m
, i
)
376 self
.dmod
.setup(m
, self
.smod
.o
)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
384 m
.d
.comb
+= self
.o
.eq(self
.dmod
.o
)
386 def process(self
, i
):
390 #with m.If(self.out_do_z):
396 class FPAddDeNormMod(FPState
):
398 def __init__(self
, width
, id_wid
):
401 self
.i
= self
.ispec()
402 self
.o
= self
.ospec()
405 return FPSCData(self
.width
, self
.id_wid
)
408 return FPSCData(self
.width
, self
.id_wid
)
410 def setup(self
, m
, i
):
411 """ links module to inputs and outputs
413 m
.submodules
.denormalise
= self
414 m
.d
.comb
+= self
.i
.eq(i
)
416 def elaborate(self
, platform
):
418 m
.submodules
.denorm_in_a
= self
.i
.a
419 m
.submodules
.denorm_in_b
= self
.i
.b
420 m
.submodules
.denorm_out_a
= self
.o
.a
421 m
.submodules
.denorm_out_b
= self
.o
.b
423 with m
.If(~self
.i
.out_do_z
):
424 # XXX hmmm, don't like repeating identical code
425 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
426 with m
.If(self
.i
.a
.exp_n127
):
427 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
429 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
431 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
432 with m
.If(self
.i
.b
.exp_n127
):
433 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
435 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
437 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
438 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
439 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
440 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
445 class FPAddDeNorm(FPState
):
447 def __init__(self
, width
, id_wid
):
448 FPState
.__init
__(self
, "denormalise")
449 self
.mod
= FPAddDeNormMod(width
)
450 self
.out_a
= FPNumBase(width
)
451 self
.out_b
= FPNumBase(width
)
453 def setup(self
, m
, i
):
454 """ links module to inputs and outputs
458 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
459 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
462 # Denormalised Number checks
466 class FPAddAlignMultiMod(FPState
):
468 def __init__(self
, width
):
469 self
.in_a
= FPNumBase(width
)
470 self
.in_b
= FPNumBase(width
)
471 self
.out_a
= FPNumIn(None, width
)
472 self
.out_b
= FPNumIn(None, width
)
473 self
.exp_eq
= Signal(reset_less
=True)
475 def elaborate(self
, platform
):
476 # This one however (single-cycle) will do the shift
481 m
.submodules
.align_in_a
= self
.in_a
482 m
.submodules
.align_in_b
= self
.in_b
483 m
.submodules
.align_out_a
= self
.out_a
484 m
.submodules
.align_out_b
= self
.out_b
486 # NOTE: this does *not* do single-cycle multi-shifting,
487 # it *STAYS* in the align state until exponents match
489 # exponent of a greater than b: shift b down
490 m
.d
.comb
+= self
.exp_eq
.eq(0)
491 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
492 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
493 agtb
= Signal(reset_less
=True)
494 altb
= Signal(reset_less
=True)
495 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
496 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
498 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
499 # exponent of b greater than a: shift a down
501 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
502 # exponents equal: move to next stage.
504 m
.d
.comb
+= self
.exp_eq
.eq(1)
508 class FPAddAlignMulti(FPState
):
510 def __init__(self
, width
, id_wid
):
511 FPState
.__init
__(self
, "align")
512 self
.mod
= FPAddAlignMultiMod(width
)
513 self
.out_a
= FPNumIn(None, width
)
514 self
.out_b
= FPNumIn(None, width
)
515 self
.exp_eq
= Signal(reset_less
=True)
517 def setup(self
, m
, in_a
, in_b
):
518 """ links module to inputs and outputs
520 m
.submodules
.align
= self
.mod
521 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
522 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
523 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
524 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
525 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
528 with m
.If(self
.exp_eq
):
534 def __init__(self
, width
, id_wid
):
535 self
.a
= FPNumIn(None, width
)
536 self
.b
= FPNumIn(None, width
)
537 self
.z
= FPNumOut(width
, False)
538 self
.out_do_z
= Signal(reset_less
=True)
539 self
.oz
= Signal(width
, reset_less
=True)
540 self
.mid
= Signal(id_wid
, reset_less
=True)
543 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
544 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
547 class FPAddAlignSingleMod
:
549 def __init__(self
, width
, id_wid
):
552 self
.i
= self
.ispec()
553 self
.o
= self
.ospec()
556 return FPSCData(self
.width
, self
.id_wid
)
559 return FPNumIn2Ops(self
.width
, self
.id_wid
)
561 def process(self
, i
):
564 def setup(self
, m
, i
):
565 """ links module to inputs and outputs
567 m
.submodules
.align
= self
568 m
.d
.comb
+= self
.i
.eq(i
)
570 def elaborate(self
, platform
):
571 """ Aligns A against B or B against A, depending on which has the
572 greater exponent. This is done in a *single* cycle using
573 variable-width bit-shift
575 the shifter used here is quite expensive in terms of gates.
576 Mux A or B in (and out) into temporaries, as only one of them
577 needs to be aligned against the other
581 m
.submodules
.align_in_a
= self
.i
.a
582 m
.submodules
.align_in_b
= self
.i
.b
583 m
.submodules
.align_out_a
= self
.o
.a
584 m
.submodules
.align_out_b
= self
.o
.b
586 # temporary (muxed) input and output to be shifted
587 t_inp
= FPNumBase(self
.width
)
588 t_out
= FPNumIn(None, self
.width
)
589 espec
= (len(self
.i
.a
.e
), True)
590 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
591 m
.submodules
.align_t_in
= t_inp
592 m
.submodules
.align_t_out
= t_out
593 m
.submodules
.multishift_r
= msr
595 ediff
= Signal(espec
, reset_less
=True)
596 ediffr
= Signal(espec
, reset_less
=True)
597 tdiff
= Signal(espec
, reset_less
=True)
598 elz
= Signal(reset_less
=True)
599 egz
= Signal(reset_less
=True)
601 # connect multi-shifter to t_inp/out mantissa (and tdiff)
602 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
603 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
604 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
605 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
606 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
608 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
609 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
610 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
611 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
613 # default: A-exp == B-exp, A and B untouched (fall through)
614 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
615 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
616 # only one shifter (muxed)
617 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
618 # exponent of a greater than b: shift b down
619 with m
.If(~self
.i
.out_do_z
):
621 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
624 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
626 # exponent of b greater than a: shift a down
628 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
631 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
634 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
635 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
636 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
637 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
642 class FPAddAlignSingle(FPState
):
644 def __init__(self
, width
, id_wid
):
645 FPState
.__init
__(self
, "align")
646 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
647 self
.out_a
= FPNumIn(None, width
)
648 self
.out_b
= FPNumIn(None, width
)
650 def setup(self
, m
, i
):
651 """ links module to inputs and outputs
655 # NOTE: could be done as comb
656 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
657 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
663 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
665 def __init__(self
, width
, id_wid
):
666 FPState
.__init
__(self
, "align")
669 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
670 self
.a1o
= self
.ospec()
673 return FPSCData(self
.width
, self
.id_wid
)
676 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
678 def setup(self
, m
, i
):
679 """ links module to inputs and outputs
682 # chain AddAlignSingle, AddStage0 and AddStage1
683 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
684 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
685 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
687 chain
= StageChain([mod
, a0mod
, a1mod
])
690 m
.d
.comb
+= self
.a1o
.eq(a1mod
.o
)
692 def process(self
, i
):
696 m
.next
= "normalise_1"
699 class FPAddStage0Data
:
701 def __init__(self
, width
, id_wid
):
702 self
.z
= FPNumBase(width
, False)
703 self
.out_do_z
= Signal(reset_less
=True)
704 self
.oz
= Signal(width
, reset_less
=True)
705 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
706 self
.mid
= Signal(id_wid
, reset_less
=True)
709 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
710 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
713 class FPAddStage0Mod
:
715 def __init__(self
, width
, id_wid
):
718 self
.i
= self
.ispec()
719 self
.o
= self
.ospec()
722 return FPSCData(self
.width
, self
.id_wid
)
725 return FPAddStage0Data(self
.width
, self
.id_wid
)
727 def process(self
, i
):
730 def setup(self
, m
, i
):
731 """ links module to inputs and outputs
733 m
.submodules
.add0
= self
734 m
.d
.comb
+= self
.i
.eq(i
)
736 def elaborate(self
, platform
):
738 m
.submodules
.add0_in_a
= self
.i
.a
739 m
.submodules
.add0_in_b
= self
.i
.b
740 m
.submodules
.add0_out_z
= self
.o
.z
742 # store intermediate tests (and zero-extended mantissas)
743 seq
= Signal(reset_less
=True)
744 mge
= Signal(reset_less
=True)
745 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
746 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
747 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
748 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
749 am0
.eq(Cat(self
.i
.a
.m
, 0)),
750 bm0
.eq(Cat(self
.i
.b
.m
, 0))
752 # same-sign (both negative or both positive) add mantissas
753 with m
.If(~self
.i
.out_do_z
):
754 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
757 self
.o
.tot
.eq(am0
+ bm0
),
758 self
.o
.z
.s
.eq(self
.i
.a
.s
)
760 # a mantissa greater than b, use a
763 self
.o
.tot
.eq(am0
- bm0
),
764 self
.o
.z
.s
.eq(self
.i
.a
.s
)
766 # b mantissa greater than a, use b
769 self
.o
.tot
.eq(bm0
- am0
),
770 self
.o
.z
.s
.eq(self
.i
.b
.s
)
773 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
774 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
775 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
779 class FPAddStage0(FPState
):
780 """ First stage of add. covers same-sign (add) and subtract
781 special-casing when mantissas are greater or equal, to
782 give greatest accuracy.
785 def __init__(self
, width
, id_wid
):
786 FPState
.__init
__(self
, "add_0")
787 self
.mod
= FPAddStage0Mod(width
)
788 self
.o
= self
.mod
.ospec()
790 def setup(self
, m
, i
):
791 """ links module to inputs and outputs
795 # NOTE: these could be done as combinatorial (merge add0+add1)
796 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
802 class FPAddStage1Data
:
804 def __init__(self
, width
, id_wid
):
805 self
.z
= FPNumBase(width
, False)
806 self
.out_do_z
= Signal(reset_less
=True)
807 self
.oz
= Signal(width
, reset_less
=True)
809 self
.mid
= Signal(id_wid
, reset_less
=True)
812 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
813 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
817 class FPAddStage1Mod(FPState
):
818 """ Second stage of add: preparation for normalisation.
819 detects when tot sum is too big (tot[27] is kinda a carry bit)
822 def __init__(self
, width
, id_wid
):
825 self
.i
= self
.ispec()
826 self
.o
= self
.ospec()
829 return FPAddStage0Data(self
.width
, self
.id_wid
)
832 return FPAddStage1Data(self
.width
, self
.id_wid
)
834 def process(self
, i
):
837 def setup(self
, m
, i
):
838 """ links module to inputs and outputs
840 m
.submodules
.add1
= self
841 m
.submodules
.add1_out_overflow
= self
.o
.of
843 m
.d
.comb
+= self
.i
.eq(i
)
845 def elaborate(self
, platform
):
847 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
848 # tot[-1] (MSB) gets set when the sum overflows. shift result down
849 with m
.If(~self
.i
.out_do_z
):
850 with m
.If(self
.i
.tot
[-1]):
852 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
853 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
854 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
855 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
856 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
857 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
859 # tot[-1] (MSB) zero case
862 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
863 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
864 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
865 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
866 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
869 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
870 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
871 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
876 class FPAddStage1(FPState
):
878 def __init__(self
, width
, id_wid
):
879 FPState
.__init
__(self
, "add_1")
880 self
.mod
= FPAddStage1Mod(width
)
881 self
.out_z
= FPNumBase(width
, False)
882 self
.out_of
= Overflow()
883 self
.norm_stb
= Signal()
885 def setup(self
, m
, i
):
886 """ links module to inputs and outputs
890 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
892 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
893 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
894 m
.d
.sync
+= self
.norm_stb
.eq(1)
897 m
.next
= "normalise_1"
900 class FPNormaliseModSingle
:
902 def __init__(self
, width
):
904 self
.in_z
= self
.ispec()
905 self
.out_z
= self
.ospec()
908 return FPNumBase(self
.width
, False)
911 return FPNumBase(self
.width
, False)
913 def setup(self
, m
, i
):
914 """ links module to inputs and outputs
916 m
.submodules
.normalise
= self
917 m
.d
.comb
+= self
.i
.eq(i
)
919 def elaborate(self
, platform
):
922 mwid
= self
.out_z
.m_width
+2
923 pe
= PriorityEncoder(mwid
)
924 m
.submodules
.norm_pe
= pe
926 m
.submodules
.norm1_out_z
= self
.out_z
927 m
.submodules
.norm1_in_z
= self
.in_z
929 in_z
= FPNumBase(self
.width
, False)
931 m
.submodules
.norm1_insel_z
= in_z
932 m
.submodules
.norm1_insel_overflow
= in_of
934 espec
= (len(in_z
.e
), True)
935 ediff_n126
= Signal(espec
, reset_less
=True)
936 msr
= MultiShiftRMerge(mwid
, espec
)
937 m
.submodules
.multishift_r
= msr
939 m
.d
.comb
+= in_z
.eq(self
.in_z
)
940 m
.d
.comb
+= in_of
.eq(self
.in_of
)
941 # initialise out from in (overridden below)
942 m
.d
.comb
+= self
.out_z
.eq(in_z
)
943 m
.d
.comb
+= self
.out_of
.eq(in_of
)
944 # normalisation decrease condition
945 decrease
= Signal(reset_less
=True)
946 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
949 # *sigh* not entirely obvious: count leading zeros (clz)
950 # with a PriorityEncoder: to find from the MSB
951 # we reverse the order of the bits.
952 temp_m
= Signal(mwid
, reset_less
=True)
953 temp_s
= Signal(mwid
+1, reset_less
=True)
954 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
956 # cat round and guard bits back into the mantissa
957 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
958 pe
.i
.eq(temp_m
[::-1]), # inverted
959 clz
.eq(pe
.o
), # count zeros from MSB down
960 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
961 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
962 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
970 def __init__(self
, width
, id_wid
):
971 self
.roundz
= Signal(reset_less
=True)
972 self
.z
= FPNumBase(width
, False)
973 self
.out_do_z
= Signal(reset_less
=True)
974 self
.oz
= Signal(width
, reset_less
=True)
975 self
.mid
= Signal(id_wid
, reset_less
=True)
978 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
979 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
982 class FPNorm1ModSingle
:
984 def __init__(self
, width
, id_wid
):
987 self
.i
= self
.ispec()
988 self
.o
= self
.ospec()
991 return FPAddStage1Data(self
.width
, self
.id_wid
)
994 return FPNorm1Data(self
.width
, self
.id_wid
)
996 def setup(self
, m
, i
):
997 """ links module to inputs and outputs
999 m
.submodules
.normalise_1
= self
1000 m
.d
.comb
+= self
.i
.eq(i
)
1002 def process(self
, i
):
1005 def elaborate(self
, platform
):
1008 mwid
= self
.o
.z
.m_width
+2
1009 pe
= PriorityEncoder(mwid
)
1010 m
.submodules
.norm_pe
= pe
1013 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1015 m
.submodules
.norm1_out_z
= self
.o
.z
1016 m
.submodules
.norm1_out_overflow
= of
1017 m
.submodules
.norm1_in_z
= self
.i
.z
1018 m
.submodules
.norm1_in_overflow
= self
.i
.of
1021 m
.submodules
.norm1_insel_z
= i
.z
1022 m
.submodules
.norm1_insel_overflow
= i
.of
1024 espec
= (len(i
.z
.e
), True)
1025 ediff_n126
= Signal(espec
, reset_less
=True)
1026 msr
= MultiShiftRMerge(mwid
, espec
)
1027 m
.submodules
.multishift_r
= msr
1029 m
.d
.comb
+= i
.eq(self
.i
)
1030 # initialise out from in (overridden below)
1031 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1032 m
.d
.comb
+= of
.eq(i
.of
)
1033 # normalisation increase/decrease conditions
1034 decrease
= Signal(reset_less
=True)
1035 increase
= Signal(reset_less
=True)
1036 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1037 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1039 with m
.If(~self
.i
.out_do_z
):
1040 with m
.If(decrease
):
1041 # *sigh* not entirely obvious: count leading zeros (clz)
1042 # with a PriorityEncoder: to find from the MSB
1043 # we reverse the order of the bits.
1044 temp_m
= Signal(mwid
, reset_less
=True)
1045 temp_s
= Signal(mwid
+1, reset_less
=True)
1046 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1047 # make sure that the amount to decrease by does NOT
1048 # go below the minimum non-INF/NaN exponent
1049 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1052 # cat round and guard bits back into the mantissa
1053 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1054 pe
.i
.eq(temp_m
[::-1]), # inverted
1055 clz
.eq(limclz
), # count zeros from MSB down
1056 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1057 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1058 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1059 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1060 # overflow in bits 0..1: got shifted too (leave sticky)
1061 of
.guard
.eq(temp_s
[1]), # guard
1062 of
.round_bit
.eq(temp_s
[0]), # round
1065 with m
.Elif(increase
):
1066 temp_m
= Signal(mwid
+1, reset_less
=True)
1068 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1070 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1071 # connect multi-shifter to inp/out mantissa (and ediff)
1073 msr
.diff
.eq(ediff_n126
),
1074 self
.o
.z
.m
.eq(msr
.m
[3:]),
1075 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1076 # overflow in bits 0..1: got shifted too (leave sticky)
1077 of
.guard
.eq(temp_s
[2]), # guard
1078 of
.round_bit
.eq(temp_s
[1]), # round
1079 of
.sticky
.eq(temp_s
[0]), # sticky
1080 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1083 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1084 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1085 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1090 class FPNorm1ModMulti
:
1092 def __init__(self
, width
, single_cycle
=True):
1094 self
.in_select
= Signal(reset_less
=True)
1095 self
.in_z
= FPNumBase(width
, False)
1096 self
.in_of
= Overflow()
1097 self
.temp_z
= FPNumBase(width
, False)
1098 self
.temp_of
= Overflow()
1099 self
.out_z
= FPNumBase(width
, False)
1100 self
.out_of
= Overflow()
1102 def elaborate(self
, platform
):
1105 m
.submodules
.norm1_out_z
= self
.out_z
1106 m
.submodules
.norm1_out_overflow
= self
.out_of
1107 m
.submodules
.norm1_temp_z
= self
.temp_z
1108 m
.submodules
.norm1_temp_of
= self
.temp_of
1109 m
.submodules
.norm1_in_z
= self
.in_z
1110 m
.submodules
.norm1_in_overflow
= self
.in_of
1112 in_z
= FPNumBase(self
.width
, False)
1114 m
.submodules
.norm1_insel_z
= in_z
1115 m
.submodules
.norm1_insel_overflow
= in_of
1117 # select which of temp or in z/of to use
1118 with m
.If(self
.in_select
):
1119 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1120 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1122 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1123 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1124 # initialise out from in (overridden below)
1125 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1126 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1127 # normalisation increase/decrease conditions
1128 decrease
= Signal(reset_less
=True)
1129 increase
= Signal(reset_less
=True)
1130 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1131 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1132 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1134 with m
.If(decrease
):
1136 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1137 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1138 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1139 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1140 self
.out_of
.round_bit
.eq(0), # reset round bit
1141 self
.out_of
.m0
.eq(in_of
.guard
),
1144 with m
.Elif(increase
):
1146 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1147 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1148 self
.out_of
.guard
.eq(in_z
.m
[0]),
1149 self
.out_of
.m0
.eq(in_z
.m
[1]),
1150 self
.out_of
.round_bit
.eq(in_of
.guard
),
1151 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1157 class FPNorm1Single(FPState
):
1159 def __init__(self
, width
, id_wid
, single_cycle
=True):
1160 FPState
.__init
__(self
, "normalise_1")
1161 self
.mod
= FPNorm1ModSingle(width
)
1162 self
.o
= self
.ospec()
1163 self
.out_z
= FPNumBase(width
, False)
1164 self
.out_roundz
= Signal(reset_less
=True)
1167 return self
.mod
.ispec()
1170 return self
.mod
.ospec()
1172 def setup(self
, m
, i
):
1173 """ links module to inputs and outputs
1175 self
.mod
.setup(m
, i
)
1177 def action(self
, m
):
1181 class FPNorm1Multi(FPState
):
1183 def __init__(self
, width
, id_wid
):
1184 FPState
.__init
__(self
, "normalise_1")
1185 self
.mod
= FPNorm1ModMulti(width
)
1186 self
.stb
= Signal(reset_less
=True)
1187 self
.ack
= Signal(reset
=0, reset_less
=True)
1188 self
.out_norm
= Signal(reset_less
=True)
1189 self
.in_accept
= Signal(reset_less
=True)
1190 self
.temp_z
= FPNumBase(width
)
1191 self
.temp_of
= Overflow()
1192 self
.out_z
= FPNumBase(width
)
1193 self
.out_roundz
= Signal(reset_less
=True)
1195 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1196 """ links module to inputs and outputs
1198 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1199 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1200 self
.out_z
, self
.out_norm
)
1202 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1203 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1205 def action(self
, m
):
1206 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1207 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1208 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1209 with m
.If(self
.out_norm
):
1210 with m
.If(self
.in_accept
):
1215 m
.d
.sync
+= self
.ack
.eq(0)
1217 # normalisation not required (or done).
1219 m
.d
.sync
+= self
.ack
.eq(1)
1220 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1223 class FPNormToPack(FPState
, UnbufferedPipeline
):
1225 def __init__(self
, width
, id_wid
):
1226 FPState
.__init
__(self
, "normalise_1")
1227 self
.id_wid
= id_wid
1229 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1232 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1235 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1237 def setup(self
, m
, i
):
1238 """ links module to inputs and outputs
1241 # Normalisation, Rounding Corrections, Pack - in a chain
1242 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1243 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1244 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1245 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1246 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1248 self
.out_z
= pmod
.ospec()
1250 m
.d
.comb
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1251 m
.d
.comb
+= self
.out_z
.z
.eq(pmod
.o
.z
) # outputs packed result
1253 def process(self
, i
):
1256 def action(self
, m
):
1257 m
.next
= "pack_put_z"
1262 def __init__(self
, width
, id_wid
):
1263 self
.z
= FPNumBase(width
, False)
1264 self
.out_do_z
= Signal(reset_less
=True)
1265 self
.oz
= Signal(width
, reset_less
=True)
1266 self
.mid
= Signal(id_wid
, reset_less
=True)
1269 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1275 def __init__(self
, width
, id_wid
):
1277 self
.id_wid
= id_wid
1278 self
.i
= self
.ispec()
1279 self
.out_z
= self
.ospec()
1282 return FPNorm1Data(self
.width
, self
.id_wid
)
1285 return FPRoundData(self
.width
, self
.id_wid
)
1287 def process(self
, i
):
1290 def setup(self
, m
, i
):
1291 m
.submodules
.roundz
= self
1292 m
.d
.comb
+= self
.i
.eq(i
)
1294 def elaborate(self
, platform
):
1296 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1297 with m
.If(~self
.i
.out_do_z
):
1298 with m
.If(self
.i
.roundz
):
1299 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1300 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1301 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1306 class FPRound(FPState
):
1308 def __init__(self
, width
, id_wid
):
1309 FPState
.__init
__(self
, "round")
1310 self
.mod
= FPRoundMod(width
)
1311 self
.out_z
= self
.ospec()
1314 return self
.mod
.ispec()
1317 return self
.mod
.ospec()
1319 def setup(self
, m
, i
):
1320 """ links module to inputs and outputs
1322 self
.mod
.setup(m
, i
)
1325 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1326 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1328 def action(self
, m
):
1329 m
.next
= "corrections"
1332 class FPCorrectionsMod
:
1334 def __init__(self
, width
, id_wid
):
1336 self
.id_wid
= id_wid
1337 self
.i
= self
.ispec()
1338 self
.out_z
= self
.ospec()
1341 return FPRoundData(self
.width
, self
.id_wid
)
1344 return FPRoundData(self
.width
, self
.id_wid
)
1346 def process(self
, i
):
1349 def setup(self
, m
, i
):
1350 """ links module to inputs and outputs
1352 m
.submodules
.corrections
= self
1353 m
.d
.comb
+= self
.i
.eq(i
)
1355 def elaborate(self
, platform
):
1357 m
.submodules
.corr_in_z
= self
.i
.z
1358 m
.submodules
.corr_out_z
= self
.out_z
.z
1359 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1360 with m
.If(~self
.i
.out_do_z
):
1361 with m
.If(self
.i
.z
.is_denormalised
):
1362 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1366 class FPCorrections(FPState
):
1368 def __init__(self
, width
, id_wid
):
1369 FPState
.__init
__(self
, "corrections")
1370 self
.mod
= FPCorrectionsMod(width
)
1371 self
.out_z
= self
.ospec()
1374 return self
.mod
.ispec()
1377 return self
.mod
.ospec()
1379 def setup(self
, m
, in_z
):
1380 """ links module to inputs and outputs
1382 self
.mod
.setup(m
, in_z
)
1384 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1385 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1387 def action(self
, m
):
1393 def __init__(self
, width
, id_wid
):
1394 self
.z
= Signal(width
, reset_less
=True)
1395 self
.mid
= Signal(id_wid
, reset_less
=True)
1398 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1401 return [self
.z
, self
.mid
]
1406 def __init__(self
, width
, id_wid
):
1408 self
.id_wid
= id_wid
1409 self
.i
= self
.ispec()
1410 self
.o
= self
.ospec()
1413 return FPRoundData(self
.width
, self
.id_wid
)
1416 return FPPackData(self
.width
, self
.id_wid
)
1418 def process(self
, i
):
1421 def setup(self
, m
, in_z
):
1422 """ links module to inputs and outputs
1424 m
.submodules
.pack
= self
1425 m
.d
.comb
+= self
.i
.eq(in_z
)
1427 def elaborate(self
, platform
):
1429 z
= FPNumOut(self
.width
, False)
1430 m
.submodules
.pack_in_z
= self
.i
.z
1431 m
.submodules
.pack_out_z
= z
1432 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1433 with m
.If(~self
.i
.out_do_z
):
1434 with m
.If(self
.i
.z
.is_overflowed
):
1435 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1437 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1439 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1440 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1444 class FPPack(FPState
):
1446 def __init__(self
, width
, id_wid
):
1447 FPState
.__init
__(self
, "pack")
1448 self
.mod
= FPPackMod(width
)
1449 self
.out_z
= self
.ospec()
1452 return self
.mod
.ispec()
1455 return self
.mod
.ospec()
1457 def setup(self
, m
, in_z
):
1458 """ links module to inputs and outputs
1460 self
.mod
.setup(m
, in_z
)
1462 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1463 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1465 def action(self
, m
):
1466 m
.next
= "pack_put_z"
1469 class FPPutZ(FPState
):
1471 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1472 FPState
.__init
__(self
, state
)
1473 if to_state
is None:
1474 to_state
= "get_ops"
1475 self
.to_state
= to_state
1478 self
.in_mid
= in_mid
1479 self
.out_mid
= out_mid
1481 def action(self
, m
):
1482 if self
.in_mid
is not None:
1483 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1485 self
.out_z
.z
.v
.eq(self
.in_z
)
1487 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1488 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1489 m
.next
= self
.to_state
1491 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1494 class FPPutZIdx(FPState
):
1496 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1497 FPState
.__init
__(self
, state
)
1498 if to_state
is None:
1499 to_state
= "get_ops"
1500 self
.to_state
= to_state
1502 self
.out_zs
= out_zs
1503 self
.in_mid
= in_mid
1505 def action(self
, m
):
1506 outz_stb
= Signal(reset_less
=True)
1507 outz_ack
= Signal(reset_less
=True)
1508 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1509 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1512 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1514 with m
.If(outz_stb
& outz_ack
):
1515 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1516 m
.next
= self
.to_state
1518 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1521 def __init__(self
, width
, id_wid
):
1522 self
.z
= FPOp(width
)
1523 self
.mid
= Signal(id_wid
, reset_less
=True)
1526 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1529 return [self
.z
, self
.mid
]
1534 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1537 * width: bit-width of IEEE754. supported: 16, 32, 64
1538 * id_wid: an identifier that is sync-connected to the input
1539 * single_cycle: True indicates each stage to complete in 1 clock
1540 * compact: True indicates a reduced number of stages
1543 self
.id_wid
= id_wid
1544 self
.single_cycle
= single_cycle
1545 self
.compact
= compact
1547 self
.in_t
= Trigger()
1548 self
.i
= self
.ispec()
1549 self
.o
= self
.ospec()
1554 return FPADDBaseData(self
.width
, self
.id_wid
)
1557 return FPOpData(self
.width
, self
.id_wid
)
1559 def add_state(self
, state
):
1560 self
.states
.append(state
)
1563 def get_fragment(self
, platform
=None):
1564 """ creates the HDL code-fragment for FPAdd
1567 m
.submodules
.out_z
= self
.o
.z
1568 m
.submodules
.in_t
= self
.in_t
1570 self
.get_compact_fragment(m
, platform
)
1572 self
.get_longer_fragment(m
, platform
)
1574 with m
.FSM() as fsm
:
1576 for state
in self
.states
:
1577 with m
.State(state
.state_from
):
1582 def get_longer_fragment(self
, m
, platform
=None):
1584 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1586 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1590 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1591 sc
.setup(m
, a
, b
, self
.in_mid
)
1593 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1594 dn
.setup(m
, a
, b
, sc
.in_mid
)
1596 if self
.single_cycle
:
1597 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1598 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1600 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1601 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1603 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1604 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1606 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1607 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1609 if self
.single_cycle
:
1610 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1611 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1613 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1614 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1616 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1617 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1619 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1620 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1622 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1623 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1625 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1626 pa
.in_mid
, self
.out_mid
))
1628 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1629 pa
.in_mid
, self
.out_mid
))
1631 def get_compact_fragment(self
, m
, platform
=None):
1633 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1634 self
.width
, self
.id_wid
))
1635 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1637 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1640 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1643 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1644 n1
.setup(m
, alm
.a1o
)
1646 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1647 n1
.out_z
.mid
, self
.o
.mid
))
1649 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1650 # sc.o.mid, self.o.mid))
1653 class FPADDBase(FPState
):
1655 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1658 * width: bit-width of IEEE754. supported: 16, 32, 64
1659 * id_wid: an identifier that is sync-connected to the input
1660 * single_cycle: True indicates each stage to complete in 1 clock
1662 FPState
.__init
__(self
, "fpadd")
1664 self
.single_cycle
= single_cycle
1665 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1666 self
.o
= self
.ospec()
1668 self
.in_t
= Trigger()
1669 self
.i
= self
.ispec()
1671 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1672 self
.in_accept
= Signal(reset_less
=True)
1673 self
.add_stb
= Signal(reset_less
=True)
1674 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1677 return self
.mod
.ispec()
1680 return self
.mod
.ospec()
1682 def setup(self
, m
, i
, add_stb
, in_mid
):
1683 m
.d
.comb
+= [self
.i
.eq(i
),
1684 self
.mod
.i
.eq(self
.i
),
1685 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1686 #self.add_stb.eq(add_stb),
1687 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1688 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1689 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1690 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1691 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1692 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1695 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1696 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1697 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1698 #m.d.sync += self.in_t.stb.eq(0)
1700 m
.submodules
.fpadd
= self
.mod
1702 def action(self
, m
):
1704 # in_accept is set on incoming strobe HIGH and ack LOW.
1705 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1707 #with m.If(self.in_t.ack):
1708 # m.d.sync += self.in_t.stb.eq(0)
1709 with m
.If(~self
.z_done
):
1710 # not done: test for accepting an incoming operand pair
1711 with m
.If(self
.in_accept
):
1713 self
.add_ack
.eq(1), # acknowledge receipt...
1714 self
.in_t
.stb
.eq(1), # initiate add
1717 m
.d
.sync
+= [self
.add_ack
.eq(0),
1718 self
.in_t
.stb
.eq(0),
1722 # done: acknowledge, and write out id and value
1723 m
.d
.sync
+= [self
.add_ack
.eq(1),
1730 if self
.in_mid
is not None:
1731 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1734 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1736 # move to output state on detecting z ack
1737 with m
.If(self
.out_z
.trigger
):
1738 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1741 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1744 class FPADDBasePipe(ControlBase
):
1745 def __init__(self
, width
, id_wid
):
1746 ControlBase
.__init
__(self
)
1747 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1748 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1749 self
.pipe3
= FPNormToPack(width
, id_wid
)
1751 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1753 def elaborate(self
, platform
):
1755 m
.submodules
.scnorm
= self
.pipe1
1756 m
.submodules
.addalign
= self
.pipe2
1757 m
.submodules
.normpack
= self
.pipe3
1758 m
.d
.comb
+= self
._eqs
1762 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1763 def __init__(self
, width
, id_wid
, num_rows
):
1764 self
.num_rows
= num_rows
1765 def iospec(): return FPADDBaseData(width
, id_wid
)
1766 stage
= PassThroughStage(iospec
)
1767 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1770 class FPADDMuxOutPipe(CombMuxOutPipe
):
1771 def __init__(self
, width
, id_wid
, num_rows
):
1772 self
.num_rows
= num_rows
1773 def iospec(): return FPPackData(width
, id_wid
)
1774 stage
= PassThroughStage(iospec
)
1775 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1778 class FPADDMuxInOut
:
1779 """ Reservation-Station version of FPADD pipeline.
1781 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1782 * 3-stage adder pipeline
1783 * fan-out on outputs (an array of FPPackData: z,mid)
1785 Fan-in and Fan-out are combinatorial.
1787 def __init__(self
, width
, id_wid
, num_rows
):
1788 self
.num_rows
= num_rows
1789 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1790 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1791 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1793 self
.p
= self
.inpipe
.p
# kinda annoying,
1794 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1795 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1797 def elaborate(self
, platform
):
1799 m
.submodules
.inpipe
= self
.inpipe
1800 m
.submodules
.fpadd
= self
.fpadd
1801 m
.submodules
.outpipe
= self
.outpipe
1803 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1804 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1813 """ FPADD: stages as follows:
1819 FPAddBase---> FPAddBaseMod
1821 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1823 FPAddBase is tricky: it is both a stage and *has* stages.
1824 Connection to FPAddBaseMod therefore requires an in stb/ack
1825 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1826 needs to be the thing that raises the incoming stb.
1829 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1832 * width: bit-width of IEEE754. supported: 16, 32, 64
1833 * id_wid: an identifier that is sync-connected to the input
1834 * single_cycle: True indicates each stage to complete in 1 clock
1837 self
.id_wid
= id_wid
1838 self
.single_cycle
= single_cycle
1840 #self.out_z = FPOp(width)
1841 self
.ids
= FPID(id_wid
)
1844 for i
in range(rs_sz
):
1847 in_a
.name
= "in_a_%d" % i
1848 in_b
.name
= "in_b_%d" % i
1849 rs
.append((in_a
, in_b
))
1853 for i
in range(rs_sz
):
1855 out_z
.name
= "out_z_%d" % i
1857 self
.res
= Array(res
)
1861 def add_state(self
, state
):
1862 self
.states
.append(state
)
1865 def get_fragment(self
, platform
=None):
1866 """ creates the HDL code-fragment for FPAdd
1869 m
.submodules
+= self
.rs
1871 in_a
= self
.rs
[0][0]
1872 in_b
= self
.rs
[0][1]
1874 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1879 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1884 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1885 ab
= self
.add_state(ab
)
1886 abd
= ab
.ispec() # create an input spec object for FPADDBase
1887 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1888 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1891 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1894 with m
.FSM() as fsm
:
1896 for state
in self
.states
:
1897 with m
.State(state
.state_from
):
1903 if __name__
== "__main__":
1905 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1906 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1907 alu
.rs
[0][1].ports() + \
1908 alu
.res
[0].ports() + \
1909 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1911 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1912 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1913 alu
.in_t
.ports() + \
1914 alu
.out_z
.ports() + \
1915 [alu
.in_mid
, alu
.out_mid
])
1918 # works... but don't use, just do "python fname.py convert -t v"
1919 #print (verilog.convert(alu, ports=[
1920 # ports=alu.in_a.ports() + \
1921 # alu.in_b.ports() + \
1922 # alu.out_z.ports())