1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 #from fpbase import FPNumShiftMultiRight
20 class FPState(FPBase
):
21 def __init__(self
, state_from
):
22 self
.state_from
= state_from
24 def set_inputs(self
, inputs
):
26 for k
,v
in inputs
.items():
29 def set_outputs(self
, outputs
):
30 self
.outputs
= outputs
31 for k
,v
in outputs
.items():
36 def __init__(self
, width
):
37 self
.in_op
= FPOp(width
)
38 self
.out_op
= Signal(width
)
39 self
.out_decode
= Signal(reset_less
=True)
41 def elaborate(self
, platform
):
43 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
44 m
.submodules
.get_op_in
= self
.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m
.If(self
.out_decode
):
48 self
.out_op
.eq(self
.in_op
.v
),
53 class FPGetOp(FPState
):
57 def __init__(self
, in_state
, out_state
, in_op
, width
):
58 FPState
.__init
__(self
, in_state
)
59 self
.out_state
= out_state
60 self
.mod
= FPGetOpMod(width
)
62 self
.out_op
= Signal(width
)
63 self
.out_decode
= Signal(reset_less
=True)
65 def setup(self
, m
, in_op
):
66 """ links module to inputs and outputs
68 setattr(m
.submodules
, self
.state_from
, self
.mod
)
69 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
70 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
73 with m
.If(self
.out_decode
):
74 m
.next
= self
.out_state
77 self
.out_op
.eq(self
.mod
.out_op
)
80 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
85 def __init__(self
, width
, id_wid
, m_extra
=True):
86 self
.a
= FPNumBase(width
, m_extra
)
87 self
.b
= FPNumBase(width
, m_extra
)
88 self
.mid
= Signal(id_wid
, reset_less
=True)
91 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
94 return [self
.a
, self
.b
, self
.mid
]
99 def __init__(self
, width
, id_wid
):
102 self
.a
= Signal(width
)
103 self
.b
= Signal(width
)
104 self
.mid
= Signal(id_wid
, reset_less
=True)
107 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
110 return [self
.a
, self
.b
, self
.mid
]
113 class FPGet2OpMod(Trigger
):
114 def __init__(self
, width
, id_wid
):
115 Trigger
.__init
__(self
)
118 self
.i
= self
.ispec()
119 self
.o
= self
.ospec()
122 return FPADDBaseData(self
.width
, self
.id_wid
)
125 return FPADDBaseData(self
.width
, self
.id_wid
)
127 def process(self
, i
):
130 def elaborate(self
, platform
):
131 m
= Trigger
.elaborate(self
, platform
)
132 with m
.If(self
.trigger
):
139 class FPGet2Op(FPState
):
143 def __init__(self
, in_state
, out_state
, width
, id_wid
):
144 FPState
.__init
__(self
, in_state
)
145 self
.out_state
= out_state
146 self
.mod
= FPGet2OpMod(width
, id_wid
)
147 self
.o
= self
.mod
.ospec()
148 self
.in_stb
= Signal(reset_less
=True)
149 self
.out_ack
= Signal(reset_less
=True)
150 self
.out_decode
= Signal(reset_less
=True)
152 def setup(self
, m
, i
, in_stb
, in_ack
):
153 """ links module to inputs and outputs
155 m
.submodules
.get_ops
= self
.mod
156 m
.d
.comb
+= self
.mod
.i
.eq(i
)
157 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
158 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
159 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
160 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
163 with m
.If(self
.out_decode
):
164 m
.next
= self
.out_state
167 self
.o
.eq(self
.mod
.o
),
170 m
.d
.sync
+= self
.mod
.ack
.eq(1)
175 def __init__(self
, width
, id_wid
):
176 self
.a
= FPNumBase(width
, True)
177 self
.b
= FPNumBase(width
, True)
178 self
.z
= FPNumOut(width
, False)
179 self
.oz
= Signal(width
, reset_less
=True)
180 self
.out_do_z
= Signal(reset_less
=True)
181 self
.mid
= Signal(id_wid
, reset_less
=True)
184 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
185 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
188 class FPAddSpecialCasesMod
:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
194 def __init__(self
, width
, id_wid
):
197 self
.i
= self
.ispec()
198 self
.o
= self
.ospec()
201 return FPADDBaseData(self
.width
, self
.id_wid
)
204 return FPSCData(self
.width
, self
.id_wid
)
206 def setup(self
, m
, i
):
207 """ links module to inputs and outputs
209 m
.submodules
.specialcases
= self
210 m
.d
.comb
+= self
.i
.eq(i
)
212 def process(self
, i
):
215 def elaborate(self
, platform
):
218 m
.submodules
.sc_out_z
= self
.o
.z
220 # decode: XXX really should move to separate stage
221 a1
= FPNumIn(None, self
.width
)
222 b1
= FPNumIn(None, self
.width
)
223 m
.submodules
.sc_decode_a
= a1
224 m
.submodules
.sc_decode_b
= b1
225 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
230 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
233 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
235 # if a is NaN or b is NaN return NaN
236 with m
.If(a1
.is_nan | b1
.is_nan
):
237 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
238 m
.d
.comb
+= self
.o
.z
.nan(0)
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
263 # if a is inf return inf (or NaN)
264 with m
.Elif(a1
.is_inf
):
265 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
266 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
267 # if a is inf and signs don't match return NaN
268 with m
.If(b1
.exp_128
& s_nomatch
):
269 m
.d
.comb
+= self
.o
.z
.nan(0)
271 # if b is inf return inf
272 with m
.Elif(b1
.is_inf
):
273 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
274 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
276 # if a is zero and b zero return signed-a/b
277 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
278 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
279 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
281 # if a is zero return b
282 with m
.Elif(a1
.is_zero
):
283 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
284 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
286 # if b is zero return a
287 with m
.Elif(b1
.is_zero
):
288 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
289 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
291 # if a equal to -b return zero (+ve zero)
292 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
293 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
294 m
.d
.comb
+= self
.o
.z
.zero(0)
296 # Denormalised Number checks next, so pass a/b data through
298 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
299 m
.d
.comb
+= self
.o
.a
.eq(a1
)
300 m
.d
.comb
+= self
.o
.b
.eq(b1
)
302 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
303 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
309 def __init__(self
, id_wid
):
312 self
.in_mid
= Signal(id_wid
, reset_less
=True)
313 self
.out_mid
= Signal(id_wid
, reset_less
=True)
319 if self
.id_wid
is not None:
320 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
323 class FPAddSpecialCases(FPState
):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
329 def __init__(self
, width
, id_wid
):
330 FPState
.__init
__(self
, "special_cases")
331 self
.mod
= FPAddSpecialCasesMod(width
)
332 self
.out_z
= self
.mod
.ospec()
333 self
.out_do_z
= Signal(reset_less
=True)
335 def setup(self
, m
, i
):
336 """ links module to inputs and outputs
338 self
.mod
.setup(m
, i
, self
.out_do_z
)
339 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
344 with m
.If(self
.out_do_z
):
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
358 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
359 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
360 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
361 self
.o
= self
.ospec()
364 return self
.smod
.ispec()
367 return self
.dmod
.ospec()
369 def setup(self
, m
, i
):
370 """ links module to inputs and outputs
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self
.smod
.setup(m
, i
)
376 self
.dmod
.setup(m
, self
.smod
.o
)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
384 # XXX TODO: sync for state-based
385 m
.d
.comb
+= self
.o
.eq(self
.dmod
.o
)
387 def process(self
, i
):
391 #with m.If(self.out_do_z):
397 class FPAddDeNormMod(FPState
):
399 def __init__(self
, width
, id_wid
):
402 self
.i
= self
.ispec()
403 self
.o
= self
.ospec()
406 return FPSCData(self
.width
, self
.id_wid
)
409 return FPSCData(self
.width
, self
.id_wid
)
411 def setup(self
, m
, i
):
412 """ links module to inputs and outputs
414 m
.submodules
.denormalise
= self
415 m
.d
.comb
+= self
.i
.eq(i
)
417 def elaborate(self
, platform
):
419 m
.submodules
.denorm_in_a
= self
.i
.a
420 m
.submodules
.denorm_in_b
= self
.i
.b
421 m
.submodules
.denorm_out_a
= self
.o
.a
422 m
.submodules
.denorm_out_b
= self
.o
.b
424 with m
.If(~self
.i
.out_do_z
):
425 # XXX hmmm, don't like repeating identical code
426 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
427 with m
.If(self
.i
.a
.exp_n127
):
428 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
430 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
432 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
433 with m
.If(self
.i
.b
.exp_n127
):
434 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
436 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
438 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
439 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
440 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
441 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
446 class FPAddDeNorm(FPState
):
448 def __init__(self
, width
, id_wid
):
449 FPState
.__init
__(self
, "denormalise")
450 self
.mod
= FPAddDeNormMod(width
)
451 self
.out_a
= FPNumBase(width
)
452 self
.out_b
= FPNumBase(width
)
454 def setup(self
, m
, i
):
455 """ links module to inputs and outputs
459 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
460 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
463 # Denormalised Number checks
467 class FPAddAlignMultiMod(FPState
):
469 def __init__(self
, width
):
470 self
.in_a
= FPNumBase(width
)
471 self
.in_b
= FPNumBase(width
)
472 self
.out_a
= FPNumIn(None, width
)
473 self
.out_b
= FPNumIn(None, width
)
474 self
.exp_eq
= Signal(reset_less
=True)
476 def elaborate(self
, platform
):
477 # This one however (single-cycle) will do the shift
482 m
.submodules
.align_in_a
= self
.in_a
483 m
.submodules
.align_in_b
= self
.in_b
484 m
.submodules
.align_out_a
= self
.out_a
485 m
.submodules
.align_out_b
= self
.out_b
487 # NOTE: this does *not* do single-cycle multi-shifting,
488 # it *STAYS* in the align state until exponents match
490 # exponent of a greater than b: shift b down
491 m
.d
.comb
+= self
.exp_eq
.eq(0)
492 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
493 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
494 agtb
= Signal(reset_less
=True)
495 altb
= Signal(reset_less
=True)
496 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
497 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
499 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
500 # exponent of b greater than a: shift a down
502 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
503 # exponents equal: move to next stage.
505 m
.d
.comb
+= self
.exp_eq
.eq(1)
509 class FPAddAlignMulti(FPState
):
511 def __init__(self
, width
, id_wid
):
512 FPState
.__init
__(self
, "align")
513 self
.mod
= FPAddAlignMultiMod(width
)
514 self
.out_a
= FPNumIn(None, width
)
515 self
.out_b
= FPNumIn(None, width
)
516 self
.exp_eq
= Signal(reset_less
=True)
518 def setup(self
, m
, in_a
, in_b
):
519 """ links module to inputs and outputs
521 m
.submodules
.align
= self
.mod
522 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
523 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
524 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
525 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
526 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
529 with m
.If(self
.exp_eq
):
535 def __init__(self
, width
, id_wid
):
536 self
.a
= FPNumIn(None, width
)
537 self
.b
= FPNumIn(None, width
)
538 self
.z
= FPNumOut(width
, False)
539 self
.out_do_z
= Signal(reset_less
=True)
540 self
.oz
= Signal(width
, reset_less
=True)
541 self
.mid
= Signal(id_wid
, reset_less
=True)
544 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
545 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
548 class FPAddAlignSingleMod
:
550 def __init__(self
, width
, id_wid
):
553 self
.i
= self
.ispec()
554 self
.o
= self
.ospec()
557 return FPSCData(self
.width
, self
.id_wid
)
560 return FPNumIn2Ops(self
.width
, self
.id_wid
)
562 def process(self
, i
):
565 def setup(self
, m
, i
):
566 """ links module to inputs and outputs
568 m
.submodules
.align
= self
569 m
.d
.comb
+= self
.i
.eq(i
)
571 def elaborate(self
, platform
):
572 """ Aligns A against B or B against A, depending on which has the
573 greater exponent. This is done in a *single* cycle using
574 variable-width bit-shift
576 the shifter used here is quite expensive in terms of gates.
577 Mux A or B in (and out) into temporaries, as only one of them
578 needs to be aligned against the other
582 m
.submodules
.align_in_a
= self
.i
.a
583 m
.submodules
.align_in_b
= self
.i
.b
584 m
.submodules
.align_out_a
= self
.o
.a
585 m
.submodules
.align_out_b
= self
.o
.b
587 # temporary (muxed) input and output to be shifted
588 t_inp
= FPNumBase(self
.width
)
589 t_out
= FPNumIn(None, self
.width
)
590 espec
= (len(self
.i
.a
.e
), True)
591 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
592 m
.submodules
.align_t_in
= t_inp
593 m
.submodules
.align_t_out
= t_out
594 m
.submodules
.multishift_r
= msr
596 ediff
= Signal(espec
, reset_less
=True)
597 ediffr
= Signal(espec
, reset_less
=True)
598 tdiff
= Signal(espec
, reset_less
=True)
599 elz
= Signal(reset_less
=True)
600 egz
= Signal(reset_less
=True)
602 # connect multi-shifter to t_inp/out mantissa (and tdiff)
603 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
604 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
605 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
606 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
607 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
609 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
610 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
611 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
612 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
614 # default: A-exp == B-exp, A and B untouched (fall through)
615 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
616 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
617 # only one shifter (muxed)
618 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
619 # exponent of a greater than b: shift b down
620 with m
.If(~self
.i
.out_do_z
):
622 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
625 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
627 # exponent of b greater than a: shift a down
629 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
632 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
635 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
636 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
637 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
638 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
643 class FPAddAlignSingle(FPState
):
645 def __init__(self
, width
, id_wid
):
646 FPState
.__init
__(self
, "align")
647 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
648 self
.out_a
= FPNumIn(None, width
)
649 self
.out_b
= FPNumIn(None, width
)
651 def setup(self
, m
, i
):
652 """ links module to inputs and outputs
656 # NOTE: could be done as comb
657 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
658 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
664 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
666 def __init__(self
, width
, id_wid
):
667 FPState
.__init
__(self
, "align")
670 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
671 self
.a1o
= self
.ospec()
674 return FPSCData(self
.width
, self
.id_wid
)
677 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
679 def setup(self
, m
, i
):
680 """ links module to inputs and outputs
683 # chain AddAlignSingle, AddStage0 and AddStage1
684 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
685 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
686 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
688 chain
= StageChain([mod
, a0mod
, a1mod
])
693 def process(self
, i
):
697 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
698 m
.next
= "normalise_1"
701 class FPAddStage0Data
:
703 def __init__(self
, width
, id_wid
):
704 self
.z
= FPNumBase(width
, False)
705 self
.out_do_z
= Signal(reset_less
=True)
706 self
.oz
= Signal(width
, reset_less
=True)
707 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
708 self
.mid
= Signal(id_wid
, reset_less
=True)
711 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
712 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
715 class FPAddStage0Mod
:
717 def __init__(self
, width
, id_wid
):
720 self
.i
= self
.ispec()
721 self
.o
= self
.ospec()
724 return FPSCData(self
.width
, self
.id_wid
)
727 return FPAddStage0Data(self
.width
, self
.id_wid
)
729 def process(self
, i
):
732 def setup(self
, m
, i
):
733 """ links module to inputs and outputs
735 m
.submodules
.add0
= self
736 m
.d
.comb
+= self
.i
.eq(i
)
738 def elaborate(self
, platform
):
740 m
.submodules
.add0_in_a
= self
.i
.a
741 m
.submodules
.add0_in_b
= self
.i
.b
742 m
.submodules
.add0_out_z
= self
.o
.z
744 # store intermediate tests (and zero-extended mantissas)
745 seq
= Signal(reset_less
=True)
746 mge
= Signal(reset_less
=True)
747 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
748 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
749 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
750 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
751 am0
.eq(Cat(self
.i
.a
.m
, 0)),
752 bm0
.eq(Cat(self
.i
.b
.m
, 0))
754 # same-sign (both negative or both positive) add mantissas
755 with m
.If(~self
.i
.out_do_z
):
756 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
759 self
.o
.tot
.eq(am0
+ bm0
),
760 self
.o
.z
.s
.eq(self
.i
.a
.s
)
762 # a mantissa greater than b, use a
765 self
.o
.tot
.eq(am0
- bm0
),
766 self
.o
.z
.s
.eq(self
.i
.a
.s
)
768 # b mantissa greater than a, use b
771 self
.o
.tot
.eq(bm0
- am0
),
772 self
.o
.z
.s
.eq(self
.i
.b
.s
)
775 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
776 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
777 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
781 class FPAddStage0(FPState
):
782 """ First stage of add. covers same-sign (add) and subtract
783 special-casing when mantissas are greater or equal, to
784 give greatest accuracy.
787 def __init__(self
, width
, id_wid
):
788 FPState
.__init
__(self
, "add_0")
789 self
.mod
= FPAddStage0Mod(width
)
790 self
.o
= self
.mod
.ospec()
792 def setup(self
, m
, i
):
793 """ links module to inputs and outputs
797 # NOTE: these could be done as combinatorial (merge add0+add1)
798 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
804 class FPAddStage1Data
:
806 def __init__(self
, width
, id_wid
):
807 self
.z
= FPNumBase(width
, False)
808 self
.out_do_z
= Signal(reset_less
=True)
809 self
.oz
= Signal(width
, reset_less
=True)
811 self
.mid
= Signal(id_wid
, reset_less
=True)
814 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
815 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
819 class FPAddStage1Mod(FPState
):
820 """ Second stage of add: preparation for normalisation.
821 detects when tot sum is too big (tot[27] is kinda a carry bit)
824 def __init__(self
, width
, id_wid
):
827 self
.i
= self
.ispec()
828 self
.o
= self
.ospec()
831 return FPAddStage0Data(self
.width
, self
.id_wid
)
834 return FPAddStage1Data(self
.width
, self
.id_wid
)
836 def process(self
, i
):
839 def setup(self
, m
, i
):
840 """ links module to inputs and outputs
842 m
.submodules
.add1
= self
843 m
.submodules
.add1_out_overflow
= self
.o
.of
845 m
.d
.comb
+= self
.i
.eq(i
)
847 def elaborate(self
, platform
):
849 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
850 # tot[-1] (MSB) gets set when the sum overflows. shift result down
851 with m
.If(~self
.i
.out_do_z
):
852 with m
.If(self
.i
.tot
[-1]):
854 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
855 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
856 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
857 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
858 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
859 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
861 # tot[-1] (MSB) zero case
864 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
865 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
866 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
867 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
868 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
871 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
872 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
873 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
878 class FPAddStage1(FPState
):
880 def __init__(self
, width
, id_wid
):
881 FPState
.__init
__(self
, "add_1")
882 self
.mod
= FPAddStage1Mod(width
)
883 self
.out_z
= FPNumBase(width
, False)
884 self
.out_of
= Overflow()
885 self
.norm_stb
= Signal()
887 def setup(self
, m
, i
):
888 """ links module to inputs and outputs
892 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
894 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
895 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
896 m
.d
.sync
+= self
.norm_stb
.eq(1)
899 m
.next
= "normalise_1"
902 class FPNormaliseModSingle
:
904 def __init__(self
, width
):
906 self
.in_z
= self
.ispec()
907 self
.out_z
= self
.ospec()
910 return FPNumBase(self
.width
, False)
913 return FPNumBase(self
.width
, False)
915 def setup(self
, m
, i
):
916 """ links module to inputs and outputs
918 m
.submodules
.normalise
= self
919 m
.d
.comb
+= self
.i
.eq(i
)
921 def elaborate(self
, platform
):
924 mwid
= self
.out_z
.m_width
+2
925 pe
= PriorityEncoder(mwid
)
926 m
.submodules
.norm_pe
= pe
928 m
.submodules
.norm1_out_z
= self
.out_z
929 m
.submodules
.norm1_in_z
= self
.in_z
931 in_z
= FPNumBase(self
.width
, False)
933 m
.submodules
.norm1_insel_z
= in_z
934 m
.submodules
.norm1_insel_overflow
= in_of
936 espec
= (len(in_z
.e
), True)
937 ediff_n126
= Signal(espec
, reset_less
=True)
938 msr
= MultiShiftRMerge(mwid
, espec
)
939 m
.submodules
.multishift_r
= msr
941 m
.d
.comb
+= in_z
.eq(self
.in_z
)
942 m
.d
.comb
+= in_of
.eq(self
.in_of
)
943 # initialise out from in (overridden below)
944 m
.d
.comb
+= self
.out_z
.eq(in_z
)
945 m
.d
.comb
+= self
.out_of
.eq(in_of
)
946 # normalisation decrease condition
947 decrease
= Signal(reset_less
=True)
948 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
951 # *sigh* not entirely obvious: count leading zeros (clz)
952 # with a PriorityEncoder: to find from the MSB
953 # we reverse the order of the bits.
954 temp_m
= Signal(mwid
, reset_less
=True)
955 temp_s
= Signal(mwid
+1, reset_less
=True)
956 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
958 # cat round and guard bits back into the mantissa
959 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
960 pe
.i
.eq(temp_m
[::-1]), # inverted
961 clz
.eq(pe
.o
), # count zeros from MSB down
962 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
963 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
964 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
972 def __init__(self
, width
, id_wid
):
973 self
.roundz
= Signal(reset_less
=True)
974 self
.z
= FPNumBase(width
, False)
975 self
.out_do_z
= Signal(reset_less
=True)
976 self
.oz
= Signal(width
, reset_less
=True)
977 self
.mid
= Signal(id_wid
, reset_less
=True)
980 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
981 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
984 class FPNorm1ModSingle
:
986 def __init__(self
, width
, id_wid
):
989 self
.i
= self
.ispec()
990 self
.o
= self
.ospec()
993 return FPAddStage1Data(self
.width
, self
.id_wid
)
996 return FPNorm1Data(self
.width
, self
.id_wid
)
998 def setup(self
, m
, i
):
999 """ links module to inputs and outputs
1001 m
.submodules
.normalise_1
= self
1002 m
.d
.comb
+= self
.i
.eq(i
)
1004 def process(self
, i
):
1007 def elaborate(self
, platform
):
1010 mwid
= self
.o
.z
.m_width
+2
1011 pe
= PriorityEncoder(mwid
)
1012 m
.submodules
.norm_pe
= pe
1015 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1017 m
.submodules
.norm1_out_z
= self
.o
.z
1018 m
.submodules
.norm1_out_overflow
= of
1019 m
.submodules
.norm1_in_z
= self
.i
.z
1020 m
.submodules
.norm1_in_overflow
= self
.i
.of
1023 m
.submodules
.norm1_insel_z
= i
.z
1024 m
.submodules
.norm1_insel_overflow
= i
.of
1026 espec
= (len(i
.z
.e
), True)
1027 ediff_n126
= Signal(espec
, reset_less
=True)
1028 msr
= MultiShiftRMerge(mwid
, espec
)
1029 m
.submodules
.multishift_r
= msr
1031 m
.d
.comb
+= i
.eq(self
.i
)
1032 # initialise out from in (overridden below)
1033 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1034 m
.d
.comb
+= of
.eq(i
.of
)
1035 # normalisation increase/decrease conditions
1036 decrease
= Signal(reset_less
=True)
1037 increase
= Signal(reset_less
=True)
1038 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1039 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1041 with m
.If(~self
.i
.out_do_z
):
1042 with m
.If(decrease
):
1043 # *sigh* not entirely obvious: count leading zeros (clz)
1044 # with a PriorityEncoder: to find from the MSB
1045 # we reverse the order of the bits.
1046 temp_m
= Signal(mwid
, reset_less
=True)
1047 temp_s
= Signal(mwid
+1, reset_less
=True)
1048 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1049 # make sure that the amount to decrease by does NOT
1050 # go below the minimum non-INF/NaN exponent
1051 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1054 # cat round and guard bits back into the mantissa
1055 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1056 pe
.i
.eq(temp_m
[::-1]), # inverted
1057 clz
.eq(limclz
), # count zeros from MSB down
1058 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1059 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1060 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1061 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1062 # overflow in bits 0..1: got shifted too (leave sticky)
1063 of
.guard
.eq(temp_s
[1]), # guard
1064 of
.round_bit
.eq(temp_s
[0]), # round
1067 with m
.Elif(increase
):
1068 temp_m
= Signal(mwid
+1, reset_less
=True)
1070 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1072 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1073 # connect multi-shifter to inp/out mantissa (and ediff)
1075 msr
.diff
.eq(ediff_n126
),
1076 self
.o
.z
.m
.eq(msr
.m
[3:]),
1077 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1078 # overflow in bits 0..1: got shifted too (leave sticky)
1079 of
.guard
.eq(temp_s
[2]), # guard
1080 of
.round_bit
.eq(temp_s
[1]), # round
1081 of
.sticky
.eq(temp_s
[0]), # sticky
1082 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1085 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1086 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1087 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1092 class FPNorm1ModMulti
:
1094 def __init__(self
, width
, single_cycle
=True):
1096 self
.in_select
= Signal(reset_less
=True)
1097 self
.in_z
= FPNumBase(width
, False)
1098 self
.in_of
= Overflow()
1099 self
.temp_z
= FPNumBase(width
, False)
1100 self
.temp_of
= Overflow()
1101 self
.out_z
= FPNumBase(width
, False)
1102 self
.out_of
= Overflow()
1104 def elaborate(self
, platform
):
1107 m
.submodules
.norm1_out_z
= self
.out_z
1108 m
.submodules
.norm1_out_overflow
= self
.out_of
1109 m
.submodules
.norm1_temp_z
= self
.temp_z
1110 m
.submodules
.norm1_temp_of
= self
.temp_of
1111 m
.submodules
.norm1_in_z
= self
.in_z
1112 m
.submodules
.norm1_in_overflow
= self
.in_of
1114 in_z
= FPNumBase(self
.width
, False)
1116 m
.submodules
.norm1_insel_z
= in_z
1117 m
.submodules
.norm1_insel_overflow
= in_of
1119 # select which of temp or in z/of to use
1120 with m
.If(self
.in_select
):
1121 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1122 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1124 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1125 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1126 # initialise out from in (overridden below)
1127 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1128 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1129 # normalisation increase/decrease conditions
1130 decrease
= Signal(reset_less
=True)
1131 increase
= Signal(reset_less
=True)
1132 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1133 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1134 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1136 with m
.If(decrease
):
1138 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1139 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1140 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1141 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1142 self
.out_of
.round_bit
.eq(0), # reset round bit
1143 self
.out_of
.m0
.eq(in_of
.guard
),
1146 with m
.Elif(increase
):
1148 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1149 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1150 self
.out_of
.guard
.eq(in_z
.m
[0]),
1151 self
.out_of
.m0
.eq(in_z
.m
[1]),
1152 self
.out_of
.round_bit
.eq(in_of
.guard
),
1153 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1159 class FPNorm1Single(FPState
):
1161 def __init__(self
, width
, id_wid
, single_cycle
=True):
1162 FPState
.__init
__(self
, "normalise_1")
1163 self
.mod
= FPNorm1ModSingle(width
)
1164 self
.o
= self
.ospec()
1165 self
.out_z
= FPNumBase(width
, False)
1166 self
.out_roundz
= Signal(reset_less
=True)
1169 return self
.mod
.ispec()
1172 return self
.mod
.ospec()
1174 def setup(self
, m
, i
):
1175 """ links module to inputs and outputs
1177 self
.mod
.setup(m
, i
)
1179 def action(self
, m
):
1183 class FPNorm1Multi(FPState
):
1185 def __init__(self
, width
, id_wid
):
1186 FPState
.__init
__(self
, "normalise_1")
1187 self
.mod
= FPNorm1ModMulti(width
)
1188 self
.stb
= Signal(reset_less
=True)
1189 self
.ack
= Signal(reset
=0, reset_less
=True)
1190 self
.out_norm
= Signal(reset_less
=True)
1191 self
.in_accept
= Signal(reset_less
=True)
1192 self
.temp_z
= FPNumBase(width
)
1193 self
.temp_of
= Overflow()
1194 self
.out_z
= FPNumBase(width
)
1195 self
.out_roundz
= Signal(reset_less
=True)
1197 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1198 """ links module to inputs and outputs
1200 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1201 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1202 self
.out_z
, self
.out_norm
)
1204 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1205 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1207 def action(self
, m
):
1208 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1209 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1210 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1211 with m
.If(self
.out_norm
):
1212 with m
.If(self
.in_accept
):
1217 m
.d
.sync
+= self
.ack
.eq(0)
1219 # normalisation not required (or done).
1221 m
.d
.sync
+= self
.ack
.eq(1)
1222 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1225 class FPNormToPack(FPState
, UnbufferedPipeline
):
1227 def __init__(self
, width
, id_wid
):
1228 FPState
.__init
__(self
, "normalise_1")
1229 self
.id_wid
= id_wid
1231 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1234 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1237 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1239 def setup(self
, m
, i
):
1240 """ links module to inputs and outputs
1243 # Normalisation, Rounding Corrections, Pack - in a chain
1244 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1245 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1246 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1247 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1248 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1250 self
.out_z
= pmod
.ospec()
1252 # XXX TODO: sync for state-based
1253 m
.d
.comb
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1254 m
.d
.comb
+= self
.out_z
.z
.eq(pmod
.o
.z
) # outputs packed result
1256 def process(self
, i
):
1259 def action(self
, m
):
1260 m
.next
= "pack_put_z"
1265 def __init__(self
, width
, id_wid
):
1266 self
.z
= FPNumBase(width
, False)
1267 self
.out_do_z
= Signal(reset_less
=True)
1268 self
.oz
= Signal(width
, reset_less
=True)
1269 self
.mid
= Signal(id_wid
, reset_less
=True)
1272 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1278 def __init__(self
, width
, id_wid
):
1280 self
.id_wid
= id_wid
1281 self
.i
= self
.ispec()
1282 self
.out_z
= self
.ospec()
1285 return FPNorm1Data(self
.width
, self
.id_wid
)
1288 return FPRoundData(self
.width
, self
.id_wid
)
1290 def process(self
, i
):
1293 def setup(self
, m
, i
):
1294 m
.submodules
.roundz
= self
1295 m
.d
.comb
+= self
.i
.eq(i
)
1297 def elaborate(self
, platform
):
1299 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1300 with m
.If(~self
.i
.out_do_z
):
1301 with m
.If(self
.i
.roundz
):
1302 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1303 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1304 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1309 class FPRound(FPState
):
1311 def __init__(self
, width
, id_wid
):
1312 FPState
.__init
__(self
, "round")
1313 self
.mod
= FPRoundMod(width
)
1314 self
.out_z
= self
.ospec()
1317 return self
.mod
.ispec()
1320 return self
.mod
.ospec()
1322 def setup(self
, m
, i
):
1323 """ links module to inputs and outputs
1325 self
.mod
.setup(m
, i
)
1328 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1329 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1331 def action(self
, m
):
1332 m
.next
= "corrections"
1335 class FPCorrectionsMod
:
1337 def __init__(self
, width
, id_wid
):
1339 self
.id_wid
= id_wid
1340 self
.i
= self
.ispec()
1341 self
.out_z
= self
.ospec()
1344 return FPRoundData(self
.width
, self
.id_wid
)
1347 return FPRoundData(self
.width
, self
.id_wid
)
1349 def process(self
, i
):
1352 def setup(self
, m
, i
):
1353 """ links module to inputs and outputs
1355 m
.submodules
.corrections
= self
1356 m
.d
.comb
+= self
.i
.eq(i
)
1358 def elaborate(self
, platform
):
1360 m
.submodules
.corr_in_z
= self
.i
.z
1361 m
.submodules
.corr_out_z
= self
.out_z
.z
1362 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1363 with m
.If(~self
.i
.out_do_z
):
1364 with m
.If(self
.i
.z
.is_denormalised
):
1365 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1369 class FPCorrections(FPState
):
1371 def __init__(self
, width
, id_wid
):
1372 FPState
.__init
__(self
, "corrections")
1373 self
.mod
= FPCorrectionsMod(width
)
1374 self
.out_z
= self
.ospec()
1377 return self
.mod
.ispec()
1380 return self
.mod
.ospec()
1382 def setup(self
, m
, in_z
):
1383 """ links module to inputs and outputs
1385 self
.mod
.setup(m
, in_z
)
1387 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1388 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1390 def action(self
, m
):
1396 def __init__(self
, width
, id_wid
):
1397 self
.z
= Signal(width
, reset_less
=True)
1398 self
.mid
= Signal(id_wid
, reset_less
=True)
1401 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1404 return [self
.z
, self
.mid
]
1409 def __init__(self
, width
, id_wid
):
1411 self
.id_wid
= id_wid
1412 self
.i
= self
.ispec()
1413 self
.o
= self
.ospec()
1416 return FPRoundData(self
.width
, self
.id_wid
)
1419 return FPPackData(self
.width
, self
.id_wid
)
1421 def process(self
, i
):
1424 def setup(self
, m
, in_z
):
1425 """ links module to inputs and outputs
1427 m
.submodules
.pack
= self
1428 m
.d
.comb
+= self
.i
.eq(in_z
)
1430 def elaborate(self
, platform
):
1432 z
= FPNumOut(self
.width
, False)
1433 m
.submodules
.pack_in_z
= self
.i
.z
1434 m
.submodules
.pack_out_z
= z
1435 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1436 with m
.If(~self
.i
.out_do_z
):
1437 with m
.If(self
.i
.z
.is_overflowed
):
1438 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1440 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1442 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1443 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1447 class FPPack(FPState
):
1449 def __init__(self
, width
, id_wid
):
1450 FPState
.__init
__(self
, "pack")
1451 self
.mod
= FPPackMod(width
)
1452 self
.out_z
= self
.ospec()
1455 return self
.mod
.ispec()
1458 return self
.mod
.ospec()
1460 def setup(self
, m
, in_z
):
1461 """ links module to inputs and outputs
1463 self
.mod
.setup(m
, in_z
)
1465 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1466 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1468 def action(self
, m
):
1469 m
.next
= "pack_put_z"
1472 class FPPutZ(FPState
):
1474 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1475 FPState
.__init
__(self
, state
)
1476 if to_state
is None:
1477 to_state
= "get_ops"
1478 self
.to_state
= to_state
1481 self
.in_mid
= in_mid
1482 self
.out_mid
= out_mid
1484 def action(self
, m
):
1485 if self
.in_mid
is not None:
1486 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1488 self
.out_z
.z
.v
.eq(self
.in_z
)
1490 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1491 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1492 m
.next
= self
.to_state
1494 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1497 class FPPutZIdx(FPState
):
1499 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1500 FPState
.__init
__(self
, state
)
1501 if to_state
is None:
1502 to_state
= "get_ops"
1503 self
.to_state
= to_state
1505 self
.out_zs
= out_zs
1506 self
.in_mid
= in_mid
1508 def action(self
, m
):
1509 outz_stb
= Signal(reset_less
=True)
1510 outz_ack
= Signal(reset_less
=True)
1511 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1512 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1515 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1517 with m
.If(outz_stb
& outz_ack
):
1518 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1519 m
.next
= self
.to_state
1521 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1524 def __init__(self
, width
, id_wid
):
1525 self
.z
= FPOp(width
)
1526 self
.mid
= Signal(id_wid
, reset_less
=True)
1529 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1532 return [self
.z
, self
.mid
]
1537 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1540 * width: bit-width of IEEE754. supported: 16, 32, 64
1541 * id_wid: an identifier that is sync-connected to the input
1542 * single_cycle: True indicates each stage to complete in 1 clock
1543 * compact: True indicates a reduced number of stages
1546 self
.id_wid
= id_wid
1547 self
.single_cycle
= single_cycle
1548 self
.compact
= compact
1550 self
.in_t
= Trigger()
1551 self
.i
= self
.ispec()
1552 self
.o
= self
.ospec()
1557 return FPADDBaseData(self
.width
, self
.id_wid
)
1560 return FPOpData(self
.width
, self
.id_wid
)
1562 def add_state(self
, state
):
1563 self
.states
.append(state
)
1566 def get_fragment(self
, platform
=None):
1567 """ creates the HDL code-fragment for FPAdd
1570 m
.submodules
.out_z
= self
.o
.z
1571 m
.submodules
.in_t
= self
.in_t
1573 self
.get_compact_fragment(m
, platform
)
1575 self
.get_longer_fragment(m
, platform
)
1577 with m
.FSM() as fsm
:
1579 for state
in self
.states
:
1580 with m
.State(state
.state_from
):
1585 def get_longer_fragment(self
, m
, platform
=None):
1587 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1589 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1593 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1594 sc
.setup(m
, a
, b
, self
.in_mid
)
1596 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1597 dn
.setup(m
, a
, b
, sc
.in_mid
)
1599 if self
.single_cycle
:
1600 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1601 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1603 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1604 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1606 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1607 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1609 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1610 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1612 if self
.single_cycle
:
1613 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1614 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1616 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1617 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1619 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1620 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1622 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1623 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1625 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1626 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1628 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1629 pa
.in_mid
, self
.out_mid
))
1631 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1632 pa
.in_mid
, self
.out_mid
))
1634 def get_compact_fragment(self
, m
, platform
=None):
1636 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1637 self
.width
, self
.id_wid
))
1638 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1640 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1643 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1646 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1647 n1
.setup(m
, alm
.a1o
)
1649 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1650 n1
.out_z
.mid
, self
.o
.mid
))
1652 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1653 # sc.o.mid, self.o.mid))
1656 class FPADDBase(FPState
):
1658 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1661 * width: bit-width of IEEE754. supported: 16, 32, 64
1662 * id_wid: an identifier that is sync-connected to the input
1663 * single_cycle: True indicates each stage to complete in 1 clock
1665 FPState
.__init
__(self
, "fpadd")
1667 self
.single_cycle
= single_cycle
1668 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1669 self
.o
= self
.ospec()
1671 self
.in_t
= Trigger()
1672 self
.i
= self
.ispec()
1674 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1675 self
.in_accept
= Signal(reset_less
=True)
1676 self
.add_stb
= Signal(reset_less
=True)
1677 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1680 return self
.mod
.ispec()
1683 return self
.mod
.ospec()
1685 def setup(self
, m
, i
, add_stb
, in_mid
):
1686 m
.d
.comb
+= [self
.i
.eq(i
),
1687 self
.mod
.i
.eq(self
.i
),
1688 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1689 #self.add_stb.eq(add_stb),
1690 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1691 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1692 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1693 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1694 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1695 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1698 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1699 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1700 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1701 #m.d.sync += self.in_t.stb.eq(0)
1703 m
.submodules
.fpadd
= self
.mod
1705 def action(self
, m
):
1707 # in_accept is set on incoming strobe HIGH and ack LOW.
1708 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1710 #with m.If(self.in_t.ack):
1711 # m.d.sync += self.in_t.stb.eq(0)
1712 with m
.If(~self
.z_done
):
1713 # not done: test for accepting an incoming operand pair
1714 with m
.If(self
.in_accept
):
1716 self
.add_ack
.eq(1), # acknowledge receipt...
1717 self
.in_t
.stb
.eq(1), # initiate add
1720 m
.d
.sync
+= [self
.add_ack
.eq(0),
1721 self
.in_t
.stb
.eq(0),
1725 # done: acknowledge, and write out id and value
1726 m
.d
.sync
+= [self
.add_ack
.eq(1),
1733 if self
.in_mid
is not None:
1734 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1737 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1739 # move to output state on detecting z ack
1740 with m
.If(self
.out_z
.trigger
):
1741 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1744 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1747 class FPADDBasePipe(ControlBase
):
1748 def __init__(self
, width
, id_wid
):
1749 ControlBase
.__init
__(self
)
1750 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1751 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1752 self
.pipe3
= FPNormToPack(width
, id_wid
)
1754 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1756 def elaborate(self
, platform
):
1758 m
.submodules
.scnorm
= self
.pipe1
1759 m
.submodules
.addalign
= self
.pipe2
1760 m
.submodules
.normpack
= self
.pipe3
1761 m
.d
.comb
+= self
._eqs
1765 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1766 def __init__(self
, width
, id_wid
, num_rows
):
1767 self
.num_rows
= num_rows
1768 def iospec(): return FPADDBaseData(width
, id_wid
)
1769 stage
= PassThroughStage(iospec
)
1770 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1773 class FPADDMuxOutPipe(CombMuxOutPipe
):
1774 def __init__(self
, width
, id_wid
, num_rows
):
1775 self
.num_rows
= num_rows
1776 def iospec(): return FPPackData(width
, id_wid
)
1777 stage
= PassThroughStage(iospec
)
1778 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1781 class FPADDMuxInOut
:
1782 """ Reservation-Station version of FPADD pipeline.
1784 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1785 * 3-stage adder pipeline
1786 * fan-out on outputs (an array of FPPackData: z,mid)
1788 Fan-in and Fan-out are combinatorial.
1790 def __init__(self
, width
, id_wid
, num_rows
):
1791 self
.num_rows
= num_rows
1792 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1793 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1794 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1796 self
.p
= self
.inpipe
.p
# kinda annoying,
1797 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1798 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1800 def elaborate(self
, platform
):
1802 m
.submodules
.inpipe
= self
.inpipe
1803 m
.submodules
.fpadd
= self
.fpadd
1804 m
.submodules
.outpipe
= self
.outpipe
1806 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1807 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1816 """ FPADD: stages as follows:
1822 FPAddBase---> FPAddBaseMod
1824 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1826 FPAddBase is tricky: it is both a stage and *has* stages.
1827 Connection to FPAddBaseMod therefore requires an in stb/ack
1828 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1829 needs to be the thing that raises the incoming stb.
1832 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1835 * width: bit-width of IEEE754. supported: 16, 32, 64
1836 * id_wid: an identifier that is sync-connected to the input
1837 * single_cycle: True indicates each stage to complete in 1 clock
1840 self
.id_wid
= id_wid
1841 self
.single_cycle
= single_cycle
1843 #self.out_z = FPOp(width)
1844 self
.ids
= FPID(id_wid
)
1847 for i
in range(rs_sz
):
1850 in_a
.name
= "in_a_%d" % i
1851 in_b
.name
= "in_b_%d" % i
1852 rs
.append((in_a
, in_b
))
1856 for i
in range(rs_sz
):
1858 out_z
.name
= "out_z_%d" % i
1860 self
.res
= Array(res
)
1864 def add_state(self
, state
):
1865 self
.states
.append(state
)
1868 def get_fragment(self
, platform
=None):
1869 """ creates the HDL code-fragment for FPAdd
1872 m
.submodules
+= self
.rs
1874 in_a
= self
.rs
[0][0]
1875 in_b
= self
.rs
[0][1]
1877 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1882 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1887 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1888 ab
= self
.add_state(ab
)
1889 abd
= ab
.ispec() # create an input spec object for FPADDBase
1890 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1891 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1894 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1897 with m
.FSM() as fsm
:
1899 for state
in self
.states
:
1900 with m
.State(state
.state_from
):
1906 if __name__
== "__main__":
1908 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1909 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1910 alu
.rs
[0][1].ports() + \
1911 alu
.res
[0].ports() + \
1912 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1914 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1915 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1916 alu
.in_t
.ports() + \
1917 alu
.out_z
.ports() + \
1918 [alu
.in_mid
, alu
.out_mid
])
1921 # works... but don't use, just do "python fname.py convert -t v"
1922 #print (verilog.convert(alu, ports=[
1923 # ports=alu.in_a.ports() + \
1924 # alu.in_b.ports() + \
1925 # alu.out_z.ports())