1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
23 def __init__(self
, width
, id_wid
):
24 self
.a
= FPNumBase(width
, True)
25 self
.b
= FPNumBase(width
, True)
26 self
.z
= FPNumOut(width
, False)
27 self
.oz
= Signal(width
, reset_less
=True)
28 self
.out_do_z
= Signal(reset_less
=True)
29 self
.mid
= Signal(id_wid
, reset_less
=True)
32 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
33 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
36 class FPAddSpecialCasesMod
:
37 """ special cases: NaNs, infs, zeros, denormalised
38 NOTE: some of these are unique to add. see "Special Operations"
39 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
42 def __init__(self
, width
, id_wid
):
49 return FPADDBaseData(self
.width
, self
.id_wid
)
52 return FPSCData(self
.width
, self
.id_wid
)
54 def setup(self
, m
, i
):
55 """ links module to inputs and outputs
57 m
.submodules
.specialcases
= self
58 m
.d
.comb
+= self
.i
.eq(i
)
63 def elaborate(self
, platform
):
66 m
.submodules
.sc_out_z
= self
.o
.z
68 # decode: XXX really should move to separate stage
69 a1
= FPNumIn(None, self
.width
)
70 b1
= FPNumIn(None, self
.width
)
71 m
.submodules
.sc_decode_a
= a1
72 m
.submodules
.sc_decode_b
= b1
73 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
78 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
81 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
83 # if a is NaN or b is NaN return NaN
84 with m
.If(a1
.is_nan | b1
.is_nan
):
85 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
86 m
.d
.comb
+= self
.o
.z
.nan(0)
88 # XXX WEIRDNESS for FP16 non-canonical NaN handling
91 ## if a is zero and b is NaN return -b
92 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
93 # m.d.comb += self.o.out_do_z.eq(1)
94 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
96 ## if b is zero and a is NaN return -a
97 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
98 # m.d.comb += self.o.out_do_z.eq(1)
99 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
101 ## if a is -zero and b is NaN return -b
102 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
103 # m.d.comb += self.o.out_do_z.eq(1)
104 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
106 ## if b is -zero and a is NaN return -a
107 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
108 # m.d.comb += self.o.out_do_z.eq(1)
109 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
111 # if a is inf return inf (or NaN)
112 with m
.Elif(a1
.is_inf
):
113 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
114 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
115 # if a is inf and signs don't match return NaN
116 with m
.If(b1
.exp_128
& s_nomatch
):
117 m
.d
.comb
+= self
.o
.z
.nan(0)
119 # if b is inf return inf
120 with m
.Elif(b1
.is_inf
):
121 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
122 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
124 # if a is zero and b zero return signed-a/b
125 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
126 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
127 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
129 # if a is zero return b
130 with m
.Elif(a1
.is_zero
):
131 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
132 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
134 # if b is zero return a
135 with m
.Elif(b1
.is_zero
):
136 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
137 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
139 # if a equal to -b return zero (+ve zero)
140 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
141 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
142 m
.d
.comb
+= self
.o
.z
.zero(0)
144 # Denormalised Number checks next, so pass a/b data through
146 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
147 m
.d
.comb
+= self
.o
.a
.eq(a1
)
148 m
.d
.comb
+= self
.o
.b
.eq(b1
)
150 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
151 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
157 def __init__(self
, id_wid
):
160 self
.in_mid
= Signal(id_wid
, reset_less
=True)
161 self
.out_mid
= Signal(id_wid
, reset_less
=True)
167 if self
.id_wid
is not None:
168 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
171 class FPAddSpecialCases(FPState
):
172 """ special cases: NaNs, infs, zeros, denormalised
173 NOTE: some of these are unique to add. see "Special Operations"
174 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
177 def __init__(self
, width
, id_wid
):
178 FPState
.__init
__(self
, "special_cases")
179 self
.mod
= FPAddSpecialCasesMod(width
)
180 self
.out_z
= self
.mod
.ospec()
181 self
.out_do_z
= Signal(reset_less
=True)
183 def setup(self
, m
, i
):
184 """ links module to inputs and outputs
186 self
.mod
.setup(m
, i
, self
.out_do_z
)
187 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
188 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
192 with m
.If(self
.out_do_z
):
195 m
.next
= "denormalise"
198 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
199 """ special cases: NaNs, infs, zeros, denormalised
200 NOTE: some of these are unique to add. see "Special Operations"
201 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
204 def __init__(self
, width
, id_wid
):
205 FPState
.__init
__(self
, "special_cases")
208 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
209 self
.out
= self
.ospec()
212 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
215 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
217 def setup(self
, m
, i
):
218 """ links module to inputs and outputs
220 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
221 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
223 chain
= StageChain([smod
, dmod
])
226 # only needed for break-out (early-out)
227 # self.out_do_z = smod.o.out_do_z
231 def process(self
, i
):
235 # for break-out (early-out)
236 #with m.If(self.out_do_z):
239 m
.d
.sync
+= self
.out
.eq(self
.process(None))
243 class FPAddDeNormMod(FPState
):
245 def __init__(self
, width
, id_wid
):
248 self
.i
= self
.ispec()
249 self
.o
= self
.ospec()
252 return FPSCData(self
.width
, self
.id_wid
)
255 return FPSCData(self
.width
, self
.id_wid
)
257 def process(self
, i
):
260 def setup(self
, m
, i
):
261 """ links module to inputs and outputs
263 m
.submodules
.denormalise
= self
264 m
.d
.comb
+= self
.i
.eq(i
)
266 def elaborate(self
, platform
):
268 m
.submodules
.denorm_in_a
= self
.i
.a
269 m
.submodules
.denorm_in_b
= self
.i
.b
270 m
.submodules
.denorm_out_a
= self
.o
.a
271 m
.submodules
.denorm_out_b
= self
.o
.b
273 with m
.If(~self
.i
.out_do_z
):
274 # XXX hmmm, don't like repeating identical code
275 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
276 with m
.If(self
.i
.a
.exp_n127
):
277 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
279 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
281 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
282 with m
.If(self
.i
.b
.exp_n127
):
283 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
285 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
287 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
288 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
289 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
290 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
295 class FPAddDeNorm(FPState
):
297 def __init__(self
, width
, id_wid
):
298 FPState
.__init
__(self
, "denormalise")
299 self
.mod
= FPAddDeNormMod(width
)
300 self
.out_a
= FPNumBase(width
)
301 self
.out_b
= FPNumBase(width
)
303 def setup(self
, m
, i
):
304 """ links module to inputs and outputs
308 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
309 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
312 # Denormalised Number checks
316 class FPAddAlignMultiMod(FPState
):
318 def __init__(self
, width
):
319 self
.in_a
= FPNumBase(width
)
320 self
.in_b
= FPNumBase(width
)
321 self
.out_a
= FPNumIn(None, width
)
322 self
.out_b
= FPNumIn(None, width
)
323 self
.exp_eq
= Signal(reset_less
=True)
325 def elaborate(self
, platform
):
326 # This one however (single-cycle) will do the shift
331 m
.submodules
.align_in_a
= self
.in_a
332 m
.submodules
.align_in_b
= self
.in_b
333 m
.submodules
.align_out_a
= self
.out_a
334 m
.submodules
.align_out_b
= self
.out_b
336 # NOTE: this does *not* do single-cycle multi-shifting,
337 # it *STAYS* in the align state until exponents match
339 # exponent of a greater than b: shift b down
340 m
.d
.comb
+= self
.exp_eq
.eq(0)
341 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
342 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
343 agtb
= Signal(reset_less
=True)
344 altb
= Signal(reset_less
=True)
345 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
346 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
348 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
349 # exponent of b greater than a: shift a down
351 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
352 # exponents equal: move to next stage.
354 m
.d
.comb
+= self
.exp_eq
.eq(1)
358 class FPAddAlignMulti(FPState
):
360 def __init__(self
, width
, id_wid
):
361 FPState
.__init
__(self
, "align")
362 self
.mod
= FPAddAlignMultiMod(width
)
363 self
.out_a
= FPNumIn(None, width
)
364 self
.out_b
= FPNumIn(None, width
)
365 self
.exp_eq
= Signal(reset_less
=True)
367 def setup(self
, m
, in_a
, in_b
):
368 """ links module to inputs and outputs
370 m
.submodules
.align
= self
.mod
371 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
372 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
373 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
374 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
375 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
378 with m
.If(self
.exp_eq
):
384 def __init__(self
, width
, id_wid
):
385 self
.a
= FPNumIn(None, width
)
386 self
.b
= FPNumIn(None, width
)
387 self
.z
= FPNumOut(width
, False)
388 self
.out_do_z
= Signal(reset_less
=True)
389 self
.oz
= Signal(width
, reset_less
=True)
390 self
.mid
= Signal(id_wid
, reset_less
=True)
393 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
394 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
397 class FPAddAlignSingleMod
:
399 def __init__(self
, width
, id_wid
):
402 self
.i
= self
.ispec()
403 self
.o
= self
.ospec()
406 return FPSCData(self
.width
, self
.id_wid
)
409 return FPNumIn2Ops(self
.width
, self
.id_wid
)
411 def process(self
, i
):
414 def setup(self
, m
, i
):
415 """ links module to inputs and outputs
417 m
.submodules
.align
= self
418 m
.d
.comb
+= self
.i
.eq(i
)
420 def elaborate(self
, platform
):
421 """ Aligns A against B or B against A, depending on which has the
422 greater exponent. This is done in a *single* cycle using
423 variable-width bit-shift
425 the shifter used here is quite expensive in terms of gates.
426 Mux A or B in (and out) into temporaries, as only one of them
427 needs to be aligned against the other
431 m
.submodules
.align_in_a
= self
.i
.a
432 m
.submodules
.align_in_b
= self
.i
.b
433 m
.submodules
.align_out_a
= self
.o
.a
434 m
.submodules
.align_out_b
= self
.o
.b
436 # temporary (muxed) input and output to be shifted
437 t_inp
= FPNumBase(self
.width
)
438 t_out
= FPNumIn(None, self
.width
)
439 espec
= (len(self
.i
.a
.e
), True)
440 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
441 m
.submodules
.align_t_in
= t_inp
442 m
.submodules
.align_t_out
= t_out
443 m
.submodules
.multishift_r
= msr
445 ediff
= Signal(espec
, reset_less
=True)
446 ediffr
= Signal(espec
, reset_less
=True)
447 tdiff
= Signal(espec
, reset_less
=True)
448 elz
= Signal(reset_less
=True)
449 egz
= Signal(reset_less
=True)
451 # connect multi-shifter to t_inp/out mantissa (and tdiff)
452 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
453 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
454 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
455 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
456 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
458 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
459 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
460 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
461 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
463 # default: A-exp == B-exp, A and B untouched (fall through)
464 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
465 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
466 # only one shifter (muxed)
467 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
468 # exponent of a greater than b: shift b down
469 with m
.If(~self
.i
.out_do_z
):
471 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
474 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
476 # exponent of b greater than a: shift a down
478 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
481 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
484 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
485 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
486 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
487 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
492 class FPAddAlignSingle(FPState
):
494 def __init__(self
, width
, id_wid
):
495 FPState
.__init
__(self
, "align")
496 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
497 self
.out_a
= FPNumIn(None, width
)
498 self
.out_b
= FPNumIn(None, width
)
500 def setup(self
, m
, i
):
501 """ links module to inputs and outputs
505 # NOTE: could be done as comb
506 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
507 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
513 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
515 def __init__(self
, width
, id_wid
):
516 FPState
.__init
__(self
, "align")
519 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
520 self
.a1o
= self
.ospec()
523 return FPSCData(self
.width
, self
.id_wid
)
526 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
528 def setup(self
, m
, i
):
529 """ links module to inputs and outputs
532 # chain AddAlignSingle, AddStage0 and AddStage1
533 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
534 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
535 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
537 chain
= StageChain([mod
, a0mod
, a1mod
])
542 def process(self
, i
):
546 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
547 m
.next
= "normalise_1"
550 class FPAddStage0Data
:
552 def __init__(self
, width
, id_wid
):
553 self
.z
= FPNumBase(width
, False)
554 self
.out_do_z
= Signal(reset_less
=True)
555 self
.oz
= Signal(width
, reset_less
=True)
556 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
557 self
.mid
= Signal(id_wid
, reset_less
=True)
560 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
561 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
564 class FPAddStage0Mod
:
566 def __init__(self
, width
, id_wid
):
569 self
.i
= self
.ispec()
570 self
.o
= self
.ospec()
573 return FPSCData(self
.width
, self
.id_wid
)
576 return FPAddStage0Data(self
.width
, self
.id_wid
)
578 def process(self
, i
):
581 def setup(self
, m
, i
):
582 """ links module to inputs and outputs
584 m
.submodules
.add0
= self
585 m
.d
.comb
+= self
.i
.eq(i
)
587 def elaborate(self
, platform
):
589 m
.submodules
.add0_in_a
= self
.i
.a
590 m
.submodules
.add0_in_b
= self
.i
.b
591 m
.submodules
.add0_out_z
= self
.o
.z
593 # store intermediate tests (and zero-extended mantissas)
594 seq
= Signal(reset_less
=True)
595 mge
= Signal(reset_less
=True)
596 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
597 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
598 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
599 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
600 am0
.eq(Cat(self
.i
.a
.m
, 0)),
601 bm0
.eq(Cat(self
.i
.b
.m
, 0))
603 # same-sign (both negative or both positive) add mantissas
604 with m
.If(~self
.i
.out_do_z
):
605 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
608 self
.o
.tot
.eq(am0
+ bm0
),
609 self
.o
.z
.s
.eq(self
.i
.a
.s
)
611 # a mantissa greater than b, use a
614 self
.o
.tot
.eq(am0
- bm0
),
615 self
.o
.z
.s
.eq(self
.i
.a
.s
)
617 # b mantissa greater than a, use b
620 self
.o
.tot
.eq(bm0
- am0
),
621 self
.o
.z
.s
.eq(self
.i
.b
.s
)
624 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
625 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
626 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
630 class FPAddStage0(FPState
):
631 """ First stage of add. covers same-sign (add) and subtract
632 special-casing when mantissas are greater or equal, to
633 give greatest accuracy.
636 def __init__(self
, width
, id_wid
):
637 FPState
.__init
__(self
, "add_0")
638 self
.mod
= FPAddStage0Mod(width
)
639 self
.o
= self
.mod
.ospec()
641 def setup(self
, m
, i
):
642 """ links module to inputs and outputs
646 # NOTE: these could be done as combinatorial (merge add0+add1)
647 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
653 class FPAddStage1Data
:
655 def __init__(self
, width
, id_wid
):
656 self
.z
= FPNumBase(width
, False)
657 self
.out_do_z
= Signal(reset_less
=True)
658 self
.oz
= Signal(width
, reset_less
=True)
660 self
.mid
= Signal(id_wid
, reset_less
=True)
663 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
664 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
668 class FPAddStage1Mod(FPState
):
669 """ Second stage of add: preparation for normalisation.
670 detects when tot sum is too big (tot[27] is kinda a carry bit)
673 def __init__(self
, width
, id_wid
):
676 self
.i
= self
.ispec()
677 self
.o
= self
.ospec()
680 return FPAddStage0Data(self
.width
, self
.id_wid
)
683 return FPAddStage1Data(self
.width
, self
.id_wid
)
685 def process(self
, i
):
688 def setup(self
, m
, i
):
689 """ links module to inputs and outputs
691 m
.submodules
.add1
= self
692 m
.submodules
.add1_out_overflow
= self
.o
.of
694 m
.d
.comb
+= self
.i
.eq(i
)
696 def elaborate(self
, platform
):
698 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
699 # tot[-1] (MSB) gets set when the sum overflows. shift result down
700 with m
.If(~self
.i
.out_do_z
):
701 with m
.If(self
.i
.tot
[-1]):
703 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
704 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
705 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
706 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
707 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
708 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
710 # tot[-1] (MSB) zero case
713 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
714 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
715 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
716 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
717 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
720 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
721 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
722 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
727 class FPAddStage1(FPState
):
729 def __init__(self
, width
, id_wid
):
730 FPState
.__init
__(self
, "add_1")
731 self
.mod
= FPAddStage1Mod(width
)
732 self
.out_z
= FPNumBase(width
, False)
733 self
.out_of
= Overflow()
734 self
.norm_stb
= Signal()
736 def setup(self
, m
, i
):
737 """ links module to inputs and outputs
741 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
743 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
744 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
745 m
.d
.sync
+= self
.norm_stb
.eq(1)
748 m
.next
= "normalise_1"
751 class FPNormaliseModSingle
:
753 def __init__(self
, width
):
755 self
.in_z
= self
.ispec()
756 self
.out_z
= self
.ospec()
759 return FPNumBase(self
.width
, False)
762 return FPNumBase(self
.width
, False)
764 def setup(self
, m
, i
):
765 """ links module to inputs and outputs
767 m
.submodules
.normalise
= self
768 m
.d
.comb
+= self
.i
.eq(i
)
770 def elaborate(self
, platform
):
773 mwid
= self
.out_z
.m_width
+2
774 pe
= PriorityEncoder(mwid
)
775 m
.submodules
.norm_pe
= pe
777 m
.submodules
.norm1_out_z
= self
.out_z
778 m
.submodules
.norm1_in_z
= self
.in_z
780 in_z
= FPNumBase(self
.width
, False)
782 m
.submodules
.norm1_insel_z
= in_z
783 m
.submodules
.norm1_insel_overflow
= in_of
785 espec
= (len(in_z
.e
), True)
786 ediff_n126
= Signal(espec
, reset_less
=True)
787 msr
= MultiShiftRMerge(mwid
, espec
)
788 m
.submodules
.multishift_r
= msr
790 m
.d
.comb
+= in_z
.eq(self
.in_z
)
791 m
.d
.comb
+= in_of
.eq(self
.in_of
)
792 # initialise out from in (overridden below)
793 m
.d
.comb
+= self
.out_z
.eq(in_z
)
794 m
.d
.comb
+= self
.out_of
.eq(in_of
)
795 # normalisation decrease condition
796 decrease
= Signal(reset_less
=True)
797 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
800 # *sigh* not entirely obvious: count leading zeros (clz)
801 # with a PriorityEncoder: to find from the MSB
802 # we reverse the order of the bits.
803 temp_m
= Signal(mwid
, reset_less
=True)
804 temp_s
= Signal(mwid
+1, reset_less
=True)
805 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
807 # cat round and guard bits back into the mantissa
808 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
809 pe
.i
.eq(temp_m
[::-1]), # inverted
810 clz
.eq(pe
.o
), # count zeros from MSB down
811 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
812 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
813 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
821 def __init__(self
, width
, id_wid
):
822 self
.roundz
= Signal(reset_less
=True)
823 self
.z
= FPNumBase(width
, False)
824 self
.out_do_z
= Signal(reset_less
=True)
825 self
.oz
= Signal(width
, reset_less
=True)
826 self
.mid
= Signal(id_wid
, reset_less
=True)
829 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
830 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
833 class FPNorm1ModSingle
:
835 def __init__(self
, width
, id_wid
):
838 self
.i
= self
.ispec()
839 self
.o
= self
.ospec()
842 return FPAddStage1Data(self
.width
, self
.id_wid
)
845 return FPNorm1Data(self
.width
, self
.id_wid
)
847 def setup(self
, m
, i
):
848 """ links module to inputs and outputs
850 m
.submodules
.normalise_1
= self
851 m
.d
.comb
+= self
.i
.eq(i
)
853 def process(self
, i
):
856 def elaborate(self
, platform
):
859 mwid
= self
.o
.z
.m_width
+2
860 pe
= PriorityEncoder(mwid
)
861 m
.submodules
.norm_pe
= pe
864 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
866 m
.submodules
.norm1_out_z
= self
.o
.z
867 m
.submodules
.norm1_out_overflow
= of
868 m
.submodules
.norm1_in_z
= self
.i
.z
869 m
.submodules
.norm1_in_overflow
= self
.i
.of
872 m
.submodules
.norm1_insel_z
= i
.z
873 m
.submodules
.norm1_insel_overflow
= i
.of
875 espec
= (len(i
.z
.e
), True)
876 ediff_n126
= Signal(espec
, reset_less
=True)
877 msr
= MultiShiftRMerge(mwid
, espec
)
878 m
.submodules
.multishift_r
= msr
880 m
.d
.comb
+= i
.eq(self
.i
)
881 # initialise out from in (overridden below)
882 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
883 m
.d
.comb
+= of
.eq(i
.of
)
884 # normalisation increase/decrease conditions
885 decrease
= Signal(reset_less
=True)
886 increase
= Signal(reset_less
=True)
887 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
888 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
890 with m
.If(~self
.i
.out_do_z
):
892 # *sigh* not entirely obvious: count leading zeros (clz)
893 # with a PriorityEncoder: to find from the MSB
894 # we reverse the order of the bits.
895 temp_m
= Signal(mwid
, reset_less
=True)
896 temp_s
= Signal(mwid
+1, reset_less
=True)
897 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
898 # make sure that the amount to decrease by does NOT
899 # go below the minimum non-INF/NaN exponent
900 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
903 # cat round and guard bits back into the mantissa
904 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
905 pe
.i
.eq(temp_m
[::-1]), # inverted
906 clz
.eq(limclz
), # count zeros from MSB down
907 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
908 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
909 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
910 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
911 # overflow in bits 0..1: got shifted too (leave sticky)
912 of
.guard
.eq(temp_s
[1]), # guard
913 of
.round_bit
.eq(temp_s
[0]), # round
916 with m
.Elif(increase
):
917 temp_m
= Signal(mwid
+1, reset_less
=True)
919 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
921 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
922 # connect multi-shifter to inp/out mantissa (and ediff)
924 msr
.diff
.eq(ediff_n126
),
925 self
.o
.z
.m
.eq(msr
.m
[3:]),
926 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
927 # overflow in bits 0..1: got shifted too (leave sticky)
928 of
.guard
.eq(temp_s
[2]), # guard
929 of
.round_bit
.eq(temp_s
[1]), # round
930 of
.sticky
.eq(temp_s
[0]), # sticky
931 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
934 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
935 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
936 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
941 class FPNorm1ModMulti
:
943 def __init__(self
, width
, single_cycle
=True):
945 self
.in_select
= Signal(reset_less
=True)
946 self
.in_z
= FPNumBase(width
, False)
947 self
.in_of
= Overflow()
948 self
.temp_z
= FPNumBase(width
, False)
949 self
.temp_of
= Overflow()
950 self
.out_z
= FPNumBase(width
, False)
951 self
.out_of
= Overflow()
953 def elaborate(self
, platform
):
956 m
.submodules
.norm1_out_z
= self
.out_z
957 m
.submodules
.norm1_out_overflow
= self
.out_of
958 m
.submodules
.norm1_temp_z
= self
.temp_z
959 m
.submodules
.norm1_temp_of
= self
.temp_of
960 m
.submodules
.norm1_in_z
= self
.in_z
961 m
.submodules
.norm1_in_overflow
= self
.in_of
963 in_z
= FPNumBase(self
.width
, False)
965 m
.submodules
.norm1_insel_z
= in_z
966 m
.submodules
.norm1_insel_overflow
= in_of
968 # select which of temp or in z/of to use
969 with m
.If(self
.in_select
):
970 m
.d
.comb
+= in_z
.eq(self
.in_z
)
971 m
.d
.comb
+= in_of
.eq(self
.in_of
)
973 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
974 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
975 # initialise out from in (overridden below)
976 m
.d
.comb
+= self
.out_z
.eq(in_z
)
977 m
.d
.comb
+= self
.out_of
.eq(in_of
)
978 # normalisation increase/decrease conditions
979 decrease
= Signal(reset_less
=True)
980 increase
= Signal(reset_less
=True)
981 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
982 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
983 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
987 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
988 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
989 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
990 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
991 self
.out_of
.round_bit
.eq(0), # reset round bit
992 self
.out_of
.m0
.eq(in_of
.guard
),
995 with m
.Elif(increase
):
997 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
998 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
999 self
.out_of
.guard
.eq(in_z
.m
[0]),
1000 self
.out_of
.m0
.eq(in_z
.m
[1]),
1001 self
.out_of
.round_bit
.eq(in_of
.guard
),
1002 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1008 class FPNorm1Single(FPState
):
1010 def __init__(self
, width
, id_wid
, single_cycle
=True):
1011 FPState
.__init
__(self
, "normalise_1")
1012 self
.mod
= FPNorm1ModSingle(width
)
1013 self
.o
= self
.ospec()
1014 self
.out_z
= FPNumBase(width
, False)
1015 self
.out_roundz
= Signal(reset_less
=True)
1018 return self
.mod
.ispec()
1021 return self
.mod
.ospec()
1023 def setup(self
, m
, i
):
1024 """ links module to inputs and outputs
1026 self
.mod
.setup(m
, i
)
1028 def action(self
, m
):
1032 class FPNorm1Multi(FPState
):
1034 def __init__(self
, width
, id_wid
):
1035 FPState
.__init
__(self
, "normalise_1")
1036 self
.mod
= FPNorm1ModMulti(width
)
1037 self
.stb
= Signal(reset_less
=True)
1038 self
.ack
= Signal(reset
=0, reset_less
=True)
1039 self
.out_norm
= Signal(reset_less
=True)
1040 self
.in_accept
= Signal(reset_less
=True)
1041 self
.temp_z
= FPNumBase(width
)
1042 self
.temp_of
= Overflow()
1043 self
.out_z
= FPNumBase(width
)
1044 self
.out_roundz
= Signal(reset_less
=True)
1046 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1047 """ links module to inputs and outputs
1049 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1050 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1051 self
.out_z
, self
.out_norm
)
1053 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1054 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1056 def action(self
, m
):
1057 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1058 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1059 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1060 with m
.If(self
.out_norm
):
1061 with m
.If(self
.in_accept
):
1066 m
.d
.sync
+= self
.ack
.eq(0)
1068 # normalisation not required (or done).
1070 m
.d
.sync
+= self
.ack
.eq(1)
1071 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1074 class FPNormToPack(FPState
, UnbufferedPipeline
):
1076 def __init__(self
, width
, id_wid
):
1077 FPState
.__init
__(self
, "normalise_1")
1078 self
.id_wid
= id_wid
1080 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
1083 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1086 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1088 def setup(self
, m
, i
):
1089 """ links module to inputs and outputs
1092 # Normalisation, Rounding Corrections, Pack - in a chain
1093 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1094 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1095 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1096 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1097 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1099 self
.out_z
= pmod
.ospec()
1103 def process(self
, i
):
1106 def action(self
, m
):
1107 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
1108 m
.next
= "pack_put_z"
1113 def __init__(self
, width
, id_wid
):
1114 self
.z
= FPNumBase(width
, False)
1115 self
.out_do_z
= Signal(reset_less
=True)
1116 self
.oz
= Signal(width
, reset_less
=True)
1117 self
.mid
= Signal(id_wid
, reset_less
=True)
1120 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1126 def __init__(self
, width
, id_wid
):
1128 self
.id_wid
= id_wid
1129 self
.i
= self
.ispec()
1130 self
.out_z
= self
.ospec()
1133 return FPNorm1Data(self
.width
, self
.id_wid
)
1136 return FPRoundData(self
.width
, self
.id_wid
)
1138 def process(self
, i
):
1141 def setup(self
, m
, i
):
1142 m
.submodules
.roundz
= self
1143 m
.d
.comb
+= self
.i
.eq(i
)
1145 def elaborate(self
, platform
):
1147 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1148 with m
.If(~self
.i
.out_do_z
):
1149 with m
.If(self
.i
.roundz
):
1150 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1151 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1152 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1157 class FPRound(FPState
):
1159 def __init__(self
, width
, id_wid
):
1160 FPState
.__init
__(self
, "round")
1161 self
.mod
= FPRoundMod(width
)
1162 self
.out_z
= self
.ospec()
1165 return self
.mod
.ispec()
1168 return self
.mod
.ospec()
1170 def setup(self
, m
, i
):
1171 """ links module to inputs and outputs
1173 self
.mod
.setup(m
, i
)
1176 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1177 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1179 def action(self
, m
):
1180 m
.next
= "corrections"
1183 class FPCorrectionsMod
:
1185 def __init__(self
, width
, id_wid
):
1187 self
.id_wid
= id_wid
1188 self
.i
= self
.ispec()
1189 self
.out_z
= self
.ospec()
1192 return FPRoundData(self
.width
, self
.id_wid
)
1195 return FPRoundData(self
.width
, self
.id_wid
)
1197 def process(self
, i
):
1200 def setup(self
, m
, i
):
1201 """ links module to inputs and outputs
1203 m
.submodules
.corrections
= self
1204 m
.d
.comb
+= self
.i
.eq(i
)
1206 def elaborate(self
, platform
):
1208 m
.submodules
.corr_in_z
= self
.i
.z
1209 m
.submodules
.corr_out_z
= self
.out_z
.z
1210 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1211 with m
.If(~self
.i
.out_do_z
):
1212 with m
.If(self
.i
.z
.is_denormalised
):
1213 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1217 class FPCorrections(FPState
):
1219 def __init__(self
, width
, id_wid
):
1220 FPState
.__init
__(self
, "corrections")
1221 self
.mod
= FPCorrectionsMod(width
)
1222 self
.out_z
= self
.ospec()
1225 return self
.mod
.ispec()
1228 return self
.mod
.ospec()
1230 def setup(self
, m
, in_z
):
1231 """ links module to inputs and outputs
1233 self
.mod
.setup(m
, in_z
)
1235 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1236 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1238 def action(self
, m
):
1244 def __init__(self
, width
, id_wid
):
1245 self
.z
= Signal(width
, reset_less
=True)
1246 self
.mid
= Signal(id_wid
, reset_less
=True)
1249 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1252 return [self
.z
, self
.mid
]
1257 def __init__(self
, width
, id_wid
):
1259 self
.id_wid
= id_wid
1260 self
.i
= self
.ispec()
1261 self
.o
= self
.ospec()
1264 return FPRoundData(self
.width
, self
.id_wid
)
1267 return FPPackData(self
.width
, self
.id_wid
)
1269 def process(self
, i
):
1272 def setup(self
, m
, in_z
):
1273 """ links module to inputs and outputs
1275 m
.submodules
.pack
= self
1276 m
.d
.comb
+= self
.i
.eq(in_z
)
1278 def elaborate(self
, platform
):
1280 z
= FPNumOut(self
.width
, False)
1281 m
.submodules
.pack_in_z
= self
.i
.z
1282 m
.submodules
.pack_out_z
= z
1283 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1284 with m
.If(~self
.i
.out_do_z
):
1285 with m
.If(self
.i
.z
.is_overflowed
):
1286 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1288 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1290 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1291 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1295 class FPPack(FPState
):
1297 def __init__(self
, width
, id_wid
):
1298 FPState
.__init
__(self
, "pack")
1299 self
.mod
= FPPackMod(width
)
1300 self
.out_z
= self
.ospec()
1303 return self
.mod
.ispec()
1306 return self
.mod
.ospec()
1308 def setup(self
, m
, in_z
):
1309 """ links module to inputs and outputs
1311 self
.mod
.setup(m
, in_z
)
1313 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1314 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1316 def action(self
, m
):
1317 m
.next
= "pack_put_z"
1320 class FPPutZ(FPState
):
1322 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1323 FPState
.__init
__(self
, state
)
1324 if to_state
is None:
1325 to_state
= "get_ops"
1326 self
.to_state
= to_state
1329 self
.in_mid
= in_mid
1330 self
.out_mid
= out_mid
1332 def action(self
, m
):
1333 if self
.in_mid
is not None:
1334 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1336 self
.out_z
.z
.v
.eq(self
.in_z
)
1338 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1339 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1340 m
.next
= self
.to_state
1342 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1345 class FPPutZIdx(FPState
):
1347 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1348 FPState
.__init
__(self
, state
)
1349 if to_state
is None:
1350 to_state
= "get_ops"
1351 self
.to_state
= to_state
1353 self
.out_zs
= out_zs
1354 self
.in_mid
= in_mid
1356 def action(self
, m
):
1357 outz_stb
= Signal(reset_less
=True)
1358 outz_ack
= Signal(reset_less
=True)
1359 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1360 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1363 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1365 with m
.If(outz_stb
& outz_ack
):
1366 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1367 m
.next
= self
.to_state
1369 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1373 def __init__(self
, width
, id_wid
):
1374 self
.z
= FPOp(width
)
1375 self
.mid
= Signal(id_wid
, reset_less
=True)
1378 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1381 return [self
.z
, self
.mid
]
1386 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1389 * width: bit-width of IEEE754. supported: 16, 32, 64
1390 * id_wid: an identifier that is sync-connected to the input
1391 * single_cycle: True indicates each stage to complete in 1 clock
1392 * compact: True indicates a reduced number of stages
1395 self
.id_wid
= id_wid
1396 self
.single_cycle
= single_cycle
1397 self
.compact
= compact
1399 self
.in_t
= Trigger()
1400 self
.i
= self
.ispec()
1401 self
.o
= self
.ospec()
1406 return FPADDBaseData(self
.width
, self
.id_wid
)
1409 return FPOpData(self
.width
, self
.id_wid
)
1411 def add_state(self
, state
):
1412 self
.states
.append(state
)
1415 def get_fragment(self
, platform
=None):
1416 """ creates the HDL code-fragment for FPAdd
1419 m
.submodules
.out_z
= self
.o
.z
1420 m
.submodules
.in_t
= self
.in_t
1422 self
.get_compact_fragment(m
, platform
)
1424 self
.get_longer_fragment(m
, platform
)
1426 with m
.FSM() as fsm
:
1428 for state
in self
.states
:
1429 with m
.State(state
.state_from
):
1434 def get_longer_fragment(self
, m
, platform
=None):
1436 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1438 get
.setup(m
, self
.i
)
1441 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1443 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1444 sc
.setup(m
, a
, b
, self
.in_mid
)
1446 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1447 dn
.setup(m
, a
, b
, sc
.in_mid
)
1449 if self
.single_cycle
:
1450 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1451 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1453 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1454 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1456 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1457 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1459 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1460 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1462 if self
.single_cycle
:
1463 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1464 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1466 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1467 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1469 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1470 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1472 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1473 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1475 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1476 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1478 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1479 pa
.in_mid
, self
.out_mid
))
1481 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1482 pa
.in_mid
, self
.out_mid
))
1484 def get_compact_fragment(self
, m
, platform
=None):
1487 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
1488 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1489 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1490 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1492 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1494 chainlist
= [get
, sc
, alm
, n1
]
1495 chain
= StageChain(chainlist
, specallocate
=True)
1496 chain
.setup(m
, self
.i
)
1498 for mod
in chainlist
:
1499 sc
= self
.add_state(mod
)
1501 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1502 n1
.out_z
.mid
, self
.o
.mid
))
1504 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1505 # sc.o.mid, self.o.mid))
1508 class FPADDBase(FPState
):
1510 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1513 * width: bit-width of IEEE754. supported: 16, 32, 64
1514 * id_wid: an identifier that is sync-connected to the input
1515 * single_cycle: True indicates each stage to complete in 1 clock
1517 FPState
.__init
__(self
, "fpadd")
1519 self
.single_cycle
= single_cycle
1520 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1521 self
.o
= self
.ospec()
1523 self
.in_t
= Trigger()
1524 self
.i
= self
.ispec()
1526 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1527 self
.in_accept
= Signal(reset_less
=True)
1528 self
.add_stb
= Signal(reset_less
=True)
1529 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1532 return self
.mod
.ispec()
1535 return self
.mod
.ospec()
1537 def setup(self
, m
, i
, add_stb
, in_mid
):
1538 m
.d
.comb
+= [self
.i
.eq(i
),
1539 self
.mod
.i
.eq(self
.i
),
1540 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1541 #self.add_stb.eq(add_stb),
1542 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1543 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1544 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1545 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1546 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1547 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1550 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1551 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1552 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1553 #m.d.sync += self.in_t.stb.eq(0)
1555 m
.submodules
.fpadd
= self
.mod
1557 def action(self
, m
):
1559 # in_accept is set on incoming strobe HIGH and ack LOW.
1560 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1562 #with m.If(self.in_t.ack):
1563 # m.d.sync += self.in_t.stb.eq(0)
1564 with m
.If(~self
.z_done
):
1565 # not done: test for accepting an incoming operand pair
1566 with m
.If(self
.in_accept
):
1568 self
.add_ack
.eq(1), # acknowledge receipt...
1569 self
.in_t
.stb
.eq(1), # initiate add
1572 m
.d
.sync
+= [self
.add_ack
.eq(0),
1573 self
.in_t
.stb
.eq(0),
1577 # done: acknowledge, and write out id and value
1578 m
.d
.sync
+= [self
.add_ack
.eq(1),
1585 if self
.in_mid
is not None:
1586 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1589 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1591 # move to output state on detecting z ack
1592 with m
.If(self
.out_z
.trigger
):
1593 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1596 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1599 class FPADDBasePipe(ControlBase
):
1600 def __init__(self
, width
, id_wid
):
1601 ControlBase
.__init
__(self
)
1602 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1603 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1604 self
.pipe3
= FPNormToPack(width
, id_wid
)
1606 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1608 def elaborate(self
, platform
):
1610 m
.submodules
.scnorm
= self
.pipe1
1611 m
.submodules
.addalign
= self
.pipe2
1612 m
.submodules
.normpack
= self
.pipe3
1613 m
.d
.comb
+= self
._eqs
1617 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1618 def __init__(self
, width
, id_wid
, num_rows
):
1619 self
.num_rows
= num_rows
1620 def iospec(): return FPADDBaseData(width
, id_wid
)
1621 stage
= PassThroughStage(iospec
)
1622 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1625 class FPADDMuxOutPipe(CombMuxOutPipe
):
1626 def __init__(self
, width
, id_wid
, num_rows
):
1627 self
.num_rows
= num_rows
1628 def iospec(): return FPPackData(width
, id_wid
)
1629 stage
= PassThroughStage(iospec
)
1630 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1633 class FPADDMuxInOut
:
1634 """ Reservation-Station version of FPADD pipeline.
1636 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1637 * 3-stage adder pipeline
1638 * fan-out on outputs (an array of FPPackData: z,mid)
1640 Fan-in and Fan-out are combinatorial.
1642 def __init__(self
, width
, id_wid
, num_rows
):
1643 self
.num_rows
= num_rows
1644 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1645 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1646 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1648 self
.p
= self
.inpipe
.p
# kinda annoying,
1649 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1650 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1652 def elaborate(self
, platform
):
1654 m
.submodules
.inpipe
= self
.inpipe
1655 m
.submodules
.fpadd
= self
.fpadd
1656 m
.submodules
.outpipe
= self
.outpipe
1658 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1659 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1668 """ FPADD: stages as follows:
1674 FPAddBase---> FPAddBaseMod
1676 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1678 FPAddBase is tricky: it is both a stage and *has* stages.
1679 Connection to FPAddBaseMod therefore requires an in stb/ack
1680 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1681 needs to be the thing that raises the incoming stb.
1684 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1687 * width: bit-width of IEEE754. supported: 16, 32, 64
1688 * id_wid: an identifier that is sync-connected to the input
1689 * single_cycle: True indicates each stage to complete in 1 clock
1692 self
.id_wid
= id_wid
1693 self
.single_cycle
= single_cycle
1695 #self.out_z = FPOp(width)
1696 self
.ids
= FPID(id_wid
)
1699 for i
in range(rs_sz
):
1702 in_a
.name
= "in_a_%d" % i
1703 in_b
.name
= "in_b_%d" % i
1704 rs
.append((in_a
, in_b
))
1708 for i
in range(rs_sz
):
1710 out_z
.name
= "out_z_%d" % i
1712 self
.res
= Array(res
)
1716 def add_state(self
, state
):
1717 self
.states
.append(state
)
1720 def get_fragment(self
, platform
=None):
1721 """ creates the HDL code-fragment for FPAdd
1724 m
.submodules
+= self
.rs
1726 in_a
= self
.rs
[0][0]
1727 in_b
= self
.rs
[0][1]
1729 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1734 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1739 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1740 ab
= self
.add_state(ab
)
1741 abd
= ab
.ispec() # create an input spec object for FPADDBase
1742 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1743 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1746 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1749 with m
.FSM() as fsm
:
1751 for state
in self
.states
:
1752 with m
.State(state
.state_from
):
1758 if __name__
== "__main__":
1760 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1761 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1762 alu
.rs
[0][1].ports() + \
1763 alu
.res
[0].ports() + \
1764 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1766 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1767 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1768 alu
.in_t
.ports() + \
1769 alu
.out_z
.ports() + \
1770 [alu
.in_mid
, alu
.out_mid
])
1773 # works... but don't use, just do "python fname.py convert -t v"
1774 #print (verilog.convert(alu, ports=[
1775 # ports=alu.in_a.ports() + \
1776 # alu.in_b.ports() + \
1777 # alu.out_z.ports())