1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
23 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
24 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
25 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
26 from fpcommon
.normtopack
import FPNormToPack
27 from fpcommon
.putz
import (FPPutZ
, FPPutZIdx
)
30 class FPAddSpecialCasesMod
:
31 """ special cases: NaNs, infs, zeros, denormalised
32 NOTE: some of these are unique to add. see "Special Operations"
33 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
36 def __init__(self
, width
, id_wid
):
43 return FPADDBaseData(self
.width
, self
.id_wid
)
46 return FPSCData(self
.width
, self
.id_wid
)
48 def setup(self
, m
, i
):
49 """ links module to inputs and outputs
51 m
.submodules
.specialcases
= self
52 m
.d
.comb
+= self
.i
.eq(i
)
57 def elaborate(self
, platform
):
60 m
.submodules
.sc_out_z
= self
.o
.z
62 # decode: XXX really should move to separate stage
63 a1
= FPNumIn(None, self
.width
)
64 b1
= FPNumIn(None, self
.width
)
65 m
.submodules
.sc_decode_a
= a1
66 m
.submodules
.sc_decode_b
= b1
67 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
72 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
75 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
77 # if a is NaN or b is NaN return NaN
78 with m
.If(a1
.is_nan | b1
.is_nan
):
79 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
80 m
.d
.comb
+= self
.o
.z
.nan(0)
82 # XXX WEIRDNESS for FP16 non-canonical NaN handling
85 ## if a is zero and b is NaN return -b
86 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
87 # m.d.comb += self.o.out_do_z.eq(1)
88 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
90 ## if b is zero and a is NaN return -a
91 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
92 # m.d.comb += self.o.out_do_z.eq(1)
93 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
95 ## if a is -zero and b is NaN return -b
96 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
97 # m.d.comb += self.o.out_do_z.eq(1)
98 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
100 ## if b is -zero and a is NaN return -a
101 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
102 # m.d.comb += self.o.out_do_z.eq(1)
103 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
105 # if a is inf return inf (or NaN)
106 with m
.Elif(a1
.is_inf
):
107 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
108 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
109 # if a is inf and signs don't match return NaN
110 with m
.If(b1
.exp_128
& s_nomatch
):
111 m
.d
.comb
+= self
.o
.z
.nan(0)
113 # if b is inf return inf
114 with m
.Elif(b1
.is_inf
):
115 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
116 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
118 # if a is zero and b zero return signed-a/b
119 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
120 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
121 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
123 # if a is zero return b
124 with m
.Elif(a1
.is_zero
):
125 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
126 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
128 # if b is zero return a
129 with m
.Elif(b1
.is_zero
):
130 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
131 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
133 # if a equal to -b return zero (+ve zero)
134 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
135 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
136 m
.d
.comb
+= self
.o
.z
.zero(0)
138 # Denormalised Number checks next, so pass a/b data through
140 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
141 m
.d
.comb
+= self
.o
.a
.eq(a1
)
142 m
.d
.comb
+= self
.o
.b
.eq(b1
)
144 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
145 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
150 class FPAddSpecialCases(FPState
):
151 """ special cases: NaNs, infs, zeros, denormalised
152 NOTE: some of these are unique to add. see "Special Operations"
153 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
156 def __init__(self
, width
, id_wid
):
157 FPState
.__init
__(self
, "special_cases")
158 self
.mod
= FPAddSpecialCasesMod(width
)
159 self
.out_z
= self
.mod
.ospec()
160 self
.out_do_z
= Signal(reset_less
=True)
162 def setup(self
, m
, i
):
163 """ links module to inputs and outputs
165 self
.mod
.setup(m
, i
, self
.out_do_z
)
166 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
167 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
171 with m
.If(self
.out_do_z
):
174 m
.next
= "denormalise"
177 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
178 """ special cases: NaNs, infs, zeros, denormalised
179 NOTE: some of these are unique to add. see "Special Operations"
180 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
183 def __init__(self
, width
, id_wid
):
184 FPState
.__init
__(self
, "special_cases")
187 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
188 self
.out
= self
.ospec()
191 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
194 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
196 def setup(self
, m
, i
):
197 """ links module to inputs and outputs
199 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
200 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
202 chain
= StageChain([smod
, dmod
])
205 # only needed for break-out (early-out)
206 # self.out_do_z = smod.o.out_do_z
210 def process(self
, i
):
214 # for break-out (early-out)
215 #with m.If(self.out_do_z):
218 m
.d
.sync
+= self
.out
.eq(self
.process(None))
222 class FPAddAlignMultiMod(FPState
):
224 def __init__(self
, width
):
225 self
.in_a
= FPNumBase(width
)
226 self
.in_b
= FPNumBase(width
)
227 self
.out_a
= FPNumIn(None, width
)
228 self
.out_b
= FPNumIn(None, width
)
229 self
.exp_eq
= Signal(reset_less
=True)
231 def elaborate(self
, platform
):
232 # This one however (single-cycle) will do the shift
237 m
.submodules
.align_in_a
= self
.in_a
238 m
.submodules
.align_in_b
= self
.in_b
239 m
.submodules
.align_out_a
= self
.out_a
240 m
.submodules
.align_out_b
= self
.out_b
242 # NOTE: this does *not* do single-cycle multi-shifting,
243 # it *STAYS* in the align state until exponents match
245 # exponent of a greater than b: shift b down
246 m
.d
.comb
+= self
.exp_eq
.eq(0)
247 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
248 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
249 agtb
= Signal(reset_less
=True)
250 altb
= Signal(reset_less
=True)
251 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
252 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
254 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
255 # exponent of b greater than a: shift a down
257 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
258 # exponents equal: move to next stage.
260 m
.d
.comb
+= self
.exp_eq
.eq(1)
264 class FPAddAlignMulti(FPState
):
266 def __init__(self
, width
, id_wid
):
267 FPState
.__init
__(self
, "align")
268 self
.mod
= FPAddAlignMultiMod(width
)
269 self
.out_a
= FPNumIn(None, width
)
270 self
.out_b
= FPNumIn(None, width
)
271 self
.exp_eq
= Signal(reset_less
=True)
273 def setup(self
, m
, in_a
, in_b
):
274 """ links module to inputs and outputs
276 m
.submodules
.align
= self
.mod
277 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
278 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
279 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
280 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
281 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
284 with m
.If(self
.exp_eq
):
290 def __init__(self
, width
, id_wid
):
291 self
.a
= FPNumIn(None, width
)
292 self
.b
= FPNumIn(None, width
)
293 self
.z
= FPNumOut(width
, False)
294 self
.out_do_z
= Signal(reset_less
=True)
295 self
.oz
= Signal(width
, reset_less
=True)
296 self
.mid
= Signal(id_wid
, reset_less
=True)
299 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
300 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
303 class FPAddAlignSingleMod
:
305 def __init__(self
, width
, id_wid
):
308 self
.i
= self
.ispec()
309 self
.o
= self
.ospec()
312 return FPSCData(self
.width
, self
.id_wid
)
315 return FPNumIn2Ops(self
.width
, self
.id_wid
)
317 def process(self
, i
):
320 def setup(self
, m
, i
):
321 """ links module to inputs and outputs
323 m
.submodules
.align
= self
324 m
.d
.comb
+= self
.i
.eq(i
)
326 def elaborate(self
, platform
):
327 """ Aligns A against B or B against A, depending on which has the
328 greater exponent. This is done in a *single* cycle using
329 variable-width bit-shift
331 the shifter used here is quite expensive in terms of gates.
332 Mux A or B in (and out) into temporaries, as only one of them
333 needs to be aligned against the other
337 m
.submodules
.align_in_a
= self
.i
.a
338 m
.submodules
.align_in_b
= self
.i
.b
339 m
.submodules
.align_out_a
= self
.o
.a
340 m
.submodules
.align_out_b
= self
.o
.b
342 # temporary (muxed) input and output to be shifted
343 t_inp
= FPNumBase(self
.width
)
344 t_out
= FPNumIn(None, self
.width
)
345 espec
= (len(self
.i
.a
.e
), True)
346 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
347 m
.submodules
.align_t_in
= t_inp
348 m
.submodules
.align_t_out
= t_out
349 m
.submodules
.multishift_r
= msr
351 ediff
= Signal(espec
, reset_less
=True)
352 ediffr
= Signal(espec
, reset_less
=True)
353 tdiff
= Signal(espec
, reset_less
=True)
354 elz
= Signal(reset_less
=True)
355 egz
= Signal(reset_less
=True)
357 # connect multi-shifter to t_inp/out mantissa (and tdiff)
358 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
359 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
360 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
361 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
362 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
364 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
365 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
366 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
367 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
369 # default: A-exp == B-exp, A and B untouched (fall through)
370 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
371 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
372 # only one shifter (muxed)
373 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
374 # exponent of a greater than b: shift b down
375 with m
.If(~self
.i
.out_do_z
):
377 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
380 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
382 # exponent of b greater than a: shift a down
384 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
387 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
390 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
392 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
393 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
398 class FPAddAlignSingle(FPState
):
400 def __init__(self
, width
, id_wid
):
401 FPState
.__init
__(self
, "align")
402 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
403 self
.out_a
= FPNumIn(None, width
)
404 self
.out_b
= FPNumIn(None, width
)
406 def setup(self
, m
, i
):
407 """ links module to inputs and outputs
411 # NOTE: could be done as comb
412 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
413 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
419 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
421 def __init__(self
, width
, id_wid
):
422 FPState
.__init
__(self
, "align")
425 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
426 self
.a1o
= self
.ospec()
429 return FPSCData(self
.width
, self
.id_wid
)
432 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
434 def setup(self
, m
, i
):
435 """ links module to inputs and outputs
438 # chain AddAlignSingle, AddStage0 and AddStage1
439 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
440 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
441 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
443 chain
= StageChain([mod
, a0mod
, a1mod
])
448 def process(self
, i
):
452 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
453 m
.next
= "normalise_1"
456 class FPAddStage0Data
:
458 def __init__(self
, width
, id_wid
):
459 self
.z
= FPNumBase(width
, False)
460 self
.out_do_z
= Signal(reset_less
=True)
461 self
.oz
= Signal(width
, reset_less
=True)
462 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
463 self
.mid
= Signal(id_wid
, reset_less
=True)
466 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
467 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
470 class FPAddStage0Mod
:
472 def __init__(self
, width
, id_wid
):
475 self
.i
= self
.ispec()
476 self
.o
= self
.ospec()
479 return FPSCData(self
.width
, self
.id_wid
)
482 return FPAddStage0Data(self
.width
, self
.id_wid
)
484 def process(self
, i
):
487 def setup(self
, m
, i
):
488 """ links module to inputs and outputs
490 m
.submodules
.add0
= self
491 m
.d
.comb
+= self
.i
.eq(i
)
493 def elaborate(self
, platform
):
495 m
.submodules
.add0_in_a
= self
.i
.a
496 m
.submodules
.add0_in_b
= self
.i
.b
497 m
.submodules
.add0_out_z
= self
.o
.z
499 # store intermediate tests (and zero-extended mantissas)
500 seq
= Signal(reset_less
=True)
501 mge
= Signal(reset_less
=True)
502 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
503 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
504 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
505 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
506 am0
.eq(Cat(self
.i
.a
.m
, 0)),
507 bm0
.eq(Cat(self
.i
.b
.m
, 0))
509 # same-sign (both negative or both positive) add mantissas
510 with m
.If(~self
.i
.out_do_z
):
511 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
514 self
.o
.tot
.eq(am0
+ bm0
),
515 self
.o
.z
.s
.eq(self
.i
.a
.s
)
517 # a mantissa greater than b, use a
520 self
.o
.tot
.eq(am0
- bm0
),
521 self
.o
.z
.s
.eq(self
.i
.a
.s
)
523 # b mantissa greater than a, use b
526 self
.o
.tot
.eq(bm0
- am0
),
527 self
.o
.z
.s
.eq(self
.i
.b
.s
)
530 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
531 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
532 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
536 class FPAddStage0(FPState
):
537 """ First stage of add. covers same-sign (add) and subtract
538 special-casing when mantissas are greater or equal, to
539 give greatest accuracy.
542 def __init__(self
, width
, id_wid
):
543 FPState
.__init
__(self
, "add_0")
544 self
.mod
= FPAddStage0Mod(width
)
545 self
.o
= self
.mod
.ospec()
547 def setup(self
, m
, i
):
548 """ links module to inputs and outputs
552 # NOTE: these could be done as combinatorial (merge add0+add1)
553 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
559 class FPAddStage1Mod(FPState
):
560 """ Second stage of add: preparation for normalisation.
561 detects when tot sum is too big (tot[27] is kinda a carry bit)
564 def __init__(self
, width
, id_wid
):
567 self
.i
= self
.ispec()
568 self
.o
= self
.ospec()
571 return FPAddStage0Data(self
.width
, self
.id_wid
)
574 return FPAddStage1Data(self
.width
, self
.id_wid
)
576 def process(self
, i
):
579 def setup(self
, m
, i
):
580 """ links module to inputs and outputs
582 m
.submodules
.add1
= self
583 m
.submodules
.add1_out_overflow
= self
.o
.of
585 m
.d
.comb
+= self
.i
.eq(i
)
587 def elaborate(self
, platform
):
589 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
590 # tot[-1] (MSB) gets set when the sum overflows. shift result down
591 with m
.If(~self
.i
.out_do_z
):
592 with m
.If(self
.i
.tot
[-1]):
594 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
595 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
596 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
597 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
598 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
599 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
601 # tot[-1] (MSB) zero case
604 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
605 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
606 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
607 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
608 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
611 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
612 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
613 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
618 class FPAddStage1(FPState
):
620 def __init__(self
, width
, id_wid
):
621 FPState
.__init
__(self
, "add_1")
622 self
.mod
= FPAddStage1Mod(width
)
623 self
.out_z
= FPNumBase(width
, False)
624 self
.out_of
= Overflow()
625 self
.norm_stb
= Signal()
627 def setup(self
, m
, i
):
628 """ links module to inputs and outputs
632 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
634 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
635 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
636 m
.d
.sync
+= self
.norm_stb
.eq(1)
639 m
.next
= "normalise_1"
645 def __init__(self
, width
, id_wid
):
647 self
.mid
= Signal(id_wid
, reset_less
=True)
650 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
653 return [self
.z
, self
.mid
]
658 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
661 * width: bit-width of IEEE754. supported: 16, 32, 64
662 * id_wid: an identifier that is sync-connected to the input
663 * single_cycle: True indicates each stage to complete in 1 clock
664 * compact: True indicates a reduced number of stages
668 self
.single_cycle
= single_cycle
669 self
.compact
= compact
671 self
.in_t
= Trigger()
672 self
.i
= self
.ispec()
673 self
.o
= self
.ospec()
678 return FPADDBaseData(self
.width
, self
.id_wid
)
681 return FPOpData(self
.width
, self
.id_wid
)
683 def add_state(self
, state
):
684 self
.states
.append(state
)
687 def get_fragment(self
, platform
=None):
688 """ creates the HDL code-fragment for FPAdd
691 m
.submodules
.out_z
= self
.o
.z
692 m
.submodules
.in_t
= self
.in_t
694 self
.get_compact_fragment(m
, platform
)
696 self
.get_longer_fragment(m
, platform
)
700 for state
in self
.states
:
701 with m
.State(state
.state_from
):
706 def get_longer_fragment(self
, m
, platform
=None):
708 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
713 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
715 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
716 sc
.setup(m
, a
, b
, self
.in_mid
)
718 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
719 dn
.setup(m
, a
, b
, sc
.in_mid
)
721 if self
.single_cycle
:
722 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
723 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
725 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
726 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
728 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
729 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
731 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
732 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
734 if self
.single_cycle
:
735 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
736 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
738 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
739 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
741 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
742 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
744 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
745 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
747 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
748 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
750 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
751 pa
.in_mid
, self
.out_mid
))
753 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
754 pa
.in_mid
, self
.out_mid
))
756 def get_compact_fragment(self
, m
, platform
=None):
759 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
760 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
761 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
762 n1
= FPNormToPack(self
.width
, self
.id_wid
)
764 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
766 chainlist
= [get
, sc
, alm
, n1
]
767 chain
= StageChain(chainlist
, specallocate
=True)
768 chain
.setup(m
, self
.i
)
770 for mod
in chainlist
:
771 sc
= self
.add_state(mod
)
773 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
774 n1
.out_z
.mid
, self
.o
.mid
))
776 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
777 # sc.o.mid, self.o.mid))
780 class FPADDBase(FPState
):
782 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
785 * width: bit-width of IEEE754. supported: 16, 32, 64
786 * id_wid: an identifier that is sync-connected to the input
787 * single_cycle: True indicates each stage to complete in 1 clock
789 FPState
.__init
__(self
, "fpadd")
791 self
.single_cycle
= single_cycle
792 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
793 self
.o
= self
.ospec()
795 self
.in_t
= Trigger()
796 self
.i
= self
.ispec()
798 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
799 self
.in_accept
= Signal(reset_less
=True)
800 self
.add_stb
= Signal(reset_less
=True)
801 self
.add_ack
= Signal(reset
=0, reset_less
=True)
804 return self
.mod
.ispec()
807 return self
.mod
.ospec()
809 def setup(self
, m
, i
, add_stb
, in_mid
):
810 m
.d
.comb
+= [self
.i
.eq(i
),
811 self
.mod
.i
.eq(self
.i
),
812 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
813 #self.add_stb.eq(add_stb),
814 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
815 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
816 self
.o
.mid
.eq(self
.mod
.o
.mid
),
817 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
818 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
819 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
822 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
823 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
824 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
825 #m.d.sync += self.in_t.stb.eq(0)
827 m
.submodules
.fpadd
= self
.mod
831 # in_accept is set on incoming strobe HIGH and ack LOW.
832 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
834 #with m.If(self.in_t.ack):
835 # m.d.sync += self.in_t.stb.eq(0)
836 with m
.If(~self
.z_done
):
837 # not done: test for accepting an incoming operand pair
838 with m
.If(self
.in_accept
):
840 self
.add_ack
.eq(1), # acknowledge receipt...
841 self
.in_t
.stb
.eq(1), # initiate add
844 m
.d
.sync
+= [self
.add_ack
.eq(0),
849 # done: acknowledge, and write out id and value
850 m
.d
.sync
+= [self
.add_ack
.eq(1),
857 if self
.in_mid
is not None:
858 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
861 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
863 # move to output state on detecting z ack
864 with m
.If(self
.out_z
.trigger
):
865 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
868 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
871 class FPADDBasePipe(ControlBase
):
872 def __init__(self
, width
, id_wid
):
873 ControlBase
.__init
__(self
)
874 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
875 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
876 self
.pipe3
= FPNormToPack(width
, id_wid
)
878 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
880 def elaborate(self
, platform
):
882 m
.submodules
.scnorm
= self
.pipe1
883 m
.submodules
.addalign
= self
.pipe2
884 m
.submodules
.normpack
= self
.pipe3
885 m
.d
.comb
+= self
._eqs
889 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
890 def __init__(self
, width
, id_wid
, num_rows
):
891 self
.num_rows
= num_rows
892 def iospec(): return FPADDBaseData(width
, id_wid
)
893 stage
= PassThroughStage(iospec
)
894 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
897 class FPADDMuxOutPipe(CombMuxOutPipe
):
898 def __init__(self
, width
, id_wid
, num_rows
):
899 self
.num_rows
= num_rows
900 def iospec(): return FPPackData(width
, id_wid
)
901 stage
= PassThroughStage(iospec
)
902 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
906 """ Reservation-Station version of FPADD pipeline.
908 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
909 * 3-stage adder pipeline
910 * fan-out on outputs (an array of FPPackData: z,mid)
912 Fan-in and Fan-out are combinatorial.
914 def __init__(self
, width
, id_wid
, num_rows
):
915 self
.num_rows
= num_rows
916 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
917 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
918 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
920 self
.p
= self
.inpipe
.p
# kinda annoying,
921 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
922 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
924 def elaborate(self
, platform
):
926 m
.submodules
.inpipe
= self
.inpipe
927 m
.submodules
.fpadd
= self
.fpadd
928 m
.submodules
.outpipe
= self
.outpipe
930 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
931 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
940 """ FPADD: stages as follows:
946 FPAddBase---> FPAddBaseMod
948 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
950 FPAddBase is tricky: it is both a stage and *has* stages.
951 Connection to FPAddBaseMod therefore requires an in stb/ack
952 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
953 needs to be the thing that raises the incoming stb.
956 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
959 * width: bit-width of IEEE754. supported: 16, 32, 64
960 * id_wid: an identifier that is sync-connected to the input
961 * single_cycle: True indicates each stage to complete in 1 clock
965 self
.single_cycle
= single_cycle
967 #self.out_z = FPOp(width)
968 self
.ids
= FPID(id_wid
)
971 for i
in range(rs_sz
):
974 in_a
.name
= "in_a_%d" % i
975 in_b
.name
= "in_b_%d" % i
976 rs
.append((in_a
, in_b
))
980 for i
in range(rs_sz
):
982 out_z
.name
= "out_z_%d" % i
984 self
.res
= Array(res
)
988 def add_state(self
, state
):
989 self
.states
.append(state
)
992 def get_fragment(self
, platform
=None):
993 """ creates the HDL code-fragment for FPAdd
996 m
.submodules
+= self
.rs
1001 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1006 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1011 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1012 ab
= self
.add_state(ab
)
1013 abd
= ab
.ispec() # create an input spec object for FPADDBase
1014 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1015 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1018 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1021 with m
.FSM() as fsm
:
1023 for state
in self
.states
:
1024 with m
.State(state
.state_from
):
1030 if __name__
== "__main__":
1032 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1033 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1034 alu
.rs
[0][1].ports() + \
1035 alu
.res
[0].ports() + \
1036 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1038 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1039 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1040 alu
.in_t
.ports() + \
1041 alu
.out_z
.ports() + \
1042 [alu
.in_mid
, alu
.out_mid
])
1045 # works... but don't use, just do "python fname.py convert -t v"
1046 #print (verilog.convert(alu, ports=[
1047 # ports=alu.in_a.ports() + \
1048 # alu.in_b.ports() + \
1049 # alu.out_z.ports())