1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
23 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
24 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
25 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
28 class FPAddSpecialCasesMod
:
29 """ special cases: NaNs, infs, zeros, denormalised
30 NOTE: some of these are unique to add. see "Special Operations"
31 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
34 def __init__(self
, width
, id_wid
):
41 return FPADDBaseData(self
.width
, self
.id_wid
)
44 return FPSCData(self
.width
, self
.id_wid
)
46 def setup(self
, m
, i
):
47 """ links module to inputs and outputs
49 m
.submodules
.specialcases
= self
50 m
.d
.comb
+= self
.i
.eq(i
)
55 def elaborate(self
, platform
):
58 m
.submodules
.sc_out_z
= self
.o
.z
60 # decode: XXX really should move to separate stage
61 a1
= FPNumIn(None, self
.width
)
62 b1
= FPNumIn(None, self
.width
)
63 m
.submodules
.sc_decode_a
= a1
64 m
.submodules
.sc_decode_b
= b1
65 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
70 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
73 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
75 # if a is NaN or b is NaN return NaN
76 with m
.If(a1
.is_nan | b1
.is_nan
):
77 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
78 m
.d
.comb
+= self
.o
.z
.nan(0)
80 # XXX WEIRDNESS for FP16 non-canonical NaN handling
83 ## if a is zero and b is NaN return -b
84 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
85 # m.d.comb += self.o.out_do_z.eq(1)
86 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
88 ## if b is zero and a is NaN return -a
89 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
90 # m.d.comb += self.o.out_do_z.eq(1)
91 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
93 ## if a is -zero and b is NaN return -b
94 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
95 # m.d.comb += self.o.out_do_z.eq(1)
96 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
98 ## if b is -zero and a is NaN return -a
99 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
100 # m.d.comb += self.o.out_do_z.eq(1)
101 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
103 # if a is inf return inf (or NaN)
104 with m
.Elif(a1
.is_inf
):
105 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
106 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
107 # if a is inf and signs don't match return NaN
108 with m
.If(b1
.exp_128
& s_nomatch
):
109 m
.d
.comb
+= self
.o
.z
.nan(0)
111 # if b is inf return inf
112 with m
.Elif(b1
.is_inf
):
113 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
114 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
116 # if a is zero and b zero return signed-a/b
117 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
118 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
119 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
121 # if a is zero return b
122 with m
.Elif(a1
.is_zero
):
123 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
124 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
126 # if b is zero return a
127 with m
.Elif(b1
.is_zero
):
128 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
129 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
131 # if a equal to -b return zero (+ve zero)
132 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
133 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
134 m
.d
.comb
+= self
.o
.z
.zero(0)
136 # Denormalised Number checks next, so pass a/b data through
138 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
139 m
.d
.comb
+= self
.o
.a
.eq(a1
)
140 m
.d
.comb
+= self
.o
.b
.eq(b1
)
142 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
143 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
148 class FPAddSpecialCases(FPState
):
149 """ special cases: NaNs, infs, zeros, denormalised
150 NOTE: some of these are unique to add. see "Special Operations"
151 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
154 def __init__(self
, width
, id_wid
):
155 FPState
.__init
__(self
, "special_cases")
156 self
.mod
= FPAddSpecialCasesMod(width
)
157 self
.out_z
= self
.mod
.ospec()
158 self
.out_do_z
= Signal(reset_less
=True)
160 def setup(self
, m
, i
):
161 """ links module to inputs and outputs
163 self
.mod
.setup(m
, i
, self
.out_do_z
)
164 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
165 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
169 with m
.If(self
.out_do_z
):
172 m
.next
= "denormalise"
175 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
176 """ special cases: NaNs, infs, zeros, denormalised
177 NOTE: some of these are unique to add. see "Special Operations"
178 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
181 def __init__(self
, width
, id_wid
):
182 FPState
.__init
__(self
, "special_cases")
185 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
186 self
.out
= self
.ospec()
189 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
192 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
194 def setup(self
, m
, i
):
195 """ links module to inputs and outputs
197 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
198 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
200 chain
= StageChain([smod
, dmod
])
203 # only needed for break-out (early-out)
204 # self.out_do_z = smod.o.out_do_z
208 def process(self
, i
):
212 # for break-out (early-out)
213 #with m.If(self.out_do_z):
216 m
.d
.sync
+= self
.out
.eq(self
.process(None))
220 class FPAddAlignMultiMod(FPState
):
222 def __init__(self
, width
):
223 self
.in_a
= FPNumBase(width
)
224 self
.in_b
= FPNumBase(width
)
225 self
.out_a
= FPNumIn(None, width
)
226 self
.out_b
= FPNumIn(None, width
)
227 self
.exp_eq
= Signal(reset_less
=True)
229 def elaborate(self
, platform
):
230 # This one however (single-cycle) will do the shift
235 m
.submodules
.align_in_a
= self
.in_a
236 m
.submodules
.align_in_b
= self
.in_b
237 m
.submodules
.align_out_a
= self
.out_a
238 m
.submodules
.align_out_b
= self
.out_b
240 # NOTE: this does *not* do single-cycle multi-shifting,
241 # it *STAYS* in the align state until exponents match
243 # exponent of a greater than b: shift b down
244 m
.d
.comb
+= self
.exp_eq
.eq(0)
245 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
246 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
247 agtb
= Signal(reset_less
=True)
248 altb
= Signal(reset_less
=True)
249 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
250 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
252 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
253 # exponent of b greater than a: shift a down
255 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
256 # exponents equal: move to next stage.
258 m
.d
.comb
+= self
.exp_eq
.eq(1)
262 class FPAddAlignMulti(FPState
):
264 def __init__(self
, width
, id_wid
):
265 FPState
.__init
__(self
, "align")
266 self
.mod
= FPAddAlignMultiMod(width
)
267 self
.out_a
= FPNumIn(None, width
)
268 self
.out_b
= FPNumIn(None, width
)
269 self
.exp_eq
= Signal(reset_less
=True)
271 def setup(self
, m
, in_a
, in_b
):
272 """ links module to inputs and outputs
274 m
.submodules
.align
= self
.mod
275 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
276 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
277 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
278 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
279 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
282 with m
.If(self
.exp_eq
):
288 def __init__(self
, width
, id_wid
):
289 self
.a
= FPNumIn(None, width
)
290 self
.b
= FPNumIn(None, width
)
291 self
.z
= FPNumOut(width
, False)
292 self
.out_do_z
= Signal(reset_less
=True)
293 self
.oz
= Signal(width
, reset_less
=True)
294 self
.mid
= Signal(id_wid
, reset_less
=True)
297 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
298 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
301 class FPAddAlignSingleMod
:
303 def __init__(self
, width
, id_wid
):
306 self
.i
= self
.ispec()
307 self
.o
= self
.ospec()
310 return FPSCData(self
.width
, self
.id_wid
)
313 return FPNumIn2Ops(self
.width
, self
.id_wid
)
315 def process(self
, i
):
318 def setup(self
, m
, i
):
319 """ links module to inputs and outputs
321 m
.submodules
.align
= self
322 m
.d
.comb
+= self
.i
.eq(i
)
324 def elaborate(self
, platform
):
325 """ Aligns A against B or B against A, depending on which has the
326 greater exponent. This is done in a *single* cycle using
327 variable-width bit-shift
329 the shifter used here is quite expensive in terms of gates.
330 Mux A or B in (and out) into temporaries, as only one of them
331 needs to be aligned against the other
335 m
.submodules
.align_in_a
= self
.i
.a
336 m
.submodules
.align_in_b
= self
.i
.b
337 m
.submodules
.align_out_a
= self
.o
.a
338 m
.submodules
.align_out_b
= self
.o
.b
340 # temporary (muxed) input and output to be shifted
341 t_inp
= FPNumBase(self
.width
)
342 t_out
= FPNumIn(None, self
.width
)
343 espec
= (len(self
.i
.a
.e
), True)
344 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
345 m
.submodules
.align_t_in
= t_inp
346 m
.submodules
.align_t_out
= t_out
347 m
.submodules
.multishift_r
= msr
349 ediff
= Signal(espec
, reset_less
=True)
350 ediffr
= Signal(espec
, reset_less
=True)
351 tdiff
= Signal(espec
, reset_less
=True)
352 elz
= Signal(reset_less
=True)
353 egz
= Signal(reset_less
=True)
355 # connect multi-shifter to t_inp/out mantissa (and tdiff)
356 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
357 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
358 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
359 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
360 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
362 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
363 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
364 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
365 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
367 # default: A-exp == B-exp, A and B untouched (fall through)
368 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
369 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
370 # only one shifter (muxed)
371 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
372 # exponent of a greater than b: shift b down
373 with m
.If(~self
.i
.out_do_z
):
375 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
378 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
380 # exponent of b greater than a: shift a down
382 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
385 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
388 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
389 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
390 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
391 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
396 class FPAddAlignSingle(FPState
):
398 def __init__(self
, width
, id_wid
):
399 FPState
.__init
__(self
, "align")
400 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
401 self
.out_a
= FPNumIn(None, width
)
402 self
.out_b
= FPNumIn(None, width
)
404 def setup(self
, m
, i
):
405 """ links module to inputs and outputs
409 # NOTE: could be done as comb
410 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
411 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
417 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
419 def __init__(self
, width
, id_wid
):
420 FPState
.__init
__(self
, "align")
423 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
424 self
.a1o
= self
.ospec()
427 return FPSCData(self
.width
, self
.id_wid
)
430 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
432 def setup(self
, m
, i
):
433 """ links module to inputs and outputs
436 # chain AddAlignSingle, AddStage0 and AddStage1
437 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
438 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
439 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
441 chain
= StageChain([mod
, a0mod
, a1mod
])
446 def process(self
, i
):
450 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
451 m
.next
= "normalise_1"
454 class FPAddStage0Data
:
456 def __init__(self
, width
, id_wid
):
457 self
.z
= FPNumBase(width
, False)
458 self
.out_do_z
= Signal(reset_less
=True)
459 self
.oz
= Signal(width
, reset_less
=True)
460 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
461 self
.mid
= Signal(id_wid
, reset_less
=True)
464 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
465 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
468 class FPAddStage0Mod
:
470 def __init__(self
, width
, id_wid
):
473 self
.i
= self
.ispec()
474 self
.o
= self
.ospec()
477 return FPSCData(self
.width
, self
.id_wid
)
480 return FPAddStage0Data(self
.width
, self
.id_wid
)
482 def process(self
, i
):
485 def setup(self
, m
, i
):
486 """ links module to inputs and outputs
488 m
.submodules
.add0
= self
489 m
.d
.comb
+= self
.i
.eq(i
)
491 def elaborate(self
, platform
):
493 m
.submodules
.add0_in_a
= self
.i
.a
494 m
.submodules
.add0_in_b
= self
.i
.b
495 m
.submodules
.add0_out_z
= self
.o
.z
497 # store intermediate tests (and zero-extended mantissas)
498 seq
= Signal(reset_less
=True)
499 mge
= Signal(reset_less
=True)
500 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
501 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
502 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
503 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
504 am0
.eq(Cat(self
.i
.a
.m
, 0)),
505 bm0
.eq(Cat(self
.i
.b
.m
, 0))
507 # same-sign (both negative or both positive) add mantissas
508 with m
.If(~self
.i
.out_do_z
):
509 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
512 self
.o
.tot
.eq(am0
+ bm0
),
513 self
.o
.z
.s
.eq(self
.i
.a
.s
)
515 # a mantissa greater than b, use a
518 self
.o
.tot
.eq(am0
- bm0
),
519 self
.o
.z
.s
.eq(self
.i
.a
.s
)
521 # b mantissa greater than a, use b
524 self
.o
.tot
.eq(bm0
- am0
),
525 self
.o
.z
.s
.eq(self
.i
.b
.s
)
528 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
529 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
530 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
534 class FPAddStage0(FPState
):
535 """ First stage of add. covers same-sign (add) and subtract
536 special-casing when mantissas are greater or equal, to
537 give greatest accuracy.
540 def __init__(self
, width
, id_wid
):
541 FPState
.__init
__(self
, "add_0")
542 self
.mod
= FPAddStage0Mod(width
)
543 self
.o
= self
.mod
.ospec()
545 def setup(self
, m
, i
):
546 """ links module to inputs and outputs
550 # NOTE: these could be done as combinatorial (merge add0+add1)
551 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
557 class FPAddStage1Mod(FPState
):
558 """ Second stage of add: preparation for normalisation.
559 detects when tot sum is too big (tot[27] is kinda a carry bit)
562 def __init__(self
, width
, id_wid
):
565 self
.i
= self
.ispec()
566 self
.o
= self
.ospec()
569 return FPAddStage0Data(self
.width
, self
.id_wid
)
572 return FPAddStage1Data(self
.width
, self
.id_wid
)
574 def process(self
, i
):
577 def setup(self
, m
, i
):
578 """ links module to inputs and outputs
580 m
.submodules
.add1
= self
581 m
.submodules
.add1_out_overflow
= self
.o
.of
583 m
.d
.comb
+= self
.i
.eq(i
)
585 def elaborate(self
, platform
):
587 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
588 # tot[-1] (MSB) gets set when the sum overflows. shift result down
589 with m
.If(~self
.i
.out_do_z
):
590 with m
.If(self
.i
.tot
[-1]):
592 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
593 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
594 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
595 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
596 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
597 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
599 # tot[-1] (MSB) zero case
602 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
603 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
604 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
605 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
606 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
609 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
610 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
611 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
616 class FPAddStage1(FPState
):
618 def __init__(self
, width
, id_wid
):
619 FPState
.__init
__(self
, "add_1")
620 self
.mod
= FPAddStage1Mod(width
)
621 self
.out_z
= FPNumBase(width
, False)
622 self
.out_of
= Overflow()
623 self
.norm_stb
= Signal()
625 def setup(self
, m
, i
):
626 """ links module to inputs and outputs
630 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
632 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
633 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
634 m
.d
.sync
+= self
.norm_stb
.eq(1)
637 m
.next
= "normalise_1"
640 class FPNormToPack(FPState
, UnbufferedPipeline
):
642 def __init__(self
, width
, id_wid
):
643 FPState
.__init
__(self
, "normalise_1")
646 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
649 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
652 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
654 def setup(self
, m
, i
):
655 """ links module to inputs and outputs
658 # Normalisation, Rounding Corrections, Pack - in a chain
659 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
660 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
661 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
662 pmod
= FPPackMod(self
.width
, self
.id_wid
)
663 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
665 self
.out_z
= pmod
.ospec()
669 def process(self
, i
):
673 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
674 m
.next
= "pack_put_z"
678 class FPPutZ(FPState
):
680 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
681 FPState
.__init
__(self
, state
)
684 self
.to_state
= to_state
688 self
.out_mid
= out_mid
691 if self
.in_mid
is not None:
692 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
694 self
.out_z
.z
.v
.eq(self
.in_z
)
696 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
697 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
698 m
.next
= self
.to_state
700 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
703 class FPPutZIdx(FPState
):
705 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
706 FPState
.__init
__(self
, state
)
709 self
.to_state
= to_state
715 outz_stb
= Signal(reset_less
=True)
716 outz_ack
= Signal(reset_less
=True)
717 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
718 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
721 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
723 with m
.If(outz_stb
& outz_ack
):
724 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
725 m
.next
= self
.to_state
727 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
731 def __init__(self
, width
, id_wid
):
733 self
.mid
= Signal(id_wid
, reset_less
=True)
736 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
739 return [self
.z
, self
.mid
]
744 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
747 * width: bit-width of IEEE754. supported: 16, 32, 64
748 * id_wid: an identifier that is sync-connected to the input
749 * single_cycle: True indicates each stage to complete in 1 clock
750 * compact: True indicates a reduced number of stages
754 self
.single_cycle
= single_cycle
755 self
.compact
= compact
757 self
.in_t
= Trigger()
758 self
.i
= self
.ispec()
759 self
.o
= self
.ospec()
764 return FPADDBaseData(self
.width
, self
.id_wid
)
767 return FPOpData(self
.width
, self
.id_wid
)
769 def add_state(self
, state
):
770 self
.states
.append(state
)
773 def get_fragment(self
, platform
=None):
774 """ creates the HDL code-fragment for FPAdd
777 m
.submodules
.out_z
= self
.o
.z
778 m
.submodules
.in_t
= self
.in_t
780 self
.get_compact_fragment(m
, platform
)
782 self
.get_longer_fragment(m
, platform
)
786 for state
in self
.states
:
787 with m
.State(state
.state_from
):
792 def get_longer_fragment(self
, m
, platform
=None):
794 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
799 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
801 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
802 sc
.setup(m
, a
, b
, self
.in_mid
)
804 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
805 dn
.setup(m
, a
, b
, sc
.in_mid
)
807 if self
.single_cycle
:
808 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
809 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
811 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
812 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
814 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
815 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
817 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
818 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
820 if self
.single_cycle
:
821 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
822 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
824 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
825 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
827 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
828 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
830 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
831 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
833 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
834 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
836 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
837 pa
.in_mid
, self
.out_mid
))
839 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
840 pa
.in_mid
, self
.out_mid
))
842 def get_compact_fragment(self
, m
, platform
=None):
845 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
846 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
847 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
848 n1
= FPNormToPack(self
.width
, self
.id_wid
)
850 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
852 chainlist
= [get
, sc
, alm
, n1
]
853 chain
= StageChain(chainlist
, specallocate
=True)
854 chain
.setup(m
, self
.i
)
856 for mod
in chainlist
:
857 sc
= self
.add_state(mod
)
859 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
860 n1
.out_z
.mid
, self
.o
.mid
))
862 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
863 # sc.o.mid, self.o.mid))
866 class FPADDBase(FPState
):
868 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
871 * width: bit-width of IEEE754. supported: 16, 32, 64
872 * id_wid: an identifier that is sync-connected to the input
873 * single_cycle: True indicates each stage to complete in 1 clock
875 FPState
.__init
__(self
, "fpadd")
877 self
.single_cycle
= single_cycle
878 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
879 self
.o
= self
.ospec()
881 self
.in_t
= Trigger()
882 self
.i
= self
.ispec()
884 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
885 self
.in_accept
= Signal(reset_less
=True)
886 self
.add_stb
= Signal(reset_less
=True)
887 self
.add_ack
= Signal(reset
=0, reset_less
=True)
890 return self
.mod
.ispec()
893 return self
.mod
.ospec()
895 def setup(self
, m
, i
, add_stb
, in_mid
):
896 m
.d
.comb
+= [self
.i
.eq(i
),
897 self
.mod
.i
.eq(self
.i
),
898 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
899 #self.add_stb.eq(add_stb),
900 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
901 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
902 self
.o
.mid
.eq(self
.mod
.o
.mid
),
903 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
904 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
905 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
908 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
909 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
910 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
911 #m.d.sync += self.in_t.stb.eq(0)
913 m
.submodules
.fpadd
= self
.mod
917 # in_accept is set on incoming strobe HIGH and ack LOW.
918 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
920 #with m.If(self.in_t.ack):
921 # m.d.sync += self.in_t.stb.eq(0)
922 with m
.If(~self
.z_done
):
923 # not done: test for accepting an incoming operand pair
924 with m
.If(self
.in_accept
):
926 self
.add_ack
.eq(1), # acknowledge receipt...
927 self
.in_t
.stb
.eq(1), # initiate add
930 m
.d
.sync
+= [self
.add_ack
.eq(0),
935 # done: acknowledge, and write out id and value
936 m
.d
.sync
+= [self
.add_ack
.eq(1),
943 if self
.in_mid
is not None:
944 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
947 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
949 # move to output state on detecting z ack
950 with m
.If(self
.out_z
.trigger
):
951 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
954 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
957 class FPADDBasePipe(ControlBase
):
958 def __init__(self
, width
, id_wid
):
959 ControlBase
.__init
__(self
)
960 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
961 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
962 self
.pipe3
= FPNormToPack(width
, id_wid
)
964 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
966 def elaborate(self
, platform
):
968 m
.submodules
.scnorm
= self
.pipe1
969 m
.submodules
.addalign
= self
.pipe2
970 m
.submodules
.normpack
= self
.pipe3
971 m
.d
.comb
+= self
._eqs
975 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
976 def __init__(self
, width
, id_wid
, num_rows
):
977 self
.num_rows
= num_rows
978 def iospec(): return FPADDBaseData(width
, id_wid
)
979 stage
= PassThroughStage(iospec
)
980 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
983 class FPADDMuxOutPipe(CombMuxOutPipe
):
984 def __init__(self
, width
, id_wid
, num_rows
):
985 self
.num_rows
= num_rows
986 def iospec(): return FPPackData(width
, id_wid
)
987 stage
= PassThroughStage(iospec
)
988 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
992 """ Reservation-Station version of FPADD pipeline.
994 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
995 * 3-stage adder pipeline
996 * fan-out on outputs (an array of FPPackData: z,mid)
998 Fan-in and Fan-out are combinatorial.
1000 def __init__(self
, width
, id_wid
, num_rows
):
1001 self
.num_rows
= num_rows
1002 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1003 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1004 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1006 self
.p
= self
.inpipe
.p
# kinda annoying,
1007 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1008 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1010 def elaborate(self
, platform
):
1012 m
.submodules
.inpipe
= self
.inpipe
1013 m
.submodules
.fpadd
= self
.fpadd
1014 m
.submodules
.outpipe
= self
.outpipe
1016 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1017 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1026 """ FPADD: stages as follows:
1032 FPAddBase---> FPAddBaseMod
1034 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1036 FPAddBase is tricky: it is both a stage and *has* stages.
1037 Connection to FPAddBaseMod therefore requires an in stb/ack
1038 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1039 needs to be the thing that raises the incoming stb.
1042 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1045 * width: bit-width of IEEE754. supported: 16, 32, 64
1046 * id_wid: an identifier that is sync-connected to the input
1047 * single_cycle: True indicates each stage to complete in 1 clock
1050 self
.id_wid
= id_wid
1051 self
.single_cycle
= single_cycle
1053 #self.out_z = FPOp(width)
1054 self
.ids
= FPID(id_wid
)
1057 for i
in range(rs_sz
):
1060 in_a
.name
= "in_a_%d" % i
1061 in_b
.name
= "in_b_%d" % i
1062 rs
.append((in_a
, in_b
))
1066 for i
in range(rs_sz
):
1068 out_z
.name
= "out_z_%d" % i
1070 self
.res
= Array(res
)
1074 def add_state(self
, state
):
1075 self
.states
.append(state
)
1078 def get_fragment(self
, platform
=None):
1079 """ creates the HDL code-fragment for FPAdd
1082 m
.submodules
+= self
.rs
1084 in_a
= self
.rs
[0][0]
1085 in_b
= self
.rs
[0][1]
1087 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1092 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1097 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1098 ab
= self
.add_state(ab
)
1099 abd
= ab
.ispec() # create an input spec object for FPADDBase
1100 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1101 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1104 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1107 with m
.FSM() as fsm
:
1109 for state
in self
.states
:
1110 with m
.State(state
.state_from
):
1116 if __name__
== "__main__":
1118 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1119 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1120 alu
.rs
[0][1].ports() + \
1121 alu
.res
[0].ports() + \
1122 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1124 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1125 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1126 alu
.in_t
.ports() + \
1127 alu
.out_z
.ports() + \
1128 [alu
.in_mid
, alu
.out_mid
])
1131 # works... but don't use, just do "python fname.py convert -t v"
1132 #print (verilog.convert(alu, ports=[
1133 # ports=alu.in_a.ports() + \
1134 # alu.in_b.ports() + \
1135 # alu.out_z.ports())