1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
23 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
24 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
27 class FPAddSpecialCasesMod
:
28 """ special cases: NaNs, infs, zeros, denormalised
29 NOTE: some of these are unique to add. see "Special Operations"
30 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
33 def __init__(self
, width
, id_wid
):
40 return FPADDBaseData(self
.width
, self
.id_wid
)
43 return FPSCData(self
.width
, self
.id_wid
)
45 def setup(self
, m
, i
):
46 """ links module to inputs and outputs
48 m
.submodules
.specialcases
= self
49 m
.d
.comb
+= self
.i
.eq(i
)
54 def elaborate(self
, platform
):
57 m
.submodules
.sc_out_z
= self
.o
.z
59 # decode: XXX really should move to separate stage
60 a1
= FPNumIn(None, self
.width
)
61 b1
= FPNumIn(None, self
.width
)
62 m
.submodules
.sc_decode_a
= a1
63 m
.submodules
.sc_decode_b
= b1
64 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
69 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
72 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
74 # if a is NaN or b is NaN return NaN
75 with m
.If(a1
.is_nan | b1
.is_nan
):
76 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
77 m
.d
.comb
+= self
.o
.z
.nan(0)
79 # XXX WEIRDNESS for FP16 non-canonical NaN handling
82 ## if a is zero and b is NaN return -b
83 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
87 ## if b is zero and a is NaN return -a
88 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
92 ## if a is -zero and b is NaN return -b
93 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
97 ## if b is -zero and a is NaN return -a
98 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
99 # m.d.comb += self.o.out_do_z.eq(1)
100 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
102 # if a is inf return inf (or NaN)
103 with m
.Elif(a1
.is_inf
):
104 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
105 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
106 # if a is inf and signs don't match return NaN
107 with m
.If(b1
.exp_128
& s_nomatch
):
108 m
.d
.comb
+= self
.o
.z
.nan(0)
110 # if b is inf return inf
111 with m
.Elif(b1
.is_inf
):
112 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
113 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
115 # if a is zero and b zero return signed-a/b
116 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
117 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
118 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
120 # if a is zero return b
121 with m
.Elif(a1
.is_zero
):
122 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
123 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
125 # if b is zero return a
126 with m
.Elif(b1
.is_zero
):
127 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
128 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
130 # if a equal to -b return zero (+ve zero)
131 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
132 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
133 m
.d
.comb
+= self
.o
.z
.zero(0)
135 # Denormalised Number checks next, so pass a/b data through
137 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
138 m
.d
.comb
+= self
.o
.a
.eq(a1
)
139 m
.d
.comb
+= self
.o
.b
.eq(b1
)
141 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
142 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
147 class FPAddSpecialCases(FPState
):
148 """ special cases: NaNs, infs, zeros, denormalised
149 NOTE: some of these are unique to add. see "Special Operations"
150 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
153 def __init__(self
, width
, id_wid
):
154 FPState
.__init
__(self
, "special_cases")
155 self
.mod
= FPAddSpecialCasesMod(width
)
156 self
.out_z
= self
.mod
.ospec()
157 self
.out_do_z
= Signal(reset_less
=True)
159 def setup(self
, m
, i
):
160 """ links module to inputs and outputs
162 self
.mod
.setup(m
, i
, self
.out_do_z
)
163 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
164 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
168 with m
.If(self
.out_do_z
):
171 m
.next
= "denormalise"
174 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
175 """ special cases: NaNs, infs, zeros, denormalised
176 NOTE: some of these are unique to add. see "Special Operations"
177 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
180 def __init__(self
, width
, id_wid
):
181 FPState
.__init
__(self
, "special_cases")
184 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
185 self
.out
= self
.ospec()
188 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
191 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
193 def setup(self
, m
, i
):
194 """ links module to inputs and outputs
196 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
197 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
199 chain
= StageChain([smod
, dmod
])
202 # only needed for break-out (early-out)
203 # self.out_do_z = smod.o.out_do_z
207 def process(self
, i
):
211 # for break-out (early-out)
212 #with m.If(self.out_do_z):
215 m
.d
.sync
+= self
.out
.eq(self
.process(None))
219 class FPAddAlignMultiMod(FPState
):
221 def __init__(self
, width
):
222 self
.in_a
= FPNumBase(width
)
223 self
.in_b
= FPNumBase(width
)
224 self
.out_a
= FPNumIn(None, width
)
225 self
.out_b
= FPNumIn(None, width
)
226 self
.exp_eq
= Signal(reset_less
=True)
228 def elaborate(self
, platform
):
229 # This one however (single-cycle) will do the shift
234 m
.submodules
.align_in_a
= self
.in_a
235 m
.submodules
.align_in_b
= self
.in_b
236 m
.submodules
.align_out_a
= self
.out_a
237 m
.submodules
.align_out_b
= self
.out_b
239 # NOTE: this does *not* do single-cycle multi-shifting,
240 # it *STAYS* in the align state until exponents match
242 # exponent of a greater than b: shift b down
243 m
.d
.comb
+= self
.exp_eq
.eq(0)
244 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
245 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
246 agtb
= Signal(reset_less
=True)
247 altb
= Signal(reset_less
=True)
248 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
249 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
251 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
252 # exponent of b greater than a: shift a down
254 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
255 # exponents equal: move to next stage.
257 m
.d
.comb
+= self
.exp_eq
.eq(1)
261 class FPAddAlignMulti(FPState
):
263 def __init__(self
, width
, id_wid
):
264 FPState
.__init
__(self
, "align")
265 self
.mod
= FPAddAlignMultiMod(width
)
266 self
.out_a
= FPNumIn(None, width
)
267 self
.out_b
= FPNumIn(None, width
)
268 self
.exp_eq
= Signal(reset_less
=True)
270 def setup(self
, m
, in_a
, in_b
):
271 """ links module to inputs and outputs
273 m
.submodules
.align
= self
.mod
274 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
275 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
276 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
277 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
278 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
281 with m
.If(self
.exp_eq
):
287 def __init__(self
, width
, id_wid
):
288 self
.a
= FPNumIn(None, width
)
289 self
.b
= FPNumIn(None, width
)
290 self
.z
= FPNumOut(width
, False)
291 self
.out_do_z
= Signal(reset_less
=True)
292 self
.oz
= Signal(width
, reset_less
=True)
293 self
.mid
= Signal(id_wid
, reset_less
=True)
296 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
297 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
300 class FPAddAlignSingleMod
:
302 def __init__(self
, width
, id_wid
):
305 self
.i
= self
.ispec()
306 self
.o
= self
.ospec()
309 return FPSCData(self
.width
, self
.id_wid
)
312 return FPNumIn2Ops(self
.width
, self
.id_wid
)
314 def process(self
, i
):
317 def setup(self
, m
, i
):
318 """ links module to inputs and outputs
320 m
.submodules
.align
= self
321 m
.d
.comb
+= self
.i
.eq(i
)
323 def elaborate(self
, platform
):
324 """ Aligns A against B or B against A, depending on which has the
325 greater exponent. This is done in a *single* cycle using
326 variable-width bit-shift
328 the shifter used here is quite expensive in terms of gates.
329 Mux A or B in (and out) into temporaries, as only one of them
330 needs to be aligned against the other
334 m
.submodules
.align_in_a
= self
.i
.a
335 m
.submodules
.align_in_b
= self
.i
.b
336 m
.submodules
.align_out_a
= self
.o
.a
337 m
.submodules
.align_out_b
= self
.o
.b
339 # temporary (muxed) input and output to be shifted
340 t_inp
= FPNumBase(self
.width
)
341 t_out
= FPNumIn(None, self
.width
)
342 espec
= (len(self
.i
.a
.e
), True)
343 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
344 m
.submodules
.align_t_in
= t_inp
345 m
.submodules
.align_t_out
= t_out
346 m
.submodules
.multishift_r
= msr
348 ediff
= Signal(espec
, reset_less
=True)
349 ediffr
= Signal(espec
, reset_less
=True)
350 tdiff
= Signal(espec
, reset_less
=True)
351 elz
= Signal(reset_less
=True)
352 egz
= Signal(reset_less
=True)
354 # connect multi-shifter to t_inp/out mantissa (and tdiff)
355 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
356 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
357 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
358 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
359 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
361 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
362 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
363 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
364 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
366 # default: A-exp == B-exp, A and B untouched (fall through)
367 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
368 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
369 # only one shifter (muxed)
370 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
371 # exponent of a greater than b: shift b down
372 with m
.If(~self
.i
.out_do_z
):
374 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
377 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
379 # exponent of b greater than a: shift a down
381 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
384 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
387 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
388 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
389 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
390 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
395 class FPAddAlignSingle(FPState
):
397 def __init__(self
, width
, id_wid
):
398 FPState
.__init
__(self
, "align")
399 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
400 self
.out_a
= FPNumIn(None, width
)
401 self
.out_b
= FPNumIn(None, width
)
403 def setup(self
, m
, i
):
404 """ links module to inputs and outputs
408 # NOTE: could be done as comb
409 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
410 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
416 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
418 def __init__(self
, width
, id_wid
):
419 FPState
.__init
__(self
, "align")
422 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
423 self
.a1o
= self
.ospec()
426 return FPSCData(self
.width
, self
.id_wid
)
429 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
431 def setup(self
, m
, i
):
432 """ links module to inputs and outputs
435 # chain AddAlignSingle, AddStage0 and AddStage1
436 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
437 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
438 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
440 chain
= StageChain([mod
, a0mod
, a1mod
])
445 def process(self
, i
):
449 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
450 m
.next
= "normalise_1"
453 class FPAddStage0Data
:
455 def __init__(self
, width
, id_wid
):
456 self
.z
= FPNumBase(width
, False)
457 self
.out_do_z
= Signal(reset_less
=True)
458 self
.oz
= Signal(width
, reset_less
=True)
459 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
460 self
.mid
= Signal(id_wid
, reset_less
=True)
463 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
464 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
467 class FPAddStage0Mod
:
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPAddStage0Data(self
.width
, self
.id_wid
)
481 def process(self
, i
):
484 def setup(self
, m
, i
):
485 """ links module to inputs and outputs
487 m
.submodules
.add0
= self
488 m
.d
.comb
+= self
.i
.eq(i
)
490 def elaborate(self
, platform
):
492 m
.submodules
.add0_in_a
= self
.i
.a
493 m
.submodules
.add0_in_b
= self
.i
.b
494 m
.submodules
.add0_out_z
= self
.o
.z
496 # store intermediate tests (and zero-extended mantissas)
497 seq
= Signal(reset_less
=True)
498 mge
= Signal(reset_less
=True)
499 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
500 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
501 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
502 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
503 am0
.eq(Cat(self
.i
.a
.m
, 0)),
504 bm0
.eq(Cat(self
.i
.b
.m
, 0))
506 # same-sign (both negative or both positive) add mantissas
507 with m
.If(~self
.i
.out_do_z
):
508 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
511 self
.o
.tot
.eq(am0
+ bm0
),
512 self
.o
.z
.s
.eq(self
.i
.a
.s
)
514 # a mantissa greater than b, use a
517 self
.o
.tot
.eq(am0
- bm0
),
518 self
.o
.z
.s
.eq(self
.i
.a
.s
)
520 # b mantissa greater than a, use b
523 self
.o
.tot
.eq(bm0
- am0
),
524 self
.o
.z
.s
.eq(self
.i
.b
.s
)
527 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
528 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
529 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
533 class FPAddStage0(FPState
):
534 """ First stage of add. covers same-sign (add) and subtract
535 special-casing when mantissas are greater or equal, to
536 give greatest accuracy.
539 def __init__(self
, width
, id_wid
):
540 FPState
.__init
__(self
, "add_0")
541 self
.mod
= FPAddStage0Mod(width
)
542 self
.o
= self
.mod
.ospec()
544 def setup(self
, m
, i
):
545 """ links module to inputs and outputs
549 # NOTE: these could be done as combinatorial (merge add0+add1)
550 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
556 class FPAddStage1Mod(FPState
):
557 """ Second stage of add: preparation for normalisation.
558 detects when tot sum is too big (tot[27] is kinda a carry bit)
561 def __init__(self
, width
, id_wid
):
564 self
.i
= self
.ispec()
565 self
.o
= self
.ospec()
568 return FPAddStage0Data(self
.width
, self
.id_wid
)
571 return FPAddStage1Data(self
.width
, self
.id_wid
)
573 def process(self
, i
):
576 def setup(self
, m
, i
):
577 """ links module to inputs and outputs
579 m
.submodules
.add1
= self
580 m
.submodules
.add1_out_overflow
= self
.o
.of
582 m
.d
.comb
+= self
.i
.eq(i
)
584 def elaborate(self
, platform
):
586 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
587 # tot[-1] (MSB) gets set when the sum overflows. shift result down
588 with m
.If(~self
.i
.out_do_z
):
589 with m
.If(self
.i
.tot
[-1]):
591 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
592 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
593 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
594 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
595 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
596 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
598 # tot[-1] (MSB) zero case
601 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
602 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
603 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
604 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
605 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
608 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
609 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
610 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
615 class FPAddStage1(FPState
):
617 def __init__(self
, width
, id_wid
):
618 FPState
.__init
__(self
, "add_1")
619 self
.mod
= FPAddStage1Mod(width
)
620 self
.out_z
= FPNumBase(width
, False)
621 self
.out_of
= Overflow()
622 self
.norm_stb
= Signal()
624 def setup(self
, m
, i
):
625 """ links module to inputs and outputs
629 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
631 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
632 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
633 m
.d
.sync
+= self
.norm_stb
.eq(1)
636 m
.next
= "normalise_1"
639 class FPNormToPack(FPState
, UnbufferedPipeline
):
641 def __init__(self
, width
, id_wid
):
642 FPState
.__init
__(self
, "normalise_1")
645 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
648 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
651 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
653 def setup(self
, m
, i
):
654 """ links module to inputs and outputs
657 # Normalisation, Rounding Corrections, Pack - in a chain
658 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
659 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
660 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
661 pmod
= FPPackMod(self
.width
, self
.id_wid
)
662 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
664 self
.out_z
= pmod
.ospec()
668 def process(self
, i
):
672 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
673 m
.next
= "pack_put_z"
678 def __init__(self
, width
, id_wid
):
679 self
.z
= Signal(width
, reset_less
=True)
680 self
.mid
= Signal(id_wid
, reset_less
=True)
683 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
686 return [self
.z
, self
.mid
]
691 def __init__(self
, width
, id_wid
):
694 self
.i
= self
.ispec()
695 self
.o
= self
.ospec()
698 return FPRoundData(self
.width
, self
.id_wid
)
701 return FPPackData(self
.width
, self
.id_wid
)
703 def process(self
, i
):
706 def setup(self
, m
, in_z
):
707 """ links module to inputs and outputs
709 m
.submodules
.pack
= self
710 m
.d
.comb
+= self
.i
.eq(in_z
)
712 def elaborate(self
, platform
):
714 z
= FPNumOut(self
.width
, False)
715 m
.submodules
.pack_in_z
= self
.i
.z
716 m
.submodules
.pack_out_z
= z
717 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
718 with m
.If(~self
.i
.out_do_z
):
719 with m
.If(self
.i
.z
.is_overflowed
):
720 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
722 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
724 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
725 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
729 class FPPack(FPState
):
731 def __init__(self
, width
, id_wid
):
732 FPState
.__init
__(self
, "pack")
733 self
.mod
= FPPackMod(width
)
734 self
.out_z
= self
.ospec()
737 return self
.mod
.ispec()
740 return self
.mod
.ospec()
742 def setup(self
, m
, in_z
):
743 """ links module to inputs and outputs
745 self
.mod
.setup(m
, in_z
)
747 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
748 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
751 m
.next
= "pack_put_z"
754 class FPPutZ(FPState
):
756 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
757 FPState
.__init
__(self
, state
)
760 self
.to_state
= to_state
764 self
.out_mid
= out_mid
767 if self
.in_mid
is not None:
768 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
770 self
.out_z
.z
.v
.eq(self
.in_z
)
772 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
773 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
774 m
.next
= self
.to_state
776 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
779 class FPPutZIdx(FPState
):
781 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
782 FPState
.__init
__(self
, state
)
785 self
.to_state
= to_state
791 outz_stb
= Signal(reset_less
=True)
792 outz_ack
= Signal(reset_less
=True)
793 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
794 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
797 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
799 with m
.If(outz_stb
& outz_ack
):
800 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
801 m
.next
= self
.to_state
803 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
807 def __init__(self
, width
, id_wid
):
809 self
.mid
= Signal(id_wid
, reset_less
=True)
812 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
815 return [self
.z
, self
.mid
]
820 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
823 * width: bit-width of IEEE754. supported: 16, 32, 64
824 * id_wid: an identifier that is sync-connected to the input
825 * single_cycle: True indicates each stage to complete in 1 clock
826 * compact: True indicates a reduced number of stages
830 self
.single_cycle
= single_cycle
831 self
.compact
= compact
833 self
.in_t
= Trigger()
834 self
.i
= self
.ispec()
835 self
.o
= self
.ospec()
840 return FPADDBaseData(self
.width
, self
.id_wid
)
843 return FPOpData(self
.width
, self
.id_wid
)
845 def add_state(self
, state
):
846 self
.states
.append(state
)
849 def get_fragment(self
, platform
=None):
850 """ creates the HDL code-fragment for FPAdd
853 m
.submodules
.out_z
= self
.o
.z
854 m
.submodules
.in_t
= self
.in_t
856 self
.get_compact_fragment(m
, platform
)
858 self
.get_longer_fragment(m
, platform
)
862 for state
in self
.states
:
863 with m
.State(state
.state_from
):
868 def get_longer_fragment(self
, m
, platform
=None):
870 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
875 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
877 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
878 sc
.setup(m
, a
, b
, self
.in_mid
)
880 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
881 dn
.setup(m
, a
, b
, sc
.in_mid
)
883 if self
.single_cycle
:
884 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
885 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
887 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
888 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
890 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
891 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
893 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
894 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
896 if self
.single_cycle
:
897 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
898 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
900 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
901 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
903 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
904 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
906 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
907 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
909 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
910 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
912 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
913 pa
.in_mid
, self
.out_mid
))
915 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
916 pa
.in_mid
, self
.out_mid
))
918 def get_compact_fragment(self
, m
, platform
=None):
921 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
922 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
923 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
924 n1
= FPNormToPack(self
.width
, self
.id_wid
)
926 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
928 chainlist
= [get
, sc
, alm
, n1
]
929 chain
= StageChain(chainlist
, specallocate
=True)
930 chain
.setup(m
, self
.i
)
932 for mod
in chainlist
:
933 sc
= self
.add_state(mod
)
935 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
936 n1
.out_z
.mid
, self
.o
.mid
))
938 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
939 # sc.o.mid, self.o.mid))
942 class FPADDBase(FPState
):
944 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
947 * width: bit-width of IEEE754. supported: 16, 32, 64
948 * id_wid: an identifier that is sync-connected to the input
949 * single_cycle: True indicates each stage to complete in 1 clock
951 FPState
.__init
__(self
, "fpadd")
953 self
.single_cycle
= single_cycle
954 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
955 self
.o
= self
.ospec()
957 self
.in_t
= Trigger()
958 self
.i
= self
.ispec()
960 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
961 self
.in_accept
= Signal(reset_less
=True)
962 self
.add_stb
= Signal(reset_less
=True)
963 self
.add_ack
= Signal(reset
=0, reset_less
=True)
966 return self
.mod
.ispec()
969 return self
.mod
.ospec()
971 def setup(self
, m
, i
, add_stb
, in_mid
):
972 m
.d
.comb
+= [self
.i
.eq(i
),
973 self
.mod
.i
.eq(self
.i
),
974 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
975 #self.add_stb.eq(add_stb),
976 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
977 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
978 self
.o
.mid
.eq(self
.mod
.o
.mid
),
979 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
980 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
981 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
984 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
985 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
986 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
987 #m.d.sync += self.in_t.stb.eq(0)
989 m
.submodules
.fpadd
= self
.mod
993 # in_accept is set on incoming strobe HIGH and ack LOW.
994 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
996 #with m.If(self.in_t.ack):
997 # m.d.sync += self.in_t.stb.eq(0)
998 with m
.If(~self
.z_done
):
999 # not done: test for accepting an incoming operand pair
1000 with m
.If(self
.in_accept
):
1002 self
.add_ack
.eq(1), # acknowledge receipt...
1003 self
.in_t
.stb
.eq(1), # initiate add
1006 m
.d
.sync
+= [self
.add_ack
.eq(0),
1007 self
.in_t
.stb
.eq(0),
1011 # done: acknowledge, and write out id and value
1012 m
.d
.sync
+= [self
.add_ack
.eq(1),
1019 if self
.in_mid
is not None:
1020 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1023 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1025 # move to output state on detecting z ack
1026 with m
.If(self
.out_z
.trigger
):
1027 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1030 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1033 class FPADDBasePipe(ControlBase
):
1034 def __init__(self
, width
, id_wid
):
1035 ControlBase
.__init
__(self
)
1036 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1037 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1038 self
.pipe3
= FPNormToPack(width
, id_wid
)
1040 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1042 def elaborate(self
, platform
):
1044 m
.submodules
.scnorm
= self
.pipe1
1045 m
.submodules
.addalign
= self
.pipe2
1046 m
.submodules
.normpack
= self
.pipe3
1047 m
.d
.comb
+= self
._eqs
1051 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1052 def __init__(self
, width
, id_wid
, num_rows
):
1053 self
.num_rows
= num_rows
1054 def iospec(): return FPADDBaseData(width
, id_wid
)
1055 stage
= PassThroughStage(iospec
)
1056 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1059 class FPADDMuxOutPipe(CombMuxOutPipe
):
1060 def __init__(self
, width
, id_wid
, num_rows
):
1061 self
.num_rows
= num_rows
1062 def iospec(): return FPPackData(width
, id_wid
)
1063 stage
= PassThroughStage(iospec
)
1064 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1067 class FPADDMuxInOut
:
1068 """ Reservation-Station version of FPADD pipeline.
1070 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1071 * 3-stage adder pipeline
1072 * fan-out on outputs (an array of FPPackData: z,mid)
1074 Fan-in and Fan-out are combinatorial.
1076 def __init__(self
, width
, id_wid
, num_rows
):
1077 self
.num_rows
= num_rows
1078 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1079 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1080 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1082 self
.p
= self
.inpipe
.p
# kinda annoying,
1083 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1084 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1086 def elaborate(self
, platform
):
1088 m
.submodules
.inpipe
= self
.inpipe
1089 m
.submodules
.fpadd
= self
.fpadd
1090 m
.submodules
.outpipe
= self
.outpipe
1092 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1093 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1102 """ FPADD: stages as follows:
1108 FPAddBase---> FPAddBaseMod
1110 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1112 FPAddBase is tricky: it is both a stage and *has* stages.
1113 Connection to FPAddBaseMod therefore requires an in stb/ack
1114 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1115 needs to be the thing that raises the incoming stb.
1118 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1121 * width: bit-width of IEEE754. supported: 16, 32, 64
1122 * id_wid: an identifier that is sync-connected to the input
1123 * single_cycle: True indicates each stage to complete in 1 clock
1126 self
.id_wid
= id_wid
1127 self
.single_cycle
= single_cycle
1129 #self.out_z = FPOp(width)
1130 self
.ids
= FPID(id_wid
)
1133 for i
in range(rs_sz
):
1136 in_a
.name
= "in_a_%d" % i
1137 in_b
.name
= "in_b_%d" % i
1138 rs
.append((in_a
, in_b
))
1142 for i
in range(rs_sz
):
1144 out_z
.name
= "out_z_%d" % i
1146 self
.res
= Array(res
)
1150 def add_state(self
, state
):
1151 self
.states
.append(state
)
1154 def get_fragment(self
, platform
=None):
1155 """ creates the HDL code-fragment for FPAdd
1158 m
.submodules
+= self
.rs
1160 in_a
= self
.rs
[0][0]
1161 in_b
= self
.rs
[0][1]
1163 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1168 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1173 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1174 ab
= self
.add_state(ab
)
1175 abd
= ab
.ispec() # create an input spec object for FPADDBase
1176 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1177 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1180 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1183 with m
.FSM() as fsm
:
1185 for state
in self
.states
:
1186 with m
.State(state
.state_from
):
1192 if __name__
== "__main__":
1194 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1195 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1196 alu
.rs
[0][1].ports() + \
1197 alu
.res
[0].ports() + \
1198 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1200 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1201 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1202 alu
.in_t
.ports() + \
1203 alu
.out_z
.ports() + \
1204 [alu
.in_mid
, alu
.out_mid
])
1207 # works... but don't use, just do "python fname.py convert -t v"
1208 #print (verilog.convert(alu, ports=[
1209 # ports=alu.in_a.ports() + \
1210 # alu.in_b.ports() + \
1211 # alu.out_z.ports())