1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
23 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
26 class FPAddSpecialCasesMod
:
27 """ special cases: NaNs, infs, zeros, denormalised
28 NOTE: some of these are unique to add. see "Special Operations"
29 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
32 def __init__(self
, width
, id_wid
):
39 return FPADDBaseData(self
.width
, self
.id_wid
)
42 return FPSCData(self
.width
, self
.id_wid
)
44 def setup(self
, m
, i
):
45 """ links module to inputs and outputs
47 m
.submodules
.specialcases
= self
48 m
.d
.comb
+= self
.i
.eq(i
)
53 def elaborate(self
, platform
):
56 m
.submodules
.sc_out_z
= self
.o
.z
58 # decode: XXX really should move to separate stage
59 a1
= FPNumIn(None, self
.width
)
60 b1
= FPNumIn(None, self
.width
)
61 m
.submodules
.sc_decode_a
= a1
62 m
.submodules
.sc_decode_b
= b1
63 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
68 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
71 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
73 # if a is NaN or b is NaN return NaN
74 with m
.If(a1
.is_nan | b1
.is_nan
):
75 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
76 m
.d
.comb
+= self
.o
.z
.nan(0)
78 # XXX WEIRDNESS for FP16 non-canonical NaN handling
81 ## if a is zero and b is NaN return -b
82 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
83 # m.d.comb += self.o.out_do_z.eq(1)
84 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
86 ## if b is zero and a is NaN return -a
87 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
88 # m.d.comb += self.o.out_do_z.eq(1)
89 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
91 ## if a is -zero and b is NaN return -b
92 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
93 # m.d.comb += self.o.out_do_z.eq(1)
94 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
96 ## if b is -zero and a is NaN return -a
97 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
98 # m.d.comb += self.o.out_do_z.eq(1)
99 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
101 # if a is inf return inf (or NaN)
102 with m
.Elif(a1
.is_inf
):
103 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
104 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
105 # if a is inf and signs don't match return NaN
106 with m
.If(b1
.exp_128
& s_nomatch
):
107 m
.d
.comb
+= self
.o
.z
.nan(0)
109 # if b is inf return inf
110 with m
.Elif(b1
.is_inf
):
111 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
112 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
114 # if a is zero and b zero return signed-a/b
115 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
116 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
117 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
119 # if a is zero return b
120 with m
.Elif(a1
.is_zero
):
121 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
122 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
124 # if b is zero return a
125 with m
.Elif(b1
.is_zero
):
126 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
127 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
129 # if a equal to -b return zero (+ve zero)
130 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
131 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
132 m
.d
.comb
+= self
.o
.z
.zero(0)
134 # Denormalised Number checks next, so pass a/b data through
136 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
137 m
.d
.comb
+= self
.o
.a
.eq(a1
)
138 m
.d
.comb
+= self
.o
.b
.eq(b1
)
140 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
141 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
146 class FPAddSpecialCases(FPState
):
147 """ special cases: NaNs, infs, zeros, denormalised
148 NOTE: some of these are unique to add. see "Special Operations"
149 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
152 def __init__(self
, width
, id_wid
):
153 FPState
.__init
__(self
, "special_cases")
154 self
.mod
= FPAddSpecialCasesMod(width
)
155 self
.out_z
= self
.mod
.ospec()
156 self
.out_do_z
= Signal(reset_less
=True)
158 def setup(self
, m
, i
):
159 """ links module to inputs and outputs
161 self
.mod
.setup(m
, i
, self
.out_do_z
)
162 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
163 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
167 with m
.If(self
.out_do_z
):
170 m
.next
= "denormalise"
173 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
174 """ special cases: NaNs, infs, zeros, denormalised
175 NOTE: some of these are unique to add. see "Special Operations"
176 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
179 def __init__(self
, width
, id_wid
):
180 FPState
.__init
__(self
, "special_cases")
183 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
184 self
.out
= self
.ospec()
187 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
190 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
192 def setup(self
, m
, i
):
193 """ links module to inputs and outputs
195 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
196 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
198 chain
= StageChain([smod
, dmod
])
201 # only needed for break-out (early-out)
202 # self.out_do_z = smod.o.out_do_z
206 def process(self
, i
):
210 # for break-out (early-out)
211 #with m.If(self.out_do_z):
214 m
.d
.sync
+= self
.out
.eq(self
.process(None))
218 class FPAddAlignMultiMod(FPState
):
220 def __init__(self
, width
):
221 self
.in_a
= FPNumBase(width
)
222 self
.in_b
= FPNumBase(width
)
223 self
.out_a
= FPNumIn(None, width
)
224 self
.out_b
= FPNumIn(None, width
)
225 self
.exp_eq
= Signal(reset_less
=True)
227 def elaborate(self
, platform
):
228 # This one however (single-cycle) will do the shift
233 m
.submodules
.align_in_a
= self
.in_a
234 m
.submodules
.align_in_b
= self
.in_b
235 m
.submodules
.align_out_a
= self
.out_a
236 m
.submodules
.align_out_b
= self
.out_b
238 # NOTE: this does *not* do single-cycle multi-shifting,
239 # it *STAYS* in the align state until exponents match
241 # exponent of a greater than b: shift b down
242 m
.d
.comb
+= self
.exp_eq
.eq(0)
243 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
244 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
245 agtb
= Signal(reset_less
=True)
246 altb
= Signal(reset_less
=True)
247 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
248 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
250 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
251 # exponent of b greater than a: shift a down
253 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
254 # exponents equal: move to next stage.
256 m
.d
.comb
+= self
.exp_eq
.eq(1)
260 class FPAddAlignMulti(FPState
):
262 def __init__(self
, width
, id_wid
):
263 FPState
.__init
__(self
, "align")
264 self
.mod
= FPAddAlignMultiMod(width
)
265 self
.out_a
= FPNumIn(None, width
)
266 self
.out_b
= FPNumIn(None, width
)
267 self
.exp_eq
= Signal(reset_less
=True)
269 def setup(self
, m
, in_a
, in_b
):
270 """ links module to inputs and outputs
272 m
.submodules
.align
= self
.mod
273 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
274 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
275 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
276 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
277 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
280 with m
.If(self
.exp_eq
):
286 def __init__(self
, width
, id_wid
):
287 self
.a
= FPNumIn(None, width
)
288 self
.b
= FPNumIn(None, width
)
289 self
.z
= FPNumOut(width
, False)
290 self
.out_do_z
= Signal(reset_less
=True)
291 self
.oz
= Signal(width
, reset_less
=True)
292 self
.mid
= Signal(id_wid
, reset_less
=True)
295 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
296 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
299 class FPAddAlignSingleMod
:
301 def __init__(self
, width
, id_wid
):
304 self
.i
= self
.ispec()
305 self
.o
= self
.ospec()
308 return FPSCData(self
.width
, self
.id_wid
)
311 return FPNumIn2Ops(self
.width
, self
.id_wid
)
313 def process(self
, i
):
316 def setup(self
, m
, i
):
317 """ links module to inputs and outputs
319 m
.submodules
.align
= self
320 m
.d
.comb
+= self
.i
.eq(i
)
322 def elaborate(self
, platform
):
323 """ Aligns A against B or B against A, depending on which has the
324 greater exponent. This is done in a *single* cycle using
325 variable-width bit-shift
327 the shifter used here is quite expensive in terms of gates.
328 Mux A or B in (and out) into temporaries, as only one of them
329 needs to be aligned against the other
333 m
.submodules
.align_in_a
= self
.i
.a
334 m
.submodules
.align_in_b
= self
.i
.b
335 m
.submodules
.align_out_a
= self
.o
.a
336 m
.submodules
.align_out_b
= self
.o
.b
338 # temporary (muxed) input and output to be shifted
339 t_inp
= FPNumBase(self
.width
)
340 t_out
= FPNumIn(None, self
.width
)
341 espec
= (len(self
.i
.a
.e
), True)
342 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
343 m
.submodules
.align_t_in
= t_inp
344 m
.submodules
.align_t_out
= t_out
345 m
.submodules
.multishift_r
= msr
347 ediff
= Signal(espec
, reset_less
=True)
348 ediffr
= Signal(espec
, reset_less
=True)
349 tdiff
= Signal(espec
, reset_less
=True)
350 elz
= Signal(reset_less
=True)
351 egz
= Signal(reset_less
=True)
353 # connect multi-shifter to t_inp/out mantissa (and tdiff)
354 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
355 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
356 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
357 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
358 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
360 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
361 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
362 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
363 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
365 # default: A-exp == B-exp, A and B untouched (fall through)
366 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
367 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
368 # only one shifter (muxed)
369 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
370 # exponent of a greater than b: shift b down
371 with m
.If(~self
.i
.out_do_z
):
373 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
376 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
378 # exponent of b greater than a: shift a down
380 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
383 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
386 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
387 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
388 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
389 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
394 class FPAddAlignSingle(FPState
):
396 def __init__(self
, width
, id_wid
):
397 FPState
.__init
__(self
, "align")
398 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
399 self
.out_a
= FPNumIn(None, width
)
400 self
.out_b
= FPNumIn(None, width
)
402 def setup(self
, m
, i
):
403 """ links module to inputs and outputs
407 # NOTE: could be done as comb
408 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
409 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
415 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
417 def __init__(self
, width
, id_wid
):
418 FPState
.__init
__(self
, "align")
421 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
422 self
.a1o
= self
.ospec()
425 return FPSCData(self
.width
, self
.id_wid
)
428 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
430 def setup(self
, m
, i
):
431 """ links module to inputs and outputs
434 # chain AddAlignSingle, AddStage0 and AddStage1
435 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
436 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
437 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
439 chain
= StageChain([mod
, a0mod
, a1mod
])
444 def process(self
, i
):
448 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
449 m
.next
= "normalise_1"
452 class FPAddStage0Data
:
454 def __init__(self
, width
, id_wid
):
455 self
.z
= FPNumBase(width
, False)
456 self
.out_do_z
= Signal(reset_less
=True)
457 self
.oz
= Signal(width
, reset_less
=True)
458 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
459 self
.mid
= Signal(id_wid
, reset_less
=True)
462 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
463 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
466 class FPAddStage0Mod
:
468 def __init__(self
, width
, id_wid
):
471 self
.i
= self
.ispec()
472 self
.o
= self
.ospec()
475 return FPSCData(self
.width
, self
.id_wid
)
478 return FPAddStage0Data(self
.width
, self
.id_wid
)
480 def process(self
, i
):
483 def setup(self
, m
, i
):
484 """ links module to inputs and outputs
486 m
.submodules
.add0
= self
487 m
.d
.comb
+= self
.i
.eq(i
)
489 def elaborate(self
, platform
):
491 m
.submodules
.add0_in_a
= self
.i
.a
492 m
.submodules
.add0_in_b
= self
.i
.b
493 m
.submodules
.add0_out_z
= self
.o
.z
495 # store intermediate tests (and zero-extended mantissas)
496 seq
= Signal(reset_less
=True)
497 mge
= Signal(reset_less
=True)
498 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
499 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
500 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
501 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
502 am0
.eq(Cat(self
.i
.a
.m
, 0)),
503 bm0
.eq(Cat(self
.i
.b
.m
, 0))
505 # same-sign (both negative or both positive) add mantissas
506 with m
.If(~self
.i
.out_do_z
):
507 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
510 self
.o
.tot
.eq(am0
+ bm0
),
511 self
.o
.z
.s
.eq(self
.i
.a
.s
)
513 # a mantissa greater than b, use a
516 self
.o
.tot
.eq(am0
- bm0
),
517 self
.o
.z
.s
.eq(self
.i
.a
.s
)
519 # b mantissa greater than a, use b
522 self
.o
.tot
.eq(bm0
- am0
),
523 self
.o
.z
.s
.eq(self
.i
.b
.s
)
526 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
527 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
528 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
532 class FPAddStage0(FPState
):
533 """ First stage of add. covers same-sign (add) and subtract
534 special-casing when mantissas are greater or equal, to
535 give greatest accuracy.
538 def __init__(self
, width
, id_wid
):
539 FPState
.__init
__(self
, "add_0")
540 self
.mod
= FPAddStage0Mod(width
)
541 self
.o
= self
.mod
.ospec()
543 def setup(self
, m
, i
):
544 """ links module to inputs and outputs
548 # NOTE: these could be done as combinatorial (merge add0+add1)
549 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
555 class FPAddStage1Mod(FPState
):
556 """ Second stage of add: preparation for normalisation.
557 detects when tot sum is too big (tot[27] is kinda a carry bit)
560 def __init__(self
, width
, id_wid
):
563 self
.i
= self
.ispec()
564 self
.o
= self
.ospec()
567 return FPAddStage0Data(self
.width
, self
.id_wid
)
570 return FPAddStage1Data(self
.width
, self
.id_wid
)
572 def process(self
, i
):
575 def setup(self
, m
, i
):
576 """ links module to inputs and outputs
578 m
.submodules
.add1
= self
579 m
.submodules
.add1_out_overflow
= self
.o
.of
581 m
.d
.comb
+= self
.i
.eq(i
)
583 def elaborate(self
, platform
):
585 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
586 # tot[-1] (MSB) gets set when the sum overflows. shift result down
587 with m
.If(~self
.i
.out_do_z
):
588 with m
.If(self
.i
.tot
[-1]):
590 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
591 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
592 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
593 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
594 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
595 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
597 # tot[-1] (MSB) zero case
600 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
601 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
602 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
603 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
604 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
607 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
608 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
609 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
614 class FPAddStage1(FPState
):
616 def __init__(self
, width
, id_wid
):
617 FPState
.__init
__(self
, "add_1")
618 self
.mod
= FPAddStage1Mod(width
)
619 self
.out_z
= FPNumBase(width
, False)
620 self
.out_of
= Overflow()
621 self
.norm_stb
= Signal()
623 def setup(self
, m
, i
):
624 """ links module to inputs and outputs
628 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
630 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
631 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
632 m
.d
.sync
+= self
.norm_stb
.eq(1)
635 m
.next
= "normalise_1"
638 class FPNormToPack(FPState
, UnbufferedPipeline
):
640 def __init__(self
, width
, id_wid
):
641 FPState
.__init
__(self
, "normalise_1")
644 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
647 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
650 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
652 def setup(self
, m
, i
):
653 """ links module to inputs and outputs
656 # Normalisation, Rounding Corrections, Pack - in a chain
657 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
658 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
659 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
660 pmod
= FPPackMod(self
.width
, self
.id_wid
)
661 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
663 self
.out_z
= pmod
.ospec()
667 def process(self
, i
):
671 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
672 m
.next
= "pack_put_z"
675 class FPCorrectionsMod
:
677 def __init__(self
, width
, id_wid
):
680 self
.i
= self
.ispec()
681 self
.out_z
= self
.ospec()
684 return FPRoundData(self
.width
, self
.id_wid
)
687 return FPRoundData(self
.width
, self
.id_wid
)
689 def process(self
, i
):
692 def setup(self
, m
, i
):
693 """ links module to inputs and outputs
695 m
.submodules
.corrections
= self
696 m
.d
.comb
+= self
.i
.eq(i
)
698 def elaborate(self
, platform
):
700 m
.submodules
.corr_in_z
= self
.i
.z
701 m
.submodules
.corr_out_z
= self
.out_z
.z
702 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
703 with m
.If(~self
.i
.out_do_z
):
704 with m
.If(self
.i
.z
.is_denormalised
):
705 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
709 class FPCorrections(FPState
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "corrections")
713 self
.mod
= FPCorrectionsMod(width
)
714 self
.out_z
= self
.ospec()
717 return self
.mod
.ispec()
720 return self
.mod
.ospec()
722 def setup(self
, m
, in_z
):
723 """ links module to inputs and outputs
725 self
.mod
.setup(m
, in_z
)
727 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
728 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
736 def __init__(self
, width
, id_wid
):
737 self
.z
= Signal(width
, reset_less
=True)
738 self
.mid
= Signal(id_wid
, reset_less
=True)
741 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
744 return [self
.z
, self
.mid
]
749 def __init__(self
, width
, id_wid
):
752 self
.i
= self
.ispec()
753 self
.o
= self
.ospec()
756 return FPRoundData(self
.width
, self
.id_wid
)
759 return FPPackData(self
.width
, self
.id_wid
)
761 def process(self
, i
):
764 def setup(self
, m
, in_z
):
765 """ links module to inputs and outputs
767 m
.submodules
.pack
= self
768 m
.d
.comb
+= self
.i
.eq(in_z
)
770 def elaborate(self
, platform
):
772 z
= FPNumOut(self
.width
, False)
773 m
.submodules
.pack_in_z
= self
.i
.z
774 m
.submodules
.pack_out_z
= z
775 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
776 with m
.If(~self
.i
.out_do_z
):
777 with m
.If(self
.i
.z
.is_overflowed
):
778 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
780 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
782 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
783 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
787 class FPPack(FPState
):
789 def __init__(self
, width
, id_wid
):
790 FPState
.__init
__(self
, "pack")
791 self
.mod
= FPPackMod(width
)
792 self
.out_z
= self
.ospec()
795 return self
.mod
.ispec()
798 return self
.mod
.ospec()
800 def setup(self
, m
, in_z
):
801 """ links module to inputs and outputs
803 self
.mod
.setup(m
, in_z
)
805 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
806 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
809 m
.next
= "pack_put_z"
812 class FPPutZ(FPState
):
814 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
815 FPState
.__init
__(self
, state
)
818 self
.to_state
= to_state
822 self
.out_mid
= out_mid
825 if self
.in_mid
is not None:
826 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
828 self
.out_z
.z
.v
.eq(self
.in_z
)
830 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
831 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
832 m
.next
= self
.to_state
834 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
837 class FPPutZIdx(FPState
):
839 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
840 FPState
.__init
__(self
, state
)
843 self
.to_state
= to_state
849 outz_stb
= Signal(reset_less
=True)
850 outz_ack
= Signal(reset_less
=True)
851 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
852 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
855 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
857 with m
.If(outz_stb
& outz_ack
):
858 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
859 m
.next
= self
.to_state
861 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
865 def __init__(self
, width
, id_wid
):
867 self
.mid
= Signal(id_wid
, reset_less
=True)
870 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
873 return [self
.z
, self
.mid
]
878 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
881 * width: bit-width of IEEE754. supported: 16, 32, 64
882 * id_wid: an identifier that is sync-connected to the input
883 * single_cycle: True indicates each stage to complete in 1 clock
884 * compact: True indicates a reduced number of stages
888 self
.single_cycle
= single_cycle
889 self
.compact
= compact
891 self
.in_t
= Trigger()
892 self
.i
= self
.ispec()
893 self
.o
= self
.ospec()
898 return FPADDBaseData(self
.width
, self
.id_wid
)
901 return FPOpData(self
.width
, self
.id_wid
)
903 def add_state(self
, state
):
904 self
.states
.append(state
)
907 def get_fragment(self
, platform
=None):
908 """ creates the HDL code-fragment for FPAdd
911 m
.submodules
.out_z
= self
.o
.z
912 m
.submodules
.in_t
= self
.in_t
914 self
.get_compact_fragment(m
, platform
)
916 self
.get_longer_fragment(m
, platform
)
920 for state
in self
.states
:
921 with m
.State(state
.state_from
):
926 def get_longer_fragment(self
, m
, platform
=None):
928 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
933 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
935 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
936 sc
.setup(m
, a
, b
, self
.in_mid
)
938 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
939 dn
.setup(m
, a
, b
, sc
.in_mid
)
941 if self
.single_cycle
:
942 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
943 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
945 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
946 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
948 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
949 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
951 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
952 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
954 if self
.single_cycle
:
955 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
956 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
958 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
959 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
961 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
962 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
964 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
965 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
967 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
968 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
970 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
971 pa
.in_mid
, self
.out_mid
))
973 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
974 pa
.in_mid
, self
.out_mid
))
976 def get_compact_fragment(self
, m
, platform
=None):
979 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
980 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
981 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
982 n1
= FPNormToPack(self
.width
, self
.id_wid
)
984 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
986 chainlist
= [get
, sc
, alm
, n1
]
987 chain
= StageChain(chainlist
, specallocate
=True)
988 chain
.setup(m
, self
.i
)
990 for mod
in chainlist
:
991 sc
= self
.add_state(mod
)
993 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
994 n1
.out_z
.mid
, self
.o
.mid
))
996 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
997 # sc.o.mid, self.o.mid))
1000 class FPADDBase(FPState
):
1002 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1005 * width: bit-width of IEEE754. supported: 16, 32, 64
1006 * id_wid: an identifier that is sync-connected to the input
1007 * single_cycle: True indicates each stage to complete in 1 clock
1009 FPState
.__init
__(self
, "fpadd")
1011 self
.single_cycle
= single_cycle
1012 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1013 self
.o
= self
.ospec()
1015 self
.in_t
= Trigger()
1016 self
.i
= self
.ispec()
1018 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1019 self
.in_accept
= Signal(reset_less
=True)
1020 self
.add_stb
= Signal(reset_less
=True)
1021 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1024 return self
.mod
.ispec()
1027 return self
.mod
.ospec()
1029 def setup(self
, m
, i
, add_stb
, in_mid
):
1030 m
.d
.comb
+= [self
.i
.eq(i
),
1031 self
.mod
.i
.eq(self
.i
),
1032 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1033 #self.add_stb.eq(add_stb),
1034 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1035 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1036 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1037 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1038 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1039 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1042 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1043 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1044 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1045 #m.d.sync += self.in_t.stb.eq(0)
1047 m
.submodules
.fpadd
= self
.mod
1049 def action(self
, m
):
1051 # in_accept is set on incoming strobe HIGH and ack LOW.
1052 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1054 #with m.If(self.in_t.ack):
1055 # m.d.sync += self.in_t.stb.eq(0)
1056 with m
.If(~self
.z_done
):
1057 # not done: test for accepting an incoming operand pair
1058 with m
.If(self
.in_accept
):
1060 self
.add_ack
.eq(1), # acknowledge receipt...
1061 self
.in_t
.stb
.eq(1), # initiate add
1064 m
.d
.sync
+= [self
.add_ack
.eq(0),
1065 self
.in_t
.stb
.eq(0),
1069 # done: acknowledge, and write out id and value
1070 m
.d
.sync
+= [self
.add_ack
.eq(1),
1077 if self
.in_mid
is not None:
1078 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1081 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1083 # move to output state on detecting z ack
1084 with m
.If(self
.out_z
.trigger
):
1085 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1088 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1091 class FPADDBasePipe(ControlBase
):
1092 def __init__(self
, width
, id_wid
):
1093 ControlBase
.__init
__(self
)
1094 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1095 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1096 self
.pipe3
= FPNormToPack(width
, id_wid
)
1098 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1100 def elaborate(self
, platform
):
1102 m
.submodules
.scnorm
= self
.pipe1
1103 m
.submodules
.addalign
= self
.pipe2
1104 m
.submodules
.normpack
= self
.pipe3
1105 m
.d
.comb
+= self
._eqs
1109 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1110 def __init__(self
, width
, id_wid
, num_rows
):
1111 self
.num_rows
= num_rows
1112 def iospec(): return FPADDBaseData(width
, id_wid
)
1113 stage
= PassThroughStage(iospec
)
1114 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1117 class FPADDMuxOutPipe(CombMuxOutPipe
):
1118 def __init__(self
, width
, id_wid
, num_rows
):
1119 self
.num_rows
= num_rows
1120 def iospec(): return FPPackData(width
, id_wid
)
1121 stage
= PassThroughStage(iospec
)
1122 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1125 class FPADDMuxInOut
:
1126 """ Reservation-Station version of FPADD pipeline.
1128 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1129 * 3-stage adder pipeline
1130 * fan-out on outputs (an array of FPPackData: z,mid)
1132 Fan-in and Fan-out are combinatorial.
1134 def __init__(self
, width
, id_wid
, num_rows
):
1135 self
.num_rows
= num_rows
1136 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1137 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1138 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1140 self
.p
= self
.inpipe
.p
# kinda annoying,
1141 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1142 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1144 def elaborate(self
, platform
):
1146 m
.submodules
.inpipe
= self
.inpipe
1147 m
.submodules
.fpadd
= self
.fpadd
1148 m
.submodules
.outpipe
= self
.outpipe
1150 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1151 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1160 """ FPADD: stages as follows:
1166 FPAddBase---> FPAddBaseMod
1168 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1170 FPAddBase is tricky: it is both a stage and *has* stages.
1171 Connection to FPAddBaseMod therefore requires an in stb/ack
1172 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1173 needs to be the thing that raises the incoming stb.
1176 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1179 * width: bit-width of IEEE754. supported: 16, 32, 64
1180 * id_wid: an identifier that is sync-connected to the input
1181 * single_cycle: True indicates each stage to complete in 1 clock
1184 self
.id_wid
= id_wid
1185 self
.single_cycle
= single_cycle
1187 #self.out_z = FPOp(width)
1188 self
.ids
= FPID(id_wid
)
1191 for i
in range(rs_sz
):
1194 in_a
.name
= "in_a_%d" % i
1195 in_b
.name
= "in_b_%d" % i
1196 rs
.append((in_a
, in_b
))
1200 for i
in range(rs_sz
):
1202 out_z
.name
= "out_z_%d" % i
1204 self
.res
= Array(res
)
1208 def add_state(self
, state
):
1209 self
.states
.append(state
)
1212 def get_fragment(self
, platform
=None):
1213 """ creates the HDL code-fragment for FPAdd
1216 m
.submodules
+= self
.rs
1218 in_a
= self
.rs
[0][0]
1219 in_b
= self
.rs
[0][1]
1221 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1226 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1231 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1232 ab
= self
.add_state(ab
)
1233 abd
= ab
.ispec() # create an input spec object for FPADDBase
1234 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1235 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1238 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1241 with m
.FSM() as fsm
:
1243 for state
in self
.states
:
1244 with m
.State(state
.state_from
):
1250 if __name__
== "__main__":
1252 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1253 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1254 alu
.rs
[0][1].ports() + \
1255 alu
.res
[0].ports() + \
1256 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1258 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1259 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1260 alu
.in_t
.ports() + \
1261 alu
.out_z
.ports() + \
1262 [alu
.in_mid
, alu
.out_mid
])
1265 # works... but don't use, just do "python fname.py convert -t v"
1266 #print (verilog.convert(alu, ports=[
1267 # ports=alu.in_a.ports() + \
1268 # alu.in_b.ports() + \
1269 # alu.out_z.ports())