1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
25 class FPAddSpecialCasesMod
:
26 """ special cases: NaNs, infs, zeros, denormalised
27 NOTE: some of these are unique to add. see "Special Operations"
28 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
31 def __init__(self
, width
, id_wid
):
38 return FPADDBaseData(self
.width
, self
.id_wid
)
41 return FPSCData(self
.width
, self
.id_wid
)
43 def setup(self
, m
, i
):
44 """ links module to inputs and outputs
46 m
.submodules
.specialcases
= self
47 m
.d
.comb
+= self
.i
.eq(i
)
52 def elaborate(self
, platform
):
55 m
.submodules
.sc_out_z
= self
.o
.z
57 # decode: XXX really should move to separate stage
58 a1
= FPNumIn(None, self
.width
)
59 b1
= FPNumIn(None, self
.width
)
60 m
.submodules
.sc_decode_a
= a1
61 m
.submodules
.sc_decode_b
= b1
62 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
67 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
70 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
72 # if a is NaN or b is NaN return NaN
73 with m
.If(a1
.is_nan | b1
.is_nan
):
74 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
75 m
.d
.comb
+= self
.o
.z
.nan(0)
77 # XXX WEIRDNESS for FP16 non-canonical NaN handling
80 ## if a is zero and b is NaN return -b
81 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
82 # m.d.comb += self.o.out_do_z.eq(1)
83 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
85 ## if b is zero and a is NaN return -a
86 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
87 # m.d.comb += self.o.out_do_z.eq(1)
88 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
90 ## if a is -zero and b is NaN return -b
91 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
92 # m.d.comb += self.o.out_do_z.eq(1)
93 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
95 ## if b is -zero and a is NaN return -a
96 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
97 # m.d.comb += self.o.out_do_z.eq(1)
98 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
100 # if a is inf return inf (or NaN)
101 with m
.Elif(a1
.is_inf
):
102 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
103 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
104 # if a is inf and signs don't match return NaN
105 with m
.If(b1
.exp_128
& s_nomatch
):
106 m
.d
.comb
+= self
.o
.z
.nan(0)
108 # if b is inf return inf
109 with m
.Elif(b1
.is_inf
):
110 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
111 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
113 # if a is zero and b zero return signed-a/b
114 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
115 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
116 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
118 # if a is zero return b
119 with m
.Elif(a1
.is_zero
):
120 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
121 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
123 # if b is zero return a
124 with m
.Elif(b1
.is_zero
):
125 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
126 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
128 # if a equal to -b return zero (+ve zero)
129 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
130 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
131 m
.d
.comb
+= self
.o
.z
.zero(0)
133 # Denormalised Number checks next, so pass a/b data through
135 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
136 m
.d
.comb
+= self
.o
.a
.eq(a1
)
137 m
.d
.comb
+= self
.o
.b
.eq(b1
)
139 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
140 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
145 class FPAddSpecialCases(FPState
):
146 """ special cases: NaNs, infs, zeros, denormalised
147 NOTE: some of these are unique to add. see "Special Operations"
148 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
151 def __init__(self
, width
, id_wid
):
152 FPState
.__init
__(self
, "special_cases")
153 self
.mod
= FPAddSpecialCasesMod(width
)
154 self
.out_z
= self
.mod
.ospec()
155 self
.out_do_z
= Signal(reset_less
=True)
157 def setup(self
, m
, i
):
158 """ links module to inputs and outputs
160 self
.mod
.setup(m
, i
, self
.out_do_z
)
161 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
162 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
166 with m
.If(self
.out_do_z
):
169 m
.next
= "denormalise"
172 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
173 """ special cases: NaNs, infs, zeros, denormalised
174 NOTE: some of these are unique to add. see "Special Operations"
175 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
178 def __init__(self
, width
, id_wid
):
179 FPState
.__init
__(self
, "special_cases")
182 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
183 self
.out
= self
.ospec()
186 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
189 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
191 def setup(self
, m
, i
):
192 """ links module to inputs and outputs
194 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
195 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
197 chain
= StageChain([smod
, dmod
])
200 # only needed for break-out (early-out)
201 # self.out_do_z = smod.o.out_do_z
205 def process(self
, i
):
209 # for break-out (early-out)
210 #with m.If(self.out_do_z):
213 m
.d
.sync
+= self
.out
.eq(self
.process(None))
217 class FPAddAlignMultiMod(FPState
):
219 def __init__(self
, width
):
220 self
.in_a
= FPNumBase(width
)
221 self
.in_b
= FPNumBase(width
)
222 self
.out_a
= FPNumIn(None, width
)
223 self
.out_b
= FPNumIn(None, width
)
224 self
.exp_eq
= Signal(reset_less
=True)
226 def elaborate(self
, platform
):
227 # This one however (single-cycle) will do the shift
232 m
.submodules
.align_in_a
= self
.in_a
233 m
.submodules
.align_in_b
= self
.in_b
234 m
.submodules
.align_out_a
= self
.out_a
235 m
.submodules
.align_out_b
= self
.out_b
237 # NOTE: this does *not* do single-cycle multi-shifting,
238 # it *STAYS* in the align state until exponents match
240 # exponent of a greater than b: shift b down
241 m
.d
.comb
+= self
.exp_eq
.eq(0)
242 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
243 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
244 agtb
= Signal(reset_less
=True)
245 altb
= Signal(reset_less
=True)
246 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
247 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
249 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
250 # exponent of b greater than a: shift a down
252 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
253 # exponents equal: move to next stage.
255 m
.d
.comb
+= self
.exp_eq
.eq(1)
259 class FPAddAlignMulti(FPState
):
261 def __init__(self
, width
, id_wid
):
262 FPState
.__init
__(self
, "align")
263 self
.mod
= FPAddAlignMultiMod(width
)
264 self
.out_a
= FPNumIn(None, width
)
265 self
.out_b
= FPNumIn(None, width
)
266 self
.exp_eq
= Signal(reset_less
=True)
268 def setup(self
, m
, in_a
, in_b
):
269 """ links module to inputs and outputs
271 m
.submodules
.align
= self
.mod
272 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
273 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
274 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
275 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
276 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
279 with m
.If(self
.exp_eq
):
285 def __init__(self
, width
, id_wid
):
286 self
.a
= FPNumIn(None, width
)
287 self
.b
= FPNumIn(None, width
)
288 self
.z
= FPNumOut(width
, False)
289 self
.out_do_z
= Signal(reset_less
=True)
290 self
.oz
= Signal(width
, reset_less
=True)
291 self
.mid
= Signal(id_wid
, reset_less
=True)
294 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
295 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
298 class FPAddAlignSingleMod
:
300 def __init__(self
, width
, id_wid
):
303 self
.i
= self
.ispec()
304 self
.o
= self
.ospec()
307 return FPSCData(self
.width
, self
.id_wid
)
310 return FPNumIn2Ops(self
.width
, self
.id_wid
)
312 def process(self
, i
):
315 def setup(self
, m
, i
):
316 """ links module to inputs and outputs
318 m
.submodules
.align
= self
319 m
.d
.comb
+= self
.i
.eq(i
)
321 def elaborate(self
, platform
):
322 """ Aligns A against B or B against A, depending on which has the
323 greater exponent. This is done in a *single* cycle using
324 variable-width bit-shift
326 the shifter used here is quite expensive in terms of gates.
327 Mux A or B in (and out) into temporaries, as only one of them
328 needs to be aligned against the other
332 m
.submodules
.align_in_a
= self
.i
.a
333 m
.submodules
.align_in_b
= self
.i
.b
334 m
.submodules
.align_out_a
= self
.o
.a
335 m
.submodules
.align_out_b
= self
.o
.b
337 # temporary (muxed) input and output to be shifted
338 t_inp
= FPNumBase(self
.width
)
339 t_out
= FPNumIn(None, self
.width
)
340 espec
= (len(self
.i
.a
.e
), True)
341 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
342 m
.submodules
.align_t_in
= t_inp
343 m
.submodules
.align_t_out
= t_out
344 m
.submodules
.multishift_r
= msr
346 ediff
= Signal(espec
, reset_less
=True)
347 ediffr
= Signal(espec
, reset_less
=True)
348 tdiff
= Signal(espec
, reset_less
=True)
349 elz
= Signal(reset_less
=True)
350 egz
= Signal(reset_less
=True)
352 # connect multi-shifter to t_inp/out mantissa (and tdiff)
353 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
354 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
355 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
356 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
357 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
359 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
360 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
361 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
362 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
364 # default: A-exp == B-exp, A and B untouched (fall through)
365 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
366 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
367 # only one shifter (muxed)
368 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
369 # exponent of a greater than b: shift b down
370 with m
.If(~self
.i
.out_do_z
):
372 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
375 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
377 # exponent of b greater than a: shift a down
379 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
382 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
386 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
387 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
388 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
393 class FPAddAlignSingle(FPState
):
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "align")
397 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
398 self
.out_a
= FPNumIn(None, width
)
399 self
.out_b
= FPNumIn(None, width
)
401 def setup(self
, m
, i
):
402 """ links module to inputs and outputs
406 # NOTE: could be done as comb
407 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
408 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
414 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
416 def __init__(self
, width
, id_wid
):
417 FPState
.__init
__(self
, "align")
420 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
421 self
.a1o
= self
.ospec()
424 return FPSCData(self
.width
, self
.id_wid
)
427 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
429 def setup(self
, m
, i
):
430 """ links module to inputs and outputs
433 # chain AddAlignSingle, AddStage0 and AddStage1
434 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
435 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
436 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
438 chain
= StageChain([mod
, a0mod
, a1mod
])
443 def process(self
, i
):
447 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
448 m
.next
= "normalise_1"
451 class FPAddStage0Data
:
453 def __init__(self
, width
, id_wid
):
454 self
.z
= FPNumBase(width
, False)
455 self
.out_do_z
= Signal(reset_less
=True)
456 self
.oz
= Signal(width
, reset_less
=True)
457 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
458 self
.mid
= Signal(id_wid
, reset_less
=True)
461 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
462 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
465 class FPAddStage0Mod
:
467 def __init__(self
, width
, id_wid
):
470 self
.i
= self
.ispec()
471 self
.o
= self
.ospec()
474 return FPSCData(self
.width
, self
.id_wid
)
477 return FPAddStage0Data(self
.width
, self
.id_wid
)
479 def process(self
, i
):
482 def setup(self
, m
, i
):
483 """ links module to inputs and outputs
485 m
.submodules
.add0
= self
486 m
.d
.comb
+= self
.i
.eq(i
)
488 def elaborate(self
, platform
):
490 m
.submodules
.add0_in_a
= self
.i
.a
491 m
.submodules
.add0_in_b
= self
.i
.b
492 m
.submodules
.add0_out_z
= self
.o
.z
494 # store intermediate tests (and zero-extended mantissas)
495 seq
= Signal(reset_less
=True)
496 mge
= Signal(reset_less
=True)
497 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
498 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
499 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
500 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
501 am0
.eq(Cat(self
.i
.a
.m
, 0)),
502 bm0
.eq(Cat(self
.i
.b
.m
, 0))
504 # same-sign (both negative or both positive) add mantissas
505 with m
.If(~self
.i
.out_do_z
):
506 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
509 self
.o
.tot
.eq(am0
+ bm0
),
510 self
.o
.z
.s
.eq(self
.i
.a
.s
)
512 # a mantissa greater than b, use a
515 self
.o
.tot
.eq(am0
- bm0
),
516 self
.o
.z
.s
.eq(self
.i
.a
.s
)
518 # b mantissa greater than a, use b
521 self
.o
.tot
.eq(bm0
- am0
),
522 self
.o
.z
.s
.eq(self
.i
.b
.s
)
525 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
526 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
527 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
531 class FPAddStage0(FPState
):
532 """ First stage of add. covers same-sign (add) and subtract
533 special-casing when mantissas are greater or equal, to
534 give greatest accuracy.
537 def __init__(self
, width
, id_wid
):
538 FPState
.__init
__(self
, "add_0")
539 self
.mod
= FPAddStage0Mod(width
)
540 self
.o
= self
.mod
.ospec()
542 def setup(self
, m
, i
):
543 """ links module to inputs and outputs
547 # NOTE: these could be done as combinatorial (merge add0+add1)
548 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
554 class FPAddStage1Mod(FPState
):
555 """ Second stage of add: preparation for normalisation.
556 detects when tot sum is too big (tot[27] is kinda a carry bit)
559 def __init__(self
, width
, id_wid
):
562 self
.i
= self
.ispec()
563 self
.o
= self
.ospec()
566 return FPAddStage0Data(self
.width
, self
.id_wid
)
569 return FPAddStage1Data(self
.width
, self
.id_wid
)
571 def process(self
, i
):
574 def setup(self
, m
, i
):
575 """ links module to inputs and outputs
577 m
.submodules
.add1
= self
578 m
.submodules
.add1_out_overflow
= self
.o
.of
580 m
.d
.comb
+= self
.i
.eq(i
)
582 def elaborate(self
, platform
):
584 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
585 # tot[-1] (MSB) gets set when the sum overflows. shift result down
586 with m
.If(~self
.i
.out_do_z
):
587 with m
.If(self
.i
.tot
[-1]):
589 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
590 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
591 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
592 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
593 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
594 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
596 # tot[-1] (MSB) zero case
599 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
600 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
601 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
602 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
603 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
606 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
607 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
608 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
613 class FPAddStage1(FPState
):
615 def __init__(self
, width
, id_wid
):
616 FPState
.__init
__(self
, "add_1")
617 self
.mod
= FPAddStage1Mod(width
)
618 self
.out_z
= FPNumBase(width
, False)
619 self
.out_of
= Overflow()
620 self
.norm_stb
= Signal()
622 def setup(self
, m
, i
):
623 """ links module to inputs and outputs
627 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
629 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
630 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
631 m
.d
.sync
+= self
.norm_stb
.eq(1)
634 m
.next
= "normalise_1"
637 class FPNormToPack(FPState
, UnbufferedPipeline
):
639 def __init__(self
, width
, id_wid
):
640 FPState
.__init
__(self
, "normalise_1")
643 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
646 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
649 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
651 def setup(self
, m
, i
):
652 """ links module to inputs and outputs
655 # Normalisation, Rounding Corrections, Pack - in a chain
656 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
657 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
658 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
659 pmod
= FPPackMod(self
.width
, self
.id_wid
)
660 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
662 self
.out_z
= pmod
.ospec()
666 def process(self
, i
):
670 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
671 m
.next
= "pack_put_z"
676 def __init__(self
, width
, id_wid
):
677 self
.z
= FPNumBase(width
, False)
678 self
.out_do_z
= Signal(reset_less
=True)
679 self
.oz
= Signal(width
, reset_less
=True)
680 self
.mid
= Signal(id_wid
, reset_less
=True)
683 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
689 def __init__(self
, width
, id_wid
):
692 self
.i
= self
.ispec()
693 self
.out_z
= self
.ospec()
696 return FPNorm1Data(self
.width
, self
.id_wid
)
699 return FPRoundData(self
.width
, self
.id_wid
)
701 def process(self
, i
):
704 def setup(self
, m
, i
):
705 m
.submodules
.roundz
= self
706 m
.d
.comb
+= self
.i
.eq(i
)
708 def elaborate(self
, platform
):
710 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
711 with m
.If(~self
.i
.out_do_z
):
712 with m
.If(self
.i
.roundz
):
713 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
714 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
715 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
720 class FPRound(FPState
):
722 def __init__(self
, width
, id_wid
):
723 FPState
.__init
__(self
, "round")
724 self
.mod
= FPRoundMod(width
)
725 self
.out_z
= self
.ospec()
728 return self
.mod
.ispec()
731 return self
.mod
.ospec()
733 def setup(self
, m
, i
):
734 """ links module to inputs and outputs
739 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
740 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
743 m
.next
= "corrections"
746 class FPCorrectionsMod
:
748 def __init__(self
, width
, id_wid
):
751 self
.i
= self
.ispec()
752 self
.out_z
= self
.ospec()
755 return FPRoundData(self
.width
, self
.id_wid
)
758 return FPRoundData(self
.width
, self
.id_wid
)
760 def process(self
, i
):
763 def setup(self
, m
, i
):
764 """ links module to inputs and outputs
766 m
.submodules
.corrections
= self
767 m
.d
.comb
+= self
.i
.eq(i
)
769 def elaborate(self
, platform
):
771 m
.submodules
.corr_in_z
= self
.i
.z
772 m
.submodules
.corr_out_z
= self
.out_z
.z
773 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
774 with m
.If(~self
.i
.out_do_z
):
775 with m
.If(self
.i
.z
.is_denormalised
):
776 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
780 class FPCorrections(FPState
):
782 def __init__(self
, width
, id_wid
):
783 FPState
.__init
__(self
, "corrections")
784 self
.mod
= FPCorrectionsMod(width
)
785 self
.out_z
= self
.ospec()
788 return self
.mod
.ispec()
791 return self
.mod
.ospec()
793 def setup(self
, m
, in_z
):
794 """ links module to inputs and outputs
796 self
.mod
.setup(m
, in_z
)
798 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
799 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
807 def __init__(self
, width
, id_wid
):
808 self
.z
= Signal(width
, reset_less
=True)
809 self
.mid
= Signal(id_wid
, reset_less
=True)
812 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
815 return [self
.z
, self
.mid
]
820 def __init__(self
, width
, id_wid
):
823 self
.i
= self
.ispec()
824 self
.o
= self
.ospec()
827 return FPRoundData(self
.width
, self
.id_wid
)
830 return FPPackData(self
.width
, self
.id_wid
)
832 def process(self
, i
):
835 def setup(self
, m
, in_z
):
836 """ links module to inputs and outputs
838 m
.submodules
.pack
= self
839 m
.d
.comb
+= self
.i
.eq(in_z
)
841 def elaborate(self
, platform
):
843 z
= FPNumOut(self
.width
, False)
844 m
.submodules
.pack_in_z
= self
.i
.z
845 m
.submodules
.pack_out_z
= z
846 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
847 with m
.If(~self
.i
.out_do_z
):
848 with m
.If(self
.i
.z
.is_overflowed
):
849 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
851 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
853 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
854 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
858 class FPPack(FPState
):
860 def __init__(self
, width
, id_wid
):
861 FPState
.__init
__(self
, "pack")
862 self
.mod
= FPPackMod(width
)
863 self
.out_z
= self
.ospec()
866 return self
.mod
.ispec()
869 return self
.mod
.ospec()
871 def setup(self
, m
, in_z
):
872 """ links module to inputs and outputs
874 self
.mod
.setup(m
, in_z
)
876 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
877 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
880 m
.next
= "pack_put_z"
883 class FPPutZ(FPState
):
885 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
886 FPState
.__init
__(self
, state
)
889 self
.to_state
= to_state
893 self
.out_mid
= out_mid
896 if self
.in_mid
is not None:
897 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
899 self
.out_z
.z
.v
.eq(self
.in_z
)
901 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
902 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
903 m
.next
= self
.to_state
905 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
908 class FPPutZIdx(FPState
):
910 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
911 FPState
.__init
__(self
, state
)
914 self
.to_state
= to_state
920 outz_stb
= Signal(reset_less
=True)
921 outz_ack
= Signal(reset_less
=True)
922 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
923 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
926 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
928 with m
.If(outz_stb
& outz_ack
):
929 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
930 m
.next
= self
.to_state
932 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
936 def __init__(self
, width
, id_wid
):
938 self
.mid
= Signal(id_wid
, reset_less
=True)
941 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
944 return [self
.z
, self
.mid
]
949 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
952 * width: bit-width of IEEE754. supported: 16, 32, 64
953 * id_wid: an identifier that is sync-connected to the input
954 * single_cycle: True indicates each stage to complete in 1 clock
955 * compact: True indicates a reduced number of stages
959 self
.single_cycle
= single_cycle
960 self
.compact
= compact
962 self
.in_t
= Trigger()
963 self
.i
= self
.ispec()
964 self
.o
= self
.ospec()
969 return FPADDBaseData(self
.width
, self
.id_wid
)
972 return FPOpData(self
.width
, self
.id_wid
)
974 def add_state(self
, state
):
975 self
.states
.append(state
)
978 def get_fragment(self
, platform
=None):
979 """ creates the HDL code-fragment for FPAdd
982 m
.submodules
.out_z
= self
.o
.z
983 m
.submodules
.in_t
= self
.in_t
985 self
.get_compact_fragment(m
, platform
)
987 self
.get_longer_fragment(m
, platform
)
991 for state
in self
.states
:
992 with m
.State(state
.state_from
):
997 def get_longer_fragment(self
, m
, platform
=None):
999 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1001 get
.setup(m
, self
.i
)
1004 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1006 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1007 sc
.setup(m
, a
, b
, self
.in_mid
)
1009 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1010 dn
.setup(m
, a
, b
, sc
.in_mid
)
1012 if self
.single_cycle
:
1013 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1014 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1016 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1017 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1019 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1020 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1022 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1023 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1025 if self
.single_cycle
:
1026 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1027 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1029 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1030 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1032 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1033 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1035 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1036 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1038 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1039 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1041 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1042 pa
.in_mid
, self
.out_mid
))
1044 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1045 pa
.in_mid
, self
.out_mid
))
1047 def get_compact_fragment(self
, m
, platform
=None):
1050 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
1051 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1052 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1053 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1055 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1057 chainlist
= [get
, sc
, alm
, n1
]
1058 chain
= StageChain(chainlist
, specallocate
=True)
1059 chain
.setup(m
, self
.i
)
1061 for mod
in chainlist
:
1062 sc
= self
.add_state(mod
)
1064 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1065 n1
.out_z
.mid
, self
.o
.mid
))
1067 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1068 # sc.o.mid, self.o.mid))
1071 class FPADDBase(FPState
):
1073 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1076 * width: bit-width of IEEE754. supported: 16, 32, 64
1077 * id_wid: an identifier that is sync-connected to the input
1078 * single_cycle: True indicates each stage to complete in 1 clock
1080 FPState
.__init
__(self
, "fpadd")
1082 self
.single_cycle
= single_cycle
1083 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1084 self
.o
= self
.ospec()
1086 self
.in_t
= Trigger()
1087 self
.i
= self
.ispec()
1089 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1090 self
.in_accept
= Signal(reset_less
=True)
1091 self
.add_stb
= Signal(reset_less
=True)
1092 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1095 return self
.mod
.ispec()
1098 return self
.mod
.ospec()
1100 def setup(self
, m
, i
, add_stb
, in_mid
):
1101 m
.d
.comb
+= [self
.i
.eq(i
),
1102 self
.mod
.i
.eq(self
.i
),
1103 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1104 #self.add_stb.eq(add_stb),
1105 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1106 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1107 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1108 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1109 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1110 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1113 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1114 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1115 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1116 #m.d.sync += self.in_t.stb.eq(0)
1118 m
.submodules
.fpadd
= self
.mod
1120 def action(self
, m
):
1122 # in_accept is set on incoming strobe HIGH and ack LOW.
1123 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1125 #with m.If(self.in_t.ack):
1126 # m.d.sync += self.in_t.stb.eq(0)
1127 with m
.If(~self
.z_done
):
1128 # not done: test for accepting an incoming operand pair
1129 with m
.If(self
.in_accept
):
1131 self
.add_ack
.eq(1), # acknowledge receipt...
1132 self
.in_t
.stb
.eq(1), # initiate add
1135 m
.d
.sync
+= [self
.add_ack
.eq(0),
1136 self
.in_t
.stb
.eq(0),
1140 # done: acknowledge, and write out id and value
1141 m
.d
.sync
+= [self
.add_ack
.eq(1),
1148 if self
.in_mid
is not None:
1149 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1152 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1154 # move to output state on detecting z ack
1155 with m
.If(self
.out_z
.trigger
):
1156 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1159 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1162 class FPADDBasePipe(ControlBase
):
1163 def __init__(self
, width
, id_wid
):
1164 ControlBase
.__init
__(self
)
1165 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1166 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1167 self
.pipe3
= FPNormToPack(width
, id_wid
)
1169 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1171 def elaborate(self
, platform
):
1173 m
.submodules
.scnorm
= self
.pipe1
1174 m
.submodules
.addalign
= self
.pipe2
1175 m
.submodules
.normpack
= self
.pipe3
1176 m
.d
.comb
+= self
._eqs
1180 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1181 def __init__(self
, width
, id_wid
, num_rows
):
1182 self
.num_rows
= num_rows
1183 def iospec(): return FPADDBaseData(width
, id_wid
)
1184 stage
= PassThroughStage(iospec
)
1185 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1188 class FPADDMuxOutPipe(CombMuxOutPipe
):
1189 def __init__(self
, width
, id_wid
, num_rows
):
1190 self
.num_rows
= num_rows
1191 def iospec(): return FPPackData(width
, id_wid
)
1192 stage
= PassThroughStage(iospec
)
1193 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1196 class FPADDMuxInOut
:
1197 """ Reservation-Station version of FPADD pipeline.
1199 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1200 * 3-stage adder pipeline
1201 * fan-out on outputs (an array of FPPackData: z,mid)
1203 Fan-in and Fan-out are combinatorial.
1205 def __init__(self
, width
, id_wid
, num_rows
):
1206 self
.num_rows
= num_rows
1207 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1208 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1209 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1211 self
.p
= self
.inpipe
.p
# kinda annoying,
1212 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1213 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1215 def elaborate(self
, platform
):
1217 m
.submodules
.inpipe
= self
.inpipe
1218 m
.submodules
.fpadd
= self
.fpadd
1219 m
.submodules
.outpipe
= self
.outpipe
1221 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1222 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1231 """ FPADD: stages as follows:
1237 FPAddBase---> FPAddBaseMod
1239 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1241 FPAddBase is tricky: it is both a stage and *has* stages.
1242 Connection to FPAddBaseMod therefore requires an in stb/ack
1243 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1244 needs to be the thing that raises the incoming stb.
1247 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1250 * width: bit-width of IEEE754. supported: 16, 32, 64
1251 * id_wid: an identifier that is sync-connected to the input
1252 * single_cycle: True indicates each stage to complete in 1 clock
1255 self
.id_wid
= id_wid
1256 self
.single_cycle
= single_cycle
1258 #self.out_z = FPOp(width)
1259 self
.ids
= FPID(id_wid
)
1262 for i
in range(rs_sz
):
1265 in_a
.name
= "in_a_%d" % i
1266 in_b
.name
= "in_b_%d" % i
1267 rs
.append((in_a
, in_b
))
1271 for i
in range(rs_sz
):
1273 out_z
.name
= "out_z_%d" % i
1275 self
.res
= Array(res
)
1279 def add_state(self
, state
):
1280 self
.states
.append(state
)
1283 def get_fragment(self
, platform
=None):
1284 """ creates the HDL code-fragment for FPAdd
1287 m
.submodules
+= self
.rs
1289 in_a
= self
.rs
[0][0]
1290 in_b
= self
.rs
[0][1]
1292 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1297 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1302 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1303 ab
= self
.add_state(ab
)
1304 abd
= ab
.ispec() # create an input spec object for FPADDBase
1305 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1306 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1309 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1312 with m
.FSM() as fsm
:
1314 for state
in self
.states
:
1315 with m
.State(state
.state_from
):
1321 if __name__
== "__main__":
1323 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1324 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1325 alu
.rs
[0][1].ports() + \
1326 alu
.res
[0].ports() + \
1327 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1329 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1330 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1331 alu
.in_t
.ports() + \
1332 alu
.out_z
.ports() + \
1333 [alu
.in_mid
, alu
.out_mid
])
1336 # works... but don't use, just do "python fname.py convert -t v"
1337 #print (verilog.convert(alu, ports=[
1338 # ports=alu.in_a.ports() + \
1339 # alu.in_b.ports() + \
1340 # alu.out_z.ports())