1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
22 class FPAddSpecialCasesMod
:
23 """ special cases: NaNs, infs, zeros, denormalised
24 NOTE: some of these are unique to add. see "Special Operations"
25 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
28 def __init__(self
, width
, id_wid
):
35 return FPADDBaseData(self
.width
, self
.id_wid
)
38 return FPSCData(self
.width
, self
.id_wid
)
40 def setup(self
, m
, i
):
41 """ links module to inputs and outputs
43 m
.submodules
.specialcases
= self
44 m
.d
.comb
+= self
.i
.eq(i
)
49 def elaborate(self
, platform
):
52 m
.submodules
.sc_out_z
= self
.o
.z
54 # decode: XXX really should move to separate stage
55 a1
= FPNumIn(None, self
.width
)
56 b1
= FPNumIn(None, self
.width
)
57 m
.submodules
.sc_decode_a
= a1
58 m
.submodules
.sc_decode_b
= b1
59 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
64 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
67 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
69 # if a is NaN or b is NaN return NaN
70 with m
.If(a1
.is_nan | b1
.is_nan
):
71 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
72 m
.d
.comb
+= self
.o
.z
.nan(0)
74 # XXX WEIRDNESS for FP16 non-canonical NaN handling
77 ## if a is zero and b is NaN return -b
78 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
79 # m.d.comb += self.o.out_do_z.eq(1)
80 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
82 ## if b is zero and a is NaN return -a
83 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
87 ## if a is -zero and b is NaN return -b
88 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
92 ## if b is -zero and a is NaN return -a
93 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
97 # if a is inf return inf (or NaN)
98 with m
.Elif(a1
.is_inf
):
99 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
100 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
101 # if a is inf and signs don't match return NaN
102 with m
.If(b1
.exp_128
& s_nomatch
):
103 m
.d
.comb
+= self
.o
.z
.nan(0)
105 # if b is inf return inf
106 with m
.Elif(b1
.is_inf
):
107 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
108 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
110 # if a is zero and b zero return signed-a/b
111 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
112 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
113 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
115 # if a is zero return b
116 with m
.Elif(a1
.is_zero
):
117 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
118 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
120 # if b is zero return a
121 with m
.Elif(b1
.is_zero
):
122 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
123 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
125 # if a equal to -b return zero (+ve zero)
126 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
127 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
128 m
.d
.comb
+= self
.o
.z
.zero(0)
130 # Denormalised Number checks next, so pass a/b data through
132 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
133 m
.d
.comb
+= self
.o
.a
.eq(a1
)
134 m
.d
.comb
+= self
.o
.b
.eq(b1
)
136 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
137 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
143 def __init__(self
, id_wid
):
146 self
.in_mid
= Signal(id_wid
, reset_less
=True)
147 self
.out_mid
= Signal(id_wid
, reset_less
=True)
153 if self
.id_wid
is not None:
154 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
157 class FPAddSpecialCases(FPState
):
158 """ special cases: NaNs, infs, zeros, denormalised
159 NOTE: some of these are unique to add. see "Special Operations"
160 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
163 def __init__(self
, width
, id_wid
):
164 FPState
.__init
__(self
, "special_cases")
165 self
.mod
= FPAddSpecialCasesMod(width
)
166 self
.out_z
= self
.mod
.ospec()
167 self
.out_do_z
= Signal(reset_less
=True)
169 def setup(self
, m
, i
):
170 """ links module to inputs and outputs
172 self
.mod
.setup(m
, i
, self
.out_do_z
)
173 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
174 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
178 with m
.If(self
.out_do_z
):
181 m
.next
= "denormalise"
184 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
185 """ special cases: NaNs, infs, zeros, denormalised
186 NOTE: some of these are unique to add. see "Special Operations"
187 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
190 def __init__(self
, width
, id_wid
):
191 FPState
.__init
__(self
, "special_cases")
194 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
195 self
.out
= self
.ospec()
198 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
201 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
203 def setup(self
, m
, i
):
204 """ links module to inputs and outputs
206 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
207 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
209 chain
= StageChain([smod
, dmod
])
212 # only needed for break-out (early-out)
213 # self.out_do_z = smod.o.out_do_z
217 def process(self
, i
):
221 # for break-out (early-out)
222 #with m.If(self.out_do_z):
225 m
.d
.sync
+= self
.out
.eq(self
.process(None))
229 class FPAddAlignMultiMod(FPState
):
231 def __init__(self
, width
):
232 self
.in_a
= FPNumBase(width
)
233 self
.in_b
= FPNumBase(width
)
234 self
.out_a
= FPNumIn(None, width
)
235 self
.out_b
= FPNumIn(None, width
)
236 self
.exp_eq
= Signal(reset_less
=True)
238 def elaborate(self
, platform
):
239 # This one however (single-cycle) will do the shift
244 m
.submodules
.align_in_a
= self
.in_a
245 m
.submodules
.align_in_b
= self
.in_b
246 m
.submodules
.align_out_a
= self
.out_a
247 m
.submodules
.align_out_b
= self
.out_b
249 # NOTE: this does *not* do single-cycle multi-shifting,
250 # it *STAYS* in the align state until exponents match
252 # exponent of a greater than b: shift b down
253 m
.d
.comb
+= self
.exp_eq
.eq(0)
254 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
255 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
256 agtb
= Signal(reset_less
=True)
257 altb
= Signal(reset_less
=True)
258 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
259 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
261 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
262 # exponent of b greater than a: shift a down
264 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
265 # exponents equal: move to next stage.
267 m
.d
.comb
+= self
.exp_eq
.eq(1)
271 class FPAddAlignMulti(FPState
):
273 def __init__(self
, width
, id_wid
):
274 FPState
.__init
__(self
, "align")
275 self
.mod
= FPAddAlignMultiMod(width
)
276 self
.out_a
= FPNumIn(None, width
)
277 self
.out_b
= FPNumIn(None, width
)
278 self
.exp_eq
= Signal(reset_less
=True)
280 def setup(self
, m
, in_a
, in_b
):
281 """ links module to inputs and outputs
283 m
.submodules
.align
= self
.mod
284 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
285 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
286 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
287 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
288 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
291 with m
.If(self
.exp_eq
):
297 def __init__(self
, width
, id_wid
):
298 self
.a
= FPNumIn(None, width
)
299 self
.b
= FPNumIn(None, width
)
300 self
.z
= FPNumOut(width
, False)
301 self
.out_do_z
= Signal(reset_less
=True)
302 self
.oz
= Signal(width
, reset_less
=True)
303 self
.mid
= Signal(id_wid
, reset_less
=True)
306 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
307 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
310 class FPAddAlignSingleMod
:
312 def __init__(self
, width
, id_wid
):
315 self
.i
= self
.ispec()
316 self
.o
= self
.ospec()
319 return FPSCData(self
.width
, self
.id_wid
)
322 return FPNumIn2Ops(self
.width
, self
.id_wid
)
324 def process(self
, i
):
327 def setup(self
, m
, i
):
328 """ links module to inputs and outputs
330 m
.submodules
.align
= self
331 m
.d
.comb
+= self
.i
.eq(i
)
333 def elaborate(self
, platform
):
334 """ Aligns A against B or B against A, depending on which has the
335 greater exponent. This is done in a *single* cycle using
336 variable-width bit-shift
338 the shifter used here is quite expensive in terms of gates.
339 Mux A or B in (and out) into temporaries, as only one of them
340 needs to be aligned against the other
344 m
.submodules
.align_in_a
= self
.i
.a
345 m
.submodules
.align_in_b
= self
.i
.b
346 m
.submodules
.align_out_a
= self
.o
.a
347 m
.submodules
.align_out_b
= self
.o
.b
349 # temporary (muxed) input and output to be shifted
350 t_inp
= FPNumBase(self
.width
)
351 t_out
= FPNumIn(None, self
.width
)
352 espec
= (len(self
.i
.a
.e
), True)
353 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
354 m
.submodules
.align_t_in
= t_inp
355 m
.submodules
.align_t_out
= t_out
356 m
.submodules
.multishift_r
= msr
358 ediff
= Signal(espec
, reset_less
=True)
359 ediffr
= Signal(espec
, reset_less
=True)
360 tdiff
= Signal(espec
, reset_less
=True)
361 elz
= Signal(reset_less
=True)
362 egz
= Signal(reset_less
=True)
364 # connect multi-shifter to t_inp/out mantissa (and tdiff)
365 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
366 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
367 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
368 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
369 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
371 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
372 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
373 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
374 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
376 # default: A-exp == B-exp, A and B untouched (fall through)
377 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
378 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
379 # only one shifter (muxed)
380 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
381 # exponent of a greater than b: shift b down
382 with m
.If(~self
.i
.out_do_z
):
384 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
387 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
389 # exponent of b greater than a: shift a down
391 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
394 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
397 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
398 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
399 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
400 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
405 class FPAddAlignSingle(FPState
):
407 def __init__(self
, width
, id_wid
):
408 FPState
.__init
__(self
, "align")
409 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
410 self
.out_a
= FPNumIn(None, width
)
411 self
.out_b
= FPNumIn(None, width
)
413 def setup(self
, m
, i
):
414 """ links module to inputs and outputs
418 # NOTE: could be done as comb
419 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
420 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
426 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
428 def __init__(self
, width
, id_wid
):
429 FPState
.__init
__(self
, "align")
432 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
433 self
.a1o
= self
.ospec()
436 return FPSCData(self
.width
, self
.id_wid
)
439 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
441 def setup(self
, m
, i
):
442 """ links module to inputs and outputs
445 # chain AddAlignSingle, AddStage0 and AddStage1
446 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
447 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
448 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
450 chain
= StageChain([mod
, a0mod
, a1mod
])
455 def process(self
, i
):
459 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
460 m
.next
= "normalise_1"
463 class FPAddStage0Data
:
465 def __init__(self
, width
, id_wid
):
466 self
.z
= FPNumBase(width
, False)
467 self
.out_do_z
= Signal(reset_less
=True)
468 self
.oz
= Signal(width
, reset_less
=True)
469 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
470 self
.mid
= Signal(id_wid
, reset_less
=True)
473 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
474 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
477 class FPAddStage0Mod
:
479 def __init__(self
, width
, id_wid
):
482 self
.i
= self
.ispec()
483 self
.o
= self
.ospec()
486 return FPSCData(self
.width
, self
.id_wid
)
489 return FPAddStage0Data(self
.width
, self
.id_wid
)
491 def process(self
, i
):
494 def setup(self
, m
, i
):
495 """ links module to inputs and outputs
497 m
.submodules
.add0
= self
498 m
.d
.comb
+= self
.i
.eq(i
)
500 def elaborate(self
, platform
):
502 m
.submodules
.add0_in_a
= self
.i
.a
503 m
.submodules
.add0_in_b
= self
.i
.b
504 m
.submodules
.add0_out_z
= self
.o
.z
506 # store intermediate tests (and zero-extended mantissas)
507 seq
= Signal(reset_less
=True)
508 mge
= Signal(reset_less
=True)
509 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
510 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
511 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
512 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
513 am0
.eq(Cat(self
.i
.a
.m
, 0)),
514 bm0
.eq(Cat(self
.i
.b
.m
, 0))
516 # same-sign (both negative or both positive) add mantissas
517 with m
.If(~self
.i
.out_do_z
):
518 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
521 self
.o
.tot
.eq(am0
+ bm0
),
522 self
.o
.z
.s
.eq(self
.i
.a
.s
)
524 # a mantissa greater than b, use a
527 self
.o
.tot
.eq(am0
- bm0
),
528 self
.o
.z
.s
.eq(self
.i
.a
.s
)
530 # b mantissa greater than a, use b
533 self
.o
.tot
.eq(bm0
- am0
),
534 self
.o
.z
.s
.eq(self
.i
.b
.s
)
537 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
538 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
539 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
543 class FPAddStage0(FPState
):
544 """ First stage of add. covers same-sign (add) and subtract
545 special-casing when mantissas are greater or equal, to
546 give greatest accuracy.
549 def __init__(self
, width
, id_wid
):
550 FPState
.__init
__(self
, "add_0")
551 self
.mod
= FPAddStage0Mod(width
)
552 self
.o
= self
.mod
.ospec()
554 def setup(self
, m
, i
):
555 """ links module to inputs and outputs
559 # NOTE: these could be done as combinatorial (merge add0+add1)
560 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
566 class FPAddStage1Data
:
568 def __init__(self
, width
, id_wid
):
569 self
.z
= FPNumBase(width
, False)
570 self
.out_do_z
= Signal(reset_less
=True)
571 self
.oz
= Signal(width
, reset_less
=True)
573 self
.mid
= Signal(id_wid
, reset_less
=True)
576 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
577 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
581 class FPAddStage1Mod(FPState
):
582 """ Second stage of add: preparation for normalisation.
583 detects when tot sum is too big (tot[27] is kinda a carry bit)
586 def __init__(self
, width
, id_wid
):
589 self
.i
= self
.ispec()
590 self
.o
= self
.ospec()
593 return FPAddStage0Data(self
.width
, self
.id_wid
)
596 return FPAddStage1Data(self
.width
, self
.id_wid
)
598 def process(self
, i
):
601 def setup(self
, m
, i
):
602 """ links module to inputs and outputs
604 m
.submodules
.add1
= self
605 m
.submodules
.add1_out_overflow
= self
.o
.of
607 m
.d
.comb
+= self
.i
.eq(i
)
609 def elaborate(self
, platform
):
611 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
612 # tot[-1] (MSB) gets set when the sum overflows. shift result down
613 with m
.If(~self
.i
.out_do_z
):
614 with m
.If(self
.i
.tot
[-1]):
616 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
617 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
618 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
619 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
620 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
621 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
623 # tot[-1] (MSB) zero case
626 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
627 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
628 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
629 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
630 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
633 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
634 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
635 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
640 class FPAddStage1(FPState
):
642 def __init__(self
, width
, id_wid
):
643 FPState
.__init
__(self
, "add_1")
644 self
.mod
= FPAddStage1Mod(width
)
645 self
.out_z
= FPNumBase(width
, False)
646 self
.out_of
= Overflow()
647 self
.norm_stb
= Signal()
649 def setup(self
, m
, i
):
650 """ links module to inputs and outputs
654 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
656 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
657 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
658 m
.d
.sync
+= self
.norm_stb
.eq(1)
661 m
.next
= "normalise_1"
666 def __init__(self
, width
, id_wid
):
667 self
.roundz
= Signal(reset_less
=True)
668 self
.z
= FPNumBase(width
, False)
669 self
.out_do_z
= Signal(reset_less
=True)
670 self
.oz
= Signal(width
, reset_less
=True)
671 self
.mid
= Signal(id_wid
, reset_less
=True)
674 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
675 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
678 class FPNorm1ModSingle
:
680 def __init__(self
, width
, id_wid
):
683 self
.i
= self
.ispec()
684 self
.o
= self
.ospec()
687 return FPAddStage1Data(self
.width
, self
.id_wid
)
690 return FPNorm1Data(self
.width
, self
.id_wid
)
692 def setup(self
, m
, i
):
693 """ links module to inputs and outputs
695 m
.submodules
.normalise_1
= self
696 m
.d
.comb
+= self
.i
.eq(i
)
698 def process(self
, i
):
701 def elaborate(self
, platform
):
704 mwid
= self
.o
.z
.m_width
+2
705 pe
= PriorityEncoder(mwid
)
706 m
.submodules
.norm_pe
= pe
709 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
711 m
.submodules
.norm1_out_z
= self
.o
.z
712 m
.submodules
.norm1_out_overflow
= of
713 m
.submodules
.norm1_in_z
= self
.i
.z
714 m
.submodules
.norm1_in_overflow
= self
.i
.of
717 m
.submodules
.norm1_insel_z
= i
.z
718 m
.submodules
.norm1_insel_overflow
= i
.of
720 espec
= (len(i
.z
.e
), True)
721 ediff_n126
= Signal(espec
, reset_less
=True)
722 msr
= MultiShiftRMerge(mwid
, espec
)
723 m
.submodules
.multishift_r
= msr
725 m
.d
.comb
+= i
.eq(self
.i
)
726 # initialise out from in (overridden below)
727 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
728 m
.d
.comb
+= of
.eq(i
.of
)
729 # normalisation increase/decrease conditions
730 decrease
= Signal(reset_less
=True)
731 increase
= Signal(reset_less
=True)
732 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
733 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
735 with m
.If(~self
.i
.out_do_z
):
737 # *sigh* not entirely obvious: count leading zeros (clz)
738 # with a PriorityEncoder: to find from the MSB
739 # we reverse the order of the bits.
740 temp_m
= Signal(mwid
, reset_less
=True)
741 temp_s
= Signal(mwid
+1, reset_less
=True)
742 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
743 # make sure that the amount to decrease by does NOT
744 # go below the minimum non-INF/NaN exponent
745 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
748 # cat round and guard bits back into the mantissa
749 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
750 pe
.i
.eq(temp_m
[::-1]), # inverted
751 clz
.eq(limclz
), # count zeros from MSB down
752 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
753 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
754 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
755 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
756 # overflow in bits 0..1: got shifted too (leave sticky)
757 of
.guard
.eq(temp_s
[1]), # guard
758 of
.round_bit
.eq(temp_s
[0]), # round
761 with m
.Elif(increase
):
762 temp_m
= Signal(mwid
+1, reset_less
=True)
764 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
766 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
767 # connect multi-shifter to inp/out mantissa (and ediff)
769 msr
.diff
.eq(ediff_n126
),
770 self
.o
.z
.m
.eq(msr
.m
[3:]),
771 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
772 # overflow in bits 0..1: got shifted too (leave sticky)
773 of
.guard
.eq(temp_s
[2]), # guard
774 of
.round_bit
.eq(temp_s
[1]), # round
775 of
.sticky
.eq(temp_s
[0]), # sticky
776 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
779 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
780 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
781 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
786 class FPNorm1ModMulti
:
788 def __init__(self
, width
, single_cycle
=True):
790 self
.in_select
= Signal(reset_less
=True)
791 self
.in_z
= FPNumBase(width
, False)
792 self
.in_of
= Overflow()
793 self
.temp_z
= FPNumBase(width
, False)
794 self
.temp_of
= Overflow()
795 self
.out_z
= FPNumBase(width
, False)
796 self
.out_of
= Overflow()
798 def elaborate(self
, platform
):
801 m
.submodules
.norm1_out_z
= self
.out_z
802 m
.submodules
.norm1_out_overflow
= self
.out_of
803 m
.submodules
.norm1_temp_z
= self
.temp_z
804 m
.submodules
.norm1_temp_of
= self
.temp_of
805 m
.submodules
.norm1_in_z
= self
.in_z
806 m
.submodules
.norm1_in_overflow
= self
.in_of
808 in_z
= FPNumBase(self
.width
, False)
810 m
.submodules
.norm1_insel_z
= in_z
811 m
.submodules
.norm1_insel_overflow
= in_of
813 # select which of temp or in z/of to use
814 with m
.If(self
.in_select
):
815 m
.d
.comb
+= in_z
.eq(self
.in_z
)
816 m
.d
.comb
+= in_of
.eq(self
.in_of
)
818 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
819 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
820 # initialise out from in (overridden below)
821 m
.d
.comb
+= self
.out_z
.eq(in_z
)
822 m
.d
.comb
+= self
.out_of
.eq(in_of
)
823 # normalisation increase/decrease conditions
824 decrease
= Signal(reset_less
=True)
825 increase
= Signal(reset_less
=True)
826 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
827 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
828 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
832 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
833 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
834 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
835 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
836 self
.out_of
.round_bit
.eq(0), # reset round bit
837 self
.out_of
.m0
.eq(in_of
.guard
),
840 with m
.Elif(increase
):
842 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
843 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
844 self
.out_of
.guard
.eq(in_z
.m
[0]),
845 self
.out_of
.m0
.eq(in_z
.m
[1]),
846 self
.out_of
.round_bit
.eq(in_of
.guard
),
847 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
853 class FPNorm1Single(FPState
):
855 def __init__(self
, width
, id_wid
, single_cycle
=True):
856 FPState
.__init
__(self
, "normalise_1")
857 self
.mod
= FPNorm1ModSingle(width
)
858 self
.o
= self
.ospec()
859 self
.out_z
= FPNumBase(width
, False)
860 self
.out_roundz
= Signal(reset_less
=True)
863 return self
.mod
.ispec()
866 return self
.mod
.ospec()
868 def setup(self
, m
, i
):
869 """ links module to inputs and outputs
877 class FPNorm1Multi(FPState
):
879 def __init__(self
, width
, id_wid
):
880 FPState
.__init
__(self
, "normalise_1")
881 self
.mod
= FPNorm1ModMulti(width
)
882 self
.stb
= Signal(reset_less
=True)
883 self
.ack
= Signal(reset
=0, reset_less
=True)
884 self
.out_norm
= Signal(reset_less
=True)
885 self
.in_accept
= Signal(reset_less
=True)
886 self
.temp_z
= FPNumBase(width
)
887 self
.temp_of
= Overflow()
888 self
.out_z
= FPNumBase(width
)
889 self
.out_roundz
= Signal(reset_less
=True)
891 def setup(self
, m
, in_z
, in_of
, norm_stb
):
892 """ links module to inputs and outputs
894 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
895 self
.in_accept
, self
.temp_z
, self
.temp_of
,
896 self
.out_z
, self
.out_norm
)
898 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
899 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
902 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
903 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
904 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
905 with m
.If(self
.out_norm
):
906 with m
.If(self
.in_accept
):
911 m
.d
.sync
+= self
.ack
.eq(0)
913 # normalisation not required (or done).
915 m
.d
.sync
+= self
.ack
.eq(1)
916 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
919 class FPNormToPack(FPState
, UnbufferedPipeline
):
921 def __init__(self
, width
, id_wid
):
922 FPState
.__init
__(self
, "normalise_1")
925 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
928 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
931 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
933 def setup(self
, m
, i
):
934 """ links module to inputs and outputs
937 # Normalisation, Rounding Corrections, Pack - in a chain
938 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
939 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
940 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
941 pmod
= FPPackMod(self
.width
, self
.id_wid
)
942 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
944 self
.out_z
= pmod
.ospec()
948 def process(self
, i
):
952 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
953 m
.next
= "pack_put_z"
958 def __init__(self
, width
, id_wid
):
959 self
.z
= FPNumBase(width
, False)
960 self
.out_do_z
= Signal(reset_less
=True)
961 self
.oz
= Signal(width
, reset_less
=True)
962 self
.mid
= Signal(id_wid
, reset_less
=True)
965 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
971 def __init__(self
, width
, id_wid
):
974 self
.i
= self
.ispec()
975 self
.out_z
= self
.ospec()
978 return FPNorm1Data(self
.width
, self
.id_wid
)
981 return FPRoundData(self
.width
, self
.id_wid
)
983 def process(self
, i
):
986 def setup(self
, m
, i
):
987 m
.submodules
.roundz
= self
988 m
.d
.comb
+= self
.i
.eq(i
)
990 def elaborate(self
, platform
):
992 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
993 with m
.If(~self
.i
.out_do_z
):
994 with m
.If(self
.i
.roundz
):
995 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
996 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
997 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1002 class FPRound(FPState
):
1004 def __init__(self
, width
, id_wid
):
1005 FPState
.__init
__(self
, "round")
1006 self
.mod
= FPRoundMod(width
)
1007 self
.out_z
= self
.ospec()
1010 return self
.mod
.ispec()
1013 return self
.mod
.ospec()
1015 def setup(self
, m
, i
):
1016 """ links module to inputs and outputs
1018 self
.mod
.setup(m
, i
)
1021 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1022 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1024 def action(self
, m
):
1025 m
.next
= "corrections"
1028 class FPCorrectionsMod
:
1030 def __init__(self
, width
, id_wid
):
1032 self
.id_wid
= id_wid
1033 self
.i
= self
.ispec()
1034 self
.out_z
= self
.ospec()
1037 return FPRoundData(self
.width
, self
.id_wid
)
1040 return FPRoundData(self
.width
, self
.id_wid
)
1042 def process(self
, i
):
1045 def setup(self
, m
, i
):
1046 """ links module to inputs and outputs
1048 m
.submodules
.corrections
= self
1049 m
.d
.comb
+= self
.i
.eq(i
)
1051 def elaborate(self
, platform
):
1053 m
.submodules
.corr_in_z
= self
.i
.z
1054 m
.submodules
.corr_out_z
= self
.out_z
.z
1055 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1056 with m
.If(~self
.i
.out_do_z
):
1057 with m
.If(self
.i
.z
.is_denormalised
):
1058 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1062 class FPCorrections(FPState
):
1064 def __init__(self
, width
, id_wid
):
1065 FPState
.__init
__(self
, "corrections")
1066 self
.mod
= FPCorrectionsMod(width
)
1067 self
.out_z
= self
.ospec()
1070 return self
.mod
.ispec()
1073 return self
.mod
.ospec()
1075 def setup(self
, m
, in_z
):
1076 """ links module to inputs and outputs
1078 self
.mod
.setup(m
, in_z
)
1080 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1081 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1083 def action(self
, m
):
1089 def __init__(self
, width
, id_wid
):
1090 self
.z
= Signal(width
, reset_less
=True)
1091 self
.mid
= Signal(id_wid
, reset_less
=True)
1094 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1097 return [self
.z
, self
.mid
]
1102 def __init__(self
, width
, id_wid
):
1104 self
.id_wid
= id_wid
1105 self
.i
= self
.ispec()
1106 self
.o
= self
.ospec()
1109 return FPRoundData(self
.width
, self
.id_wid
)
1112 return FPPackData(self
.width
, self
.id_wid
)
1114 def process(self
, i
):
1117 def setup(self
, m
, in_z
):
1118 """ links module to inputs and outputs
1120 m
.submodules
.pack
= self
1121 m
.d
.comb
+= self
.i
.eq(in_z
)
1123 def elaborate(self
, platform
):
1125 z
= FPNumOut(self
.width
, False)
1126 m
.submodules
.pack_in_z
= self
.i
.z
1127 m
.submodules
.pack_out_z
= z
1128 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1129 with m
.If(~self
.i
.out_do_z
):
1130 with m
.If(self
.i
.z
.is_overflowed
):
1131 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1133 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1135 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1136 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1140 class FPPack(FPState
):
1142 def __init__(self
, width
, id_wid
):
1143 FPState
.__init
__(self
, "pack")
1144 self
.mod
= FPPackMod(width
)
1145 self
.out_z
= self
.ospec()
1148 return self
.mod
.ispec()
1151 return self
.mod
.ospec()
1153 def setup(self
, m
, in_z
):
1154 """ links module to inputs and outputs
1156 self
.mod
.setup(m
, in_z
)
1158 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1159 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1161 def action(self
, m
):
1162 m
.next
= "pack_put_z"
1165 class FPPutZ(FPState
):
1167 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1168 FPState
.__init
__(self
, state
)
1169 if to_state
is None:
1170 to_state
= "get_ops"
1171 self
.to_state
= to_state
1174 self
.in_mid
= in_mid
1175 self
.out_mid
= out_mid
1177 def action(self
, m
):
1178 if self
.in_mid
is not None:
1179 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1181 self
.out_z
.z
.v
.eq(self
.in_z
)
1183 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1184 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1185 m
.next
= self
.to_state
1187 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1190 class FPPutZIdx(FPState
):
1192 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1193 FPState
.__init
__(self
, state
)
1194 if to_state
is None:
1195 to_state
= "get_ops"
1196 self
.to_state
= to_state
1198 self
.out_zs
= out_zs
1199 self
.in_mid
= in_mid
1201 def action(self
, m
):
1202 outz_stb
= Signal(reset_less
=True)
1203 outz_ack
= Signal(reset_less
=True)
1204 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1205 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1208 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1210 with m
.If(outz_stb
& outz_ack
):
1211 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1212 m
.next
= self
.to_state
1214 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1218 def __init__(self
, width
, id_wid
):
1219 self
.z
= FPOp(width
)
1220 self
.mid
= Signal(id_wid
, reset_less
=True)
1223 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1226 return [self
.z
, self
.mid
]
1231 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1234 * width: bit-width of IEEE754. supported: 16, 32, 64
1235 * id_wid: an identifier that is sync-connected to the input
1236 * single_cycle: True indicates each stage to complete in 1 clock
1237 * compact: True indicates a reduced number of stages
1240 self
.id_wid
= id_wid
1241 self
.single_cycle
= single_cycle
1242 self
.compact
= compact
1244 self
.in_t
= Trigger()
1245 self
.i
= self
.ispec()
1246 self
.o
= self
.ospec()
1251 return FPADDBaseData(self
.width
, self
.id_wid
)
1254 return FPOpData(self
.width
, self
.id_wid
)
1256 def add_state(self
, state
):
1257 self
.states
.append(state
)
1260 def get_fragment(self
, platform
=None):
1261 """ creates the HDL code-fragment for FPAdd
1264 m
.submodules
.out_z
= self
.o
.z
1265 m
.submodules
.in_t
= self
.in_t
1267 self
.get_compact_fragment(m
, platform
)
1269 self
.get_longer_fragment(m
, platform
)
1271 with m
.FSM() as fsm
:
1273 for state
in self
.states
:
1274 with m
.State(state
.state_from
):
1279 def get_longer_fragment(self
, m
, platform
=None):
1281 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1283 get
.setup(m
, self
.i
)
1286 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1288 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1289 sc
.setup(m
, a
, b
, self
.in_mid
)
1291 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1292 dn
.setup(m
, a
, b
, sc
.in_mid
)
1294 if self
.single_cycle
:
1295 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1296 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1298 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1299 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1301 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1302 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1304 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1305 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1307 if self
.single_cycle
:
1308 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1309 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1311 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1312 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1314 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1315 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1317 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1318 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1320 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1321 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1323 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1324 pa
.in_mid
, self
.out_mid
))
1326 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1327 pa
.in_mid
, self
.out_mid
))
1329 def get_compact_fragment(self
, m
, platform
=None):
1332 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
1333 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1334 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1335 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1337 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1339 chainlist
= [get
, sc
, alm
, n1
]
1340 chain
= StageChain(chainlist
, specallocate
=True)
1341 chain
.setup(m
, self
.i
)
1343 for mod
in chainlist
:
1344 sc
= self
.add_state(mod
)
1346 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1347 n1
.out_z
.mid
, self
.o
.mid
))
1349 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1350 # sc.o.mid, self.o.mid))
1353 class FPADDBase(FPState
):
1355 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1358 * width: bit-width of IEEE754. supported: 16, 32, 64
1359 * id_wid: an identifier that is sync-connected to the input
1360 * single_cycle: True indicates each stage to complete in 1 clock
1362 FPState
.__init
__(self
, "fpadd")
1364 self
.single_cycle
= single_cycle
1365 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1366 self
.o
= self
.ospec()
1368 self
.in_t
= Trigger()
1369 self
.i
= self
.ispec()
1371 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1372 self
.in_accept
= Signal(reset_less
=True)
1373 self
.add_stb
= Signal(reset_less
=True)
1374 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1377 return self
.mod
.ispec()
1380 return self
.mod
.ospec()
1382 def setup(self
, m
, i
, add_stb
, in_mid
):
1383 m
.d
.comb
+= [self
.i
.eq(i
),
1384 self
.mod
.i
.eq(self
.i
),
1385 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1386 #self.add_stb.eq(add_stb),
1387 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1388 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1389 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1390 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1391 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1392 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1395 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1396 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1397 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1398 #m.d.sync += self.in_t.stb.eq(0)
1400 m
.submodules
.fpadd
= self
.mod
1402 def action(self
, m
):
1404 # in_accept is set on incoming strobe HIGH and ack LOW.
1405 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1407 #with m.If(self.in_t.ack):
1408 # m.d.sync += self.in_t.stb.eq(0)
1409 with m
.If(~self
.z_done
):
1410 # not done: test for accepting an incoming operand pair
1411 with m
.If(self
.in_accept
):
1413 self
.add_ack
.eq(1), # acknowledge receipt...
1414 self
.in_t
.stb
.eq(1), # initiate add
1417 m
.d
.sync
+= [self
.add_ack
.eq(0),
1418 self
.in_t
.stb
.eq(0),
1422 # done: acknowledge, and write out id and value
1423 m
.d
.sync
+= [self
.add_ack
.eq(1),
1430 if self
.in_mid
is not None:
1431 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1434 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1436 # move to output state on detecting z ack
1437 with m
.If(self
.out_z
.trigger
):
1438 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1441 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1444 class FPADDBasePipe(ControlBase
):
1445 def __init__(self
, width
, id_wid
):
1446 ControlBase
.__init
__(self
)
1447 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1448 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1449 self
.pipe3
= FPNormToPack(width
, id_wid
)
1451 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1453 def elaborate(self
, platform
):
1455 m
.submodules
.scnorm
= self
.pipe1
1456 m
.submodules
.addalign
= self
.pipe2
1457 m
.submodules
.normpack
= self
.pipe3
1458 m
.d
.comb
+= self
._eqs
1462 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1463 def __init__(self
, width
, id_wid
, num_rows
):
1464 self
.num_rows
= num_rows
1465 def iospec(): return FPADDBaseData(width
, id_wid
)
1466 stage
= PassThroughStage(iospec
)
1467 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1470 class FPADDMuxOutPipe(CombMuxOutPipe
):
1471 def __init__(self
, width
, id_wid
, num_rows
):
1472 self
.num_rows
= num_rows
1473 def iospec(): return FPPackData(width
, id_wid
)
1474 stage
= PassThroughStage(iospec
)
1475 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1478 class FPADDMuxInOut
:
1479 """ Reservation-Station version of FPADD pipeline.
1481 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1482 * 3-stage adder pipeline
1483 * fan-out on outputs (an array of FPPackData: z,mid)
1485 Fan-in and Fan-out are combinatorial.
1487 def __init__(self
, width
, id_wid
, num_rows
):
1488 self
.num_rows
= num_rows
1489 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1490 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1491 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1493 self
.p
= self
.inpipe
.p
# kinda annoying,
1494 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1495 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1497 def elaborate(self
, platform
):
1499 m
.submodules
.inpipe
= self
.inpipe
1500 m
.submodules
.fpadd
= self
.fpadd
1501 m
.submodules
.outpipe
= self
.outpipe
1503 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1504 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1513 """ FPADD: stages as follows:
1519 FPAddBase---> FPAddBaseMod
1521 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1523 FPAddBase is tricky: it is both a stage and *has* stages.
1524 Connection to FPAddBaseMod therefore requires an in stb/ack
1525 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1526 needs to be the thing that raises the incoming stb.
1529 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1532 * width: bit-width of IEEE754. supported: 16, 32, 64
1533 * id_wid: an identifier that is sync-connected to the input
1534 * single_cycle: True indicates each stage to complete in 1 clock
1537 self
.id_wid
= id_wid
1538 self
.single_cycle
= single_cycle
1540 #self.out_z = FPOp(width)
1541 self
.ids
= FPID(id_wid
)
1544 for i
in range(rs_sz
):
1547 in_a
.name
= "in_a_%d" % i
1548 in_b
.name
= "in_b_%d" % i
1549 rs
.append((in_a
, in_b
))
1553 for i
in range(rs_sz
):
1555 out_z
.name
= "out_z_%d" % i
1557 self
.res
= Array(res
)
1561 def add_state(self
, state
):
1562 self
.states
.append(state
)
1565 def get_fragment(self
, platform
=None):
1566 """ creates the HDL code-fragment for FPAdd
1569 m
.submodules
+= self
.rs
1571 in_a
= self
.rs
[0][0]
1572 in_b
= self
.rs
[0][1]
1574 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1579 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1584 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1585 ab
= self
.add_state(ab
)
1586 abd
= ab
.ispec() # create an input spec object for FPADDBase
1587 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1588 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1591 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1594 with m
.FSM() as fsm
:
1596 for state
in self
.states
:
1597 with m
.State(state
.state_from
):
1603 if __name__
== "__main__":
1605 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1606 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1607 alu
.rs
[0][1].ports() + \
1608 alu
.res
[0].ports() + \
1609 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1611 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1612 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1613 alu
.in_t
.ports() + \
1614 alu
.out_z
.ports() + \
1615 [alu
.in_mid
, alu
.out_mid
])
1618 # works... but don't use, just do "python fname.py convert -t v"
1619 #print (verilog.convert(alu, ports=[
1620 # ports=alu.in_a.ports() + \
1621 # alu.in_b.ports() + \
1622 # alu.out_z.ports())