1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
22 class FPAddSpecialCasesMod
:
23 """ special cases: NaNs, infs, zeros, denormalised
24 NOTE: some of these are unique to add. see "Special Operations"
25 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
28 def __init__(self
, width
, id_wid
):
35 return FPADDBaseData(self
.width
, self
.id_wid
)
38 return FPSCData(self
.width
, self
.id_wid
)
40 def setup(self
, m
, i
):
41 """ links module to inputs and outputs
43 m
.submodules
.specialcases
= self
44 m
.d
.comb
+= self
.i
.eq(i
)
49 def elaborate(self
, platform
):
52 m
.submodules
.sc_out_z
= self
.o
.z
54 # decode: XXX really should move to separate stage
55 a1
= FPNumIn(None, self
.width
)
56 b1
= FPNumIn(None, self
.width
)
57 m
.submodules
.sc_decode_a
= a1
58 m
.submodules
.sc_decode_b
= b1
59 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
64 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
67 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
69 # if a is NaN or b is NaN return NaN
70 with m
.If(a1
.is_nan | b1
.is_nan
):
71 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
72 m
.d
.comb
+= self
.o
.z
.nan(0)
74 # XXX WEIRDNESS for FP16 non-canonical NaN handling
77 ## if a is zero and b is NaN return -b
78 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
79 # m.d.comb += self.o.out_do_z.eq(1)
80 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
82 ## if b is zero and a is NaN return -a
83 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
87 ## if a is -zero and b is NaN return -b
88 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
92 ## if b is -zero and a is NaN return -a
93 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
97 # if a is inf return inf (or NaN)
98 with m
.Elif(a1
.is_inf
):
99 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
100 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
101 # if a is inf and signs don't match return NaN
102 with m
.If(b1
.exp_128
& s_nomatch
):
103 m
.d
.comb
+= self
.o
.z
.nan(0)
105 # if b is inf return inf
106 with m
.Elif(b1
.is_inf
):
107 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
108 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
110 # if a is zero and b zero return signed-a/b
111 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
112 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
113 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
115 # if a is zero return b
116 with m
.Elif(a1
.is_zero
):
117 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
118 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
120 # if b is zero return a
121 with m
.Elif(b1
.is_zero
):
122 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
123 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
125 # if a equal to -b return zero (+ve zero)
126 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
127 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
128 m
.d
.comb
+= self
.o
.z
.zero(0)
130 # Denormalised Number checks next, so pass a/b data through
132 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
133 m
.d
.comb
+= self
.o
.a
.eq(a1
)
134 m
.d
.comb
+= self
.o
.b
.eq(b1
)
136 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
137 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
143 def __init__(self
, id_wid
):
146 self
.in_mid
= Signal(id_wid
, reset_less
=True)
147 self
.out_mid
= Signal(id_wid
, reset_less
=True)
153 if self
.id_wid
is not None:
154 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
157 class FPAddSpecialCases(FPState
):
158 """ special cases: NaNs, infs, zeros, denormalised
159 NOTE: some of these are unique to add. see "Special Operations"
160 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
163 def __init__(self
, width
, id_wid
):
164 FPState
.__init
__(self
, "special_cases")
165 self
.mod
= FPAddSpecialCasesMod(width
)
166 self
.out_z
= self
.mod
.ospec()
167 self
.out_do_z
= Signal(reset_less
=True)
169 def setup(self
, m
, i
):
170 """ links module to inputs and outputs
172 self
.mod
.setup(m
, i
, self
.out_do_z
)
173 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
174 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
178 with m
.If(self
.out_do_z
):
181 m
.next
= "denormalise"
184 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
185 """ special cases: NaNs, infs, zeros, denormalised
186 NOTE: some of these are unique to add. see "Special Operations"
187 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
190 def __init__(self
, width
, id_wid
):
191 FPState
.__init
__(self
, "special_cases")
194 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
195 self
.out
= self
.ospec()
198 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
201 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
203 def setup(self
, m
, i
):
204 """ links module to inputs and outputs
206 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
207 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
209 chain
= StageChain([smod
, dmod
])
212 # only needed for break-out (early-out)
213 # self.out_do_z = smod.o.out_do_z
217 def process(self
, i
):
221 # for break-out (early-out)
222 #with m.If(self.out_do_z):
225 m
.d
.sync
+= self
.out
.eq(self
.process(None))
229 class FPAddAlignMultiMod(FPState
):
231 def __init__(self
, width
):
232 self
.in_a
= FPNumBase(width
)
233 self
.in_b
= FPNumBase(width
)
234 self
.out_a
= FPNumIn(None, width
)
235 self
.out_b
= FPNumIn(None, width
)
236 self
.exp_eq
= Signal(reset_less
=True)
238 def elaborate(self
, platform
):
239 # This one however (single-cycle) will do the shift
244 m
.submodules
.align_in_a
= self
.in_a
245 m
.submodules
.align_in_b
= self
.in_b
246 m
.submodules
.align_out_a
= self
.out_a
247 m
.submodules
.align_out_b
= self
.out_b
249 # NOTE: this does *not* do single-cycle multi-shifting,
250 # it *STAYS* in the align state until exponents match
252 # exponent of a greater than b: shift b down
253 m
.d
.comb
+= self
.exp_eq
.eq(0)
254 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
255 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
256 agtb
= Signal(reset_less
=True)
257 altb
= Signal(reset_less
=True)
258 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
259 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
261 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
262 # exponent of b greater than a: shift a down
264 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
265 # exponents equal: move to next stage.
267 m
.d
.comb
+= self
.exp_eq
.eq(1)
271 class FPAddAlignMulti(FPState
):
273 def __init__(self
, width
, id_wid
):
274 FPState
.__init
__(self
, "align")
275 self
.mod
= FPAddAlignMultiMod(width
)
276 self
.out_a
= FPNumIn(None, width
)
277 self
.out_b
= FPNumIn(None, width
)
278 self
.exp_eq
= Signal(reset_less
=True)
280 def setup(self
, m
, in_a
, in_b
):
281 """ links module to inputs and outputs
283 m
.submodules
.align
= self
.mod
284 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
285 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
286 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
287 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
288 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
291 with m
.If(self
.exp_eq
):
297 def __init__(self
, width
, id_wid
):
298 self
.a
= FPNumIn(None, width
)
299 self
.b
= FPNumIn(None, width
)
300 self
.z
= FPNumOut(width
, False)
301 self
.out_do_z
= Signal(reset_less
=True)
302 self
.oz
= Signal(width
, reset_less
=True)
303 self
.mid
= Signal(id_wid
, reset_less
=True)
306 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
307 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
310 class FPAddAlignSingleMod
:
312 def __init__(self
, width
, id_wid
):
315 self
.i
= self
.ispec()
316 self
.o
= self
.ospec()
319 return FPSCData(self
.width
, self
.id_wid
)
322 return FPNumIn2Ops(self
.width
, self
.id_wid
)
324 def process(self
, i
):
327 def setup(self
, m
, i
):
328 """ links module to inputs and outputs
330 m
.submodules
.align
= self
331 m
.d
.comb
+= self
.i
.eq(i
)
333 def elaborate(self
, platform
):
334 """ Aligns A against B or B against A, depending on which has the
335 greater exponent. This is done in a *single* cycle using
336 variable-width bit-shift
338 the shifter used here is quite expensive in terms of gates.
339 Mux A or B in (and out) into temporaries, as only one of them
340 needs to be aligned against the other
344 m
.submodules
.align_in_a
= self
.i
.a
345 m
.submodules
.align_in_b
= self
.i
.b
346 m
.submodules
.align_out_a
= self
.o
.a
347 m
.submodules
.align_out_b
= self
.o
.b
349 # temporary (muxed) input and output to be shifted
350 t_inp
= FPNumBase(self
.width
)
351 t_out
= FPNumIn(None, self
.width
)
352 espec
= (len(self
.i
.a
.e
), True)
353 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
354 m
.submodules
.align_t_in
= t_inp
355 m
.submodules
.align_t_out
= t_out
356 m
.submodules
.multishift_r
= msr
358 ediff
= Signal(espec
, reset_less
=True)
359 ediffr
= Signal(espec
, reset_less
=True)
360 tdiff
= Signal(espec
, reset_less
=True)
361 elz
= Signal(reset_less
=True)
362 egz
= Signal(reset_less
=True)
364 # connect multi-shifter to t_inp/out mantissa (and tdiff)
365 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
366 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
367 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
368 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
369 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
371 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
372 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
373 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
374 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
376 # default: A-exp == B-exp, A and B untouched (fall through)
377 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
378 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
379 # only one shifter (muxed)
380 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
381 # exponent of a greater than b: shift b down
382 with m
.If(~self
.i
.out_do_z
):
384 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
387 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
389 # exponent of b greater than a: shift a down
391 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
394 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
397 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
398 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
399 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
400 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
405 class FPAddAlignSingle(FPState
):
407 def __init__(self
, width
, id_wid
):
408 FPState
.__init
__(self
, "align")
409 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
410 self
.out_a
= FPNumIn(None, width
)
411 self
.out_b
= FPNumIn(None, width
)
413 def setup(self
, m
, i
):
414 """ links module to inputs and outputs
418 # NOTE: could be done as comb
419 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
420 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
426 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
428 def __init__(self
, width
, id_wid
):
429 FPState
.__init
__(self
, "align")
432 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
433 self
.a1o
= self
.ospec()
436 return FPSCData(self
.width
, self
.id_wid
)
439 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
441 def setup(self
, m
, i
):
442 """ links module to inputs and outputs
445 # chain AddAlignSingle, AddStage0 and AddStage1
446 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
447 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
448 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
450 chain
= StageChain([mod
, a0mod
, a1mod
])
455 def process(self
, i
):
459 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
460 m
.next
= "normalise_1"
463 class FPAddStage0Data
:
465 def __init__(self
, width
, id_wid
):
466 self
.z
= FPNumBase(width
, False)
467 self
.out_do_z
= Signal(reset_less
=True)
468 self
.oz
= Signal(width
, reset_less
=True)
469 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
470 self
.mid
= Signal(id_wid
, reset_less
=True)
473 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
474 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
477 class FPAddStage0Mod
:
479 def __init__(self
, width
, id_wid
):
482 self
.i
= self
.ispec()
483 self
.o
= self
.ospec()
486 return FPSCData(self
.width
, self
.id_wid
)
489 return FPAddStage0Data(self
.width
, self
.id_wid
)
491 def process(self
, i
):
494 def setup(self
, m
, i
):
495 """ links module to inputs and outputs
497 m
.submodules
.add0
= self
498 m
.d
.comb
+= self
.i
.eq(i
)
500 def elaborate(self
, platform
):
502 m
.submodules
.add0_in_a
= self
.i
.a
503 m
.submodules
.add0_in_b
= self
.i
.b
504 m
.submodules
.add0_out_z
= self
.o
.z
506 # store intermediate tests (and zero-extended mantissas)
507 seq
= Signal(reset_less
=True)
508 mge
= Signal(reset_less
=True)
509 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
510 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
511 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
512 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
513 am0
.eq(Cat(self
.i
.a
.m
, 0)),
514 bm0
.eq(Cat(self
.i
.b
.m
, 0))
516 # same-sign (both negative or both positive) add mantissas
517 with m
.If(~self
.i
.out_do_z
):
518 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
521 self
.o
.tot
.eq(am0
+ bm0
),
522 self
.o
.z
.s
.eq(self
.i
.a
.s
)
524 # a mantissa greater than b, use a
527 self
.o
.tot
.eq(am0
- bm0
),
528 self
.o
.z
.s
.eq(self
.i
.a
.s
)
530 # b mantissa greater than a, use b
533 self
.o
.tot
.eq(bm0
- am0
),
534 self
.o
.z
.s
.eq(self
.i
.b
.s
)
537 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
538 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
539 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
543 class FPAddStage0(FPState
):
544 """ First stage of add. covers same-sign (add) and subtract
545 special-casing when mantissas are greater or equal, to
546 give greatest accuracy.
549 def __init__(self
, width
, id_wid
):
550 FPState
.__init
__(self
, "add_0")
551 self
.mod
= FPAddStage0Mod(width
)
552 self
.o
= self
.mod
.ospec()
554 def setup(self
, m
, i
):
555 """ links module to inputs and outputs
559 # NOTE: these could be done as combinatorial (merge add0+add1)
560 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
566 class FPAddStage1Data
:
568 def __init__(self
, width
, id_wid
):
569 self
.z
= FPNumBase(width
, False)
570 self
.out_do_z
= Signal(reset_less
=True)
571 self
.oz
= Signal(width
, reset_less
=True)
573 self
.mid
= Signal(id_wid
, reset_less
=True)
576 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
577 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
581 class FPAddStage1Mod(FPState
):
582 """ Second stage of add: preparation for normalisation.
583 detects when tot sum is too big (tot[27] is kinda a carry bit)
586 def __init__(self
, width
, id_wid
):
589 self
.i
= self
.ispec()
590 self
.o
= self
.ospec()
593 return FPAddStage0Data(self
.width
, self
.id_wid
)
596 return FPAddStage1Data(self
.width
, self
.id_wid
)
598 def process(self
, i
):
601 def setup(self
, m
, i
):
602 """ links module to inputs and outputs
604 m
.submodules
.add1
= self
605 m
.submodules
.add1_out_overflow
= self
.o
.of
607 m
.d
.comb
+= self
.i
.eq(i
)
609 def elaborate(self
, platform
):
611 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
612 # tot[-1] (MSB) gets set when the sum overflows. shift result down
613 with m
.If(~self
.i
.out_do_z
):
614 with m
.If(self
.i
.tot
[-1]):
616 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
617 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
618 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
619 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
620 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
621 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
623 # tot[-1] (MSB) zero case
626 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
627 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
628 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
629 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
630 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
633 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
634 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
635 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
640 class FPAddStage1(FPState
):
642 def __init__(self
, width
, id_wid
):
643 FPState
.__init
__(self
, "add_1")
644 self
.mod
= FPAddStage1Mod(width
)
645 self
.out_z
= FPNumBase(width
, False)
646 self
.out_of
= Overflow()
647 self
.norm_stb
= Signal()
649 def setup(self
, m
, i
):
650 """ links module to inputs and outputs
654 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
656 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
657 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
658 m
.d
.sync
+= self
.norm_stb
.eq(1)
661 m
.next
= "normalise_1"
664 class FPNormaliseModSingle
:
666 def __init__(self
, width
):
668 self
.in_z
= self
.ispec()
669 self
.out_z
= self
.ospec()
672 return FPNumBase(self
.width
, False)
675 return FPNumBase(self
.width
, False)
677 def setup(self
, m
, i
):
678 """ links module to inputs and outputs
680 m
.submodules
.normalise
= self
681 m
.d
.comb
+= self
.i
.eq(i
)
683 def elaborate(self
, platform
):
686 mwid
= self
.out_z
.m_width
+2
687 pe
= PriorityEncoder(mwid
)
688 m
.submodules
.norm_pe
= pe
690 m
.submodules
.norm1_out_z
= self
.out_z
691 m
.submodules
.norm1_in_z
= self
.in_z
693 in_z
= FPNumBase(self
.width
, False)
695 m
.submodules
.norm1_insel_z
= in_z
696 m
.submodules
.norm1_insel_overflow
= in_of
698 espec
= (len(in_z
.e
), True)
699 ediff_n126
= Signal(espec
, reset_less
=True)
700 msr
= MultiShiftRMerge(mwid
, espec
)
701 m
.submodules
.multishift_r
= msr
703 m
.d
.comb
+= in_z
.eq(self
.in_z
)
704 m
.d
.comb
+= in_of
.eq(self
.in_of
)
705 # initialise out from in (overridden below)
706 m
.d
.comb
+= self
.out_z
.eq(in_z
)
707 m
.d
.comb
+= self
.out_of
.eq(in_of
)
708 # normalisation decrease condition
709 decrease
= Signal(reset_less
=True)
710 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
713 # *sigh* not entirely obvious: count leading zeros (clz)
714 # with a PriorityEncoder: to find from the MSB
715 # we reverse the order of the bits.
716 temp_m
= Signal(mwid
, reset_less
=True)
717 temp_s
= Signal(mwid
+1, reset_less
=True)
718 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
720 # cat round and guard bits back into the mantissa
721 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
722 pe
.i
.eq(temp_m
[::-1]), # inverted
723 clz
.eq(pe
.o
), # count zeros from MSB down
724 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
725 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
726 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
734 def __init__(self
, width
, id_wid
):
735 self
.roundz
= Signal(reset_less
=True)
736 self
.z
= FPNumBase(width
, False)
737 self
.out_do_z
= Signal(reset_less
=True)
738 self
.oz
= Signal(width
, reset_less
=True)
739 self
.mid
= Signal(id_wid
, reset_less
=True)
742 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
743 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
746 class FPNorm1ModSingle
:
748 def __init__(self
, width
, id_wid
):
751 self
.i
= self
.ispec()
752 self
.o
= self
.ospec()
755 return FPAddStage1Data(self
.width
, self
.id_wid
)
758 return FPNorm1Data(self
.width
, self
.id_wid
)
760 def setup(self
, m
, i
):
761 """ links module to inputs and outputs
763 m
.submodules
.normalise_1
= self
764 m
.d
.comb
+= self
.i
.eq(i
)
766 def process(self
, i
):
769 def elaborate(self
, platform
):
772 mwid
= self
.o
.z
.m_width
+2
773 pe
= PriorityEncoder(mwid
)
774 m
.submodules
.norm_pe
= pe
777 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
779 m
.submodules
.norm1_out_z
= self
.o
.z
780 m
.submodules
.norm1_out_overflow
= of
781 m
.submodules
.norm1_in_z
= self
.i
.z
782 m
.submodules
.norm1_in_overflow
= self
.i
.of
785 m
.submodules
.norm1_insel_z
= i
.z
786 m
.submodules
.norm1_insel_overflow
= i
.of
788 espec
= (len(i
.z
.e
), True)
789 ediff_n126
= Signal(espec
, reset_less
=True)
790 msr
= MultiShiftRMerge(mwid
, espec
)
791 m
.submodules
.multishift_r
= msr
793 m
.d
.comb
+= i
.eq(self
.i
)
794 # initialise out from in (overridden below)
795 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
796 m
.d
.comb
+= of
.eq(i
.of
)
797 # normalisation increase/decrease conditions
798 decrease
= Signal(reset_less
=True)
799 increase
= Signal(reset_less
=True)
800 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
801 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
803 with m
.If(~self
.i
.out_do_z
):
805 # *sigh* not entirely obvious: count leading zeros (clz)
806 # with a PriorityEncoder: to find from the MSB
807 # we reverse the order of the bits.
808 temp_m
= Signal(mwid
, reset_less
=True)
809 temp_s
= Signal(mwid
+1, reset_less
=True)
810 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
811 # make sure that the amount to decrease by does NOT
812 # go below the minimum non-INF/NaN exponent
813 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
816 # cat round and guard bits back into the mantissa
817 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
818 pe
.i
.eq(temp_m
[::-1]), # inverted
819 clz
.eq(limclz
), # count zeros from MSB down
820 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
821 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
822 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
823 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
824 # overflow in bits 0..1: got shifted too (leave sticky)
825 of
.guard
.eq(temp_s
[1]), # guard
826 of
.round_bit
.eq(temp_s
[0]), # round
829 with m
.Elif(increase
):
830 temp_m
= Signal(mwid
+1, reset_less
=True)
832 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
834 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
835 # connect multi-shifter to inp/out mantissa (and ediff)
837 msr
.diff
.eq(ediff_n126
),
838 self
.o
.z
.m
.eq(msr
.m
[3:]),
839 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
840 # overflow in bits 0..1: got shifted too (leave sticky)
841 of
.guard
.eq(temp_s
[2]), # guard
842 of
.round_bit
.eq(temp_s
[1]), # round
843 of
.sticky
.eq(temp_s
[0]), # sticky
844 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
847 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
848 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
849 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
854 class FPNorm1ModMulti
:
856 def __init__(self
, width
, single_cycle
=True):
858 self
.in_select
= Signal(reset_less
=True)
859 self
.in_z
= FPNumBase(width
, False)
860 self
.in_of
= Overflow()
861 self
.temp_z
= FPNumBase(width
, False)
862 self
.temp_of
= Overflow()
863 self
.out_z
= FPNumBase(width
, False)
864 self
.out_of
= Overflow()
866 def elaborate(self
, platform
):
869 m
.submodules
.norm1_out_z
= self
.out_z
870 m
.submodules
.norm1_out_overflow
= self
.out_of
871 m
.submodules
.norm1_temp_z
= self
.temp_z
872 m
.submodules
.norm1_temp_of
= self
.temp_of
873 m
.submodules
.norm1_in_z
= self
.in_z
874 m
.submodules
.norm1_in_overflow
= self
.in_of
876 in_z
= FPNumBase(self
.width
, False)
878 m
.submodules
.norm1_insel_z
= in_z
879 m
.submodules
.norm1_insel_overflow
= in_of
881 # select which of temp or in z/of to use
882 with m
.If(self
.in_select
):
883 m
.d
.comb
+= in_z
.eq(self
.in_z
)
884 m
.d
.comb
+= in_of
.eq(self
.in_of
)
886 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
887 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
888 # initialise out from in (overridden below)
889 m
.d
.comb
+= self
.out_z
.eq(in_z
)
890 m
.d
.comb
+= self
.out_of
.eq(in_of
)
891 # normalisation increase/decrease conditions
892 decrease
= Signal(reset_less
=True)
893 increase
= Signal(reset_less
=True)
894 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
895 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
896 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
900 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
901 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
902 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
903 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
904 self
.out_of
.round_bit
.eq(0), # reset round bit
905 self
.out_of
.m0
.eq(in_of
.guard
),
908 with m
.Elif(increase
):
910 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
911 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
912 self
.out_of
.guard
.eq(in_z
.m
[0]),
913 self
.out_of
.m0
.eq(in_z
.m
[1]),
914 self
.out_of
.round_bit
.eq(in_of
.guard
),
915 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
921 class FPNorm1Single(FPState
):
923 def __init__(self
, width
, id_wid
, single_cycle
=True):
924 FPState
.__init
__(self
, "normalise_1")
925 self
.mod
= FPNorm1ModSingle(width
)
926 self
.o
= self
.ospec()
927 self
.out_z
= FPNumBase(width
, False)
928 self
.out_roundz
= Signal(reset_less
=True)
931 return self
.mod
.ispec()
934 return self
.mod
.ospec()
936 def setup(self
, m
, i
):
937 """ links module to inputs and outputs
945 class FPNorm1Multi(FPState
):
947 def __init__(self
, width
, id_wid
):
948 FPState
.__init
__(self
, "normalise_1")
949 self
.mod
= FPNorm1ModMulti(width
)
950 self
.stb
= Signal(reset_less
=True)
951 self
.ack
= Signal(reset
=0, reset_less
=True)
952 self
.out_norm
= Signal(reset_less
=True)
953 self
.in_accept
= Signal(reset_less
=True)
954 self
.temp_z
= FPNumBase(width
)
955 self
.temp_of
= Overflow()
956 self
.out_z
= FPNumBase(width
)
957 self
.out_roundz
= Signal(reset_less
=True)
959 def setup(self
, m
, in_z
, in_of
, norm_stb
):
960 """ links module to inputs and outputs
962 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
963 self
.in_accept
, self
.temp_z
, self
.temp_of
,
964 self
.out_z
, self
.out_norm
)
966 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
967 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
970 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
971 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
972 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
973 with m
.If(self
.out_norm
):
974 with m
.If(self
.in_accept
):
979 m
.d
.sync
+= self
.ack
.eq(0)
981 # normalisation not required (or done).
983 m
.d
.sync
+= self
.ack
.eq(1)
984 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
987 class FPNormToPack(FPState
, UnbufferedPipeline
):
989 def __init__(self
, width
, id_wid
):
990 FPState
.__init
__(self
, "normalise_1")
993 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
996 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
999 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1001 def setup(self
, m
, i
):
1002 """ links module to inputs and outputs
1005 # Normalisation, Rounding Corrections, Pack - in a chain
1006 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1007 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1008 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1009 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1010 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1012 self
.out_z
= pmod
.ospec()
1016 def process(self
, i
):
1019 def action(self
, m
):
1020 m
.d
.sync
+= self
.out_z
.eq(self
.process(None))
1021 m
.next
= "pack_put_z"
1026 def __init__(self
, width
, id_wid
):
1027 self
.z
= FPNumBase(width
, False)
1028 self
.out_do_z
= Signal(reset_less
=True)
1029 self
.oz
= Signal(width
, reset_less
=True)
1030 self
.mid
= Signal(id_wid
, reset_less
=True)
1033 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1039 def __init__(self
, width
, id_wid
):
1041 self
.id_wid
= id_wid
1042 self
.i
= self
.ispec()
1043 self
.out_z
= self
.ospec()
1046 return FPNorm1Data(self
.width
, self
.id_wid
)
1049 return FPRoundData(self
.width
, self
.id_wid
)
1051 def process(self
, i
):
1054 def setup(self
, m
, i
):
1055 m
.submodules
.roundz
= self
1056 m
.d
.comb
+= self
.i
.eq(i
)
1058 def elaborate(self
, platform
):
1060 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1061 with m
.If(~self
.i
.out_do_z
):
1062 with m
.If(self
.i
.roundz
):
1063 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1064 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1065 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1070 class FPRound(FPState
):
1072 def __init__(self
, width
, id_wid
):
1073 FPState
.__init
__(self
, "round")
1074 self
.mod
= FPRoundMod(width
)
1075 self
.out_z
= self
.ospec()
1078 return self
.mod
.ispec()
1081 return self
.mod
.ospec()
1083 def setup(self
, m
, i
):
1084 """ links module to inputs and outputs
1086 self
.mod
.setup(m
, i
)
1089 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1090 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1092 def action(self
, m
):
1093 m
.next
= "corrections"
1096 class FPCorrectionsMod
:
1098 def __init__(self
, width
, id_wid
):
1100 self
.id_wid
= id_wid
1101 self
.i
= self
.ispec()
1102 self
.out_z
= self
.ospec()
1105 return FPRoundData(self
.width
, self
.id_wid
)
1108 return FPRoundData(self
.width
, self
.id_wid
)
1110 def process(self
, i
):
1113 def setup(self
, m
, i
):
1114 """ links module to inputs and outputs
1116 m
.submodules
.corrections
= self
1117 m
.d
.comb
+= self
.i
.eq(i
)
1119 def elaborate(self
, platform
):
1121 m
.submodules
.corr_in_z
= self
.i
.z
1122 m
.submodules
.corr_out_z
= self
.out_z
.z
1123 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1124 with m
.If(~self
.i
.out_do_z
):
1125 with m
.If(self
.i
.z
.is_denormalised
):
1126 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1130 class FPCorrections(FPState
):
1132 def __init__(self
, width
, id_wid
):
1133 FPState
.__init
__(self
, "corrections")
1134 self
.mod
= FPCorrectionsMod(width
)
1135 self
.out_z
= self
.ospec()
1138 return self
.mod
.ispec()
1141 return self
.mod
.ospec()
1143 def setup(self
, m
, in_z
):
1144 """ links module to inputs and outputs
1146 self
.mod
.setup(m
, in_z
)
1148 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1149 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1151 def action(self
, m
):
1157 def __init__(self
, width
, id_wid
):
1158 self
.z
= Signal(width
, reset_less
=True)
1159 self
.mid
= Signal(id_wid
, reset_less
=True)
1162 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1165 return [self
.z
, self
.mid
]
1170 def __init__(self
, width
, id_wid
):
1172 self
.id_wid
= id_wid
1173 self
.i
= self
.ispec()
1174 self
.o
= self
.ospec()
1177 return FPRoundData(self
.width
, self
.id_wid
)
1180 return FPPackData(self
.width
, self
.id_wid
)
1182 def process(self
, i
):
1185 def setup(self
, m
, in_z
):
1186 """ links module to inputs and outputs
1188 m
.submodules
.pack
= self
1189 m
.d
.comb
+= self
.i
.eq(in_z
)
1191 def elaborate(self
, platform
):
1193 z
= FPNumOut(self
.width
, False)
1194 m
.submodules
.pack_in_z
= self
.i
.z
1195 m
.submodules
.pack_out_z
= z
1196 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1197 with m
.If(~self
.i
.out_do_z
):
1198 with m
.If(self
.i
.z
.is_overflowed
):
1199 m
.d
.comb
+= z
.inf(self
.i
.z
.s
)
1201 m
.d
.comb
+= z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1203 m
.d
.comb
+= z
.v
.eq(self
.i
.oz
)
1204 m
.d
.comb
+= self
.o
.z
.eq(z
.v
)
1208 class FPPack(FPState
):
1210 def __init__(self
, width
, id_wid
):
1211 FPState
.__init
__(self
, "pack")
1212 self
.mod
= FPPackMod(width
)
1213 self
.out_z
= self
.ospec()
1216 return self
.mod
.ispec()
1219 return self
.mod
.ospec()
1221 def setup(self
, m
, in_z
):
1222 """ links module to inputs and outputs
1224 self
.mod
.setup(m
, in_z
)
1226 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1227 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1229 def action(self
, m
):
1230 m
.next
= "pack_put_z"
1233 class FPPutZ(FPState
):
1235 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1236 FPState
.__init
__(self
, state
)
1237 if to_state
is None:
1238 to_state
= "get_ops"
1239 self
.to_state
= to_state
1242 self
.in_mid
= in_mid
1243 self
.out_mid
= out_mid
1245 def action(self
, m
):
1246 if self
.in_mid
is not None:
1247 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1249 self
.out_z
.z
.v
.eq(self
.in_z
)
1251 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1252 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1253 m
.next
= self
.to_state
1255 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1258 class FPPutZIdx(FPState
):
1260 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1261 FPState
.__init
__(self
, state
)
1262 if to_state
is None:
1263 to_state
= "get_ops"
1264 self
.to_state
= to_state
1266 self
.out_zs
= out_zs
1267 self
.in_mid
= in_mid
1269 def action(self
, m
):
1270 outz_stb
= Signal(reset_less
=True)
1271 outz_ack
= Signal(reset_less
=True)
1272 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1273 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1276 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1278 with m
.If(outz_stb
& outz_ack
):
1279 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1280 m
.next
= self
.to_state
1282 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1286 def __init__(self
, width
, id_wid
):
1287 self
.z
= FPOp(width
)
1288 self
.mid
= Signal(id_wid
, reset_less
=True)
1291 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1294 return [self
.z
, self
.mid
]
1299 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1302 * width: bit-width of IEEE754. supported: 16, 32, 64
1303 * id_wid: an identifier that is sync-connected to the input
1304 * single_cycle: True indicates each stage to complete in 1 clock
1305 * compact: True indicates a reduced number of stages
1308 self
.id_wid
= id_wid
1309 self
.single_cycle
= single_cycle
1310 self
.compact
= compact
1312 self
.in_t
= Trigger()
1313 self
.i
= self
.ispec()
1314 self
.o
= self
.ospec()
1319 return FPADDBaseData(self
.width
, self
.id_wid
)
1322 return FPOpData(self
.width
, self
.id_wid
)
1324 def add_state(self
, state
):
1325 self
.states
.append(state
)
1328 def get_fragment(self
, platform
=None):
1329 """ creates the HDL code-fragment for FPAdd
1332 m
.submodules
.out_z
= self
.o
.z
1333 m
.submodules
.in_t
= self
.in_t
1335 self
.get_compact_fragment(m
, platform
)
1337 self
.get_longer_fragment(m
, platform
)
1339 with m
.FSM() as fsm
:
1341 for state
in self
.states
:
1342 with m
.State(state
.state_from
):
1347 def get_longer_fragment(self
, m
, platform
=None):
1349 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1351 get
.setup(m
, self
.i
)
1354 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1356 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1357 sc
.setup(m
, a
, b
, self
.in_mid
)
1359 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1360 dn
.setup(m
, a
, b
, sc
.in_mid
)
1362 if self
.single_cycle
:
1363 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1364 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1366 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1367 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1369 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1370 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1372 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1373 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1375 if self
.single_cycle
:
1376 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1377 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1379 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1380 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1382 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1383 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1385 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1386 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1388 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1389 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1391 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1392 pa
.in_mid
, self
.out_mid
))
1394 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1395 pa
.in_mid
, self
.out_mid
))
1397 def get_compact_fragment(self
, m
, platform
=None):
1400 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
1401 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
1402 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
1403 n1
= FPNormToPack(self
.width
, self
.id_wid
)
1405 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
1407 chainlist
= [get
, sc
, alm
, n1
]
1408 chain
= StageChain(chainlist
, specallocate
=True)
1409 chain
.setup(m
, self
.i
)
1411 for mod
in chainlist
:
1412 sc
= self
.add_state(mod
)
1414 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1415 n1
.out_z
.mid
, self
.o
.mid
))
1417 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1418 # sc.o.mid, self.o.mid))
1421 class FPADDBase(FPState
):
1423 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1426 * width: bit-width of IEEE754. supported: 16, 32, 64
1427 * id_wid: an identifier that is sync-connected to the input
1428 * single_cycle: True indicates each stage to complete in 1 clock
1430 FPState
.__init
__(self
, "fpadd")
1432 self
.single_cycle
= single_cycle
1433 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1434 self
.o
= self
.ospec()
1436 self
.in_t
= Trigger()
1437 self
.i
= self
.ispec()
1439 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1440 self
.in_accept
= Signal(reset_less
=True)
1441 self
.add_stb
= Signal(reset_less
=True)
1442 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1445 return self
.mod
.ispec()
1448 return self
.mod
.ospec()
1450 def setup(self
, m
, i
, add_stb
, in_mid
):
1451 m
.d
.comb
+= [self
.i
.eq(i
),
1452 self
.mod
.i
.eq(self
.i
),
1453 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1454 #self.add_stb.eq(add_stb),
1455 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1456 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1457 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1458 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1459 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1460 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1463 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1464 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1465 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1466 #m.d.sync += self.in_t.stb.eq(0)
1468 m
.submodules
.fpadd
= self
.mod
1470 def action(self
, m
):
1472 # in_accept is set on incoming strobe HIGH and ack LOW.
1473 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1475 #with m.If(self.in_t.ack):
1476 # m.d.sync += self.in_t.stb.eq(0)
1477 with m
.If(~self
.z_done
):
1478 # not done: test for accepting an incoming operand pair
1479 with m
.If(self
.in_accept
):
1481 self
.add_ack
.eq(1), # acknowledge receipt...
1482 self
.in_t
.stb
.eq(1), # initiate add
1485 m
.d
.sync
+= [self
.add_ack
.eq(0),
1486 self
.in_t
.stb
.eq(0),
1490 # done: acknowledge, and write out id and value
1491 m
.d
.sync
+= [self
.add_ack
.eq(1),
1498 if self
.in_mid
is not None:
1499 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1502 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1504 # move to output state on detecting z ack
1505 with m
.If(self
.out_z
.trigger
):
1506 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1509 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1512 class FPADDBasePipe(ControlBase
):
1513 def __init__(self
, width
, id_wid
):
1514 ControlBase
.__init
__(self
)
1515 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
1516 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
1517 self
.pipe3
= FPNormToPack(width
, id_wid
)
1519 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
1521 def elaborate(self
, platform
):
1523 m
.submodules
.scnorm
= self
.pipe1
1524 m
.submodules
.addalign
= self
.pipe2
1525 m
.submodules
.normpack
= self
.pipe3
1526 m
.d
.comb
+= self
._eqs
1530 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
1531 def __init__(self
, width
, id_wid
, num_rows
):
1532 self
.num_rows
= num_rows
1533 def iospec(): return FPADDBaseData(width
, id_wid
)
1534 stage
= PassThroughStage(iospec
)
1535 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
1538 class FPADDMuxOutPipe(CombMuxOutPipe
):
1539 def __init__(self
, width
, id_wid
, num_rows
):
1540 self
.num_rows
= num_rows
1541 def iospec(): return FPPackData(width
, id_wid
)
1542 stage
= PassThroughStage(iospec
)
1543 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
1546 class FPADDMuxInOut
:
1547 """ Reservation-Station version of FPADD pipeline.
1549 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1550 * 3-stage adder pipeline
1551 * fan-out on outputs (an array of FPPackData: z,mid)
1553 Fan-in and Fan-out are combinatorial.
1555 def __init__(self
, width
, id_wid
, num_rows
):
1556 self
.num_rows
= num_rows
1557 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
1558 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
1559 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
1561 self
.p
= self
.inpipe
.p
# kinda annoying,
1562 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
1563 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
1565 def elaborate(self
, platform
):
1567 m
.submodules
.inpipe
= self
.inpipe
1568 m
.submodules
.fpadd
= self
.fpadd
1569 m
.submodules
.outpipe
= self
.outpipe
1571 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
1572 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
1581 """ FPADD: stages as follows:
1587 FPAddBase---> FPAddBaseMod
1589 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1591 FPAddBase is tricky: it is both a stage and *has* stages.
1592 Connection to FPAddBaseMod therefore requires an in stb/ack
1593 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1594 needs to be the thing that raises the incoming stb.
1597 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1600 * width: bit-width of IEEE754. supported: 16, 32, 64
1601 * id_wid: an identifier that is sync-connected to the input
1602 * single_cycle: True indicates each stage to complete in 1 clock
1605 self
.id_wid
= id_wid
1606 self
.single_cycle
= single_cycle
1608 #self.out_z = FPOp(width)
1609 self
.ids
= FPID(id_wid
)
1612 for i
in range(rs_sz
):
1615 in_a
.name
= "in_a_%d" % i
1616 in_b
.name
= "in_b_%d" % i
1617 rs
.append((in_a
, in_b
))
1621 for i
in range(rs_sz
):
1623 out_z
.name
= "out_z_%d" % i
1625 self
.res
= Array(res
)
1629 def add_state(self
, state
):
1630 self
.states
.append(state
)
1633 def get_fragment(self
, platform
=None):
1634 """ creates the HDL code-fragment for FPAdd
1637 m
.submodules
+= self
.rs
1639 in_a
= self
.rs
[0][0]
1640 in_b
= self
.rs
[0][1]
1642 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1647 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1652 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1653 ab
= self
.add_state(ab
)
1654 abd
= ab
.ispec() # create an input spec object for FPADDBase
1655 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1656 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1659 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1662 with m
.FSM() as fsm
:
1664 for state
in self
.states
:
1665 with m
.State(state
.state_from
):
1671 if __name__
== "__main__":
1673 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1674 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1675 alu
.rs
[0][1].ports() + \
1676 alu
.res
[0].ports() + \
1677 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1679 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1680 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1681 alu
.in_t
.ports() + \
1682 alu
.out_z
.ports() + \
1683 [alu
.in_mid
, alu
.out_mid
])
1686 # works... but don't use, just do "python fname.py convert -t v"
1687 #print (verilog.convert(alu, ports=[
1688 # ports=alu.in_a.ports() + \
1689 # alu.in_b.ports() + \
1690 # alu.out_z.ports())