1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
, FPGet2OpMod
, FPGet2Op
)
19 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
20 from fpcommon
.postcalc
import FPAddStage1Data
21 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
22 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
23 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
24 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
25 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
26 from fpcommon
.normtopack
import FPNormToPack
29 class FPAddSpecialCasesMod
:
30 """ special cases: NaNs, infs, zeros, denormalised
31 NOTE: some of these are unique to add. see "Special Operations"
32 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
35 def __init__(self
, width
, id_wid
):
42 return FPADDBaseData(self
.width
, self
.id_wid
)
45 return FPSCData(self
.width
, self
.id_wid
)
47 def setup(self
, m
, i
):
48 """ links module to inputs and outputs
50 m
.submodules
.specialcases
= self
51 m
.d
.comb
+= self
.i
.eq(i
)
56 def elaborate(self
, platform
):
59 m
.submodules
.sc_out_z
= self
.o
.z
61 # decode: XXX really should move to separate stage
62 a1
= FPNumIn(None, self
.width
)
63 b1
= FPNumIn(None, self
.width
)
64 m
.submodules
.sc_decode_a
= a1
65 m
.submodules
.sc_decode_b
= b1
66 m
.d
.comb
+= [a1
.decode(self
.i
.a
),
71 m
.d
.comb
+= s_nomatch
.eq(a1
.s
!= b1
.s
)
74 m
.d
.comb
+= m_match
.eq(a1
.m
== b1
.m
)
76 # if a is NaN or b is NaN return NaN
77 with m
.If(a1
.is_nan | b1
.is_nan
):
78 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
79 m
.d
.comb
+= self
.o
.z
.nan(0)
81 # XXX WEIRDNESS for FP16 non-canonical NaN handling
84 ## if a is zero and b is NaN return -b
85 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
86 # m.d.comb += self.o.out_do_z.eq(1)
87 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
89 ## if b is zero and a is NaN return -a
90 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
91 # m.d.comb += self.o.out_do_z.eq(1)
92 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
94 ## if a is -zero and b is NaN return -b
95 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
96 # m.d.comb += self.o.out_do_z.eq(1)
97 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
99 ## if b is -zero and a is NaN return -a
100 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
101 # m.d.comb += self.o.out_do_z.eq(1)
102 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
104 # if a is inf return inf (or NaN)
105 with m
.Elif(a1
.is_inf
):
106 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
107 m
.d
.comb
+= self
.o
.z
.inf(a1
.s
)
108 # if a is inf and signs don't match return NaN
109 with m
.If(b1
.exp_128
& s_nomatch
):
110 m
.d
.comb
+= self
.o
.z
.nan(0)
112 # if b is inf return inf
113 with m
.Elif(b1
.is_inf
):
114 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
115 m
.d
.comb
+= self
.o
.z
.inf(b1
.s
)
117 # if a is zero and b zero return signed-a/b
118 with m
.Elif(a1
.is_zero
& b1
.is_zero
):
119 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
120 m
.d
.comb
+= self
.o
.z
.create(a1
.s
& b1
.s
, b1
.e
, b1
.m
[3:-1])
122 # if a is zero return b
123 with m
.Elif(a1
.is_zero
):
124 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
125 m
.d
.comb
+= self
.o
.z
.create(b1
.s
, b1
.e
, b1
.m
[3:-1])
127 # if b is zero return a
128 with m
.Elif(b1
.is_zero
):
129 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
130 m
.d
.comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[3:-1])
132 # if a equal to -b return zero (+ve zero)
133 with m
.Elif(s_nomatch
& m_match
& (a1
.e
== b1
.e
)):
134 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
135 m
.d
.comb
+= self
.o
.z
.zero(0)
137 # Denormalised Number checks next, so pass a/b data through
139 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
140 m
.d
.comb
+= self
.o
.a
.eq(a1
)
141 m
.d
.comb
+= self
.o
.b
.eq(b1
)
143 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
144 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
149 class FPAddSpecialCases(FPState
):
150 """ special cases: NaNs, infs, zeros, denormalised
151 NOTE: some of these are unique to add. see "Special Operations"
152 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
155 def __init__(self
, width
, id_wid
):
156 FPState
.__init
__(self
, "special_cases")
157 self
.mod
= FPAddSpecialCasesMod(width
)
158 self
.out_z
= self
.mod
.ospec()
159 self
.out_do_z
= Signal(reset_less
=True)
161 def setup(self
, m
, i
):
162 """ links module to inputs and outputs
164 self
.mod
.setup(m
, i
, self
.out_do_z
)
165 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
166 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
170 with m
.If(self
.out_do_z
):
173 m
.next
= "denormalise"
176 class FPAddSpecialCasesDeNorm(FPState
, UnbufferedPipeline
):
177 """ special cases: NaNs, infs, zeros, denormalised
178 NOTE: some of these are unique to add. see "Special Operations"
179 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
182 def __init__(self
, width
, id_wid
):
183 FPState
.__init
__(self
, "special_cases")
186 UnbufferedPipeline
.__init
__(self
, self
) # pipe is its own stage
187 self
.out
= self
.ospec()
190 return FPADDBaseData(self
.width
, self
.id_wid
) # SpecialCases ispec
193 return FPSCData(self
.width
, self
.id_wid
) # DeNorm ospec
195 def setup(self
, m
, i
):
196 """ links module to inputs and outputs
198 smod
= FPAddSpecialCasesMod(self
.width
, self
.id_wid
)
199 dmod
= FPAddDeNormMod(self
.width
, self
.id_wid
)
201 chain
= StageChain([smod
, dmod
])
204 # only needed for break-out (early-out)
205 # self.out_do_z = smod.o.out_do_z
209 def process(self
, i
):
213 # for break-out (early-out)
214 #with m.If(self.out_do_z):
217 m
.d
.sync
+= self
.out
.eq(self
.process(None))
221 class FPAddAlignMultiMod(FPState
):
223 def __init__(self
, width
):
224 self
.in_a
= FPNumBase(width
)
225 self
.in_b
= FPNumBase(width
)
226 self
.out_a
= FPNumIn(None, width
)
227 self
.out_b
= FPNumIn(None, width
)
228 self
.exp_eq
= Signal(reset_less
=True)
230 def elaborate(self
, platform
):
231 # This one however (single-cycle) will do the shift
236 m
.submodules
.align_in_a
= self
.in_a
237 m
.submodules
.align_in_b
= self
.in_b
238 m
.submodules
.align_out_a
= self
.out_a
239 m
.submodules
.align_out_b
= self
.out_b
241 # NOTE: this does *not* do single-cycle multi-shifting,
242 # it *STAYS* in the align state until exponents match
244 # exponent of a greater than b: shift b down
245 m
.d
.comb
+= self
.exp_eq
.eq(0)
246 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
247 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
248 agtb
= Signal(reset_less
=True)
249 altb
= Signal(reset_less
=True)
250 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
251 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
253 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
254 # exponent of b greater than a: shift a down
256 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
257 # exponents equal: move to next stage.
259 m
.d
.comb
+= self
.exp_eq
.eq(1)
263 class FPAddAlignMulti(FPState
):
265 def __init__(self
, width
, id_wid
):
266 FPState
.__init
__(self
, "align")
267 self
.mod
= FPAddAlignMultiMod(width
)
268 self
.out_a
= FPNumIn(None, width
)
269 self
.out_b
= FPNumIn(None, width
)
270 self
.exp_eq
= Signal(reset_less
=True)
272 def setup(self
, m
, in_a
, in_b
):
273 """ links module to inputs and outputs
275 m
.submodules
.align
= self
.mod
276 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
277 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
278 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
279 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
280 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
283 with m
.If(self
.exp_eq
):
289 def __init__(self
, width
, id_wid
):
290 self
.a
= FPNumIn(None, width
)
291 self
.b
= FPNumIn(None, width
)
292 self
.z
= FPNumOut(width
, False)
293 self
.out_do_z
= Signal(reset_less
=True)
294 self
.oz
= Signal(width
, reset_less
=True)
295 self
.mid
= Signal(id_wid
, reset_less
=True)
298 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
299 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
302 class FPAddAlignSingleMod
:
304 def __init__(self
, width
, id_wid
):
307 self
.i
= self
.ispec()
308 self
.o
= self
.ospec()
311 return FPSCData(self
.width
, self
.id_wid
)
314 return FPNumIn2Ops(self
.width
, self
.id_wid
)
316 def process(self
, i
):
319 def setup(self
, m
, i
):
320 """ links module to inputs and outputs
322 m
.submodules
.align
= self
323 m
.d
.comb
+= self
.i
.eq(i
)
325 def elaborate(self
, platform
):
326 """ Aligns A against B or B against A, depending on which has the
327 greater exponent. This is done in a *single* cycle using
328 variable-width bit-shift
330 the shifter used here is quite expensive in terms of gates.
331 Mux A or B in (and out) into temporaries, as only one of them
332 needs to be aligned against the other
336 m
.submodules
.align_in_a
= self
.i
.a
337 m
.submodules
.align_in_b
= self
.i
.b
338 m
.submodules
.align_out_a
= self
.o
.a
339 m
.submodules
.align_out_b
= self
.o
.b
341 # temporary (muxed) input and output to be shifted
342 t_inp
= FPNumBase(self
.width
)
343 t_out
= FPNumIn(None, self
.width
)
344 espec
= (len(self
.i
.a
.e
), True)
345 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
346 m
.submodules
.align_t_in
= t_inp
347 m
.submodules
.align_t_out
= t_out
348 m
.submodules
.multishift_r
= msr
350 ediff
= Signal(espec
, reset_less
=True)
351 ediffr
= Signal(espec
, reset_less
=True)
352 tdiff
= Signal(espec
, reset_less
=True)
353 elz
= Signal(reset_less
=True)
354 egz
= Signal(reset_less
=True)
356 # connect multi-shifter to t_inp/out mantissa (and tdiff)
357 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
358 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
359 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
360 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
361 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
363 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
364 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
365 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
366 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
368 # default: A-exp == B-exp, A and B untouched (fall through)
369 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
370 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
371 # only one shifter (muxed)
372 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
373 # exponent of a greater than b: shift b down
374 with m
.If(~self
.i
.out_do_z
):
376 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
379 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
381 # exponent of b greater than a: shift a down
383 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
386 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
389 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
390 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
391 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
392 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
397 class FPAddAlignSingle(FPState
):
399 def __init__(self
, width
, id_wid
):
400 FPState
.__init
__(self
, "align")
401 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
402 self
.out_a
= FPNumIn(None, width
)
403 self
.out_b
= FPNumIn(None, width
)
405 def setup(self
, m
, i
):
406 """ links module to inputs and outputs
410 # NOTE: could be done as comb
411 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
412 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
418 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
420 def __init__(self
, width
, id_wid
):
421 FPState
.__init
__(self
, "align")
424 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
425 self
.a1o
= self
.ospec()
428 return FPSCData(self
.width
, self
.id_wid
)
431 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
433 def setup(self
, m
, i
):
434 """ links module to inputs and outputs
437 # chain AddAlignSingle, AddStage0 and AddStage1
438 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
439 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
440 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
442 chain
= StageChain([mod
, a0mod
, a1mod
])
447 def process(self
, i
):
451 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
452 m
.next
= "normalise_1"
455 class FPAddStage0Data
:
457 def __init__(self
, width
, id_wid
):
458 self
.z
= FPNumBase(width
, False)
459 self
.out_do_z
= Signal(reset_less
=True)
460 self
.oz
= Signal(width
, reset_less
=True)
461 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
462 self
.mid
= Signal(id_wid
, reset_less
=True)
465 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
466 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
469 class FPAddStage0Mod
:
471 def __init__(self
, width
, id_wid
):
474 self
.i
= self
.ispec()
475 self
.o
= self
.ospec()
478 return FPSCData(self
.width
, self
.id_wid
)
481 return FPAddStage0Data(self
.width
, self
.id_wid
)
483 def process(self
, i
):
486 def setup(self
, m
, i
):
487 """ links module to inputs and outputs
489 m
.submodules
.add0
= self
490 m
.d
.comb
+= self
.i
.eq(i
)
492 def elaborate(self
, platform
):
494 m
.submodules
.add0_in_a
= self
.i
.a
495 m
.submodules
.add0_in_b
= self
.i
.b
496 m
.submodules
.add0_out_z
= self
.o
.z
498 # store intermediate tests (and zero-extended mantissas)
499 seq
= Signal(reset_less
=True)
500 mge
= Signal(reset_less
=True)
501 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
502 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
503 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
504 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
505 am0
.eq(Cat(self
.i
.a
.m
, 0)),
506 bm0
.eq(Cat(self
.i
.b
.m
, 0))
508 # same-sign (both negative or both positive) add mantissas
509 with m
.If(~self
.i
.out_do_z
):
510 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
513 self
.o
.tot
.eq(am0
+ bm0
),
514 self
.o
.z
.s
.eq(self
.i
.a
.s
)
516 # a mantissa greater than b, use a
519 self
.o
.tot
.eq(am0
- bm0
),
520 self
.o
.z
.s
.eq(self
.i
.a
.s
)
522 # b mantissa greater than a, use b
525 self
.o
.tot
.eq(bm0
- am0
),
526 self
.o
.z
.s
.eq(self
.i
.b
.s
)
529 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
530 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
531 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
535 class FPAddStage0(FPState
):
536 """ First stage of add. covers same-sign (add) and subtract
537 special-casing when mantissas are greater or equal, to
538 give greatest accuracy.
541 def __init__(self
, width
, id_wid
):
542 FPState
.__init
__(self
, "add_0")
543 self
.mod
= FPAddStage0Mod(width
)
544 self
.o
= self
.mod
.ospec()
546 def setup(self
, m
, i
):
547 """ links module to inputs and outputs
551 # NOTE: these could be done as combinatorial (merge add0+add1)
552 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
558 class FPAddStage1Mod(FPState
):
559 """ Second stage of add: preparation for normalisation.
560 detects when tot sum is too big (tot[27] is kinda a carry bit)
563 def __init__(self
, width
, id_wid
):
566 self
.i
= self
.ispec()
567 self
.o
= self
.ospec()
570 return FPAddStage0Data(self
.width
, self
.id_wid
)
573 return FPAddStage1Data(self
.width
, self
.id_wid
)
575 def process(self
, i
):
578 def setup(self
, m
, i
):
579 """ links module to inputs and outputs
581 m
.submodules
.add1
= self
582 m
.submodules
.add1_out_overflow
= self
.o
.of
584 m
.d
.comb
+= self
.i
.eq(i
)
586 def elaborate(self
, platform
):
588 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
589 # tot[-1] (MSB) gets set when the sum overflows. shift result down
590 with m
.If(~self
.i
.out_do_z
):
591 with m
.If(self
.i
.tot
[-1]):
593 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
594 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
595 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
596 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
597 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
598 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
600 # tot[-1] (MSB) zero case
603 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
604 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
605 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
606 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
607 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
610 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
611 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
612 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
617 class FPAddStage1(FPState
):
619 def __init__(self
, width
, id_wid
):
620 FPState
.__init
__(self
, "add_1")
621 self
.mod
= FPAddStage1Mod(width
)
622 self
.out_z
= FPNumBase(width
, False)
623 self
.out_of
= Overflow()
624 self
.norm_stb
= Signal()
626 def setup(self
, m
, i
):
627 """ links module to inputs and outputs
631 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
633 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
634 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
635 m
.d
.sync
+= self
.norm_stb
.eq(1)
638 m
.next
= "normalise_1"
642 class FPPutZ(FPState
):
644 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
645 FPState
.__init
__(self
, state
)
648 self
.to_state
= to_state
652 self
.out_mid
= out_mid
655 if self
.in_mid
is not None:
656 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
658 self
.out_z
.z
.v
.eq(self
.in_z
)
660 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
661 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
662 m
.next
= self
.to_state
664 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
667 class FPPutZIdx(FPState
):
669 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
670 FPState
.__init
__(self
, state
)
673 self
.to_state
= to_state
679 outz_stb
= Signal(reset_less
=True)
680 outz_ack
= Signal(reset_less
=True)
681 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
682 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
685 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
687 with m
.If(outz_stb
& outz_ack
):
688 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
689 m
.next
= self
.to_state
691 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
695 def __init__(self
, width
, id_wid
):
697 self
.mid
= Signal(id_wid
, reset_less
=True)
700 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
703 return [self
.z
, self
.mid
]
708 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
711 * width: bit-width of IEEE754. supported: 16, 32, 64
712 * id_wid: an identifier that is sync-connected to the input
713 * single_cycle: True indicates each stage to complete in 1 clock
714 * compact: True indicates a reduced number of stages
718 self
.single_cycle
= single_cycle
719 self
.compact
= compact
721 self
.in_t
= Trigger()
722 self
.i
= self
.ispec()
723 self
.o
= self
.ospec()
728 return FPADDBaseData(self
.width
, self
.id_wid
)
731 return FPOpData(self
.width
, self
.id_wid
)
733 def add_state(self
, state
):
734 self
.states
.append(state
)
737 def get_fragment(self
, platform
=None):
738 """ creates the HDL code-fragment for FPAdd
741 m
.submodules
.out_z
= self
.o
.z
742 m
.submodules
.in_t
= self
.in_t
744 self
.get_compact_fragment(m
, platform
)
746 self
.get_longer_fragment(m
, platform
)
750 for state
in self
.states
:
751 with m
.State(state
.state_from
):
756 def get_longer_fragment(self
, m
, platform
=None):
758 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
763 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
765 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
766 sc
.setup(m
, a
, b
, self
.in_mid
)
768 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
769 dn
.setup(m
, a
, b
, sc
.in_mid
)
771 if self
.single_cycle
:
772 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
773 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
775 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
776 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
778 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
779 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
781 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
782 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
784 if self
.single_cycle
:
785 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
786 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
788 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
789 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
791 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
792 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
794 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
795 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
797 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
798 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
800 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
801 pa
.in_mid
, self
.out_mid
))
803 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
804 pa
.in_mid
, self
.out_mid
))
806 def get_compact_fragment(self
, m
, platform
=None):
809 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
810 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
811 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
812 n1
= FPNormToPack(self
.width
, self
.id_wid
)
814 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
816 chainlist
= [get
, sc
, alm
, n1
]
817 chain
= StageChain(chainlist
, specallocate
=True)
818 chain
.setup(m
, self
.i
)
820 for mod
in chainlist
:
821 sc
= self
.add_state(mod
)
823 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
824 n1
.out_z
.mid
, self
.o
.mid
))
826 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
827 # sc.o.mid, self.o.mid))
830 class FPADDBase(FPState
):
832 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
835 * width: bit-width of IEEE754. supported: 16, 32, 64
836 * id_wid: an identifier that is sync-connected to the input
837 * single_cycle: True indicates each stage to complete in 1 clock
839 FPState
.__init
__(self
, "fpadd")
841 self
.single_cycle
= single_cycle
842 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
843 self
.o
= self
.ospec()
845 self
.in_t
= Trigger()
846 self
.i
= self
.ispec()
848 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
849 self
.in_accept
= Signal(reset_less
=True)
850 self
.add_stb
= Signal(reset_less
=True)
851 self
.add_ack
= Signal(reset
=0, reset_less
=True)
854 return self
.mod
.ispec()
857 return self
.mod
.ospec()
859 def setup(self
, m
, i
, add_stb
, in_mid
):
860 m
.d
.comb
+= [self
.i
.eq(i
),
861 self
.mod
.i
.eq(self
.i
),
862 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
863 #self.add_stb.eq(add_stb),
864 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
865 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
866 self
.o
.mid
.eq(self
.mod
.o
.mid
),
867 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
868 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
869 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
872 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
873 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
874 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
875 #m.d.sync += self.in_t.stb.eq(0)
877 m
.submodules
.fpadd
= self
.mod
881 # in_accept is set on incoming strobe HIGH and ack LOW.
882 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
884 #with m.If(self.in_t.ack):
885 # m.d.sync += self.in_t.stb.eq(0)
886 with m
.If(~self
.z_done
):
887 # not done: test for accepting an incoming operand pair
888 with m
.If(self
.in_accept
):
890 self
.add_ack
.eq(1), # acknowledge receipt...
891 self
.in_t
.stb
.eq(1), # initiate add
894 m
.d
.sync
+= [self
.add_ack
.eq(0),
899 # done: acknowledge, and write out id and value
900 m
.d
.sync
+= [self
.add_ack
.eq(1),
907 if self
.in_mid
is not None:
908 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
911 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
913 # move to output state on detecting z ack
914 with m
.If(self
.out_z
.trigger
):
915 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
918 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
921 class FPADDBasePipe(ControlBase
):
922 def __init__(self
, width
, id_wid
):
923 ControlBase
.__init
__(self
)
924 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
925 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
926 self
.pipe3
= FPNormToPack(width
, id_wid
)
928 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
930 def elaborate(self
, platform
):
932 m
.submodules
.scnorm
= self
.pipe1
933 m
.submodules
.addalign
= self
.pipe2
934 m
.submodules
.normpack
= self
.pipe3
935 m
.d
.comb
+= self
._eqs
939 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
940 def __init__(self
, width
, id_wid
, num_rows
):
941 self
.num_rows
= num_rows
942 def iospec(): return FPADDBaseData(width
, id_wid
)
943 stage
= PassThroughStage(iospec
)
944 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
947 class FPADDMuxOutPipe(CombMuxOutPipe
):
948 def __init__(self
, width
, id_wid
, num_rows
):
949 self
.num_rows
= num_rows
950 def iospec(): return FPPackData(width
, id_wid
)
951 stage
= PassThroughStage(iospec
)
952 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
956 """ Reservation-Station version of FPADD pipeline.
958 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
959 * 3-stage adder pipeline
960 * fan-out on outputs (an array of FPPackData: z,mid)
962 Fan-in and Fan-out are combinatorial.
964 def __init__(self
, width
, id_wid
, num_rows
):
965 self
.num_rows
= num_rows
966 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
967 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
968 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
970 self
.p
= self
.inpipe
.p
# kinda annoying,
971 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
972 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
974 def elaborate(self
, platform
):
976 m
.submodules
.inpipe
= self
.inpipe
977 m
.submodules
.fpadd
= self
.fpadd
978 m
.submodules
.outpipe
= self
.outpipe
980 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
981 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
990 """ FPADD: stages as follows:
996 FPAddBase---> FPAddBaseMod
998 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1000 FPAddBase is tricky: it is both a stage and *has* stages.
1001 Connection to FPAddBaseMod therefore requires an in stb/ack
1002 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1003 needs to be the thing that raises the incoming stb.
1006 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1009 * width: bit-width of IEEE754. supported: 16, 32, 64
1010 * id_wid: an identifier that is sync-connected to the input
1011 * single_cycle: True indicates each stage to complete in 1 clock
1014 self
.id_wid
= id_wid
1015 self
.single_cycle
= single_cycle
1017 #self.out_z = FPOp(width)
1018 self
.ids
= FPID(id_wid
)
1021 for i
in range(rs_sz
):
1024 in_a
.name
= "in_a_%d" % i
1025 in_b
.name
= "in_b_%d" % i
1026 rs
.append((in_a
, in_b
))
1030 for i
in range(rs_sz
):
1032 out_z
.name
= "out_z_%d" % i
1034 self
.res
= Array(res
)
1038 def add_state(self
, state
):
1039 self
.states
.append(state
)
1042 def get_fragment(self
, platform
=None):
1043 """ creates the HDL code-fragment for FPAdd
1046 m
.submodules
+= self
.rs
1048 in_a
= self
.rs
[0][0]
1049 in_b
= self
.rs
[0][1]
1051 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1056 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1061 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1062 ab
= self
.add_state(ab
)
1063 abd
= ab
.ispec() # create an input spec object for FPADDBase
1064 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1065 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1068 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1071 with m
.FSM() as fsm
:
1073 for state
in self
.states
:
1074 with m
.State(state
.state_from
):
1080 if __name__
== "__main__":
1082 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1083 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1084 alu
.rs
[0][1].ports() + \
1085 alu
.res
[0].ports() + \
1086 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1088 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1089 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1090 alu
.in_t
.ports() + \
1091 alu
.out_z
.ports() + \
1092 [alu
.in_mid
, alu
.out_mid
])
1095 # works... but don't use, just do "python fname.py convert -t v"
1096 #print (verilog.convert(alu, ports=[
1097 # ports=alu.in_a.ports() + \
1098 # alu.in_b.ports() + \
1099 # alu.out_z.ports())