1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
,
19 FPGet2OpMod
, FPGet2Op
)
20 from fpadd
.specialcases
import (FPAddSpecialCasesMod
, FPAddSpecialCases
,
21 FPAddSpecialCasesDeNorm
)
22 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
23 from fpcommon
.postcalc
import FPAddStage1Data
24 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
25 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
26 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
27 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
28 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
29 from fpcommon
.normtopack
import FPNormToPack
30 from fpcommon
.putz
import (FPPutZ
, FPPutZIdx
)
33 class FPAddAlignMultiMod(FPState
):
35 def __init__(self
, width
):
36 self
.in_a
= FPNumBase(width
)
37 self
.in_b
= FPNumBase(width
)
38 self
.out_a
= FPNumIn(None, width
)
39 self
.out_b
= FPNumIn(None, width
)
40 self
.exp_eq
= Signal(reset_less
=True)
42 def elaborate(self
, platform
):
43 # This one however (single-cycle) will do the shift
48 m
.submodules
.align_in_a
= self
.in_a
49 m
.submodules
.align_in_b
= self
.in_b
50 m
.submodules
.align_out_a
= self
.out_a
51 m
.submodules
.align_out_b
= self
.out_b
53 # NOTE: this does *not* do single-cycle multi-shifting,
54 # it *STAYS* in the align state until exponents match
56 # exponent of a greater than b: shift b down
57 m
.d
.comb
+= self
.exp_eq
.eq(0)
58 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
59 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
60 agtb
= Signal(reset_less
=True)
61 altb
= Signal(reset_less
=True)
62 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
63 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
65 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
66 # exponent of b greater than a: shift a down
68 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
69 # exponents equal: move to next stage.
71 m
.d
.comb
+= self
.exp_eq
.eq(1)
75 class FPAddAlignMulti(FPState
):
77 def __init__(self
, width
, id_wid
):
78 FPState
.__init
__(self
, "align")
79 self
.mod
= FPAddAlignMultiMod(width
)
80 self
.out_a
= FPNumIn(None, width
)
81 self
.out_b
= FPNumIn(None, width
)
82 self
.exp_eq
= Signal(reset_less
=True)
84 def setup(self
, m
, in_a
, in_b
):
85 """ links module to inputs and outputs
87 m
.submodules
.align
= self
.mod
88 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
89 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
90 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
91 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
92 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
95 with m
.If(self
.exp_eq
):
101 def __init__(self
, width
, id_wid
):
102 self
.a
= FPNumIn(None, width
)
103 self
.b
= FPNumIn(None, width
)
104 self
.z
= FPNumOut(width
, False)
105 self
.out_do_z
= Signal(reset_less
=True)
106 self
.oz
= Signal(width
, reset_less
=True)
107 self
.mid
= Signal(id_wid
, reset_less
=True)
110 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
111 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
114 class FPAddAlignSingleMod
:
116 def __init__(self
, width
, id_wid
):
119 self
.i
= self
.ispec()
120 self
.o
= self
.ospec()
123 return FPSCData(self
.width
, self
.id_wid
)
126 return FPNumIn2Ops(self
.width
, self
.id_wid
)
128 def process(self
, i
):
131 def setup(self
, m
, i
):
132 """ links module to inputs and outputs
134 m
.submodules
.align
= self
135 m
.d
.comb
+= self
.i
.eq(i
)
137 def elaborate(self
, platform
):
138 """ Aligns A against B or B against A, depending on which has the
139 greater exponent. This is done in a *single* cycle using
140 variable-width bit-shift
142 the shifter used here is quite expensive in terms of gates.
143 Mux A or B in (and out) into temporaries, as only one of them
144 needs to be aligned against the other
148 m
.submodules
.align_in_a
= self
.i
.a
149 m
.submodules
.align_in_b
= self
.i
.b
150 m
.submodules
.align_out_a
= self
.o
.a
151 m
.submodules
.align_out_b
= self
.o
.b
153 # temporary (muxed) input and output to be shifted
154 t_inp
= FPNumBase(self
.width
)
155 t_out
= FPNumIn(None, self
.width
)
156 espec
= (len(self
.i
.a
.e
), True)
157 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
158 m
.submodules
.align_t_in
= t_inp
159 m
.submodules
.align_t_out
= t_out
160 m
.submodules
.multishift_r
= msr
162 ediff
= Signal(espec
, reset_less
=True)
163 ediffr
= Signal(espec
, reset_less
=True)
164 tdiff
= Signal(espec
, reset_less
=True)
165 elz
= Signal(reset_less
=True)
166 egz
= Signal(reset_less
=True)
168 # connect multi-shifter to t_inp/out mantissa (and tdiff)
169 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
170 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
171 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
172 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
173 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
175 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
176 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
177 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
178 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
180 # default: A-exp == B-exp, A and B untouched (fall through)
181 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
182 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
183 # only one shifter (muxed)
184 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
185 # exponent of a greater than b: shift b down
186 with m
.If(~self
.i
.out_do_z
):
188 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
191 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
193 # exponent of b greater than a: shift a down
195 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
198 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
201 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
202 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
203 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
204 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
209 class FPAddAlignSingle(FPState
):
211 def __init__(self
, width
, id_wid
):
212 FPState
.__init
__(self
, "align")
213 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
214 self
.out_a
= FPNumIn(None, width
)
215 self
.out_b
= FPNumIn(None, width
)
217 def setup(self
, m
, i
):
218 """ links module to inputs and outputs
222 # NOTE: could be done as comb
223 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
224 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
230 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
232 def __init__(self
, width
, id_wid
):
233 FPState
.__init
__(self
, "align")
236 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
237 self
.a1o
= self
.ospec()
240 return FPSCData(self
.width
, self
.id_wid
)
243 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
245 def setup(self
, m
, i
):
246 """ links module to inputs and outputs
249 # chain AddAlignSingle, AddStage0 and AddStage1
250 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
251 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
252 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
254 chain
= StageChain([mod
, a0mod
, a1mod
])
259 def process(self
, i
):
263 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
264 m
.next
= "normalise_1"
267 class FPAddStage0Data
:
269 def __init__(self
, width
, id_wid
):
270 self
.z
= FPNumBase(width
, False)
271 self
.out_do_z
= Signal(reset_less
=True)
272 self
.oz
= Signal(width
, reset_less
=True)
273 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
274 self
.mid
= Signal(id_wid
, reset_less
=True)
277 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
278 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
281 class FPAddStage0Mod
:
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPSCData(self
.width
, self
.id_wid
)
293 return FPAddStage0Data(self
.width
, self
.id_wid
)
295 def process(self
, i
):
298 def setup(self
, m
, i
):
299 """ links module to inputs and outputs
301 m
.submodules
.add0
= self
302 m
.d
.comb
+= self
.i
.eq(i
)
304 def elaborate(self
, platform
):
306 m
.submodules
.add0_in_a
= self
.i
.a
307 m
.submodules
.add0_in_b
= self
.i
.b
308 m
.submodules
.add0_out_z
= self
.o
.z
310 # store intermediate tests (and zero-extended mantissas)
311 seq
= Signal(reset_less
=True)
312 mge
= Signal(reset_less
=True)
313 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
314 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
315 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
316 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
317 am0
.eq(Cat(self
.i
.a
.m
, 0)),
318 bm0
.eq(Cat(self
.i
.b
.m
, 0))
320 # same-sign (both negative or both positive) add mantissas
321 with m
.If(~self
.i
.out_do_z
):
322 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
325 self
.o
.tot
.eq(am0
+ bm0
),
326 self
.o
.z
.s
.eq(self
.i
.a
.s
)
328 # a mantissa greater than b, use a
331 self
.o
.tot
.eq(am0
- bm0
),
332 self
.o
.z
.s
.eq(self
.i
.a
.s
)
334 # b mantissa greater than a, use b
337 self
.o
.tot
.eq(bm0
- am0
),
338 self
.o
.z
.s
.eq(self
.i
.b
.s
)
341 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
342 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
343 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
347 class FPAddStage0(FPState
):
348 """ First stage of add. covers same-sign (add) and subtract
349 special-casing when mantissas are greater or equal, to
350 give greatest accuracy.
353 def __init__(self
, width
, id_wid
):
354 FPState
.__init
__(self
, "add_0")
355 self
.mod
= FPAddStage0Mod(width
)
356 self
.o
= self
.mod
.ospec()
358 def setup(self
, m
, i
):
359 """ links module to inputs and outputs
363 # NOTE: these could be done as combinatorial (merge add0+add1)
364 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
370 class FPAddStage1Mod(FPState
):
371 """ Second stage of add: preparation for normalisation.
372 detects when tot sum is too big (tot[27] is kinda a carry bit)
375 def __init__(self
, width
, id_wid
):
378 self
.i
= self
.ispec()
379 self
.o
= self
.ospec()
382 return FPAddStage0Data(self
.width
, self
.id_wid
)
385 return FPAddStage1Data(self
.width
, self
.id_wid
)
387 def process(self
, i
):
390 def setup(self
, m
, i
):
391 """ links module to inputs and outputs
393 m
.submodules
.add1
= self
394 m
.submodules
.add1_out_overflow
= self
.o
.of
396 m
.d
.comb
+= self
.i
.eq(i
)
398 def elaborate(self
, platform
):
400 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
401 # tot[-1] (MSB) gets set when the sum overflows. shift result down
402 with m
.If(~self
.i
.out_do_z
):
403 with m
.If(self
.i
.tot
[-1]):
405 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
406 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
407 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
408 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
409 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
410 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
412 # tot[-1] (MSB) zero case
415 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
416 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
417 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
418 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
419 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
422 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
423 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
424 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
429 class FPAddStage1(FPState
):
431 def __init__(self
, width
, id_wid
):
432 FPState
.__init
__(self
, "add_1")
433 self
.mod
= FPAddStage1Mod(width
)
434 self
.out_z
= FPNumBase(width
, False)
435 self
.out_of
= Overflow()
436 self
.norm_stb
= Signal()
438 def setup(self
, m
, i
):
439 """ links module to inputs and outputs
443 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
445 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
446 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
447 m
.d
.sync
+= self
.norm_stb
.eq(1)
450 m
.next
= "normalise_1"
456 def __init__(self
, width
, id_wid
):
458 self
.mid
= Signal(id_wid
, reset_less
=True)
461 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
464 return [self
.z
, self
.mid
]
469 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
472 * width: bit-width of IEEE754. supported: 16, 32, 64
473 * id_wid: an identifier that is sync-connected to the input
474 * single_cycle: True indicates each stage to complete in 1 clock
475 * compact: True indicates a reduced number of stages
479 self
.single_cycle
= single_cycle
480 self
.compact
= compact
482 self
.in_t
= Trigger()
483 self
.i
= self
.ispec()
484 self
.o
= self
.ospec()
489 return FPADDBaseData(self
.width
, self
.id_wid
)
492 return FPOpData(self
.width
, self
.id_wid
)
494 def add_state(self
, state
):
495 self
.states
.append(state
)
498 def get_fragment(self
, platform
=None):
499 """ creates the HDL code-fragment for FPAdd
502 m
.submodules
.out_z
= self
.o
.z
503 m
.submodules
.in_t
= self
.in_t
505 self
.get_compact_fragment(m
, platform
)
507 self
.get_longer_fragment(m
, platform
)
511 for state
in self
.states
:
512 with m
.State(state
.state_from
):
517 def get_longer_fragment(self
, m
, platform
=None):
519 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
524 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
526 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
527 sc
.setup(m
, a
, b
, self
.in_mid
)
529 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
530 dn
.setup(m
, a
, b
, sc
.in_mid
)
532 if self
.single_cycle
:
533 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
534 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
536 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
537 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
539 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
540 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
542 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
543 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
545 if self
.single_cycle
:
546 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
547 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
549 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
550 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
552 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
553 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
555 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
556 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
558 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
559 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
561 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
562 pa
.in_mid
, self
.out_mid
))
564 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
565 pa
.in_mid
, self
.out_mid
))
567 def get_compact_fragment(self
, m
, platform
=None):
570 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
571 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
572 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
573 n1
= FPNormToPack(self
.width
, self
.id_wid
)
575 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
577 chainlist
= [get
, sc
, alm
, n1
]
578 chain
= StageChain(chainlist
, specallocate
=True)
579 chain
.setup(m
, self
.i
)
581 for mod
in chainlist
:
582 sc
= self
.add_state(mod
)
584 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
585 n1
.out_z
.mid
, self
.o
.mid
))
587 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
588 # sc.o.mid, self.o.mid))
591 class FPADDBase(FPState
):
593 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
596 * width: bit-width of IEEE754. supported: 16, 32, 64
597 * id_wid: an identifier that is sync-connected to the input
598 * single_cycle: True indicates each stage to complete in 1 clock
600 FPState
.__init
__(self
, "fpadd")
602 self
.single_cycle
= single_cycle
603 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
604 self
.o
= self
.ospec()
606 self
.in_t
= Trigger()
607 self
.i
= self
.ispec()
609 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
610 self
.in_accept
= Signal(reset_less
=True)
611 self
.add_stb
= Signal(reset_less
=True)
612 self
.add_ack
= Signal(reset
=0, reset_less
=True)
615 return self
.mod
.ispec()
618 return self
.mod
.ospec()
620 def setup(self
, m
, i
, add_stb
, in_mid
):
621 m
.d
.comb
+= [self
.i
.eq(i
),
622 self
.mod
.i
.eq(self
.i
),
623 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
624 #self.add_stb.eq(add_stb),
625 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
626 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
627 self
.o
.mid
.eq(self
.mod
.o
.mid
),
628 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
629 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
630 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
633 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
634 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
635 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
636 #m.d.sync += self.in_t.stb.eq(0)
638 m
.submodules
.fpadd
= self
.mod
642 # in_accept is set on incoming strobe HIGH and ack LOW.
643 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
645 #with m.If(self.in_t.ack):
646 # m.d.sync += self.in_t.stb.eq(0)
647 with m
.If(~self
.z_done
):
648 # not done: test for accepting an incoming operand pair
649 with m
.If(self
.in_accept
):
651 self
.add_ack
.eq(1), # acknowledge receipt...
652 self
.in_t
.stb
.eq(1), # initiate add
655 m
.d
.sync
+= [self
.add_ack
.eq(0),
660 # done: acknowledge, and write out id and value
661 m
.d
.sync
+= [self
.add_ack
.eq(1),
668 if self
.in_mid
is not None:
669 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
672 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
674 # move to output state on detecting z ack
675 with m
.If(self
.out_z
.trigger
):
676 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
679 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
682 class FPADDBasePipe(ControlBase
):
683 def __init__(self
, width
, id_wid
):
684 ControlBase
.__init
__(self
)
685 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
686 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
687 self
.pipe3
= FPNormToPack(width
, id_wid
)
689 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
691 def elaborate(self
, platform
):
693 m
.submodules
.scnorm
= self
.pipe1
694 m
.submodules
.addalign
= self
.pipe2
695 m
.submodules
.normpack
= self
.pipe3
696 m
.d
.comb
+= self
._eqs
700 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
701 def __init__(self
, width
, id_wid
, num_rows
):
702 self
.num_rows
= num_rows
703 def iospec(): return FPADDBaseData(width
, id_wid
)
704 stage
= PassThroughStage(iospec
)
705 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
708 class FPADDMuxOutPipe(CombMuxOutPipe
):
709 def __init__(self
, width
, id_wid
, num_rows
):
710 self
.num_rows
= num_rows
711 def iospec(): return FPPackData(width
, id_wid
)
712 stage
= PassThroughStage(iospec
)
713 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
717 """ Reservation-Station version of FPADD pipeline.
719 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
720 * 3-stage adder pipeline
721 * fan-out on outputs (an array of FPPackData: z,mid)
723 Fan-in and Fan-out are combinatorial.
725 def __init__(self
, width
, id_wid
, num_rows
):
726 self
.num_rows
= num_rows
727 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
728 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
729 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
731 self
.p
= self
.inpipe
.p
# kinda annoying,
732 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
733 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
735 def elaborate(self
, platform
):
737 m
.submodules
.inpipe
= self
.inpipe
738 m
.submodules
.fpadd
= self
.fpadd
739 m
.submodules
.outpipe
= self
.outpipe
741 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
742 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
751 """ FPADD: stages as follows:
757 FPAddBase---> FPAddBaseMod
759 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
761 FPAddBase is tricky: it is both a stage and *has* stages.
762 Connection to FPAddBaseMod therefore requires an in stb/ack
763 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
764 needs to be the thing that raises the incoming stb.
767 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
770 * width: bit-width of IEEE754. supported: 16, 32, 64
771 * id_wid: an identifier that is sync-connected to the input
772 * single_cycle: True indicates each stage to complete in 1 clock
776 self
.single_cycle
= single_cycle
778 #self.out_z = FPOp(width)
779 self
.ids
= FPID(id_wid
)
782 for i
in range(rs_sz
):
785 in_a
.name
= "in_a_%d" % i
786 in_b
.name
= "in_b_%d" % i
787 rs
.append((in_a
, in_b
))
791 for i
in range(rs_sz
):
793 out_z
.name
= "out_z_%d" % i
795 self
.res
= Array(res
)
799 def add_state(self
, state
):
800 self
.states
.append(state
)
803 def get_fragment(self
, platform
=None):
804 """ creates the HDL code-fragment for FPAdd
807 m
.submodules
+= self
.rs
812 geta
= self
.add_state(FPGetOp("get_a", "get_b",
817 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
822 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
823 ab
= self
.add_state(ab
)
824 abd
= ab
.ispec() # create an input spec object for FPADDBase
825 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
826 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
829 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
834 for state
in self
.states
:
835 with m
.State(state
.state_from
):
841 if __name__
== "__main__":
843 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
844 main(alu
, ports
=alu
.rs
[0][0].ports() + \
845 alu
.rs
[0][1].ports() + \
846 alu
.res
[0].ports() + \
847 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
849 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
850 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
852 alu
.out_z
.ports() + \
853 [alu
.in_mid
, alu
.out_mid
])
856 # works... but don't use, just do "python fname.py convert -t v"
857 #print (verilog.convert(alu, ports=[
858 # ports=alu.in_a.ports() + \
859 # alu.in_b.ports() + \