1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
,
19 FPGet2OpMod
, FPGet2Op
)
20 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
21 from fpcommon
.postcalc
import FPAddStage1Data
22 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
23 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
24 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
25 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
26 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
27 from fpcommon
.normtopack
import FPNormToPack
28 from fpcommon
.putz
import (FPPutZ
, FPPutZIdx
)
30 from fpadd
.specialcases
import (FPAddSpecialCasesMod
, FPAddSpecialCases
,
31 FPAddSpecialCasesDeNorm
)
32 from fpadd
.align
import (FPAddAlignMulti
, FPAddAlignMultiMod
, FPNumIn2Ops
,
33 FPAddAlignSingleMod
, FPAddAlignSingle
)
34 from fpadd
.add0
import (FPAddStage0Data
, FPAddStage0Mod
, FPAddStage0
)
35 from fpadd
.add1
import (FPAddStage1Mod
, FPAddStage1
)
36 from fpadd
.addstages
import FPAddAlignSingleAdd
40 def __init__(self
, width
, id_wid
):
42 self
.mid
= Signal(id_wid
, reset_less
=True)
45 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
48 return [self
.z
, self
.mid
]
53 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
56 * width: bit-width of IEEE754. supported: 16, 32, 64
57 * id_wid: an identifier that is sync-connected to the input
58 * single_cycle: True indicates each stage to complete in 1 clock
59 * compact: True indicates a reduced number of stages
63 self
.single_cycle
= single_cycle
64 self
.compact
= compact
73 return FPADDBaseData(self
.width
, self
.id_wid
)
76 return FPOpData(self
.width
, self
.id_wid
)
78 def add_state(self
, state
):
79 self
.states
.append(state
)
82 def get_fragment(self
, platform
=None):
83 """ creates the HDL code-fragment for FPAdd
86 m
.submodules
.out_z
= self
.o
.z
87 m
.submodules
.in_t
= self
.in_t
89 self
.get_compact_fragment(m
, platform
)
91 self
.get_longer_fragment(m
, platform
)
95 for state
in self
.states
:
96 with m
.State(state
.state_from
):
101 def get_longer_fragment(self
, m
, platform
=None):
103 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
108 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
110 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
111 sc
.setup(m
, a
, b
, self
.in_mid
)
113 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
114 dn
.setup(m
, a
, b
, sc
.in_mid
)
116 if self
.single_cycle
:
117 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
118 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
120 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
121 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
123 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
124 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
126 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
127 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
129 if self
.single_cycle
:
130 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
131 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
133 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
134 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
136 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
137 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
139 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
140 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
142 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
143 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
145 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
146 pa
.in_mid
, self
.out_mid
))
148 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
149 pa
.in_mid
, self
.out_mid
))
151 def get_compact_fragment(self
, m
, platform
=None):
153 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
154 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
155 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
156 n1
= FPNormToPack(self
.width
, self
.id_wid
)
158 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
160 chainlist
= [get
, sc
, alm
, n1
]
161 chain
= StageChain(chainlist
, specallocate
=True)
162 chain
.setup(m
, self
.i
)
164 for mod
in chainlist
:
165 sc
= self
.add_state(mod
)
167 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
168 n1
.out_z
.mid
, self
.o
.mid
))
170 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
171 # sc.o.mid, self.o.mid))
174 class FPADDBase(FPState
):
176 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
179 * width: bit-width of IEEE754. supported: 16, 32, 64
180 * id_wid: an identifier that is sync-connected to the input
181 * single_cycle: True indicates each stage to complete in 1 clock
183 FPState
.__init
__(self
, "fpadd")
185 self
.single_cycle
= single_cycle
186 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
187 self
.o
= self
.ospec()
189 self
.in_t
= Trigger()
190 self
.i
= self
.ispec()
192 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
193 self
.in_accept
= Signal(reset_less
=True)
194 self
.add_stb
= Signal(reset_less
=True)
195 self
.add_ack
= Signal(reset
=0, reset_less
=True)
198 return self
.mod
.ispec()
201 return self
.mod
.ospec()
203 def setup(self
, m
, i
, add_stb
, in_mid
):
204 m
.d
.comb
+= [self
.i
.eq(i
),
205 self
.mod
.i
.eq(self
.i
),
206 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
207 #self.add_stb.eq(add_stb),
208 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
209 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
210 self
.o
.mid
.eq(self
.mod
.o
.mid
),
211 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
212 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
213 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
216 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
217 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
218 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
219 #m.d.sync += self.in_t.stb.eq(0)
221 m
.submodules
.fpadd
= self
.mod
225 # in_accept is set on incoming strobe HIGH and ack LOW.
226 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
228 #with m.If(self.in_t.ack):
229 # m.d.sync += self.in_t.stb.eq(0)
230 with m
.If(~self
.z_done
):
231 # not done: test for accepting an incoming operand pair
232 with m
.If(self
.in_accept
):
234 self
.add_ack
.eq(1), # acknowledge receipt...
235 self
.in_t
.stb
.eq(1), # initiate add
238 m
.d
.sync
+= [self
.add_ack
.eq(0),
243 # done: acknowledge, and write out id and value
244 m
.d
.sync
+= [self
.add_ack
.eq(1),
251 if self
.in_mid
is not None:
252 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
255 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
257 # move to output state on detecting z ack
258 with m
.If(self
.out_z
.trigger
):
259 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
262 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
265 class FPADDBasePipe(ControlBase
):
266 def __init__(self
, width
, id_wid
):
267 ControlBase
.__init
__(self
)
268 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
269 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
270 self
.pipe3
= FPNormToPack(width
, id_wid
)
272 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
274 def elaborate(self
, platform
):
276 m
.submodules
.scnorm
= self
.pipe1
277 m
.submodules
.addalign
= self
.pipe2
278 m
.submodules
.normpack
= self
.pipe3
279 m
.d
.comb
+= self
._eqs
283 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
284 def __init__(self
, width
, id_wid
, num_rows
):
285 self
.num_rows
= num_rows
286 def iospec(): return FPADDBaseData(width
, id_wid
)
287 stage
= PassThroughStage(iospec
)
288 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
291 class FPADDMuxOutPipe(CombMuxOutPipe
):
292 def __init__(self
, width
, id_wid
, num_rows
):
293 self
.num_rows
= num_rows
294 def iospec(): return FPPackData(width
, id_wid
)
295 stage
= PassThroughStage(iospec
)
296 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
300 """ Reservation-Station version of FPADD pipeline.
302 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
303 * 3-stage adder pipeline
304 * fan-out on outputs (an array of FPPackData: z,mid)
306 Fan-in and Fan-out are combinatorial.
308 def __init__(self
, width
, id_wid
, num_rows
):
309 self
.num_rows
= num_rows
310 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
311 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
312 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
314 self
.p
= self
.inpipe
.p
# kinda annoying,
315 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
316 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
318 def elaborate(self
, platform
):
320 m
.submodules
.inpipe
= self
.inpipe
321 m
.submodules
.fpadd
= self
.fpadd
322 m
.submodules
.outpipe
= self
.outpipe
324 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
325 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
334 """ FPADD: stages as follows:
340 FPAddBase---> FPAddBaseMod
342 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
344 FPAddBase is tricky: it is both a stage and *has* stages.
345 Connection to FPAddBaseMod therefore requires an in stb/ack
346 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
347 needs to be the thing that raises the incoming stb.
350 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
353 * width: bit-width of IEEE754. supported: 16, 32, 64
354 * id_wid: an identifier that is sync-connected to the input
355 * single_cycle: True indicates each stage to complete in 1 clock
359 self
.single_cycle
= single_cycle
361 #self.out_z = FPOp(width)
362 self
.ids
= FPID(id_wid
)
365 for i
in range(rs_sz
):
368 in_a
.name
= "in_a_%d" % i
369 in_b
.name
= "in_b_%d" % i
370 rs
.append((in_a
, in_b
))
374 for i
in range(rs_sz
):
376 out_z
.name
= "out_z_%d" % i
378 self
.res
= Array(res
)
382 def add_state(self
, state
):
383 self
.states
.append(state
)
386 def get_fragment(self
, platform
=None):
387 """ creates the HDL code-fragment for FPAdd
390 m
.submodules
+= self
.rs
395 geta
= self
.add_state(FPGetOp("get_a", "get_b",
400 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
405 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
406 ab
= self
.add_state(ab
)
407 abd
= ab
.ispec() # create an input spec object for FPADDBase
408 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
409 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
412 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
417 for state
in self
.states
:
418 with m
.State(state
.state_from
):
424 if __name__
== "__main__":
426 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
427 main(alu
, ports
=alu
.rs
[0][0].ports() + \
428 alu
.rs
[0][1].ports() + \
429 alu
.res
[0].ports() + \
430 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
432 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
433 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
435 alu
.out_z
.ports() + \
436 [alu
.in_mid
, alu
.out_mid
])
439 # works... but don't use, just do "python fname.py convert -t v"
440 #print (verilog.convert(alu, ports=[
441 # ports=alu.in_a.ports() + \
442 # alu.in_b.ports() + \