0ca7ce7a490f761214ea13846e555b80d9e80205
[ieee754fpu.git] / src / ieee754 / fpadd / statemachine.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const, Elaboratable
6 from nmigen.cli import main, verilog
7 from math import log
8
9 from ieee754.fpcommon.fpbase import FPOpIn, FPOpOut
10 from ieee754.fpcommon.fpbase import Trigger
11 from nmutil.singlepipe import (StageChain, SimpleHandshake)
12
13 from ieee754.fpcommon.fpbase import FPState, FPID
14 from ieee754.fpcommon.getop import (FPGetOp, FPADDBaseData, FPGet2Op)
15 from ieee754.fpcommon.denorm import (FPSCData, FPAddDeNorm)
16 from ieee754.fpcommon.postcalc import FPAddStage1Data
17 from ieee754.fpcommon.postnormalise import (FPNorm1Data,
18 FPNorm1Single, FPNorm1Multi)
19 from ieee754.fpcommon.roundz import (FPRoundData, FPRound)
20 from ieee754.fpcommon.corrections import FPCorrections
21 from ieee754.fpcommon.pack import (FPPackData, FPPackMod, FPPack)
22 from ieee754.fpcommon.normtopack import FPNormToPack
23 from ieee754.fpcommon.putz import (FPPutZ, FPPutZIdx)
24
25 from .specialcases import (FPAddSpecialCases, FPAddSpecialCasesDeNorm)
26 from .align import (FPAddAlignMulti, FPAddAlignSingle)
27 from .add0 import (FPAddStage0Data, FPAddStage0)
28 from .add1 import (FPAddStage1Mod, FPAddStage1)
29 from .addstages import FPAddAlignSingleAdd
30
31
32 class FPOpData:
33 def __init__(self, width, id_wid):
34 self.z = FPOpOut(width)
35 self.z.data_o = Signal(width)
36 self.mid = Signal(id_wid, reset_less=True)
37
38 def __iter__(self):
39 yield self.z
40 yield self.mid
41
42 def eq(self, i):
43 return [self.z.eq(i.z), self.mid.eq(i.mid)]
44
45 def ports(self):
46 return list(self)
47
48
49 class FPADDBaseMod(Elaboratable):
50
51 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
52 """ IEEE754 FP Add
53
54 * width: bit-width of IEEE754. supported: 16, 32, 64
55 * id_wid: an identifier that is sync-connected to the input
56 * single_cycle: True indicates each stage to complete in 1 clock
57 * compact: True indicates a reduced number of stages
58 """
59 self.width = width
60 self.id_wid = id_wid
61 self.single_cycle = single_cycle
62 self.compact = compact
63
64 self.in_t = Trigger()
65 self.i = self.ispec()
66 self.o = self.ospec()
67
68 self.states = []
69
70 def ispec(self):
71 return FPADDBaseData(self.width, self.id_wid)
72
73 def ospec(self):
74 return FPOpData(self.width, self.id_wid)
75
76 def add_state(self, state):
77 self.states.append(state)
78 return state
79
80 def elaborate(self, platform=None):
81 """ creates the HDL code-fragment for FPAdd
82 """
83 m = Module()
84 m.submodules.out_z = self.o.z
85 m.submodules.in_t = self.in_t
86 if self.compact:
87 self.get_compact_fragment(m, platform)
88 else:
89 self.get_longer_fragment(m, platform)
90
91 with m.FSM() as fsm:
92
93 for state in self.states:
94 with m.State(state.state_from):
95 state.action(m)
96
97 return m
98
99 def get_longer_fragment(self, m, platform=None):
100
101 get = self.add_state(FPGet2Op("get_ops", "special_cases",
102 self.width))
103 get.setup(m, self.i)
104 a = get.out_op1
105 b = get.out_op2
106 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
107
108 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
109 sc.setup(m, a, b, self.in_mid)
110 m.submodules.sc = sc
111
112 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
113 dn.setup(m, a, b, sc.in_mid)
114 m.submodules.dn = dn
115
116 if self.single_cycle:
117 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
118 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
119 else:
120 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
121 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
122 m.submodules.alm = alm
123
124 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
125 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
126 m.submodules.add0 = add0
127
128 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
129 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
130 m.submodules.add1 = add1
131
132 if self.single_cycle:
133 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
134 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
135 else:
136 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
137 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
138
139 rn = self.add_state(FPRound(self.width, self.id_wid))
140 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
141
142 cor = self.add_state(FPCorrections(self.width, self.id_wid))
143 cor.setup(m, rn.out_z, rn.in_mid)
144
145 pa = self.add_state(FPPack(self.width, self.id_wid))
146 pa.setup(m, cor.out_z, rn.in_mid)
147
148 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
149 pa.in_mid, self.out_mid))
150
151 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
152 pa.in_mid, self.out_mid))
153
154 def get_compact_fragment(self, m, platform=None):
155
156 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
157 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
158 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
159 n1 = FPNormToPack(self.width, self.id_wid)
160
161 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
162
163 chainlist = [get, sc, alm, n1]
164 chain = StageChain(chainlist, specallocate=False)
165 chain.setup(m, self.i)
166 m.submodules.sc = sc
167 m.submodules.alm = alm
168 m.submodules.n1 = n1
169
170 for mod in chainlist:
171 self.add_state(mod)
172
173 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
174 n1.out_z.mid, self.o.mid))
175
176 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
177 # sc.o.mid, self.o.mid))
178
179
180 class FPADDBase(FPState):
181
182 def __init__(self, width, id_wid=None, single_cycle=False):
183 """ IEEE754 FP Add
184
185 * width: bit-width of IEEE754. supported: 16, 32, 64
186 * id_wid: an identifier that is sync-connected to the input
187 * single_cycle: True indicates each stage to complete in 1 clock
188 """
189 FPState.__init__(self, "fpadd")
190 self.width = width
191 self.single_cycle = single_cycle
192 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
193 self.o = self.ospec()
194
195 self.in_t = Trigger()
196 self.i = self.ispec()
197
198 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
199 self.in_accept = Signal(reset_less=True)
200 self.add_stb = Signal(reset_less=True)
201 self.add_ack = Signal(reset=0, reset_less=True)
202
203 def ispec(self):
204 return self.mod.ispec()
205
206 def ospec(self):
207 return self.mod.ospec()
208
209 def setup(self, m, i, add_stb, in_mid):
210 m.d.comb += [self.i.eq(i),
211 self.mod.i.eq(self.i),
212 self.z_done.eq(self.mod.o.z.trigger),
213 #self.add_stb.eq(add_stb),
214 self.mod.in_t.stb.eq(self.in_t.stb),
215 self.in_t.ack.eq(self.mod.in_t.ack),
216 self.o.mid.eq(self.mod.o.mid),
217 self.o.z.v.eq(self.mod.o.z.v),
218 self.o.z.valid_o.eq(self.mod.o.z.valid_o),
219 self.mod.o.z.ready_i.eq(self.o.z.ready_i_test),
220 ]
221
222 m.d.sync += self.add_stb.eq(add_stb)
223 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
224 m.d.sync += self.o.z.ready_i.eq(0) # likewise
225 #m.d.sync += self.in_t.stb.eq(0)
226
227 m.submodules.fpadd = self.mod
228
229 def action(self, m):
230
231 # in_accept is set on incoming strobe HIGH and ack LOW.
232 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
233
234 #with m.If(self.in_t.ack):
235 # m.d.sync += self.in_t.stb.eq(0)
236 with m.If(~self.z_done):
237 # not done: test for accepting an incoming operand pair
238 with m.If(self.in_accept):
239 m.d.sync += [
240 self.add_ack.eq(1), # acknowledge receipt...
241 self.in_t.stb.eq(1), # initiate add
242 ]
243 with m.Else():
244 m.d.sync += [self.add_ack.eq(0),
245 self.in_t.stb.eq(0),
246 self.o.z.ready_i.eq(1),
247 ]
248 with m.Else():
249 # done: acknowledge, and write out id and value
250 m.d.sync += [self.add_ack.eq(1),
251 self.in_t.stb.eq(0)
252 ]
253 m.next = "put_z"
254
255 return
256
257 if self.in_mid is not None:
258 m.d.sync += self.out_mid.eq(self.mod.out_mid)
259
260 m.d.sync += [
261 self.out_z.v.eq(self.mod.out_z.v)
262 ]
263 # move to output state on detecting z ack
264 with m.If(self.out_z.trigger):
265 m.d.sync += self.out_z.stb.eq(0)
266 m.next = "put_z"
267 with m.Else():
268 m.d.sync += self.out_z.stb.eq(1)
269
270
271 class FPADD(FPID, Elaboratable):
272 """ FPADD: stages as follows:
273
274 FPGetOp (a)
275 |
276 FPGetOp (b)
277 |
278 FPAddBase---> FPAddBaseMod
279 | |
280 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
281
282 FPAddBase is tricky: it is both a stage and *has* stages.
283 Connection to FPAddBaseMod therefore requires an in stb/ack
284 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
285 needs to be the thing that raises the incoming stb.
286 """
287
288 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
289 """ IEEE754 FP Add
290
291 * width: bit-width of IEEE754. supported: 16, 32, 64
292 * id_wid: an identifier that is sync-connected to the input
293 * single_cycle: True indicates each stage to complete in 1 clock
294 """
295 self.width = width
296 self.id_wid = id_wid
297 self.single_cycle = single_cycle
298
299 #self.out_z = FPOp(width)
300 self.ids = FPID(id_wid)
301
302 rs = []
303 for i in range(rs_sz):
304 in_a = FPOpIn(width)
305 in_b = FPOpIn(width)
306 in_a.data_i = Signal(width)
307 in_b.data_i = Signal(width)
308 in_a.name = "in_a_%d" % i
309 in_b.name = "in_b_%d" % i
310 rs.append((in_a, in_b))
311 self.rs = Array(rs)
312
313 res = []
314 for i in range(rs_sz):
315 out_z = FPOpOut(width)
316 out_z.data_o = Signal(width)
317 out_z.name = "out_z_%d" % i
318 res.append(out_z)
319 self.res = Array(res)
320
321 self.states = []
322
323 def add_state(self, state):
324 self.states.append(state)
325 return state
326
327 def elaborate(self, platform=None):
328 """ creates the HDL code-fragment for FPAdd
329 """
330 m = Module()
331 #m.submodules += self.rs
332
333 in_a = self.rs[0][0]
334 in_b = self.rs[0][1]
335
336 geta = self.add_state(FPGetOp("get_a", "get_b",
337 in_a, self.width))
338 geta.setup(m, in_a)
339 a = geta.out_op
340
341 getb = self.add_state(FPGetOp("get_b", "fpadd",
342 in_b, self.width))
343 getb.setup(m, in_b)
344 b = getb.out_op
345
346 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
347 ab = self.add_state(ab)
348 abd = ab.ispec() # create an input spec object for FPADDBase
349 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
350 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
351 o = ab.o
352
353 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
354 o.mid, "get_a"))
355
356 with m.FSM() as fsm:
357
358 for state in self.states:
359 with m.State(state.state_from):
360 state.action(m)
361
362 return m
363
364
365 if __name__ == "__main__":
366 if True:
367 alu = FPADD(width=32, id_wid=5, single_cycle=True)
368 main(alu, ports=alu.rs[0][0].ports() + \
369 alu.rs[0][1].ports() + \
370 alu.res[0].ports() + \
371 [alu.ids.in_mid, alu.ids.out_mid])
372 else:
373 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
374 main(alu, ports=[alu.in_a, alu.in_b] + \
375 alu.in_t.ports() + \
376 alu.out_z.ports() + \
377 [alu.in_mid, alu.out_mid])
378
379
380 # works... but don't use, just do "python fname.py convert -t v"
381 #print (verilog.convert(alu, ports=[
382 # ports=alu.in_a.ports() + \
383 # alu.in_b.ports() + \
384 # alu.out_z.ports())