split out pipeline and statemachine to separate modules
[ieee754fpu.git] / src / add / fpadd / statemachine.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
19 FPGet2OpMod, FPGet2Op)
20 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
21 from fpcommon.postcalc import FPAddStage1Data
22 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
23 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
24 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
25 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
26 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
27 from fpcommon.normtopack import FPNormToPack
28 from fpcommon.putz import (FPPutZ, FPPutZIdx)
29
30 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
31 FPAddSpecialCasesDeNorm)
32 from fpadd.align import (FPAddAlignMulti, FPAddAlignMultiMod, FPNumIn2Ops,
33 FPAddAlignSingleMod, FPAddAlignSingle)
34 from fpadd.add0 import (FPAddStage0Data, FPAddStage0Mod, FPAddStage0)
35 from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
36 from fpadd.addstages import FPAddAlignSingleAdd
37
38
39 class FPOpData:
40 def __init__(self, width, id_wid):
41 self.z = FPOp(width)
42 self.mid = Signal(id_wid, reset_less=True)
43
44 def eq(self, i):
45 return [self.z.eq(i.z), self.mid.eq(i.mid)]
46
47 def ports(self):
48 return [self.z, self.mid]
49
50
51 class FPADDBaseMod:
52
53 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
54 """ IEEE754 FP Add
55
56 * width: bit-width of IEEE754. supported: 16, 32, 64
57 * id_wid: an identifier that is sync-connected to the input
58 * single_cycle: True indicates each stage to complete in 1 clock
59 * compact: True indicates a reduced number of stages
60 """
61 self.width = width
62 self.id_wid = id_wid
63 self.single_cycle = single_cycle
64 self.compact = compact
65
66 self.in_t = Trigger()
67 self.i = self.ispec()
68 self.o = self.ospec()
69
70 self.states = []
71
72 def ispec(self):
73 return FPADDBaseData(self.width, self.id_wid)
74
75 def ospec(self):
76 return FPOpData(self.width, self.id_wid)
77
78 def add_state(self, state):
79 self.states.append(state)
80 return state
81
82 def get_fragment(self, platform=None):
83 """ creates the HDL code-fragment for FPAdd
84 """
85 m = Module()
86 m.submodules.out_z = self.o.z
87 m.submodules.in_t = self.in_t
88 if self.compact:
89 self.get_compact_fragment(m, platform)
90 else:
91 self.get_longer_fragment(m, platform)
92
93 with m.FSM() as fsm:
94
95 for state in self.states:
96 with m.State(state.state_from):
97 state.action(m)
98
99 return m
100
101 def get_longer_fragment(self, m, platform=None):
102
103 get = self.add_state(FPGet2Op("get_ops", "special_cases",
104 self.width))
105 get.setup(m, self.i)
106 a = get.out_op1
107 b = get.out_op2
108 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
109
110 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
111 sc.setup(m, a, b, self.in_mid)
112
113 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
114 dn.setup(m, a, b, sc.in_mid)
115
116 if self.single_cycle:
117 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
118 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
119 else:
120 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
121 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
122
123 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
124 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
125
126 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
127 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
128
129 if self.single_cycle:
130 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
131 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
132 else:
133 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
134 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
135
136 rn = self.add_state(FPRound(self.width, self.id_wid))
137 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
138
139 cor = self.add_state(FPCorrections(self.width, self.id_wid))
140 cor.setup(m, rn.out_z, rn.in_mid)
141
142 pa = self.add_state(FPPack(self.width, self.id_wid))
143 pa.setup(m, cor.out_z, rn.in_mid)
144
145 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
146 pa.in_mid, self.out_mid))
147
148 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
149 pa.in_mid, self.out_mid))
150
151 def get_compact_fragment(self, m, platform=None):
152
153 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
154 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
155 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
156 n1 = FPNormToPack(self.width, self.id_wid)
157
158 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
159
160 chainlist = [get, sc, alm, n1]
161 chain = StageChain(chainlist, specallocate=True)
162 chain.setup(m, self.i)
163
164 for mod in chainlist:
165 sc = self.add_state(mod)
166
167 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
168 n1.out_z.mid, self.o.mid))
169
170 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
171 # sc.o.mid, self.o.mid))
172
173
174 class FPADDBase(FPState):
175
176 def __init__(self, width, id_wid=None, single_cycle=False):
177 """ IEEE754 FP Add
178
179 * width: bit-width of IEEE754. supported: 16, 32, 64
180 * id_wid: an identifier that is sync-connected to the input
181 * single_cycle: True indicates each stage to complete in 1 clock
182 """
183 FPState.__init__(self, "fpadd")
184 self.width = width
185 self.single_cycle = single_cycle
186 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
187 self.o = self.ospec()
188
189 self.in_t = Trigger()
190 self.i = self.ispec()
191
192 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
193 self.in_accept = Signal(reset_less=True)
194 self.add_stb = Signal(reset_less=True)
195 self.add_ack = Signal(reset=0, reset_less=True)
196
197 def ispec(self):
198 return self.mod.ispec()
199
200 def ospec(self):
201 return self.mod.ospec()
202
203 def setup(self, m, i, add_stb, in_mid):
204 m.d.comb += [self.i.eq(i),
205 self.mod.i.eq(self.i),
206 self.z_done.eq(self.mod.o.z.trigger),
207 #self.add_stb.eq(add_stb),
208 self.mod.in_t.stb.eq(self.in_t.stb),
209 self.in_t.ack.eq(self.mod.in_t.ack),
210 self.o.mid.eq(self.mod.o.mid),
211 self.o.z.v.eq(self.mod.o.z.v),
212 self.o.z.stb.eq(self.mod.o.z.stb),
213 self.mod.o.z.ack.eq(self.o.z.ack),
214 ]
215
216 m.d.sync += self.add_stb.eq(add_stb)
217 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
218 m.d.sync += self.o.z.ack.eq(0) # likewise
219 #m.d.sync += self.in_t.stb.eq(0)
220
221 m.submodules.fpadd = self.mod
222
223 def action(self, m):
224
225 # in_accept is set on incoming strobe HIGH and ack LOW.
226 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
227
228 #with m.If(self.in_t.ack):
229 # m.d.sync += self.in_t.stb.eq(0)
230 with m.If(~self.z_done):
231 # not done: test for accepting an incoming operand pair
232 with m.If(self.in_accept):
233 m.d.sync += [
234 self.add_ack.eq(1), # acknowledge receipt...
235 self.in_t.stb.eq(1), # initiate add
236 ]
237 with m.Else():
238 m.d.sync += [self.add_ack.eq(0),
239 self.in_t.stb.eq(0),
240 self.o.z.ack.eq(1),
241 ]
242 with m.Else():
243 # done: acknowledge, and write out id and value
244 m.d.sync += [self.add_ack.eq(1),
245 self.in_t.stb.eq(0)
246 ]
247 m.next = "put_z"
248
249 return
250
251 if self.in_mid is not None:
252 m.d.sync += self.out_mid.eq(self.mod.out_mid)
253
254 m.d.sync += [
255 self.out_z.v.eq(self.mod.out_z.v)
256 ]
257 # move to output state on detecting z ack
258 with m.If(self.out_z.trigger):
259 m.d.sync += self.out_z.stb.eq(0)
260 m.next = "put_z"
261 with m.Else():
262 m.d.sync += self.out_z.stb.eq(1)
263
264
265 class FPADD(FPID):
266 """ FPADD: stages as follows:
267
268 FPGetOp (a)
269 |
270 FPGetOp (b)
271 |
272 FPAddBase---> FPAddBaseMod
273 | |
274 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
275
276 FPAddBase is tricky: it is both a stage and *has* stages.
277 Connection to FPAddBaseMod therefore requires an in stb/ack
278 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
279 needs to be the thing that raises the incoming stb.
280 """
281
282 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
283 """ IEEE754 FP Add
284
285 * width: bit-width of IEEE754. supported: 16, 32, 64
286 * id_wid: an identifier that is sync-connected to the input
287 * single_cycle: True indicates each stage to complete in 1 clock
288 """
289 self.width = width
290 self.id_wid = id_wid
291 self.single_cycle = single_cycle
292
293 #self.out_z = FPOp(width)
294 self.ids = FPID(id_wid)
295
296 rs = []
297 for i in range(rs_sz):
298 in_a = FPOp(width)
299 in_b = FPOp(width)
300 in_a.name = "in_a_%d" % i
301 in_b.name = "in_b_%d" % i
302 rs.append((in_a, in_b))
303 self.rs = Array(rs)
304
305 res = []
306 for i in range(rs_sz):
307 out_z = FPOp(width)
308 out_z.name = "out_z_%d" % i
309 res.append(out_z)
310 self.res = Array(res)
311
312 self.states = []
313
314 def add_state(self, state):
315 self.states.append(state)
316 return state
317
318 def get_fragment(self, platform=None):
319 """ creates the HDL code-fragment for FPAdd
320 """
321 m = Module()
322 m.submodules += self.rs
323
324 in_a = self.rs[0][0]
325 in_b = self.rs[0][1]
326
327 geta = self.add_state(FPGetOp("get_a", "get_b",
328 in_a, self.width))
329 geta.setup(m, in_a)
330 a = geta.out_op
331
332 getb = self.add_state(FPGetOp("get_b", "fpadd",
333 in_b, self.width))
334 getb.setup(m, in_b)
335 b = getb.out_op
336
337 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
338 ab = self.add_state(ab)
339 abd = ab.ispec() # create an input spec object for FPADDBase
340 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
341 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
342 o = ab.o
343
344 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
345 o.mid, "get_a"))
346
347 with m.FSM() as fsm:
348
349 for state in self.states:
350 with m.State(state.state_from):
351 state.action(m)
352
353 return m
354
355
356 if __name__ == "__main__":
357 if True:
358 alu = FPADD(width=32, id_wid=5, single_cycle=True)
359 main(alu, ports=alu.rs[0][0].ports() + \
360 alu.rs[0][1].ports() + \
361 alu.res[0].ports() + \
362 [alu.ids.in_mid, alu.ids.out_mid])
363 else:
364 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
365 main(alu, ports=[alu.in_a, alu.in_b] + \
366 alu.in_t.ports() + \
367 alu.out_z.ports() + \
368 [alu.in_mid, alu.out_mid])
369
370
371 # works... but don't use, just do "python fname.py convert -t v"
372 #print (verilog.convert(alu, ports=[
373 # ports=alu.in_a.ports() + \
374 # alu.in_b.ports() + \
375 # alu.out_z.ports())