split out addstages to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
19 FPGet2OpMod, FPGet2Op)
20 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
21 from fpcommon.postcalc import FPAddStage1Data
22 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
23 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
24 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
25 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
26 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
27 from fpcommon.normtopack import FPNormToPack
28 from fpcommon.putz import (FPPutZ, FPPutZIdx)
29
30 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
31 FPAddSpecialCasesDeNorm)
32 from fpadd.align import (FPAddAlignMulti, FPAddAlignMultiMod, FPNumIn2Ops,
33 FPAddAlignSingleMod, FPAddAlignSingle)
34 from fpadd.add0 import (FPAddStage0Data, FPAddStage0Mod, FPAddStage0)
35 from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
36 from fpadd.addstages import FPAddAlignSingleAdd
37
38
39 class FPOpData:
40 def __init__(self, width, id_wid):
41 self.z = FPOp(width)
42 self.mid = Signal(id_wid, reset_less=True)
43
44 def eq(self, i):
45 return [self.z.eq(i.z), self.mid.eq(i.mid)]
46
47 def ports(self):
48 return [self.z, self.mid]
49
50
51 class FPADDBaseMod:
52
53 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
54 """ IEEE754 FP Add
55
56 * width: bit-width of IEEE754. supported: 16, 32, 64
57 * id_wid: an identifier that is sync-connected to the input
58 * single_cycle: True indicates each stage to complete in 1 clock
59 * compact: True indicates a reduced number of stages
60 """
61 self.width = width
62 self.id_wid = id_wid
63 self.single_cycle = single_cycle
64 self.compact = compact
65
66 self.in_t = Trigger()
67 self.i = self.ispec()
68 self.o = self.ospec()
69
70 self.states = []
71
72 def ispec(self):
73 return FPADDBaseData(self.width, self.id_wid)
74
75 def ospec(self):
76 return FPOpData(self.width, self.id_wid)
77
78 def add_state(self, state):
79 self.states.append(state)
80 return state
81
82 def get_fragment(self, platform=None):
83 """ creates the HDL code-fragment for FPAdd
84 """
85 m = Module()
86 m.submodules.out_z = self.o.z
87 m.submodules.in_t = self.in_t
88 if self.compact:
89 self.get_compact_fragment(m, platform)
90 else:
91 self.get_longer_fragment(m, platform)
92
93 with m.FSM() as fsm:
94
95 for state in self.states:
96 with m.State(state.state_from):
97 state.action(m)
98
99 return m
100
101 def get_longer_fragment(self, m, platform=None):
102
103 get = self.add_state(FPGet2Op("get_ops", "special_cases",
104 self.width))
105 get.setup(m, self.i)
106 a = get.out_op1
107 b = get.out_op2
108 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
109
110 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
111 sc.setup(m, a, b, self.in_mid)
112
113 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
114 dn.setup(m, a, b, sc.in_mid)
115
116 if self.single_cycle:
117 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
118 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
119 else:
120 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
121 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
122
123 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
124 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
125
126 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
127 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
128
129 if self.single_cycle:
130 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
131 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
132 else:
133 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
134 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
135
136 rn = self.add_state(FPRound(self.width, self.id_wid))
137 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
138
139 cor = self.add_state(FPCorrections(self.width, self.id_wid))
140 cor.setup(m, rn.out_z, rn.in_mid)
141
142 pa = self.add_state(FPPack(self.width, self.id_wid))
143 pa.setup(m, cor.out_z, rn.in_mid)
144
145 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
146 pa.in_mid, self.out_mid))
147
148 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
149 pa.in_mid, self.out_mid))
150
151 def get_compact_fragment(self, m, platform=None):
152
153 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
154 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
155 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
156 n1 = FPNormToPack(self.width, self.id_wid)
157
158 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
159
160 chainlist = [get, sc, alm, n1]
161 chain = StageChain(chainlist, specallocate=True)
162 chain.setup(m, self.i)
163
164 for mod in chainlist:
165 sc = self.add_state(mod)
166
167 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
168 n1.out_z.mid, self.o.mid))
169
170 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
171 # sc.o.mid, self.o.mid))
172
173
174 class FPADDBase(FPState):
175
176 def __init__(self, width, id_wid=None, single_cycle=False):
177 """ IEEE754 FP Add
178
179 * width: bit-width of IEEE754. supported: 16, 32, 64
180 * id_wid: an identifier that is sync-connected to the input
181 * single_cycle: True indicates each stage to complete in 1 clock
182 """
183 FPState.__init__(self, "fpadd")
184 self.width = width
185 self.single_cycle = single_cycle
186 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
187 self.o = self.ospec()
188
189 self.in_t = Trigger()
190 self.i = self.ispec()
191
192 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
193 self.in_accept = Signal(reset_less=True)
194 self.add_stb = Signal(reset_less=True)
195 self.add_ack = Signal(reset=0, reset_less=True)
196
197 def ispec(self):
198 return self.mod.ispec()
199
200 def ospec(self):
201 return self.mod.ospec()
202
203 def setup(self, m, i, add_stb, in_mid):
204 m.d.comb += [self.i.eq(i),
205 self.mod.i.eq(self.i),
206 self.z_done.eq(self.mod.o.z.trigger),
207 #self.add_stb.eq(add_stb),
208 self.mod.in_t.stb.eq(self.in_t.stb),
209 self.in_t.ack.eq(self.mod.in_t.ack),
210 self.o.mid.eq(self.mod.o.mid),
211 self.o.z.v.eq(self.mod.o.z.v),
212 self.o.z.stb.eq(self.mod.o.z.stb),
213 self.mod.o.z.ack.eq(self.o.z.ack),
214 ]
215
216 m.d.sync += self.add_stb.eq(add_stb)
217 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
218 m.d.sync += self.o.z.ack.eq(0) # likewise
219 #m.d.sync += self.in_t.stb.eq(0)
220
221 m.submodules.fpadd = self.mod
222
223 def action(self, m):
224
225 # in_accept is set on incoming strobe HIGH and ack LOW.
226 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
227
228 #with m.If(self.in_t.ack):
229 # m.d.sync += self.in_t.stb.eq(0)
230 with m.If(~self.z_done):
231 # not done: test for accepting an incoming operand pair
232 with m.If(self.in_accept):
233 m.d.sync += [
234 self.add_ack.eq(1), # acknowledge receipt...
235 self.in_t.stb.eq(1), # initiate add
236 ]
237 with m.Else():
238 m.d.sync += [self.add_ack.eq(0),
239 self.in_t.stb.eq(0),
240 self.o.z.ack.eq(1),
241 ]
242 with m.Else():
243 # done: acknowledge, and write out id and value
244 m.d.sync += [self.add_ack.eq(1),
245 self.in_t.stb.eq(0)
246 ]
247 m.next = "put_z"
248
249 return
250
251 if self.in_mid is not None:
252 m.d.sync += self.out_mid.eq(self.mod.out_mid)
253
254 m.d.sync += [
255 self.out_z.v.eq(self.mod.out_z.v)
256 ]
257 # move to output state on detecting z ack
258 with m.If(self.out_z.trigger):
259 m.d.sync += self.out_z.stb.eq(0)
260 m.next = "put_z"
261 with m.Else():
262 m.d.sync += self.out_z.stb.eq(1)
263
264
265 class FPADDBasePipe(ControlBase):
266 def __init__(self, width, id_wid):
267 ControlBase.__init__(self)
268 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
269 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
270 self.pipe3 = FPNormToPack(width, id_wid)
271
272 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
273
274 def elaborate(self, platform):
275 m = Module()
276 m.submodules.scnorm = self.pipe1
277 m.submodules.addalign = self.pipe2
278 m.submodules.normpack = self.pipe3
279 m.d.comb += self._eqs
280 return m
281
282
283 class FPADDInMuxPipe(PriorityCombMuxInPipe):
284 def __init__(self, width, id_wid, num_rows):
285 self.num_rows = num_rows
286 def iospec(): return FPADDBaseData(width, id_wid)
287 stage = PassThroughStage(iospec)
288 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
289
290
291 class FPADDMuxOutPipe(CombMuxOutPipe):
292 def __init__(self, width, id_wid, num_rows):
293 self.num_rows = num_rows
294 def iospec(): return FPPackData(width, id_wid)
295 stage = PassThroughStage(iospec)
296 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
297
298
299 class FPADDMuxInOut:
300 """ Reservation-Station version of FPADD pipeline.
301
302 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
303 * 3-stage adder pipeline
304 * fan-out on outputs (an array of FPPackData: z,mid)
305
306 Fan-in and Fan-out are combinatorial.
307 """
308 def __init__(self, width, id_wid, num_rows):
309 self.num_rows = num_rows
310 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
311 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
312 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
313
314 self.p = self.inpipe.p # kinda annoying,
315 self.n = self.outpipe.n # use pipe in/out as this class in/out
316 self._ports = self.inpipe.ports() + self.outpipe.ports()
317
318 def elaborate(self, platform):
319 m = Module()
320 m.submodules.inpipe = self.inpipe
321 m.submodules.fpadd = self.fpadd
322 m.submodules.outpipe = self.outpipe
323
324 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
325 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
326
327 return m
328
329 def ports(self):
330 return self._ports
331
332
333 class FPADD(FPID):
334 """ FPADD: stages as follows:
335
336 FPGetOp (a)
337 |
338 FPGetOp (b)
339 |
340 FPAddBase---> FPAddBaseMod
341 | |
342 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
343
344 FPAddBase is tricky: it is both a stage and *has* stages.
345 Connection to FPAddBaseMod therefore requires an in stb/ack
346 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
347 needs to be the thing that raises the incoming stb.
348 """
349
350 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
351 """ IEEE754 FP Add
352
353 * width: bit-width of IEEE754. supported: 16, 32, 64
354 * id_wid: an identifier that is sync-connected to the input
355 * single_cycle: True indicates each stage to complete in 1 clock
356 """
357 self.width = width
358 self.id_wid = id_wid
359 self.single_cycle = single_cycle
360
361 #self.out_z = FPOp(width)
362 self.ids = FPID(id_wid)
363
364 rs = []
365 for i in range(rs_sz):
366 in_a = FPOp(width)
367 in_b = FPOp(width)
368 in_a.name = "in_a_%d" % i
369 in_b.name = "in_b_%d" % i
370 rs.append((in_a, in_b))
371 self.rs = Array(rs)
372
373 res = []
374 for i in range(rs_sz):
375 out_z = FPOp(width)
376 out_z.name = "out_z_%d" % i
377 res.append(out_z)
378 self.res = Array(res)
379
380 self.states = []
381
382 def add_state(self, state):
383 self.states.append(state)
384 return state
385
386 def get_fragment(self, platform=None):
387 """ creates the HDL code-fragment for FPAdd
388 """
389 m = Module()
390 m.submodules += self.rs
391
392 in_a = self.rs[0][0]
393 in_b = self.rs[0][1]
394
395 geta = self.add_state(FPGetOp("get_a", "get_b",
396 in_a, self.width))
397 geta.setup(m, in_a)
398 a = geta.out_op
399
400 getb = self.add_state(FPGetOp("get_b", "fpadd",
401 in_b, self.width))
402 getb.setup(m, in_b)
403 b = getb.out_op
404
405 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
406 ab = self.add_state(ab)
407 abd = ab.ispec() # create an input spec object for FPADDBase
408 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
409 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
410 o = ab.o
411
412 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
413 o.mid, "get_a"))
414
415 with m.FSM() as fsm:
416
417 for state in self.states:
418 with m.State(state.state_from):
419 state.action(m)
420
421 return m
422
423
424 if __name__ == "__main__":
425 if True:
426 alu = FPADD(width=32, id_wid=5, single_cycle=True)
427 main(alu, ports=alu.rs[0][0].ports() + \
428 alu.rs[0][1].ports() + \
429 alu.res[0].ports() + \
430 [alu.ids.in_mid, alu.ids.out_mid])
431 else:
432 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
433 main(alu, ports=[alu.in_a, alu.in_b] + \
434 alu.in_t.ports() + \
435 alu.out_z.ports() + \
436 [alu.in_mid, alu.out_mid])
437
438
439 # works... but don't use, just do "python fname.py convert -t v"
440 #print (verilog.convert(alu, ports=[
441 # ports=alu.in_a.ports() + \
442 # alu.in_b.ports() + \
443 # alu.out_z.ports())