split out add1 to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
19 FPGet2OpMod, FPGet2Op)
20 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
21 from fpcommon.postcalc import FPAddStage1Data
22 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
23 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
24 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
25 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
26 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
27 from fpcommon.normtopack import FPNormToPack
28 from fpcommon.putz import (FPPutZ, FPPutZIdx)
29
30 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
31 FPAddSpecialCasesDeNorm)
32 from fpadd.align import (FPAddAlignMulti, FPAddAlignMultiMod, FPNumIn2Ops,
33 FPAddAlignSingleMod, FPAddAlignSingle)
34 from fpadd.add0 import (FPAddStage0Data, FPAddStage0Mod, FPAddStage0)
35 from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
36
37
38 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
39
40 def __init__(self, width, id_wid):
41 FPState.__init__(self, "align")
42 self.width = width
43 self.id_wid = id_wid
44 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
45 self.a1o = self.ospec()
46
47 def ispec(self):
48 return FPSCData(self.width, self.id_wid)
49
50 def ospec(self):
51 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
52
53 def setup(self, m, i):
54 """ links module to inputs and outputs
55 """
56
57 # chain AddAlignSingle, AddStage0 and AddStage1
58 mod = FPAddAlignSingleMod(self.width, self.id_wid)
59 a0mod = FPAddStage0Mod(self.width, self.id_wid)
60 a1mod = FPAddStage1Mod(self.width, self.id_wid)
61
62 chain = StageChain([mod, a0mod, a1mod])
63 chain.setup(m, i)
64
65 self.o = a1mod.o
66
67 def process(self, i):
68 return self.o
69
70 def action(self, m):
71 m.d.sync += self.a1o.eq(self.process(None))
72 m.next = "normalise_1"
73
74
75 class FPOpData:
76 def __init__(self, width, id_wid):
77 self.z = FPOp(width)
78 self.mid = Signal(id_wid, reset_less=True)
79
80 def eq(self, i):
81 return [self.z.eq(i.z), self.mid.eq(i.mid)]
82
83 def ports(self):
84 return [self.z, self.mid]
85
86
87 class FPADDBaseMod:
88
89 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
90 """ IEEE754 FP Add
91
92 * width: bit-width of IEEE754. supported: 16, 32, 64
93 * id_wid: an identifier that is sync-connected to the input
94 * single_cycle: True indicates each stage to complete in 1 clock
95 * compact: True indicates a reduced number of stages
96 """
97 self.width = width
98 self.id_wid = id_wid
99 self.single_cycle = single_cycle
100 self.compact = compact
101
102 self.in_t = Trigger()
103 self.i = self.ispec()
104 self.o = self.ospec()
105
106 self.states = []
107
108 def ispec(self):
109 return FPADDBaseData(self.width, self.id_wid)
110
111 def ospec(self):
112 return FPOpData(self.width, self.id_wid)
113
114 def add_state(self, state):
115 self.states.append(state)
116 return state
117
118 def get_fragment(self, platform=None):
119 """ creates the HDL code-fragment for FPAdd
120 """
121 m = Module()
122 m.submodules.out_z = self.o.z
123 m.submodules.in_t = self.in_t
124 if self.compact:
125 self.get_compact_fragment(m, platform)
126 else:
127 self.get_longer_fragment(m, platform)
128
129 with m.FSM() as fsm:
130
131 for state in self.states:
132 with m.State(state.state_from):
133 state.action(m)
134
135 return m
136
137 def get_longer_fragment(self, m, platform=None):
138
139 get = self.add_state(FPGet2Op("get_ops", "special_cases",
140 self.width))
141 get.setup(m, self.i)
142 a = get.out_op1
143 b = get.out_op2
144 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
145
146 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
147 sc.setup(m, a, b, self.in_mid)
148
149 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
150 dn.setup(m, a, b, sc.in_mid)
151
152 if self.single_cycle:
153 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
154 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
155 else:
156 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
157 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
158
159 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
160 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
161
162 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
163 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
164
165 if self.single_cycle:
166 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
167 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
168 else:
169 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
170 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
171
172 rn = self.add_state(FPRound(self.width, self.id_wid))
173 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
174
175 cor = self.add_state(FPCorrections(self.width, self.id_wid))
176 cor.setup(m, rn.out_z, rn.in_mid)
177
178 pa = self.add_state(FPPack(self.width, self.id_wid))
179 pa.setup(m, cor.out_z, rn.in_mid)
180
181 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
182 pa.in_mid, self.out_mid))
183
184 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
185 pa.in_mid, self.out_mid))
186
187 def get_compact_fragment(self, m, platform=None):
188
189 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
190 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
191 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
192 n1 = FPNormToPack(self.width, self.id_wid)
193
194 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
195
196 chainlist = [get, sc, alm, n1]
197 chain = StageChain(chainlist, specallocate=True)
198 chain.setup(m, self.i)
199
200 for mod in chainlist:
201 sc = self.add_state(mod)
202
203 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
204 n1.out_z.mid, self.o.mid))
205
206 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
207 # sc.o.mid, self.o.mid))
208
209
210 class FPADDBase(FPState):
211
212 def __init__(self, width, id_wid=None, single_cycle=False):
213 """ IEEE754 FP Add
214
215 * width: bit-width of IEEE754. supported: 16, 32, 64
216 * id_wid: an identifier that is sync-connected to the input
217 * single_cycle: True indicates each stage to complete in 1 clock
218 """
219 FPState.__init__(self, "fpadd")
220 self.width = width
221 self.single_cycle = single_cycle
222 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
223 self.o = self.ospec()
224
225 self.in_t = Trigger()
226 self.i = self.ispec()
227
228 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
229 self.in_accept = Signal(reset_less=True)
230 self.add_stb = Signal(reset_less=True)
231 self.add_ack = Signal(reset=0, reset_less=True)
232
233 def ispec(self):
234 return self.mod.ispec()
235
236 def ospec(self):
237 return self.mod.ospec()
238
239 def setup(self, m, i, add_stb, in_mid):
240 m.d.comb += [self.i.eq(i),
241 self.mod.i.eq(self.i),
242 self.z_done.eq(self.mod.o.z.trigger),
243 #self.add_stb.eq(add_stb),
244 self.mod.in_t.stb.eq(self.in_t.stb),
245 self.in_t.ack.eq(self.mod.in_t.ack),
246 self.o.mid.eq(self.mod.o.mid),
247 self.o.z.v.eq(self.mod.o.z.v),
248 self.o.z.stb.eq(self.mod.o.z.stb),
249 self.mod.o.z.ack.eq(self.o.z.ack),
250 ]
251
252 m.d.sync += self.add_stb.eq(add_stb)
253 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
254 m.d.sync += self.o.z.ack.eq(0) # likewise
255 #m.d.sync += self.in_t.stb.eq(0)
256
257 m.submodules.fpadd = self.mod
258
259 def action(self, m):
260
261 # in_accept is set on incoming strobe HIGH and ack LOW.
262 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
263
264 #with m.If(self.in_t.ack):
265 # m.d.sync += self.in_t.stb.eq(0)
266 with m.If(~self.z_done):
267 # not done: test for accepting an incoming operand pair
268 with m.If(self.in_accept):
269 m.d.sync += [
270 self.add_ack.eq(1), # acknowledge receipt...
271 self.in_t.stb.eq(1), # initiate add
272 ]
273 with m.Else():
274 m.d.sync += [self.add_ack.eq(0),
275 self.in_t.stb.eq(0),
276 self.o.z.ack.eq(1),
277 ]
278 with m.Else():
279 # done: acknowledge, and write out id and value
280 m.d.sync += [self.add_ack.eq(1),
281 self.in_t.stb.eq(0)
282 ]
283 m.next = "put_z"
284
285 return
286
287 if self.in_mid is not None:
288 m.d.sync += self.out_mid.eq(self.mod.out_mid)
289
290 m.d.sync += [
291 self.out_z.v.eq(self.mod.out_z.v)
292 ]
293 # move to output state on detecting z ack
294 with m.If(self.out_z.trigger):
295 m.d.sync += self.out_z.stb.eq(0)
296 m.next = "put_z"
297 with m.Else():
298 m.d.sync += self.out_z.stb.eq(1)
299
300
301 class FPADDBasePipe(ControlBase):
302 def __init__(self, width, id_wid):
303 ControlBase.__init__(self)
304 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
305 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
306 self.pipe3 = FPNormToPack(width, id_wid)
307
308 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
309
310 def elaborate(self, platform):
311 m = Module()
312 m.submodules.scnorm = self.pipe1
313 m.submodules.addalign = self.pipe2
314 m.submodules.normpack = self.pipe3
315 m.d.comb += self._eqs
316 return m
317
318
319 class FPADDInMuxPipe(PriorityCombMuxInPipe):
320 def __init__(self, width, id_wid, num_rows):
321 self.num_rows = num_rows
322 def iospec(): return FPADDBaseData(width, id_wid)
323 stage = PassThroughStage(iospec)
324 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
325
326
327 class FPADDMuxOutPipe(CombMuxOutPipe):
328 def __init__(self, width, id_wid, num_rows):
329 self.num_rows = num_rows
330 def iospec(): return FPPackData(width, id_wid)
331 stage = PassThroughStage(iospec)
332 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
333
334
335 class FPADDMuxInOut:
336 """ Reservation-Station version of FPADD pipeline.
337
338 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
339 * 3-stage adder pipeline
340 * fan-out on outputs (an array of FPPackData: z,mid)
341
342 Fan-in and Fan-out are combinatorial.
343 """
344 def __init__(self, width, id_wid, num_rows):
345 self.num_rows = num_rows
346 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
347 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
348 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
349
350 self.p = self.inpipe.p # kinda annoying,
351 self.n = self.outpipe.n # use pipe in/out as this class in/out
352 self._ports = self.inpipe.ports() + self.outpipe.ports()
353
354 def elaborate(self, platform):
355 m = Module()
356 m.submodules.inpipe = self.inpipe
357 m.submodules.fpadd = self.fpadd
358 m.submodules.outpipe = self.outpipe
359
360 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
361 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
362
363 return m
364
365 def ports(self):
366 return self._ports
367
368
369 class FPADD(FPID):
370 """ FPADD: stages as follows:
371
372 FPGetOp (a)
373 |
374 FPGetOp (b)
375 |
376 FPAddBase---> FPAddBaseMod
377 | |
378 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
379
380 FPAddBase is tricky: it is both a stage and *has* stages.
381 Connection to FPAddBaseMod therefore requires an in stb/ack
382 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
383 needs to be the thing that raises the incoming stb.
384 """
385
386 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
387 """ IEEE754 FP Add
388
389 * width: bit-width of IEEE754. supported: 16, 32, 64
390 * id_wid: an identifier that is sync-connected to the input
391 * single_cycle: True indicates each stage to complete in 1 clock
392 """
393 self.width = width
394 self.id_wid = id_wid
395 self.single_cycle = single_cycle
396
397 #self.out_z = FPOp(width)
398 self.ids = FPID(id_wid)
399
400 rs = []
401 for i in range(rs_sz):
402 in_a = FPOp(width)
403 in_b = FPOp(width)
404 in_a.name = "in_a_%d" % i
405 in_b.name = "in_b_%d" % i
406 rs.append((in_a, in_b))
407 self.rs = Array(rs)
408
409 res = []
410 for i in range(rs_sz):
411 out_z = FPOp(width)
412 out_z.name = "out_z_%d" % i
413 res.append(out_z)
414 self.res = Array(res)
415
416 self.states = []
417
418 def add_state(self, state):
419 self.states.append(state)
420 return state
421
422 def get_fragment(self, platform=None):
423 """ creates the HDL code-fragment for FPAdd
424 """
425 m = Module()
426 m.submodules += self.rs
427
428 in_a = self.rs[0][0]
429 in_b = self.rs[0][1]
430
431 geta = self.add_state(FPGetOp("get_a", "get_b",
432 in_a, self.width))
433 geta.setup(m, in_a)
434 a = geta.out_op
435
436 getb = self.add_state(FPGetOp("get_b", "fpadd",
437 in_b, self.width))
438 getb.setup(m, in_b)
439 b = getb.out_op
440
441 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
442 ab = self.add_state(ab)
443 abd = ab.ispec() # create an input spec object for FPADDBase
444 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
445 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
446 o = ab.o
447
448 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
449 o.mid, "get_a"))
450
451 with m.FSM() as fsm:
452
453 for state in self.states:
454 with m.State(state.state_from):
455 state.action(m)
456
457 return m
458
459
460 if __name__ == "__main__":
461 if True:
462 alu = FPADD(width=32, id_wid=5, single_cycle=True)
463 main(alu, ports=alu.rs[0][0].ports() + \
464 alu.rs[0][1].ports() + \
465 alu.res[0].ports() + \
466 [alu.ids.in_mid, alu.ids.out_mid])
467 else:
468 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
469 main(alu, ports=[alu.in_a, alu.in_b] + \
470 alu.in_t.ports() + \
471 alu.out_z.ports() + \
472 [alu.in_mid, alu.out_mid])
473
474
475 # works... but don't use, just do "python fname.py convert -t v"
476 #print (verilog.convert(alu, ports=[
477 # ports=alu.in_a.ports() + \
478 # alu.in_b.ports() + \
479 # alu.out_z.ports())