split out add specialcases to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
19 FPGet2OpMod, FPGet2Op)
20 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
21 FPAddSpecialCasesDeNorm)
22 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
23 from fpcommon.postcalc import FPAddStage1Data
24 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
25 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
26 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
27 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
28 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
29 from fpcommon.normtopack import FPNormToPack
30 from fpcommon.putz import (FPPutZ, FPPutZIdx)
31
32
33 class FPAddAlignMultiMod(FPState):
34
35 def __init__(self, width):
36 self.in_a = FPNumBase(width)
37 self.in_b = FPNumBase(width)
38 self.out_a = FPNumIn(None, width)
39 self.out_b = FPNumIn(None, width)
40 self.exp_eq = Signal(reset_less=True)
41
42 def elaborate(self, platform):
43 # This one however (single-cycle) will do the shift
44 # in one go.
45
46 m = Module()
47
48 m.submodules.align_in_a = self.in_a
49 m.submodules.align_in_b = self.in_b
50 m.submodules.align_out_a = self.out_a
51 m.submodules.align_out_b = self.out_b
52
53 # NOTE: this does *not* do single-cycle multi-shifting,
54 # it *STAYS* in the align state until exponents match
55
56 # exponent of a greater than b: shift b down
57 m.d.comb += self.exp_eq.eq(0)
58 m.d.comb += self.out_a.eq(self.in_a)
59 m.d.comb += self.out_b.eq(self.in_b)
60 agtb = Signal(reset_less=True)
61 altb = Signal(reset_less=True)
62 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
63 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
64 with m.If(agtb):
65 m.d.comb += self.out_b.shift_down(self.in_b)
66 # exponent of b greater than a: shift a down
67 with m.Elif(altb):
68 m.d.comb += self.out_a.shift_down(self.in_a)
69 # exponents equal: move to next stage.
70 with m.Else():
71 m.d.comb += self.exp_eq.eq(1)
72 return m
73
74
75 class FPAddAlignMulti(FPState):
76
77 def __init__(self, width, id_wid):
78 FPState.__init__(self, "align")
79 self.mod = FPAddAlignMultiMod(width)
80 self.out_a = FPNumIn(None, width)
81 self.out_b = FPNumIn(None, width)
82 self.exp_eq = Signal(reset_less=True)
83
84 def setup(self, m, in_a, in_b):
85 """ links module to inputs and outputs
86 """
87 m.submodules.align = self.mod
88 m.d.comb += self.mod.in_a.eq(in_a)
89 m.d.comb += self.mod.in_b.eq(in_b)
90 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
91 m.d.sync += self.out_a.eq(self.mod.out_a)
92 m.d.sync += self.out_b.eq(self.mod.out_b)
93
94 def action(self, m):
95 with m.If(self.exp_eq):
96 m.next = "add_0"
97
98
99 class FPNumIn2Ops:
100
101 def __init__(self, width, id_wid):
102 self.a = FPNumIn(None, width)
103 self.b = FPNumIn(None, width)
104 self.z = FPNumOut(width, False)
105 self.out_do_z = Signal(reset_less=True)
106 self.oz = Signal(width, reset_less=True)
107 self.mid = Signal(id_wid, reset_less=True)
108
109 def eq(self, i):
110 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
111 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
112
113
114 class FPAddAlignSingleMod:
115
116 def __init__(self, width, id_wid):
117 self.width = width
118 self.id_wid = id_wid
119 self.i = self.ispec()
120 self.o = self.ospec()
121
122 def ispec(self):
123 return FPSCData(self.width, self.id_wid)
124
125 def ospec(self):
126 return FPNumIn2Ops(self.width, self.id_wid)
127
128 def process(self, i):
129 return self.o
130
131 def setup(self, m, i):
132 """ links module to inputs and outputs
133 """
134 m.submodules.align = self
135 m.d.comb += self.i.eq(i)
136
137 def elaborate(self, platform):
138 """ Aligns A against B or B against A, depending on which has the
139 greater exponent. This is done in a *single* cycle using
140 variable-width bit-shift
141
142 the shifter used here is quite expensive in terms of gates.
143 Mux A or B in (and out) into temporaries, as only one of them
144 needs to be aligned against the other
145 """
146 m = Module()
147
148 m.submodules.align_in_a = self.i.a
149 m.submodules.align_in_b = self.i.b
150 m.submodules.align_out_a = self.o.a
151 m.submodules.align_out_b = self.o.b
152
153 # temporary (muxed) input and output to be shifted
154 t_inp = FPNumBase(self.width)
155 t_out = FPNumIn(None, self.width)
156 espec = (len(self.i.a.e), True)
157 msr = MultiShiftRMerge(self.i.a.m_width, espec)
158 m.submodules.align_t_in = t_inp
159 m.submodules.align_t_out = t_out
160 m.submodules.multishift_r = msr
161
162 ediff = Signal(espec, reset_less=True)
163 ediffr = Signal(espec, reset_less=True)
164 tdiff = Signal(espec, reset_less=True)
165 elz = Signal(reset_less=True)
166 egz = Signal(reset_less=True)
167
168 # connect multi-shifter to t_inp/out mantissa (and tdiff)
169 m.d.comb += msr.inp.eq(t_inp.m)
170 m.d.comb += msr.diff.eq(tdiff)
171 m.d.comb += t_out.m.eq(msr.m)
172 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
173 m.d.comb += t_out.s.eq(t_inp.s)
174
175 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
176 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
177 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
178 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
179
180 # default: A-exp == B-exp, A and B untouched (fall through)
181 m.d.comb += self.o.a.eq(self.i.a)
182 m.d.comb += self.o.b.eq(self.i.b)
183 # only one shifter (muxed)
184 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
185 # exponent of a greater than b: shift b down
186 with m.If(~self.i.out_do_z):
187 with m.If(egz):
188 m.d.comb += [t_inp.eq(self.i.b),
189 tdiff.eq(ediff),
190 self.o.b.eq(t_out),
191 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
192 ]
193 # exponent of b greater than a: shift a down
194 with m.Elif(elz):
195 m.d.comb += [t_inp.eq(self.i.a),
196 tdiff.eq(ediffr),
197 self.o.a.eq(t_out),
198 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
199 ]
200
201 m.d.comb += self.o.mid.eq(self.i.mid)
202 m.d.comb += self.o.z.eq(self.i.z)
203 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
204 m.d.comb += self.o.oz.eq(self.i.oz)
205
206 return m
207
208
209 class FPAddAlignSingle(FPState):
210
211 def __init__(self, width, id_wid):
212 FPState.__init__(self, "align")
213 self.mod = FPAddAlignSingleMod(width, id_wid)
214 self.out_a = FPNumIn(None, width)
215 self.out_b = FPNumIn(None, width)
216
217 def setup(self, m, i):
218 """ links module to inputs and outputs
219 """
220 self.mod.setup(m, i)
221
222 # NOTE: could be done as comb
223 m.d.sync += self.out_a.eq(self.mod.out_a)
224 m.d.sync += self.out_b.eq(self.mod.out_b)
225
226 def action(self, m):
227 m.next = "add_0"
228
229
230 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
231
232 def __init__(self, width, id_wid):
233 FPState.__init__(self, "align")
234 self.width = width
235 self.id_wid = id_wid
236 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
237 self.a1o = self.ospec()
238
239 def ispec(self):
240 return FPSCData(self.width, self.id_wid)
241
242 def ospec(self):
243 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
244
245 def setup(self, m, i):
246 """ links module to inputs and outputs
247 """
248
249 # chain AddAlignSingle, AddStage0 and AddStage1
250 mod = FPAddAlignSingleMod(self.width, self.id_wid)
251 a0mod = FPAddStage0Mod(self.width, self.id_wid)
252 a1mod = FPAddStage1Mod(self.width, self.id_wid)
253
254 chain = StageChain([mod, a0mod, a1mod])
255 chain.setup(m, i)
256
257 self.o = a1mod.o
258
259 def process(self, i):
260 return self.o
261
262 def action(self, m):
263 m.d.sync += self.a1o.eq(self.process(None))
264 m.next = "normalise_1"
265
266
267 class FPAddStage0Data:
268
269 def __init__(self, width, id_wid):
270 self.z = FPNumBase(width, False)
271 self.out_do_z = Signal(reset_less=True)
272 self.oz = Signal(width, reset_less=True)
273 self.tot = Signal(self.z.m_width + 4, reset_less=True)
274 self.mid = Signal(id_wid, reset_less=True)
275
276 def eq(self, i):
277 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
278 self.tot.eq(i.tot), self.mid.eq(i.mid)]
279
280
281 class FPAddStage0Mod:
282
283 def __init__(self, width, id_wid):
284 self.width = width
285 self.id_wid = id_wid
286 self.i = self.ispec()
287 self.o = self.ospec()
288
289 def ispec(self):
290 return FPSCData(self.width, self.id_wid)
291
292 def ospec(self):
293 return FPAddStage0Data(self.width, self.id_wid)
294
295 def process(self, i):
296 return self.o
297
298 def setup(self, m, i):
299 """ links module to inputs and outputs
300 """
301 m.submodules.add0 = self
302 m.d.comb += self.i.eq(i)
303
304 def elaborate(self, platform):
305 m = Module()
306 m.submodules.add0_in_a = self.i.a
307 m.submodules.add0_in_b = self.i.b
308 m.submodules.add0_out_z = self.o.z
309
310 # store intermediate tests (and zero-extended mantissas)
311 seq = Signal(reset_less=True)
312 mge = Signal(reset_less=True)
313 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
314 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
315 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
316 mge.eq(self.i.a.m >= self.i.b.m),
317 am0.eq(Cat(self.i.a.m, 0)),
318 bm0.eq(Cat(self.i.b.m, 0))
319 ]
320 # same-sign (both negative or both positive) add mantissas
321 with m.If(~self.i.out_do_z):
322 m.d.comb += self.o.z.e.eq(self.i.a.e)
323 with m.If(seq):
324 m.d.comb += [
325 self.o.tot.eq(am0 + bm0),
326 self.o.z.s.eq(self.i.a.s)
327 ]
328 # a mantissa greater than b, use a
329 with m.Elif(mge):
330 m.d.comb += [
331 self.o.tot.eq(am0 - bm0),
332 self.o.z.s.eq(self.i.a.s)
333 ]
334 # b mantissa greater than a, use b
335 with m.Else():
336 m.d.comb += [
337 self.o.tot.eq(bm0 - am0),
338 self.o.z.s.eq(self.i.b.s)
339 ]
340
341 m.d.comb += self.o.oz.eq(self.i.oz)
342 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
343 m.d.comb += self.o.mid.eq(self.i.mid)
344 return m
345
346
347 class FPAddStage0(FPState):
348 """ First stage of add. covers same-sign (add) and subtract
349 special-casing when mantissas are greater or equal, to
350 give greatest accuracy.
351 """
352
353 def __init__(self, width, id_wid):
354 FPState.__init__(self, "add_0")
355 self.mod = FPAddStage0Mod(width)
356 self.o = self.mod.ospec()
357
358 def setup(self, m, i):
359 """ links module to inputs and outputs
360 """
361 self.mod.setup(m, i)
362
363 # NOTE: these could be done as combinatorial (merge add0+add1)
364 m.d.sync += self.o.eq(self.mod.o)
365
366 def action(self, m):
367 m.next = "add_1"
368
369
370 class FPAddStage1Mod(FPState):
371 """ Second stage of add: preparation for normalisation.
372 detects when tot sum is too big (tot[27] is kinda a carry bit)
373 """
374
375 def __init__(self, width, id_wid):
376 self.width = width
377 self.id_wid = id_wid
378 self.i = self.ispec()
379 self.o = self.ospec()
380
381 def ispec(self):
382 return FPAddStage0Data(self.width, self.id_wid)
383
384 def ospec(self):
385 return FPAddStage1Data(self.width, self.id_wid)
386
387 def process(self, i):
388 return self.o
389
390 def setup(self, m, i):
391 """ links module to inputs and outputs
392 """
393 m.submodules.add1 = self
394 m.submodules.add1_out_overflow = self.o.of
395
396 m.d.comb += self.i.eq(i)
397
398 def elaborate(self, platform):
399 m = Module()
400 m.d.comb += self.o.z.eq(self.i.z)
401 # tot[-1] (MSB) gets set when the sum overflows. shift result down
402 with m.If(~self.i.out_do_z):
403 with m.If(self.i.tot[-1]):
404 m.d.comb += [
405 self.o.z.m.eq(self.i.tot[4:]),
406 self.o.of.m0.eq(self.i.tot[4]),
407 self.o.of.guard.eq(self.i.tot[3]),
408 self.o.of.round_bit.eq(self.i.tot[2]),
409 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
410 self.o.z.e.eq(self.i.z.e + 1)
411 ]
412 # tot[-1] (MSB) zero case
413 with m.Else():
414 m.d.comb += [
415 self.o.z.m.eq(self.i.tot[3:]),
416 self.o.of.m0.eq(self.i.tot[3]),
417 self.o.of.guard.eq(self.i.tot[2]),
418 self.o.of.round_bit.eq(self.i.tot[1]),
419 self.o.of.sticky.eq(self.i.tot[0])
420 ]
421
422 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
423 m.d.comb += self.o.oz.eq(self.i.oz)
424 m.d.comb += self.o.mid.eq(self.i.mid)
425
426 return m
427
428
429 class FPAddStage1(FPState):
430
431 def __init__(self, width, id_wid):
432 FPState.__init__(self, "add_1")
433 self.mod = FPAddStage1Mod(width)
434 self.out_z = FPNumBase(width, False)
435 self.out_of = Overflow()
436 self.norm_stb = Signal()
437
438 def setup(self, m, i):
439 """ links module to inputs and outputs
440 """
441 self.mod.setup(m, i)
442
443 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
444
445 m.d.sync += self.out_of.eq(self.mod.out_of)
446 m.d.sync += self.out_z.eq(self.mod.out_z)
447 m.d.sync += self.norm_stb.eq(1)
448
449 def action(self, m):
450 m.next = "normalise_1"
451
452
453
454
455 class FPOpData:
456 def __init__(self, width, id_wid):
457 self.z = FPOp(width)
458 self.mid = Signal(id_wid, reset_less=True)
459
460 def eq(self, i):
461 return [self.z.eq(i.z), self.mid.eq(i.mid)]
462
463 def ports(self):
464 return [self.z, self.mid]
465
466
467 class FPADDBaseMod:
468
469 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
470 """ IEEE754 FP Add
471
472 * width: bit-width of IEEE754. supported: 16, 32, 64
473 * id_wid: an identifier that is sync-connected to the input
474 * single_cycle: True indicates each stage to complete in 1 clock
475 * compact: True indicates a reduced number of stages
476 """
477 self.width = width
478 self.id_wid = id_wid
479 self.single_cycle = single_cycle
480 self.compact = compact
481
482 self.in_t = Trigger()
483 self.i = self.ispec()
484 self.o = self.ospec()
485
486 self.states = []
487
488 def ispec(self):
489 return FPADDBaseData(self.width, self.id_wid)
490
491 def ospec(self):
492 return FPOpData(self.width, self.id_wid)
493
494 def add_state(self, state):
495 self.states.append(state)
496 return state
497
498 def get_fragment(self, platform=None):
499 """ creates the HDL code-fragment for FPAdd
500 """
501 m = Module()
502 m.submodules.out_z = self.o.z
503 m.submodules.in_t = self.in_t
504 if self.compact:
505 self.get_compact_fragment(m, platform)
506 else:
507 self.get_longer_fragment(m, platform)
508
509 with m.FSM() as fsm:
510
511 for state in self.states:
512 with m.State(state.state_from):
513 state.action(m)
514
515 return m
516
517 def get_longer_fragment(self, m, platform=None):
518
519 get = self.add_state(FPGet2Op("get_ops", "special_cases",
520 self.width))
521 get.setup(m, self.i)
522 a = get.out_op1
523 b = get.out_op2
524 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
525
526 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
527 sc.setup(m, a, b, self.in_mid)
528
529 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
530 dn.setup(m, a, b, sc.in_mid)
531
532 if self.single_cycle:
533 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
534 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
535 else:
536 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
537 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
538
539 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
540 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
541
542 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
543 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
544
545 if self.single_cycle:
546 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
547 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
548 else:
549 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
550 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
551
552 rn = self.add_state(FPRound(self.width, self.id_wid))
553 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
554
555 cor = self.add_state(FPCorrections(self.width, self.id_wid))
556 cor.setup(m, rn.out_z, rn.in_mid)
557
558 pa = self.add_state(FPPack(self.width, self.id_wid))
559 pa.setup(m, cor.out_z, rn.in_mid)
560
561 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
562 pa.in_mid, self.out_mid))
563
564 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
565 pa.in_mid, self.out_mid))
566
567 def get_compact_fragment(self, m, platform=None):
568
569
570 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
571 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
572 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
573 n1 = FPNormToPack(self.width, self.id_wid)
574
575 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
576
577 chainlist = [get, sc, alm, n1]
578 chain = StageChain(chainlist, specallocate=True)
579 chain.setup(m, self.i)
580
581 for mod in chainlist:
582 sc = self.add_state(mod)
583
584 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
585 n1.out_z.mid, self.o.mid))
586
587 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
588 # sc.o.mid, self.o.mid))
589
590
591 class FPADDBase(FPState):
592
593 def __init__(self, width, id_wid=None, single_cycle=False):
594 """ IEEE754 FP Add
595
596 * width: bit-width of IEEE754. supported: 16, 32, 64
597 * id_wid: an identifier that is sync-connected to the input
598 * single_cycle: True indicates each stage to complete in 1 clock
599 """
600 FPState.__init__(self, "fpadd")
601 self.width = width
602 self.single_cycle = single_cycle
603 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
604 self.o = self.ospec()
605
606 self.in_t = Trigger()
607 self.i = self.ispec()
608
609 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
610 self.in_accept = Signal(reset_less=True)
611 self.add_stb = Signal(reset_less=True)
612 self.add_ack = Signal(reset=0, reset_less=True)
613
614 def ispec(self):
615 return self.mod.ispec()
616
617 def ospec(self):
618 return self.mod.ospec()
619
620 def setup(self, m, i, add_stb, in_mid):
621 m.d.comb += [self.i.eq(i),
622 self.mod.i.eq(self.i),
623 self.z_done.eq(self.mod.o.z.trigger),
624 #self.add_stb.eq(add_stb),
625 self.mod.in_t.stb.eq(self.in_t.stb),
626 self.in_t.ack.eq(self.mod.in_t.ack),
627 self.o.mid.eq(self.mod.o.mid),
628 self.o.z.v.eq(self.mod.o.z.v),
629 self.o.z.stb.eq(self.mod.o.z.stb),
630 self.mod.o.z.ack.eq(self.o.z.ack),
631 ]
632
633 m.d.sync += self.add_stb.eq(add_stb)
634 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
635 m.d.sync += self.o.z.ack.eq(0) # likewise
636 #m.d.sync += self.in_t.stb.eq(0)
637
638 m.submodules.fpadd = self.mod
639
640 def action(self, m):
641
642 # in_accept is set on incoming strobe HIGH and ack LOW.
643 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
644
645 #with m.If(self.in_t.ack):
646 # m.d.sync += self.in_t.stb.eq(0)
647 with m.If(~self.z_done):
648 # not done: test for accepting an incoming operand pair
649 with m.If(self.in_accept):
650 m.d.sync += [
651 self.add_ack.eq(1), # acknowledge receipt...
652 self.in_t.stb.eq(1), # initiate add
653 ]
654 with m.Else():
655 m.d.sync += [self.add_ack.eq(0),
656 self.in_t.stb.eq(0),
657 self.o.z.ack.eq(1),
658 ]
659 with m.Else():
660 # done: acknowledge, and write out id and value
661 m.d.sync += [self.add_ack.eq(1),
662 self.in_t.stb.eq(0)
663 ]
664 m.next = "put_z"
665
666 return
667
668 if self.in_mid is not None:
669 m.d.sync += self.out_mid.eq(self.mod.out_mid)
670
671 m.d.sync += [
672 self.out_z.v.eq(self.mod.out_z.v)
673 ]
674 # move to output state on detecting z ack
675 with m.If(self.out_z.trigger):
676 m.d.sync += self.out_z.stb.eq(0)
677 m.next = "put_z"
678 with m.Else():
679 m.d.sync += self.out_z.stb.eq(1)
680
681
682 class FPADDBasePipe(ControlBase):
683 def __init__(self, width, id_wid):
684 ControlBase.__init__(self)
685 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
686 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
687 self.pipe3 = FPNormToPack(width, id_wid)
688
689 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
690
691 def elaborate(self, platform):
692 m = Module()
693 m.submodules.scnorm = self.pipe1
694 m.submodules.addalign = self.pipe2
695 m.submodules.normpack = self.pipe3
696 m.d.comb += self._eqs
697 return m
698
699
700 class FPADDInMuxPipe(PriorityCombMuxInPipe):
701 def __init__(self, width, id_wid, num_rows):
702 self.num_rows = num_rows
703 def iospec(): return FPADDBaseData(width, id_wid)
704 stage = PassThroughStage(iospec)
705 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
706
707
708 class FPADDMuxOutPipe(CombMuxOutPipe):
709 def __init__(self, width, id_wid, num_rows):
710 self.num_rows = num_rows
711 def iospec(): return FPPackData(width, id_wid)
712 stage = PassThroughStage(iospec)
713 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
714
715
716 class FPADDMuxInOut:
717 """ Reservation-Station version of FPADD pipeline.
718
719 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
720 * 3-stage adder pipeline
721 * fan-out on outputs (an array of FPPackData: z,mid)
722
723 Fan-in and Fan-out are combinatorial.
724 """
725 def __init__(self, width, id_wid, num_rows):
726 self.num_rows = num_rows
727 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
728 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
729 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
730
731 self.p = self.inpipe.p # kinda annoying,
732 self.n = self.outpipe.n # use pipe in/out as this class in/out
733 self._ports = self.inpipe.ports() + self.outpipe.ports()
734
735 def elaborate(self, platform):
736 m = Module()
737 m.submodules.inpipe = self.inpipe
738 m.submodules.fpadd = self.fpadd
739 m.submodules.outpipe = self.outpipe
740
741 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
742 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
743
744 return m
745
746 def ports(self):
747 return self._ports
748
749
750 class FPADD(FPID):
751 """ FPADD: stages as follows:
752
753 FPGetOp (a)
754 |
755 FPGetOp (b)
756 |
757 FPAddBase---> FPAddBaseMod
758 | |
759 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
760
761 FPAddBase is tricky: it is both a stage and *has* stages.
762 Connection to FPAddBaseMod therefore requires an in stb/ack
763 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
764 needs to be the thing that raises the incoming stb.
765 """
766
767 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
768 """ IEEE754 FP Add
769
770 * width: bit-width of IEEE754. supported: 16, 32, 64
771 * id_wid: an identifier that is sync-connected to the input
772 * single_cycle: True indicates each stage to complete in 1 clock
773 """
774 self.width = width
775 self.id_wid = id_wid
776 self.single_cycle = single_cycle
777
778 #self.out_z = FPOp(width)
779 self.ids = FPID(id_wid)
780
781 rs = []
782 for i in range(rs_sz):
783 in_a = FPOp(width)
784 in_b = FPOp(width)
785 in_a.name = "in_a_%d" % i
786 in_b.name = "in_b_%d" % i
787 rs.append((in_a, in_b))
788 self.rs = Array(rs)
789
790 res = []
791 for i in range(rs_sz):
792 out_z = FPOp(width)
793 out_z.name = "out_z_%d" % i
794 res.append(out_z)
795 self.res = Array(res)
796
797 self.states = []
798
799 def add_state(self, state):
800 self.states.append(state)
801 return state
802
803 def get_fragment(self, platform=None):
804 """ creates the HDL code-fragment for FPAdd
805 """
806 m = Module()
807 m.submodules += self.rs
808
809 in_a = self.rs[0][0]
810 in_b = self.rs[0][1]
811
812 geta = self.add_state(FPGetOp("get_a", "get_b",
813 in_a, self.width))
814 geta.setup(m, in_a)
815 a = geta.out_op
816
817 getb = self.add_state(FPGetOp("get_b", "fpadd",
818 in_b, self.width))
819 getb.setup(m, in_b)
820 b = getb.out_op
821
822 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
823 ab = self.add_state(ab)
824 abd = ab.ispec() # create an input spec object for FPADDBase
825 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
826 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
827 o = ab.o
828
829 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
830 o.mid, "get_a"))
831
832 with m.FSM() as fsm:
833
834 for state in self.states:
835 with m.State(state.state_from):
836 state.action(m)
837
838 return m
839
840
841 if __name__ == "__main__":
842 if True:
843 alu = FPADD(width=32, id_wid=5, single_cycle=True)
844 main(alu, ports=alu.rs[0][0].ports() + \
845 alu.rs[0][1].ports() + \
846 alu.res[0].ports() + \
847 [alu.ids.in_mid, alu.ids.out_mid])
848 else:
849 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
850 main(alu, ports=[alu.in_a, alu.in_b] + \
851 alu.in_t.ports() + \
852 alu.out_z.ports() + \
853 [alu.in_mid, alu.out_mid])
854
855
856 # works... but don't use, just do "python fname.py convert -t v"
857 #print (verilog.convert(alu, ports=[
858 # ports=alu.in_a.ports() + \
859 # alu.in_b.ports() + \
860 # alu.out_z.ports())