split out putz to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
26 from fpcommon.normtopack import FPNormToPack
27 from fpcommon.putz import (FPPutZ, FPPutZIdx)
28
29
30 class FPAddSpecialCasesMod:
31 """ special cases: NaNs, infs, zeros, denormalised
32 NOTE: some of these are unique to add. see "Special Operations"
33 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
34 """
35
36 def __init__(self, width, id_wid):
37 self.width = width
38 self.id_wid = id_wid
39 self.i = self.ispec()
40 self.o = self.ospec()
41
42 def ispec(self):
43 return FPADDBaseData(self.width, self.id_wid)
44
45 def ospec(self):
46 return FPSCData(self.width, self.id_wid)
47
48 def setup(self, m, i):
49 """ links module to inputs and outputs
50 """
51 m.submodules.specialcases = self
52 m.d.comb += self.i.eq(i)
53
54 def process(self, i):
55 return self.o
56
57 def elaborate(self, platform):
58 m = Module()
59
60 m.submodules.sc_out_z = self.o.z
61
62 # decode: XXX really should move to separate stage
63 a1 = FPNumIn(None, self.width)
64 b1 = FPNumIn(None, self.width)
65 m.submodules.sc_decode_a = a1
66 m.submodules.sc_decode_b = b1
67 m.d.comb += [a1.decode(self.i.a),
68 b1.decode(self.i.b),
69 ]
70
71 s_nomatch = Signal()
72 m.d.comb += s_nomatch.eq(a1.s != b1.s)
73
74 m_match = Signal()
75 m.d.comb += m_match.eq(a1.m == b1.m)
76
77 # if a is NaN or b is NaN return NaN
78 with m.If(a1.is_nan | b1.is_nan):
79 m.d.comb += self.o.out_do_z.eq(1)
80 m.d.comb += self.o.z.nan(0)
81
82 # XXX WEIRDNESS for FP16 non-canonical NaN handling
83 # under review
84
85 ## if a is zero and b is NaN return -b
86 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
87 # m.d.comb += self.o.out_do_z.eq(1)
88 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
89
90 ## if b is zero and a is NaN return -a
91 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
92 # m.d.comb += self.o.out_do_z.eq(1)
93 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
94
95 ## if a is -zero and b is NaN return -b
96 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
97 # m.d.comb += self.o.out_do_z.eq(1)
98 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
99
100 ## if b is -zero and a is NaN return -a
101 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
102 # m.d.comb += self.o.out_do_z.eq(1)
103 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
104
105 # if a is inf return inf (or NaN)
106 with m.Elif(a1.is_inf):
107 m.d.comb += self.o.out_do_z.eq(1)
108 m.d.comb += self.o.z.inf(a1.s)
109 # if a is inf and signs don't match return NaN
110 with m.If(b1.exp_128 & s_nomatch):
111 m.d.comb += self.o.z.nan(0)
112
113 # if b is inf return inf
114 with m.Elif(b1.is_inf):
115 m.d.comb += self.o.out_do_z.eq(1)
116 m.d.comb += self.o.z.inf(b1.s)
117
118 # if a is zero and b zero return signed-a/b
119 with m.Elif(a1.is_zero & b1.is_zero):
120 m.d.comb += self.o.out_do_z.eq(1)
121 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
122
123 # if a is zero return b
124 with m.Elif(a1.is_zero):
125 m.d.comb += self.o.out_do_z.eq(1)
126 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
127
128 # if b is zero return a
129 with m.Elif(b1.is_zero):
130 m.d.comb += self.o.out_do_z.eq(1)
131 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
132
133 # if a equal to -b return zero (+ve zero)
134 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
135 m.d.comb += self.o.out_do_z.eq(1)
136 m.d.comb += self.o.z.zero(0)
137
138 # Denormalised Number checks next, so pass a/b data through
139 with m.Else():
140 m.d.comb += self.o.out_do_z.eq(0)
141 m.d.comb += self.o.a.eq(a1)
142 m.d.comb += self.o.b.eq(b1)
143
144 m.d.comb += self.o.oz.eq(self.o.z.v)
145 m.d.comb += self.o.mid.eq(self.i.mid)
146
147 return m
148
149
150 class FPAddSpecialCases(FPState):
151 """ special cases: NaNs, infs, zeros, denormalised
152 NOTE: some of these are unique to add. see "Special Operations"
153 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
154 """
155
156 def __init__(self, width, id_wid):
157 FPState.__init__(self, "special_cases")
158 self.mod = FPAddSpecialCasesMod(width)
159 self.out_z = self.mod.ospec()
160 self.out_do_z = Signal(reset_less=True)
161
162 def setup(self, m, i):
163 """ links module to inputs and outputs
164 """
165 self.mod.setup(m, i, self.out_do_z)
166 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
167 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
168
169 def action(self, m):
170 self.idsync(m)
171 with m.If(self.out_do_z):
172 m.next = "put_z"
173 with m.Else():
174 m.next = "denormalise"
175
176
177 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
178 """ special cases: NaNs, infs, zeros, denormalised
179 NOTE: some of these are unique to add. see "Special Operations"
180 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
181 """
182
183 def __init__(self, width, id_wid):
184 FPState.__init__(self, "special_cases")
185 self.width = width
186 self.id_wid = id_wid
187 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
188 self.out = self.ospec()
189
190 def ispec(self):
191 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
192
193 def ospec(self):
194 return FPSCData(self.width, self.id_wid) # DeNorm ospec
195
196 def setup(self, m, i):
197 """ links module to inputs and outputs
198 """
199 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
200 dmod = FPAddDeNormMod(self.width, self.id_wid)
201
202 chain = StageChain([smod, dmod])
203 chain.setup(m, i)
204
205 # only needed for break-out (early-out)
206 # self.out_do_z = smod.o.out_do_z
207
208 self.o = dmod.o
209
210 def process(self, i):
211 return self.o
212
213 def action(self, m):
214 # for break-out (early-out)
215 #with m.If(self.out_do_z):
216 # m.next = "put_z"
217 #with m.Else():
218 m.d.sync += self.out.eq(self.process(None))
219 m.next = "align"
220
221
222 class FPAddAlignMultiMod(FPState):
223
224 def __init__(self, width):
225 self.in_a = FPNumBase(width)
226 self.in_b = FPNumBase(width)
227 self.out_a = FPNumIn(None, width)
228 self.out_b = FPNumIn(None, width)
229 self.exp_eq = Signal(reset_less=True)
230
231 def elaborate(self, platform):
232 # This one however (single-cycle) will do the shift
233 # in one go.
234
235 m = Module()
236
237 m.submodules.align_in_a = self.in_a
238 m.submodules.align_in_b = self.in_b
239 m.submodules.align_out_a = self.out_a
240 m.submodules.align_out_b = self.out_b
241
242 # NOTE: this does *not* do single-cycle multi-shifting,
243 # it *STAYS* in the align state until exponents match
244
245 # exponent of a greater than b: shift b down
246 m.d.comb += self.exp_eq.eq(0)
247 m.d.comb += self.out_a.eq(self.in_a)
248 m.d.comb += self.out_b.eq(self.in_b)
249 agtb = Signal(reset_less=True)
250 altb = Signal(reset_less=True)
251 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
252 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
253 with m.If(agtb):
254 m.d.comb += self.out_b.shift_down(self.in_b)
255 # exponent of b greater than a: shift a down
256 with m.Elif(altb):
257 m.d.comb += self.out_a.shift_down(self.in_a)
258 # exponents equal: move to next stage.
259 with m.Else():
260 m.d.comb += self.exp_eq.eq(1)
261 return m
262
263
264 class FPAddAlignMulti(FPState):
265
266 def __init__(self, width, id_wid):
267 FPState.__init__(self, "align")
268 self.mod = FPAddAlignMultiMod(width)
269 self.out_a = FPNumIn(None, width)
270 self.out_b = FPNumIn(None, width)
271 self.exp_eq = Signal(reset_less=True)
272
273 def setup(self, m, in_a, in_b):
274 """ links module to inputs and outputs
275 """
276 m.submodules.align = self.mod
277 m.d.comb += self.mod.in_a.eq(in_a)
278 m.d.comb += self.mod.in_b.eq(in_b)
279 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
280 m.d.sync += self.out_a.eq(self.mod.out_a)
281 m.d.sync += self.out_b.eq(self.mod.out_b)
282
283 def action(self, m):
284 with m.If(self.exp_eq):
285 m.next = "add_0"
286
287
288 class FPNumIn2Ops:
289
290 def __init__(self, width, id_wid):
291 self.a = FPNumIn(None, width)
292 self.b = FPNumIn(None, width)
293 self.z = FPNumOut(width, False)
294 self.out_do_z = Signal(reset_less=True)
295 self.oz = Signal(width, reset_less=True)
296 self.mid = Signal(id_wid, reset_less=True)
297
298 def eq(self, i):
299 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
300 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
301
302
303 class FPAddAlignSingleMod:
304
305 def __init__(self, width, id_wid):
306 self.width = width
307 self.id_wid = id_wid
308 self.i = self.ispec()
309 self.o = self.ospec()
310
311 def ispec(self):
312 return FPSCData(self.width, self.id_wid)
313
314 def ospec(self):
315 return FPNumIn2Ops(self.width, self.id_wid)
316
317 def process(self, i):
318 return self.o
319
320 def setup(self, m, i):
321 """ links module to inputs and outputs
322 """
323 m.submodules.align = self
324 m.d.comb += self.i.eq(i)
325
326 def elaborate(self, platform):
327 """ Aligns A against B or B against A, depending on which has the
328 greater exponent. This is done in a *single* cycle using
329 variable-width bit-shift
330
331 the shifter used here is quite expensive in terms of gates.
332 Mux A or B in (and out) into temporaries, as only one of them
333 needs to be aligned against the other
334 """
335 m = Module()
336
337 m.submodules.align_in_a = self.i.a
338 m.submodules.align_in_b = self.i.b
339 m.submodules.align_out_a = self.o.a
340 m.submodules.align_out_b = self.o.b
341
342 # temporary (muxed) input and output to be shifted
343 t_inp = FPNumBase(self.width)
344 t_out = FPNumIn(None, self.width)
345 espec = (len(self.i.a.e), True)
346 msr = MultiShiftRMerge(self.i.a.m_width, espec)
347 m.submodules.align_t_in = t_inp
348 m.submodules.align_t_out = t_out
349 m.submodules.multishift_r = msr
350
351 ediff = Signal(espec, reset_less=True)
352 ediffr = Signal(espec, reset_less=True)
353 tdiff = Signal(espec, reset_less=True)
354 elz = Signal(reset_less=True)
355 egz = Signal(reset_less=True)
356
357 # connect multi-shifter to t_inp/out mantissa (and tdiff)
358 m.d.comb += msr.inp.eq(t_inp.m)
359 m.d.comb += msr.diff.eq(tdiff)
360 m.d.comb += t_out.m.eq(msr.m)
361 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
362 m.d.comb += t_out.s.eq(t_inp.s)
363
364 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
365 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
366 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
367 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
368
369 # default: A-exp == B-exp, A and B untouched (fall through)
370 m.d.comb += self.o.a.eq(self.i.a)
371 m.d.comb += self.o.b.eq(self.i.b)
372 # only one shifter (muxed)
373 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
374 # exponent of a greater than b: shift b down
375 with m.If(~self.i.out_do_z):
376 with m.If(egz):
377 m.d.comb += [t_inp.eq(self.i.b),
378 tdiff.eq(ediff),
379 self.o.b.eq(t_out),
380 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
381 ]
382 # exponent of b greater than a: shift a down
383 with m.Elif(elz):
384 m.d.comb += [t_inp.eq(self.i.a),
385 tdiff.eq(ediffr),
386 self.o.a.eq(t_out),
387 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
388 ]
389
390 m.d.comb += self.o.mid.eq(self.i.mid)
391 m.d.comb += self.o.z.eq(self.i.z)
392 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
393 m.d.comb += self.o.oz.eq(self.i.oz)
394
395 return m
396
397
398 class FPAddAlignSingle(FPState):
399
400 def __init__(self, width, id_wid):
401 FPState.__init__(self, "align")
402 self.mod = FPAddAlignSingleMod(width, id_wid)
403 self.out_a = FPNumIn(None, width)
404 self.out_b = FPNumIn(None, width)
405
406 def setup(self, m, i):
407 """ links module to inputs and outputs
408 """
409 self.mod.setup(m, i)
410
411 # NOTE: could be done as comb
412 m.d.sync += self.out_a.eq(self.mod.out_a)
413 m.d.sync += self.out_b.eq(self.mod.out_b)
414
415 def action(self, m):
416 m.next = "add_0"
417
418
419 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
420
421 def __init__(self, width, id_wid):
422 FPState.__init__(self, "align")
423 self.width = width
424 self.id_wid = id_wid
425 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
426 self.a1o = self.ospec()
427
428 def ispec(self):
429 return FPSCData(self.width, self.id_wid)
430
431 def ospec(self):
432 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
433
434 def setup(self, m, i):
435 """ links module to inputs and outputs
436 """
437
438 # chain AddAlignSingle, AddStage0 and AddStage1
439 mod = FPAddAlignSingleMod(self.width, self.id_wid)
440 a0mod = FPAddStage0Mod(self.width, self.id_wid)
441 a1mod = FPAddStage1Mod(self.width, self.id_wid)
442
443 chain = StageChain([mod, a0mod, a1mod])
444 chain.setup(m, i)
445
446 self.o = a1mod.o
447
448 def process(self, i):
449 return self.o
450
451 def action(self, m):
452 m.d.sync += self.a1o.eq(self.process(None))
453 m.next = "normalise_1"
454
455
456 class FPAddStage0Data:
457
458 def __init__(self, width, id_wid):
459 self.z = FPNumBase(width, False)
460 self.out_do_z = Signal(reset_less=True)
461 self.oz = Signal(width, reset_less=True)
462 self.tot = Signal(self.z.m_width + 4, reset_less=True)
463 self.mid = Signal(id_wid, reset_less=True)
464
465 def eq(self, i):
466 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
467 self.tot.eq(i.tot), self.mid.eq(i.mid)]
468
469
470 class FPAddStage0Mod:
471
472 def __init__(self, width, id_wid):
473 self.width = width
474 self.id_wid = id_wid
475 self.i = self.ispec()
476 self.o = self.ospec()
477
478 def ispec(self):
479 return FPSCData(self.width, self.id_wid)
480
481 def ospec(self):
482 return FPAddStage0Data(self.width, self.id_wid)
483
484 def process(self, i):
485 return self.o
486
487 def setup(self, m, i):
488 """ links module to inputs and outputs
489 """
490 m.submodules.add0 = self
491 m.d.comb += self.i.eq(i)
492
493 def elaborate(self, platform):
494 m = Module()
495 m.submodules.add0_in_a = self.i.a
496 m.submodules.add0_in_b = self.i.b
497 m.submodules.add0_out_z = self.o.z
498
499 # store intermediate tests (and zero-extended mantissas)
500 seq = Signal(reset_less=True)
501 mge = Signal(reset_less=True)
502 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
503 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
504 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
505 mge.eq(self.i.a.m >= self.i.b.m),
506 am0.eq(Cat(self.i.a.m, 0)),
507 bm0.eq(Cat(self.i.b.m, 0))
508 ]
509 # same-sign (both negative or both positive) add mantissas
510 with m.If(~self.i.out_do_z):
511 m.d.comb += self.o.z.e.eq(self.i.a.e)
512 with m.If(seq):
513 m.d.comb += [
514 self.o.tot.eq(am0 + bm0),
515 self.o.z.s.eq(self.i.a.s)
516 ]
517 # a mantissa greater than b, use a
518 with m.Elif(mge):
519 m.d.comb += [
520 self.o.tot.eq(am0 - bm0),
521 self.o.z.s.eq(self.i.a.s)
522 ]
523 # b mantissa greater than a, use b
524 with m.Else():
525 m.d.comb += [
526 self.o.tot.eq(bm0 - am0),
527 self.o.z.s.eq(self.i.b.s)
528 ]
529
530 m.d.comb += self.o.oz.eq(self.i.oz)
531 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
532 m.d.comb += self.o.mid.eq(self.i.mid)
533 return m
534
535
536 class FPAddStage0(FPState):
537 """ First stage of add. covers same-sign (add) and subtract
538 special-casing when mantissas are greater or equal, to
539 give greatest accuracy.
540 """
541
542 def __init__(self, width, id_wid):
543 FPState.__init__(self, "add_0")
544 self.mod = FPAddStage0Mod(width)
545 self.o = self.mod.ospec()
546
547 def setup(self, m, i):
548 """ links module to inputs and outputs
549 """
550 self.mod.setup(m, i)
551
552 # NOTE: these could be done as combinatorial (merge add0+add1)
553 m.d.sync += self.o.eq(self.mod.o)
554
555 def action(self, m):
556 m.next = "add_1"
557
558
559 class FPAddStage1Mod(FPState):
560 """ Second stage of add: preparation for normalisation.
561 detects when tot sum is too big (tot[27] is kinda a carry bit)
562 """
563
564 def __init__(self, width, id_wid):
565 self.width = width
566 self.id_wid = id_wid
567 self.i = self.ispec()
568 self.o = self.ospec()
569
570 def ispec(self):
571 return FPAddStage0Data(self.width, self.id_wid)
572
573 def ospec(self):
574 return FPAddStage1Data(self.width, self.id_wid)
575
576 def process(self, i):
577 return self.o
578
579 def setup(self, m, i):
580 """ links module to inputs and outputs
581 """
582 m.submodules.add1 = self
583 m.submodules.add1_out_overflow = self.o.of
584
585 m.d.comb += self.i.eq(i)
586
587 def elaborate(self, platform):
588 m = Module()
589 m.d.comb += self.o.z.eq(self.i.z)
590 # tot[-1] (MSB) gets set when the sum overflows. shift result down
591 with m.If(~self.i.out_do_z):
592 with m.If(self.i.tot[-1]):
593 m.d.comb += [
594 self.o.z.m.eq(self.i.tot[4:]),
595 self.o.of.m0.eq(self.i.tot[4]),
596 self.o.of.guard.eq(self.i.tot[3]),
597 self.o.of.round_bit.eq(self.i.tot[2]),
598 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
599 self.o.z.e.eq(self.i.z.e + 1)
600 ]
601 # tot[-1] (MSB) zero case
602 with m.Else():
603 m.d.comb += [
604 self.o.z.m.eq(self.i.tot[3:]),
605 self.o.of.m0.eq(self.i.tot[3]),
606 self.o.of.guard.eq(self.i.tot[2]),
607 self.o.of.round_bit.eq(self.i.tot[1]),
608 self.o.of.sticky.eq(self.i.tot[0])
609 ]
610
611 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
612 m.d.comb += self.o.oz.eq(self.i.oz)
613 m.d.comb += self.o.mid.eq(self.i.mid)
614
615 return m
616
617
618 class FPAddStage1(FPState):
619
620 def __init__(self, width, id_wid):
621 FPState.__init__(self, "add_1")
622 self.mod = FPAddStage1Mod(width)
623 self.out_z = FPNumBase(width, False)
624 self.out_of = Overflow()
625 self.norm_stb = Signal()
626
627 def setup(self, m, i):
628 """ links module to inputs and outputs
629 """
630 self.mod.setup(m, i)
631
632 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
633
634 m.d.sync += self.out_of.eq(self.mod.out_of)
635 m.d.sync += self.out_z.eq(self.mod.out_z)
636 m.d.sync += self.norm_stb.eq(1)
637
638 def action(self, m):
639 m.next = "normalise_1"
640
641
642
643
644 class FPOpData:
645 def __init__(self, width, id_wid):
646 self.z = FPOp(width)
647 self.mid = Signal(id_wid, reset_less=True)
648
649 def eq(self, i):
650 return [self.z.eq(i.z), self.mid.eq(i.mid)]
651
652 def ports(self):
653 return [self.z, self.mid]
654
655
656 class FPADDBaseMod:
657
658 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
659 """ IEEE754 FP Add
660
661 * width: bit-width of IEEE754. supported: 16, 32, 64
662 * id_wid: an identifier that is sync-connected to the input
663 * single_cycle: True indicates each stage to complete in 1 clock
664 * compact: True indicates a reduced number of stages
665 """
666 self.width = width
667 self.id_wid = id_wid
668 self.single_cycle = single_cycle
669 self.compact = compact
670
671 self.in_t = Trigger()
672 self.i = self.ispec()
673 self.o = self.ospec()
674
675 self.states = []
676
677 def ispec(self):
678 return FPADDBaseData(self.width, self.id_wid)
679
680 def ospec(self):
681 return FPOpData(self.width, self.id_wid)
682
683 def add_state(self, state):
684 self.states.append(state)
685 return state
686
687 def get_fragment(self, platform=None):
688 """ creates the HDL code-fragment for FPAdd
689 """
690 m = Module()
691 m.submodules.out_z = self.o.z
692 m.submodules.in_t = self.in_t
693 if self.compact:
694 self.get_compact_fragment(m, platform)
695 else:
696 self.get_longer_fragment(m, platform)
697
698 with m.FSM() as fsm:
699
700 for state in self.states:
701 with m.State(state.state_from):
702 state.action(m)
703
704 return m
705
706 def get_longer_fragment(self, m, platform=None):
707
708 get = self.add_state(FPGet2Op("get_ops", "special_cases",
709 self.width))
710 get.setup(m, self.i)
711 a = get.out_op1
712 b = get.out_op2
713 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
714
715 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
716 sc.setup(m, a, b, self.in_mid)
717
718 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
719 dn.setup(m, a, b, sc.in_mid)
720
721 if self.single_cycle:
722 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
723 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
724 else:
725 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
726 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
727
728 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
729 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
730
731 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
732 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
733
734 if self.single_cycle:
735 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
736 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
737 else:
738 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
739 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
740
741 rn = self.add_state(FPRound(self.width, self.id_wid))
742 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
743
744 cor = self.add_state(FPCorrections(self.width, self.id_wid))
745 cor.setup(m, rn.out_z, rn.in_mid)
746
747 pa = self.add_state(FPPack(self.width, self.id_wid))
748 pa.setup(m, cor.out_z, rn.in_mid)
749
750 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
751 pa.in_mid, self.out_mid))
752
753 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
754 pa.in_mid, self.out_mid))
755
756 def get_compact_fragment(self, m, platform=None):
757
758
759 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
760 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
761 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
762 n1 = FPNormToPack(self.width, self.id_wid)
763
764 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
765
766 chainlist = [get, sc, alm, n1]
767 chain = StageChain(chainlist, specallocate=True)
768 chain.setup(m, self.i)
769
770 for mod in chainlist:
771 sc = self.add_state(mod)
772
773 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
774 n1.out_z.mid, self.o.mid))
775
776 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
777 # sc.o.mid, self.o.mid))
778
779
780 class FPADDBase(FPState):
781
782 def __init__(self, width, id_wid=None, single_cycle=False):
783 """ IEEE754 FP Add
784
785 * width: bit-width of IEEE754. supported: 16, 32, 64
786 * id_wid: an identifier that is sync-connected to the input
787 * single_cycle: True indicates each stage to complete in 1 clock
788 """
789 FPState.__init__(self, "fpadd")
790 self.width = width
791 self.single_cycle = single_cycle
792 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
793 self.o = self.ospec()
794
795 self.in_t = Trigger()
796 self.i = self.ispec()
797
798 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
799 self.in_accept = Signal(reset_less=True)
800 self.add_stb = Signal(reset_less=True)
801 self.add_ack = Signal(reset=0, reset_less=True)
802
803 def ispec(self):
804 return self.mod.ispec()
805
806 def ospec(self):
807 return self.mod.ospec()
808
809 def setup(self, m, i, add_stb, in_mid):
810 m.d.comb += [self.i.eq(i),
811 self.mod.i.eq(self.i),
812 self.z_done.eq(self.mod.o.z.trigger),
813 #self.add_stb.eq(add_stb),
814 self.mod.in_t.stb.eq(self.in_t.stb),
815 self.in_t.ack.eq(self.mod.in_t.ack),
816 self.o.mid.eq(self.mod.o.mid),
817 self.o.z.v.eq(self.mod.o.z.v),
818 self.o.z.stb.eq(self.mod.o.z.stb),
819 self.mod.o.z.ack.eq(self.o.z.ack),
820 ]
821
822 m.d.sync += self.add_stb.eq(add_stb)
823 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
824 m.d.sync += self.o.z.ack.eq(0) # likewise
825 #m.d.sync += self.in_t.stb.eq(0)
826
827 m.submodules.fpadd = self.mod
828
829 def action(self, m):
830
831 # in_accept is set on incoming strobe HIGH and ack LOW.
832 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
833
834 #with m.If(self.in_t.ack):
835 # m.d.sync += self.in_t.stb.eq(0)
836 with m.If(~self.z_done):
837 # not done: test for accepting an incoming operand pair
838 with m.If(self.in_accept):
839 m.d.sync += [
840 self.add_ack.eq(1), # acknowledge receipt...
841 self.in_t.stb.eq(1), # initiate add
842 ]
843 with m.Else():
844 m.d.sync += [self.add_ack.eq(0),
845 self.in_t.stb.eq(0),
846 self.o.z.ack.eq(1),
847 ]
848 with m.Else():
849 # done: acknowledge, and write out id and value
850 m.d.sync += [self.add_ack.eq(1),
851 self.in_t.stb.eq(0)
852 ]
853 m.next = "put_z"
854
855 return
856
857 if self.in_mid is not None:
858 m.d.sync += self.out_mid.eq(self.mod.out_mid)
859
860 m.d.sync += [
861 self.out_z.v.eq(self.mod.out_z.v)
862 ]
863 # move to output state on detecting z ack
864 with m.If(self.out_z.trigger):
865 m.d.sync += self.out_z.stb.eq(0)
866 m.next = "put_z"
867 with m.Else():
868 m.d.sync += self.out_z.stb.eq(1)
869
870
871 class FPADDBasePipe(ControlBase):
872 def __init__(self, width, id_wid):
873 ControlBase.__init__(self)
874 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
875 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
876 self.pipe3 = FPNormToPack(width, id_wid)
877
878 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
879
880 def elaborate(self, platform):
881 m = Module()
882 m.submodules.scnorm = self.pipe1
883 m.submodules.addalign = self.pipe2
884 m.submodules.normpack = self.pipe3
885 m.d.comb += self._eqs
886 return m
887
888
889 class FPADDInMuxPipe(PriorityCombMuxInPipe):
890 def __init__(self, width, id_wid, num_rows):
891 self.num_rows = num_rows
892 def iospec(): return FPADDBaseData(width, id_wid)
893 stage = PassThroughStage(iospec)
894 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
895
896
897 class FPADDMuxOutPipe(CombMuxOutPipe):
898 def __init__(self, width, id_wid, num_rows):
899 self.num_rows = num_rows
900 def iospec(): return FPPackData(width, id_wid)
901 stage = PassThroughStage(iospec)
902 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
903
904
905 class FPADDMuxInOut:
906 """ Reservation-Station version of FPADD pipeline.
907
908 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
909 * 3-stage adder pipeline
910 * fan-out on outputs (an array of FPPackData: z,mid)
911
912 Fan-in and Fan-out are combinatorial.
913 """
914 def __init__(self, width, id_wid, num_rows):
915 self.num_rows = num_rows
916 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
917 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
918 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
919
920 self.p = self.inpipe.p # kinda annoying,
921 self.n = self.outpipe.n # use pipe in/out as this class in/out
922 self._ports = self.inpipe.ports() + self.outpipe.ports()
923
924 def elaborate(self, platform):
925 m = Module()
926 m.submodules.inpipe = self.inpipe
927 m.submodules.fpadd = self.fpadd
928 m.submodules.outpipe = self.outpipe
929
930 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
931 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
932
933 return m
934
935 def ports(self):
936 return self._ports
937
938
939 class FPADD(FPID):
940 """ FPADD: stages as follows:
941
942 FPGetOp (a)
943 |
944 FPGetOp (b)
945 |
946 FPAddBase---> FPAddBaseMod
947 | |
948 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
949
950 FPAddBase is tricky: it is both a stage and *has* stages.
951 Connection to FPAddBaseMod therefore requires an in stb/ack
952 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
953 needs to be the thing that raises the incoming stb.
954 """
955
956 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
957 """ IEEE754 FP Add
958
959 * width: bit-width of IEEE754. supported: 16, 32, 64
960 * id_wid: an identifier that is sync-connected to the input
961 * single_cycle: True indicates each stage to complete in 1 clock
962 """
963 self.width = width
964 self.id_wid = id_wid
965 self.single_cycle = single_cycle
966
967 #self.out_z = FPOp(width)
968 self.ids = FPID(id_wid)
969
970 rs = []
971 for i in range(rs_sz):
972 in_a = FPOp(width)
973 in_b = FPOp(width)
974 in_a.name = "in_a_%d" % i
975 in_b.name = "in_b_%d" % i
976 rs.append((in_a, in_b))
977 self.rs = Array(rs)
978
979 res = []
980 for i in range(rs_sz):
981 out_z = FPOp(width)
982 out_z.name = "out_z_%d" % i
983 res.append(out_z)
984 self.res = Array(res)
985
986 self.states = []
987
988 def add_state(self, state):
989 self.states.append(state)
990 return state
991
992 def get_fragment(self, platform=None):
993 """ creates the HDL code-fragment for FPAdd
994 """
995 m = Module()
996 m.submodules += self.rs
997
998 in_a = self.rs[0][0]
999 in_b = self.rs[0][1]
1000
1001 geta = self.add_state(FPGetOp("get_a", "get_b",
1002 in_a, self.width))
1003 geta.setup(m, in_a)
1004 a = geta.out_op
1005
1006 getb = self.add_state(FPGetOp("get_b", "fpadd",
1007 in_b, self.width))
1008 getb.setup(m, in_b)
1009 b = getb.out_op
1010
1011 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1012 ab = self.add_state(ab)
1013 abd = ab.ispec() # create an input spec object for FPADDBase
1014 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1015 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1016 o = ab.o
1017
1018 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1019 o.mid, "get_a"))
1020
1021 with m.FSM() as fsm:
1022
1023 for state in self.states:
1024 with m.State(state.state_from):
1025 state.action(m)
1026
1027 return m
1028
1029
1030 if __name__ == "__main__":
1031 if True:
1032 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1033 main(alu, ports=alu.rs[0][0].ports() + \
1034 alu.rs[0][1].ports() + \
1035 alu.res[0].ports() + \
1036 [alu.ids.in_mid, alu.ids.out_mid])
1037 else:
1038 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1039 main(alu, ports=[alu.in_a, alu.in_b] + \
1040 alu.in_t.ports() + \
1041 alu.out_z.ports() + \
1042 [alu.in_mid, alu.out_mid])
1043
1044
1045 # works... but don't use, just do "python fname.py convert -t v"
1046 #print (verilog.convert(alu, ports=[
1047 # ports=alu.in_a.ports() + \
1048 # alu.in_b.ports() + \
1049 # alu.out_z.ports())