split out corrections to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
25
26
27 class FPAddSpecialCasesMod:
28 """ special cases: NaNs, infs, zeros, denormalised
29 NOTE: some of these are unique to add. see "Special Operations"
30 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
31 """
32
33 def __init__(self, width, id_wid):
34 self.width = width
35 self.id_wid = id_wid
36 self.i = self.ispec()
37 self.o = self.ospec()
38
39 def ispec(self):
40 return FPADDBaseData(self.width, self.id_wid)
41
42 def ospec(self):
43 return FPSCData(self.width, self.id_wid)
44
45 def setup(self, m, i):
46 """ links module to inputs and outputs
47 """
48 m.submodules.specialcases = self
49 m.d.comb += self.i.eq(i)
50
51 def process(self, i):
52 return self.o
53
54 def elaborate(self, platform):
55 m = Module()
56
57 m.submodules.sc_out_z = self.o.z
58
59 # decode: XXX really should move to separate stage
60 a1 = FPNumIn(None, self.width)
61 b1 = FPNumIn(None, self.width)
62 m.submodules.sc_decode_a = a1
63 m.submodules.sc_decode_b = b1
64 m.d.comb += [a1.decode(self.i.a),
65 b1.decode(self.i.b),
66 ]
67
68 s_nomatch = Signal()
69 m.d.comb += s_nomatch.eq(a1.s != b1.s)
70
71 m_match = Signal()
72 m.d.comb += m_match.eq(a1.m == b1.m)
73
74 # if a is NaN or b is NaN return NaN
75 with m.If(a1.is_nan | b1.is_nan):
76 m.d.comb += self.o.out_do_z.eq(1)
77 m.d.comb += self.o.z.nan(0)
78
79 # XXX WEIRDNESS for FP16 non-canonical NaN handling
80 # under review
81
82 ## if a is zero and b is NaN return -b
83 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
86
87 ## if b is zero and a is NaN return -a
88 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
91
92 ## if a is -zero and b is NaN return -b
93 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
96
97 ## if b is -zero and a is NaN return -a
98 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
99 # m.d.comb += self.o.out_do_z.eq(1)
100 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
101
102 # if a is inf return inf (or NaN)
103 with m.Elif(a1.is_inf):
104 m.d.comb += self.o.out_do_z.eq(1)
105 m.d.comb += self.o.z.inf(a1.s)
106 # if a is inf and signs don't match return NaN
107 with m.If(b1.exp_128 & s_nomatch):
108 m.d.comb += self.o.z.nan(0)
109
110 # if b is inf return inf
111 with m.Elif(b1.is_inf):
112 m.d.comb += self.o.out_do_z.eq(1)
113 m.d.comb += self.o.z.inf(b1.s)
114
115 # if a is zero and b zero return signed-a/b
116 with m.Elif(a1.is_zero & b1.is_zero):
117 m.d.comb += self.o.out_do_z.eq(1)
118 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
119
120 # if a is zero return b
121 with m.Elif(a1.is_zero):
122 m.d.comb += self.o.out_do_z.eq(1)
123 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
124
125 # if b is zero return a
126 with m.Elif(b1.is_zero):
127 m.d.comb += self.o.out_do_z.eq(1)
128 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
129
130 # if a equal to -b return zero (+ve zero)
131 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
132 m.d.comb += self.o.out_do_z.eq(1)
133 m.d.comb += self.o.z.zero(0)
134
135 # Denormalised Number checks next, so pass a/b data through
136 with m.Else():
137 m.d.comb += self.o.out_do_z.eq(0)
138 m.d.comb += self.o.a.eq(a1)
139 m.d.comb += self.o.b.eq(b1)
140
141 m.d.comb += self.o.oz.eq(self.o.z.v)
142 m.d.comb += self.o.mid.eq(self.i.mid)
143
144 return m
145
146
147 class FPAddSpecialCases(FPState):
148 """ special cases: NaNs, infs, zeros, denormalised
149 NOTE: some of these are unique to add. see "Special Operations"
150 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
151 """
152
153 def __init__(self, width, id_wid):
154 FPState.__init__(self, "special_cases")
155 self.mod = FPAddSpecialCasesMod(width)
156 self.out_z = self.mod.ospec()
157 self.out_do_z = Signal(reset_less=True)
158
159 def setup(self, m, i):
160 """ links module to inputs and outputs
161 """
162 self.mod.setup(m, i, self.out_do_z)
163 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
164 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
165
166 def action(self, m):
167 self.idsync(m)
168 with m.If(self.out_do_z):
169 m.next = "put_z"
170 with m.Else():
171 m.next = "denormalise"
172
173
174 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
175 """ special cases: NaNs, infs, zeros, denormalised
176 NOTE: some of these are unique to add. see "Special Operations"
177 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
178 """
179
180 def __init__(self, width, id_wid):
181 FPState.__init__(self, "special_cases")
182 self.width = width
183 self.id_wid = id_wid
184 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
185 self.out = self.ospec()
186
187 def ispec(self):
188 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
189
190 def ospec(self):
191 return FPSCData(self.width, self.id_wid) # DeNorm ospec
192
193 def setup(self, m, i):
194 """ links module to inputs and outputs
195 """
196 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
197 dmod = FPAddDeNormMod(self.width, self.id_wid)
198
199 chain = StageChain([smod, dmod])
200 chain.setup(m, i)
201
202 # only needed for break-out (early-out)
203 # self.out_do_z = smod.o.out_do_z
204
205 self.o = dmod.o
206
207 def process(self, i):
208 return self.o
209
210 def action(self, m):
211 # for break-out (early-out)
212 #with m.If(self.out_do_z):
213 # m.next = "put_z"
214 #with m.Else():
215 m.d.sync += self.out.eq(self.process(None))
216 m.next = "align"
217
218
219 class FPAddAlignMultiMod(FPState):
220
221 def __init__(self, width):
222 self.in_a = FPNumBase(width)
223 self.in_b = FPNumBase(width)
224 self.out_a = FPNumIn(None, width)
225 self.out_b = FPNumIn(None, width)
226 self.exp_eq = Signal(reset_less=True)
227
228 def elaborate(self, platform):
229 # This one however (single-cycle) will do the shift
230 # in one go.
231
232 m = Module()
233
234 m.submodules.align_in_a = self.in_a
235 m.submodules.align_in_b = self.in_b
236 m.submodules.align_out_a = self.out_a
237 m.submodules.align_out_b = self.out_b
238
239 # NOTE: this does *not* do single-cycle multi-shifting,
240 # it *STAYS* in the align state until exponents match
241
242 # exponent of a greater than b: shift b down
243 m.d.comb += self.exp_eq.eq(0)
244 m.d.comb += self.out_a.eq(self.in_a)
245 m.d.comb += self.out_b.eq(self.in_b)
246 agtb = Signal(reset_less=True)
247 altb = Signal(reset_less=True)
248 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
249 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
250 with m.If(agtb):
251 m.d.comb += self.out_b.shift_down(self.in_b)
252 # exponent of b greater than a: shift a down
253 with m.Elif(altb):
254 m.d.comb += self.out_a.shift_down(self.in_a)
255 # exponents equal: move to next stage.
256 with m.Else():
257 m.d.comb += self.exp_eq.eq(1)
258 return m
259
260
261 class FPAddAlignMulti(FPState):
262
263 def __init__(self, width, id_wid):
264 FPState.__init__(self, "align")
265 self.mod = FPAddAlignMultiMod(width)
266 self.out_a = FPNumIn(None, width)
267 self.out_b = FPNumIn(None, width)
268 self.exp_eq = Signal(reset_less=True)
269
270 def setup(self, m, in_a, in_b):
271 """ links module to inputs and outputs
272 """
273 m.submodules.align = self.mod
274 m.d.comb += self.mod.in_a.eq(in_a)
275 m.d.comb += self.mod.in_b.eq(in_b)
276 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
277 m.d.sync += self.out_a.eq(self.mod.out_a)
278 m.d.sync += self.out_b.eq(self.mod.out_b)
279
280 def action(self, m):
281 with m.If(self.exp_eq):
282 m.next = "add_0"
283
284
285 class FPNumIn2Ops:
286
287 def __init__(self, width, id_wid):
288 self.a = FPNumIn(None, width)
289 self.b = FPNumIn(None, width)
290 self.z = FPNumOut(width, False)
291 self.out_do_z = Signal(reset_less=True)
292 self.oz = Signal(width, reset_less=True)
293 self.mid = Signal(id_wid, reset_less=True)
294
295 def eq(self, i):
296 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
297 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
298
299
300 class FPAddAlignSingleMod:
301
302 def __init__(self, width, id_wid):
303 self.width = width
304 self.id_wid = id_wid
305 self.i = self.ispec()
306 self.o = self.ospec()
307
308 def ispec(self):
309 return FPSCData(self.width, self.id_wid)
310
311 def ospec(self):
312 return FPNumIn2Ops(self.width, self.id_wid)
313
314 def process(self, i):
315 return self.o
316
317 def setup(self, m, i):
318 """ links module to inputs and outputs
319 """
320 m.submodules.align = self
321 m.d.comb += self.i.eq(i)
322
323 def elaborate(self, platform):
324 """ Aligns A against B or B against A, depending on which has the
325 greater exponent. This is done in a *single* cycle using
326 variable-width bit-shift
327
328 the shifter used here is quite expensive in terms of gates.
329 Mux A or B in (and out) into temporaries, as only one of them
330 needs to be aligned against the other
331 """
332 m = Module()
333
334 m.submodules.align_in_a = self.i.a
335 m.submodules.align_in_b = self.i.b
336 m.submodules.align_out_a = self.o.a
337 m.submodules.align_out_b = self.o.b
338
339 # temporary (muxed) input and output to be shifted
340 t_inp = FPNumBase(self.width)
341 t_out = FPNumIn(None, self.width)
342 espec = (len(self.i.a.e), True)
343 msr = MultiShiftRMerge(self.i.a.m_width, espec)
344 m.submodules.align_t_in = t_inp
345 m.submodules.align_t_out = t_out
346 m.submodules.multishift_r = msr
347
348 ediff = Signal(espec, reset_less=True)
349 ediffr = Signal(espec, reset_less=True)
350 tdiff = Signal(espec, reset_less=True)
351 elz = Signal(reset_less=True)
352 egz = Signal(reset_less=True)
353
354 # connect multi-shifter to t_inp/out mantissa (and tdiff)
355 m.d.comb += msr.inp.eq(t_inp.m)
356 m.d.comb += msr.diff.eq(tdiff)
357 m.d.comb += t_out.m.eq(msr.m)
358 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
359 m.d.comb += t_out.s.eq(t_inp.s)
360
361 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
362 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
363 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
364 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
365
366 # default: A-exp == B-exp, A and B untouched (fall through)
367 m.d.comb += self.o.a.eq(self.i.a)
368 m.d.comb += self.o.b.eq(self.i.b)
369 # only one shifter (muxed)
370 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
371 # exponent of a greater than b: shift b down
372 with m.If(~self.i.out_do_z):
373 with m.If(egz):
374 m.d.comb += [t_inp.eq(self.i.b),
375 tdiff.eq(ediff),
376 self.o.b.eq(t_out),
377 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
378 ]
379 # exponent of b greater than a: shift a down
380 with m.Elif(elz):
381 m.d.comb += [t_inp.eq(self.i.a),
382 tdiff.eq(ediffr),
383 self.o.a.eq(t_out),
384 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
385 ]
386
387 m.d.comb += self.o.mid.eq(self.i.mid)
388 m.d.comb += self.o.z.eq(self.i.z)
389 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
390 m.d.comb += self.o.oz.eq(self.i.oz)
391
392 return m
393
394
395 class FPAddAlignSingle(FPState):
396
397 def __init__(self, width, id_wid):
398 FPState.__init__(self, "align")
399 self.mod = FPAddAlignSingleMod(width, id_wid)
400 self.out_a = FPNumIn(None, width)
401 self.out_b = FPNumIn(None, width)
402
403 def setup(self, m, i):
404 """ links module to inputs and outputs
405 """
406 self.mod.setup(m, i)
407
408 # NOTE: could be done as comb
409 m.d.sync += self.out_a.eq(self.mod.out_a)
410 m.d.sync += self.out_b.eq(self.mod.out_b)
411
412 def action(self, m):
413 m.next = "add_0"
414
415
416 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
417
418 def __init__(self, width, id_wid):
419 FPState.__init__(self, "align")
420 self.width = width
421 self.id_wid = id_wid
422 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
423 self.a1o = self.ospec()
424
425 def ispec(self):
426 return FPSCData(self.width, self.id_wid)
427
428 def ospec(self):
429 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
430
431 def setup(self, m, i):
432 """ links module to inputs and outputs
433 """
434
435 # chain AddAlignSingle, AddStage0 and AddStage1
436 mod = FPAddAlignSingleMod(self.width, self.id_wid)
437 a0mod = FPAddStage0Mod(self.width, self.id_wid)
438 a1mod = FPAddStage1Mod(self.width, self.id_wid)
439
440 chain = StageChain([mod, a0mod, a1mod])
441 chain.setup(m, i)
442
443 self.o = a1mod.o
444
445 def process(self, i):
446 return self.o
447
448 def action(self, m):
449 m.d.sync += self.a1o.eq(self.process(None))
450 m.next = "normalise_1"
451
452
453 class FPAddStage0Data:
454
455 def __init__(self, width, id_wid):
456 self.z = FPNumBase(width, False)
457 self.out_do_z = Signal(reset_less=True)
458 self.oz = Signal(width, reset_less=True)
459 self.tot = Signal(self.z.m_width + 4, reset_less=True)
460 self.mid = Signal(id_wid, reset_less=True)
461
462 def eq(self, i):
463 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
464 self.tot.eq(i.tot), self.mid.eq(i.mid)]
465
466
467 class FPAddStage0Mod:
468
469 def __init__(self, width, id_wid):
470 self.width = width
471 self.id_wid = id_wid
472 self.i = self.ispec()
473 self.o = self.ospec()
474
475 def ispec(self):
476 return FPSCData(self.width, self.id_wid)
477
478 def ospec(self):
479 return FPAddStage0Data(self.width, self.id_wid)
480
481 def process(self, i):
482 return self.o
483
484 def setup(self, m, i):
485 """ links module to inputs and outputs
486 """
487 m.submodules.add0 = self
488 m.d.comb += self.i.eq(i)
489
490 def elaborate(self, platform):
491 m = Module()
492 m.submodules.add0_in_a = self.i.a
493 m.submodules.add0_in_b = self.i.b
494 m.submodules.add0_out_z = self.o.z
495
496 # store intermediate tests (and zero-extended mantissas)
497 seq = Signal(reset_less=True)
498 mge = Signal(reset_less=True)
499 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
500 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
501 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
502 mge.eq(self.i.a.m >= self.i.b.m),
503 am0.eq(Cat(self.i.a.m, 0)),
504 bm0.eq(Cat(self.i.b.m, 0))
505 ]
506 # same-sign (both negative or both positive) add mantissas
507 with m.If(~self.i.out_do_z):
508 m.d.comb += self.o.z.e.eq(self.i.a.e)
509 with m.If(seq):
510 m.d.comb += [
511 self.o.tot.eq(am0 + bm0),
512 self.o.z.s.eq(self.i.a.s)
513 ]
514 # a mantissa greater than b, use a
515 with m.Elif(mge):
516 m.d.comb += [
517 self.o.tot.eq(am0 - bm0),
518 self.o.z.s.eq(self.i.a.s)
519 ]
520 # b mantissa greater than a, use b
521 with m.Else():
522 m.d.comb += [
523 self.o.tot.eq(bm0 - am0),
524 self.o.z.s.eq(self.i.b.s)
525 ]
526
527 m.d.comb += self.o.oz.eq(self.i.oz)
528 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
529 m.d.comb += self.o.mid.eq(self.i.mid)
530 return m
531
532
533 class FPAddStage0(FPState):
534 """ First stage of add. covers same-sign (add) and subtract
535 special-casing when mantissas are greater or equal, to
536 give greatest accuracy.
537 """
538
539 def __init__(self, width, id_wid):
540 FPState.__init__(self, "add_0")
541 self.mod = FPAddStage0Mod(width)
542 self.o = self.mod.ospec()
543
544 def setup(self, m, i):
545 """ links module to inputs and outputs
546 """
547 self.mod.setup(m, i)
548
549 # NOTE: these could be done as combinatorial (merge add0+add1)
550 m.d.sync += self.o.eq(self.mod.o)
551
552 def action(self, m):
553 m.next = "add_1"
554
555
556 class FPAddStage1Mod(FPState):
557 """ Second stage of add: preparation for normalisation.
558 detects when tot sum is too big (tot[27] is kinda a carry bit)
559 """
560
561 def __init__(self, width, id_wid):
562 self.width = width
563 self.id_wid = id_wid
564 self.i = self.ispec()
565 self.o = self.ospec()
566
567 def ispec(self):
568 return FPAddStage0Data(self.width, self.id_wid)
569
570 def ospec(self):
571 return FPAddStage1Data(self.width, self.id_wid)
572
573 def process(self, i):
574 return self.o
575
576 def setup(self, m, i):
577 """ links module to inputs and outputs
578 """
579 m.submodules.add1 = self
580 m.submodules.add1_out_overflow = self.o.of
581
582 m.d.comb += self.i.eq(i)
583
584 def elaborate(self, platform):
585 m = Module()
586 m.d.comb += self.o.z.eq(self.i.z)
587 # tot[-1] (MSB) gets set when the sum overflows. shift result down
588 with m.If(~self.i.out_do_z):
589 with m.If(self.i.tot[-1]):
590 m.d.comb += [
591 self.o.z.m.eq(self.i.tot[4:]),
592 self.o.of.m0.eq(self.i.tot[4]),
593 self.o.of.guard.eq(self.i.tot[3]),
594 self.o.of.round_bit.eq(self.i.tot[2]),
595 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
596 self.o.z.e.eq(self.i.z.e + 1)
597 ]
598 # tot[-1] (MSB) zero case
599 with m.Else():
600 m.d.comb += [
601 self.o.z.m.eq(self.i.tot[3:]),
602 self.o.of.m0.eq(self.i.tot[3]),
603 self.o.of.guard.eq(self.i.tot[2]),
604 self.o.of.round_bit.eq(self.i.tot[1]),
605 self.o.of.sticky.eq(self.i.tot[0])
606 ]
607
608 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
609 m.d.comb += self.o.oz.eq(self.i.oz)
610 m.d.comb += self.o.mid.eq(self.i.mid)
611
612 return m
613
614
615 class FPAddStage1(FPState):
616
617 def __init__(self, width, id_wid):
618 FPState.__init__(self, "add_1")
619 self.mod = FPAddStage1Mod(width)
620 self.out_z = FPNumBase(width, False)
621 self.out_of = Overflow()
622 self.norm_stb = Signal()
623
624 def setup(self, m, i):
625 """ links module to inputs and outputs
626 """
627 self.mod.setup(m, i)
628
629 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
630
631 m.d.sync += self.out_of.eq(self.mod.out_of)
632 m.d.sync += self.out_z.eq(self.mod.out_z)
633 m.d.sync += self.norm_stb.eq(1)
634
635 def action(self, m):
636 m.next = "normalise_1"
637
638
639 class FPNormToPack(FPState, UnbufferedPipeline):
640
641 def __init__(self, width, id_wid):
642 FPState.__init__(self, "normalise_1")
643 self.id_wid = id_wid
644 self.width = width
645 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
646
647 def ispec(self):
648 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
649
650 def ospec(self):
651 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
652
653 def setup(self, m, i):
654 """ links module to inputs and outputs
655 """
656
657 # Normalisation, Rounding Corrections, Pack - in a chain
658 nmod = FPNorm1ModSingle(self.width, self.id_wid)
659 rmod = FPRoundMod(self.width, self.id_wid)
660 cmod = FPCorrectionsMod(self.width, self.id_wid)
661 pmod = FPPackMod(self.width, self.id_wid)
662 chain = StageChain([nmod, rmod, cmod, pmod])
663 chain.setup(m, i)
664 self.out_z = pmod.ospec()
665
666 self.o = pmod.o
667
668 def process(self, i):
669 return self.o
670
671 def action(self, m):
672 m.d.sync += self.out_z.eq(self.process(None))
673 m.next = "pack_put_z"
674
675
676 class FPPackData:
677
678 def __init__(self, width, id_wid):
679 self.z = Signal(width, reset_less=True)
680 self.mid = Signal(id_wid, reset_less=True)
681
682 def eq(self, i):
683 return [self.z.eq(i.z), self.mid.eq(i.mid)]
684
685 def ports(self):
686 return [self.z, self.mid]
687
688
689 class FPPackMod:
690
691 def __init__(self, width, id_wid):
692 self.width = width
693 self.id_wid = id_wid
694 self.i = self.ispec()
695 self.o = self.ospec()
696
697 def ispec(self):
698 return FPRoundData(self.width, self.id_wid)
699
700 def ospec(self):
701 return FPPackData(self.width, self.id_wid)
702
703 def process(self, i):
704 return self.o
705
706 def setup(self, m, in_z):
707 """ links module to inputs and outputs
708 """
709 m.submodules.pack = self
710 m.d.comb += self.i.eq(in_z)
711
712 def elaborate(self, platform):
713 m = Module()
714 z = FPNumOut(self.width, False)
715 m.submodules.pack_in_z = self.i.z
716 m.submodules.pack_out_z = z
717 m.d.comb += self.o.mid.eq(self.i.mid)
718 with m.If(~self.i.out_do_z):
719 with m.If(self.i.z.is_overflowed):
720 m.d.comb += z.inf(self.i.z.s)
721 with m.Else():
722 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
723 with m.Else():
724 m.d.comb += z.v.eq(self.i.oz)
725 m.d.comb += self.o.z.eq(z.v)
726 return m
727
728
729 class FPPack(FPState):
730
731 def __init__(self, width, id_wid):
732 FPState.__init__(self, "pack")
733 self.mod = FPPackMod(width)
734 self.out_z = self.ospec()
735
736 def ispec(self):
737 return self.mod.ispec()
738
739 def ospec(self):
740 return self.mod.ospec()
741
742 def setup(self, m, in_z):
743 """ links module to inputs and outputs
744 """
745 self.mod.setup(m, in_z)
746
747 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
748 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
749
750 def action(self, m):
751 m.next = "pack_put_z"
752
753
754 class FPPutZ(FPState):
755
756 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
757 FPState.__init__(self, state)
758 if to_state is None:
759 to_state = "get_ops"
760 self.to_state = to_state
761 self.in_z = in_z
762 self.out_z = out_z
763 self.in_mid = in_mid
764 self.out_mid = out_mid
765
766 def action(self, m):
767 if self.in_mid is not None:
768 m.d.sync += self.out_mid.eq(self.in_mid)
769 m.d.sync += [
770 self.out_z.z.v.eq(self.in_z)
771 ]
772 with m.If(self.out_z.z.stb & self.out_z.z.ack):
773 m.d.sync += self.out_z.z.stb.eq(0)
774 m.next = self.to_state
775 with m.Else():
776 m.d.sync += self.out_z.z.stb.eq(1)
777
778
779 class FPPutZIdx(FPState):
780
781 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
782 FPState.__init__(self, state)
783 if to_state is None:
784 to_state = "get_ops"
785 self.to_state = to_state
786 self.in_z = in_z
787 self.out_zs = out_zs
788 self.in_mid = in_mid
789
790 def action(self, m):
791 outz_stb = Signal(reset_less=True)
792 outz_ack = Signal(reset_less=True)
793 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
794 outz_ack.eq(self.out_zs[self.in_mid].ack),
795 ]
796 m.d.sync += [
797 self.out_zs[self.in_mid].v.eq(self.in_z.v)
798 ]
799 with m.If(outz_stb & outz_ack):
800 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
801 m.next = self.to_state
802 with m.Else():
803 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
804
805
806 class FPOpData:
807 def __init__(self, width, id_wid):
808 self.z = FPOp(width)
809 self.mid = Signal(id_wid, reset_less=True)
810
811 def eq(self, i):
812 return [self.z.eq(i.z), self.mid.eq(i.mid)]
813
814 def ports(self):
815 return [self.z, self.mid]
816
817
818 class FPADDBaseMod:
819
820 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
821 """ IEEE754 FP Add
822
823 * width: bit-width of IEEE754. supported: 16, 32, 64
824 * id_wid: an identifier that is sync-connected to the input
825 * single_cycle: True indicates each stage to complete in 1 clock
826 * compact: True indicates a reduced number of stages
827 """
828 self.width = width
829 self.id_wid = id_wid
830 self.single_cycle = single_cycle
831 self.compact = compact
832
833 self.in_t = Trigger()
834 self.i = self.ispec()
835 self.o = self.ospec()
836
837 self.states = []
838
839 def ispec(self):
840 return FPADDBaseData(self.width, self.id_wid)
841
842 def ospec(self):
843 return FPOpData(self.width, self.id_wid)
844
845 def add_state(self, state):
846 self.states.append(state)
847 return state
848
849 def get_fragment(self, platform=None):
850 """ creates the HDL code-fragment for FPAdd
851 """
852 m = Module()
853 m.submodules.out_z = self.o.z
854 m.submodules.in_t = self.in_t
855 if self.compact:
856 self.get_compact_fragment(m, platform)
857 else:
858 self.get_longer_fragment(m, platform)
859
860 with m.FSM() as fsm:
861
862 for state in self.states:
863 with m.State(state.state_from):
864 state.action(m)
865
866 return m
867
868 def get_longer_fragment(self, m, platform=None):
869
870 get = self.add_state(FPGet2Op("get_ops", "special_cases",
871 self.width))
872 get.setup(m, self.i)
873 a = get.out_op1
874 b = get.out_op2
875 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
876
877 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
878 sc.setup(m, a, b, self.in_mid)
879
880 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
881 dn.setup(m, a, b, sc.in_mid)
882
883 if self.single_cycle:
884 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
885 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
886 else:
887 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
888 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
889
890 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
891 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
892
893 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
894 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
895
896 if self.single_cycle:
897 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
898 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
899 else:
900 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
901 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
902
903 rn = self.add_state(FPRound(self.width, self.id_wid))
904 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
905
906 cor = self.add_state(FPCorrections(self.width, self.id_wid))
907 cor.setup(m, rn.out_z, rn.in_mid)
908
909 pa = self.add_state(FPPack(self.width, self.id_wid))
910 pa.setup(m, cor.out_z, rn.in_mid)
911
912 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
913 pa.in_mid, self.out_mid))
914
915 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
916 pa.in_mid, self.out_mid))
917
918 def get_compact_fragment(self, m, platform=None):
919
920
921 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
922 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
923 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
924 n1 = FPNormToPack(self.width, self.id_wid)
925
926 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
927
928 chainlist = [get, sc, alm, n1]
929 chain = StageChain(chainlist, specallocate=True)
930 chain.setup(m, self.i)
931
932 for mod in chainlist:
933 sc = self.add_state(mod)
934
935 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
936 n1.out_z.mid, self.o.mid))
937
938 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
939 # sc.o.mid, self.o.mid))
940
941
942 class FPADDBase(FPState):
943
944 def __init__(self, width, id_wid=None, single_cycle=False):
945 """ IEEE754 FP Add
946
947 * width: bit-width of IEEE754. supported: 16, 32, 64
948 * id_wid: an identifier that is sync-connected to the input
949 * single_cycle: True indicates each stage to complete in 1 clock
950 """
951 FPState.__init__(self, "fpadd")
952 self.width = width
953 self.single_cycle = single_cycle
954 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
955 self.o = self.ospec()
956
957 self.in_t = Trigger()
958 self.i = self.ispec()
959
960 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
961 self.in_accept = Signal(reset_less=True)
962 self.add_stb = Signal(reset_less=True)
963 self.add_ack = Signal(reset=0, reset_less=True)
964
965 def ispec(self):
966 return self.mod.ispec()
967
968 def ospec(self):
969 return self.mod.ospec()
970
971 def setup(self, m, i, add_stb, in_mid):
972 m.d.comb += [self.i.eq(i),
973 self.mod.i.eq(self.i),
974 self.z_done.eq(self.mod.o.z.trigger),
975 #self.add_stb.eq(add_stb),
976 self.mod.in_t.stb.eq(self.in_t.stb),
977 self.in_t.ack.eq(self.mod.in_t.ack),
978 self.o.mid.eq(self.mod.o.mid),
979 self.o.z.v.eq(self.mod.o.z.v),
980 self.o.z.stb.eq(self.mod.o.z.stb),
981 self.mod.o.z.ack.eq(self.o.z.ack),
982 ]
983
984 m.d.sync += self.add_stb.eq(add_stb)
985 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
986 m.d.sync += self.o.z.ack.eq(0) # likewise
987 #m.d.sync += self.in_t.stb.eq(0)
988
989 m.submodules.fpadd = self.mod
990
991 def action(self, m):
992
993 # in_accept is set on incoming strobe HIGH and ack LOW.
994 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
995
996 #with m.If(self.in_t.ack):
997 # m.d.sync += self.in_t.stb.eq(0)
998 with m.If(~self.z_done):
999 # not done: test for accepting an incoming operand pair
1000 with m.If(self.in_accept):
1001 m.d.sync += [
1002 self.add_ack.eq(1), # acknowledge receipt...
1003 self.in_t.stb.eq(1), # initiate add
1004 ]
1005 with m.Else():
1006 m.d.sync += [self.add_ack.eq(0),
1007 self.in_t.stb.eq(0),
1008 self.o.z.ack.eq(1),
1009 ]
1010 with m.Else():
1011 # done: acknowledge, and write out id and value
1012 m.d.sync += [self.add_ack.eq(1),
1013 self.in_t.stb.eq(0)
1014 ]
1015 m.next = "put_z"
1016
1017 return
1018
1019 if self.in_mid is not None:
1020 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1021
1022 m.d.sync += [
1023 self.out_z.v.eq(self.mod.out_z.v)
1024 ]
1025 # move to output state on detecting z ack
1026 with m.If(self.out_z.trigger):
1027 m.d.sync += self.out_z.stb.eq(0)
1028 m.next = "put_z"
1029 with m.Else():
1030 m.d.sync += self.out_z.stb.eq(1)
1031
1032
1033 class FPADDBasePipe(ControlBase):
1034 def __init__(self, width, id_wid):
1035 ControlBase.__init__(self)
1036 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1037 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1038 self.pipe3 = FPNormToPack(width, id_wid)
1039
1040 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1041
1042 def elaborate(self, platform):
1043 m = Module()
1044 m.submodules.scnorm = self.pipe1
1045 m.submodules.addalign = self.pipe2
1046 m.submodules.normpack = self.pipe3
1047 m.d.comb += self._eqs
1048 return m
1049
1050
1051 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1052 def __init__(self, width, id_wid, num_rows):
1053 self.num_rows = num_rows
1054 def iospec(): return FPADDBaseData(width, id_wid)
1055 stage = PassThroughStage(iospec)
1056 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1057
1058
1059 class FPADDMuxOutPipe(CombMuxOutPipe):
1060 def __init__(self, width, id_wid, num_rows):
1061 self.num_rows = num_rows
1062 def iospec(): return FPPackData(width, id_wid)
1063 stage = PassThroughStage(iospec)
1064 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1065
1066
1067 class FPADDMuxInOut:
1068 """ Reservation-Station version of FPADD pipeline.
1069
1070 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1071 * 3-stage adder pipeline
1072 * fan-out on outputs (an array of FPPackData: z,mid)
1073
1074 Fan-in and Fan-out are combinatorial.
1075 """
1076 def __init__(self, width, id_wid, num_rows):
1077 self.num_rows = num_rows
1078 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1079 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1080 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1081
1082 self.p = self.inpipe.p # kinda annoying,
1083 self.n = self.outpipe.n # use pipe in/out as this class in/out
1084 self._ports = self.inpipe.ports() + self.outpipe.ports()
1085
1086 def elaborate(self, platform):
1087 m = Module()
1088 m.submodules.inpipe = self.inpipe
1089 m.submodules.fpadd = self.fpadd
1090 m.submodules.outpipe = self.outpipe
1091
1092 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1093 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1094
1095 return m
1096
1097 def ports(self):
1098 return self._ports
1099
1100
1101 class FPADD(FPID):
1102 """ FPADD: stages as follows:
1103
1104 FPGetOp (a)
1105 |
1106 FPGetOp (b)
1107 |
1108 FPAddBase---> FPAddBaseMod
1109 | |
1110 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1111
1112 FPAddBase is tricky: it is both a stage and *has* stages.
1113 Connection to FPAddBaseMod therefore requires an in stb/ack
1114 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1115 needs to be the thing that raises the incoming stb.
1116 """
1117
1118 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1119 """ IEEE754 FP Add
1120
1121 * width: bit-width of IEEE754. supported: 16, 32, 64
1122 * id_wid: an identifier that is sync-connected to the input
1123 * single_cycle: True indicates each stage to complete in 1 clock
1124 """
1125 self.width = width
1126 self.id_wid = id_wid
1127 self.single_cycle = single_cycle
1128
1129 #self.out_z = FPOp(width)
1130 self.ids = FPID(id_wid)
1131
1132 rs = []
1133 for i in range(rs_sz):
1134 in_a = FPOp(width)
1135 in_b = FPOp(width)
1136 in_a.name = "in_a_%d" % i
1137 in_b.name = "in_b_%d" % i
1138 rs.append((in_a, in_b))
1139 self.rs = Array(rs)
1140
1141 res = []
1142 for i in range(rs_sz):
1143 out_z = FPOp(width)
1144 out_z.name = "out_z_%d" % i
1145 res.append(out_z)
1146 self.res = Array(res)
1147
1148 self.states = []
1149
1150 def add_state(self, state):
1151 self.states.append(state)
1152 return state
1153
1154 def get_fragment(self, platform=None):
1155 """ creates the HDL code-fragment for FPAdd
1156 """
1157 m = Module()
1158 m.submodules += self.rs
1159
1160 in_a = self.rs[0][0]
1161 in_b = self.rs[0][1]
1162
1163 geta = self.add_state(FPGetOp("get_a", "get_b",
1164 in_a, self.width))
1165 geta.setup(m, in_a)
1166 a = geta.out_op
1167
1168 getb = self.add_state(FPGetOp("get_b", "fpadd",
1169 in_b, self.width))
1170 getb.setup(m, in_b)
1171 b = getb.out_op
1172
1173 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1174 ab = self.add_state(ab)
1175 abd = ab.ispec() # create an input spec object for FPADDBase
1176 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1177 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1178 o = ab.o
1179
1180 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1181 o.mid, "get_a"))
1182
1183 with m.FSM() as fsm:
1184
1185 for state in self.states:
1186 with m.State(state.state_from):
1187 state.action(m)
1188
1189 return m
1190
1191
1192 if __name__ == "__main__":
1193 if True:
1194 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1195 main(alu, ports=alu.rs[0][0].ports() + \
1196 alu.rs[0][1].ports() + \
1197 alu.res[0].ports() + \
1198 [alu.ids.in_mid, alu.ids.out_mid])
1199 else:
1200 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1201 main(alu, ports=[alu.in_a, alu.in_b] + \
1202 alu.in_t.ports() + \
1203 alu.out_z.ports() + \
1204 [alu.in_mid, alu.out_mid])
1205
1206
1207 # works... but don't use, just do "python fname.py convert -t v"
1208 #print (verilog.convert(alu, ports=[
1209 # ports=alu.in_a.ports() + \
1210 # alu.in_b.ports() + \
1211 # alu.out_z.ports())