split out rounding to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
24
25
26 class FPAddSpecialCasesMod:
27 """ special cases: NaNs, infs, zeros, denormalised
28 NOTE: some of these are unique to add. see "Special Operations"
29 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
30 """
31
32 def __init__(self, width, id_wid):
33 self.width = width
34 self.id_wid = id_wid
35 self.i = self.ispec()
36 self.o = self.ospec()
37
38 def ispec(self):
39 return FPADDBaseData(self.width, self.id_wid)
40
41 def ospec(self):
42 return FPSCData(self.width, self.id_wid)
43
44 def setup(self, m, i):
45 """ links module to inputs and outputs
46 """
47 m.submodules.specialcases = self
48 m.d.comb += self.i.eq(i)
49
50 def process(self, i):
51 return self.o
52
53 def elaborate(self, platform):
54 m = Module()
55
56 m.submodules.sc_out_z = self.o.z
57
58 # decode: XXX really should move to separate stage
59 a1 = FPNumIn(None, self.width)
60 b1 = FPNumIn(None, self.width)
61 m.submodules.sc_decode_a = a1
62 m.submodules.sc_decode_b = b1
63 m.d.comb += [a1.decode(self.i.a),
64 b1.decode(self.i.b),
65 ]
66
67 s_nomatch = Signal()
68 m.d.comb += s_nomatch.eq(a1.s != b1.s)
69
70 m_match = Signal()
71 m.d.comb += m_match.eq(a1.m == b1.m)
72
73 # if a is NaN or b is NaN return NaN
74 with m.If(a1.is_nan | b1.is_nan):
75 m.d.comb += self.o.out_do_z.eq(1)
76 m.d.comb += self.o.z.nan(0)
77
78 # XXX WEIRDNESS for FP16 non-canonical NaN handling
79 # under review
80
81 ## if a is zero and b is NaN return -b
82 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
83 # m.d.comb += self.o.out_do_z.eq(1)
84 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
85
86 ## if b is zero and a is NaN return -a
87 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
88 # m.d.comb += self.o.out_do_z.eq(1)
89 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
90
91 ## if a is -zero and b is NaN return -b
92 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
93 # m.d.comb += self.o.out_do_z.eq(1)
94 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
95
96 ## if b is -zero and a is NaN return -a
97 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
98 # m.d.comb += self.o.out_do_z.eq(1)
99 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
100
101 # if a is inf return inf (or NaN)
102 with m.Elif(a1.is_inf):
103 m.d.comb += self.o.out_do_z.eq(1)
104 m.d.comb += self.o.z.inf(a1.s)
105 # if a is inf and signs don't match return NaN
106 with m.If(b1.exp_128 & s_nomatch):
107 m.d.comb += self.o.z.nan(0)
108
109 # if b is inf return inf
110 with m.Elif(b1.is_inf):
111 m.d.comb += self.o.out_do_z.eq(1)
112 m.d.comb += self.o.z.inf(b1.s)
113
114 # if a is zero and b zero return signed-a/b
115 with m.Elif(a1.is_zero & b1.is_zero):
116 m.d.comb += self.o.out_do_z.eq(1)
117 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
118
119 # if a is zero return b
120 with m.Elif(a1.is_zero):
121 m.d.comb += self.o.out_do_z.eq(1)
122 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
123
124 # if b is zero return a
125 with m.Elif(b1.is_zero):
126 m.d.comb += self.o.out_do_z.eq(1)
127 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
128
129 # if a equal to -b return zero (+ve zero)
130 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
131 m.d.comb += self.o.out_do_z.eq(1)
132 m.d.comb += self.o.z.zero(0)
133
134 # Denormalised Number checks next, so pass a/b data through
135 with m.Else():
136 m.d.comb += self.o.out_do_z.eq(0)
137 m.d.comb += self.o.a.eq(a1)
138 m.d.comb += self.o.b.eq(b1)
139
140 m.d.comb += self.o.oz.eq(self.o.z.v)
141 m.d.comb += self.o.mid.eq(self.i.mid)
142
143 return m
144
145
146 class FPAddSpecialCases(FPState):
147 """ special cases: NaNs, infs, zeros, denormalised
148 NOTE: some of these are unique to add. see "Special Operations"
149 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
150 """
151
152 def __init__(self, width, id_wid):
153 FPState.__init__(self, "special_cases")
154 self.mod = FPAddSpecialCasesMod(width)
155 self.out_z = self.mod.ospec()
156 self.out_do_z = Signal(reset_less=True)
157
158 def setup(self, m, i):
159 """ links module to inputs and outputs
160 """
161 self.mod.setup(m, i, self.out_do_z)
162 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
163 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
164
165 def action(self, m):
166 self.idsync(m)
167 with m.If(self.out_do_z):
168 m.next = "put_z"
169 with m.Else():
170 m.next = "denormalise"
171
172
173 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
174 """ special cases: NaNs, infs, zeros, denormalised
175 NOTE: some of these are unique to add. see "Special Operations"
176 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
177 """
178
179 def __init__(self, width, id_wid):
180 FPState.__init__(self, "special_cases")
181 self.width = width
182 self.id_wid = id_wid
183 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
184 self.out = self.ospec()
185
186 def ispec(self):
187 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
188
189 def ospec(self):
190 return FPSCData(self.width, self.id_wid) # DeNorm ospec
191
192 def setup(self, m, i):
193 """ links module to inputs and outputs
194 """
195 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
196 dmod = FPAddDeNormMod(self.width, self.id_wid)
197
198 chain = StageChain([smod, dmod])
199 chain.setup(m, i)
200
201 # only needed for break-out (early-out)
202 # self.out_do_z = smod.o.out_do_z
203
204 self.o = dmod.o
205
206 def process(self, i):
207 return self.o
208
209 def action(self, m):
210 # for break-out (early-out)
211 #with m.If(self.out_do_z):
212 # m.next = "put_z"
213 #with m.Else():
214 m.d.sync += self.out.eq(self.process(None))
215 m.next = "align"
216
217
218 class FPAddAlignMultiMod(FPState):
219
220 def __init__(self, width):
221 self.in_a = FPNumBase(width)
222 self.in_b = FPNumBase(width)
223 self.out_a = FPNumIn(None, width)
224 self.out_b = FPNumIn(None, width)
225 self.exp_eq = Signal(reset_less=True)
226
227 def elaborate(self, platform):
228 # This one however (single-cycle) will do the shift
229 # in one go.
230
231 m = Module()
232
233 m.submodules.align_in_a = self.in_a
234 m.submodules.align_in_b = self.in_b
235 m.submodules.align_out_a = self.out_a
236 m.submodules.align_out_b = self.out_b
237
238 # NOTE: this does *not* do single-cycle multi-shifting,
239 # it *STAYS* in the align state until exponents match
240
241 # exponent of a greater than b: shift b down
242 m.d.comb += self.exp_eq.eq(0)
243 m.d.comb += self.out_a.eq(self.in_a)
244 m.d.comb += self.out_b.eq(self.in_b)
245 agtb = Signal(reset_less=True)
246 altb = Signal(reset_less=True)
247 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
248 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
249 with m.If(agtb):
250 m.d.comb += self.out_b.shift_down(self.in_b)
251 # exponent of b greater than a: shift a down
252 with m.Elif(altb):
253 m.d.comb += self.out_a.shift_down(self.in_a)
254 # exponents equal: move to next stage.
255 with m.Else():
256 m.d.comb += self.exp_eq.eq(1)
257 return m
258
259
260 class FPAddAlignMulti(FPState):
261
262 def __init__(self, width, id_wid):
263 FPState.__init__(self, "align")
264 self.mod = FPAddAlignMultiMod(width)
265 self.out_a = FPNumIn(None, width)
266 self.out_b = FPNumIn(None, width)
267 self.exp_eq = Signal(reset_less=True)
268
269 def setup(self, m, in_a, in_b):
270 """ links module to inputs and outputs
271 """
272 m.submodules.align = self.mod
273 m.d.comb += self.mod.in_a.eq(in_a)
274 m.d.comb += self.mod.in_b.eq(in_b)
275 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
276 m.d.sync += self.out_a.eq(self.mod.out_a)
277 m.d.sync += self.out_b.eq(self.mod.out_b)
278
279 def action(self, m):
280 with m.If(self.exp_eq):
281 m.next = "add_0"
282
283
284 class FPNumIn2Ops:
285
286 def __init__(self, width, id_wid):
287 self.a = FPNumIn(None, width)
288 self.b = FPNumIn(None, width)
289 self.z = FPNumOut(width, False)
290 self.out_do_z = Signal(reset_less=True)
291 self.oz = Signal(width, reset_less=True)
292 self.mid = Signal(id_wid, reset_less=True)
293
294 def eq(self, i):
295 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
296 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
297
298
299 class FPAddAlignSingleMod:
300
301 def __init__(self, width, id_wid):
302 self.width = width
303 self.id_wid = id_wid
304 self.i = self.ispec()
305 self.o = self.ospec()
306
307 def ispec(self):
308 return FPSCData(self.width, self.id_wid)
309
310 def ospec(self):
311 return FPNumIn2Ops(self.width, self.id_wid)
312
313 def process(self, i):
314 return self.o
315
316 def setup(self, m, i):
317 """ links module to inputs and outputs
318 """
319 m.submodules.align = self
320 m.d.comb += self.i.eq(i)
321
322 def elaborate(self, platform):
323 """ Aligns A against B or B against A, depending on which has the
324 greater exponent. This is done in a *single* cycle using
325 variable-width bit-shift
326
327 the shifter used here is quite expensive in terms of gates.
328 Mux A or B in (and out) into temporaries, as only one of them
329 needs to be aligned against the other
330 """
331 m = Module()
332
333 m.submodules.align_in_a = self.i.a
334 m.submodules.align_in_b = self.i.b
335 m.submodules.align_out_a = self.o.a
336 m.submodules.align_out_b = self.o.b
337
338 # temporary (muxed) input and output to be shifted
339 t_inp = FPNumBase(self.width)
340 t_out = FPNumIn(None, self.width)
341 espec = (len(self.i.a.e), True)
342 msr = MultiShiftRMerge(self.i.a.m_width, espec)
343 m.submodules.align_t_in = t_inp
344 m.submodules.align_t_out = t_out
345 m.submodules.multishift_r = msr
346
347 ediff = Signal(espec, reset_less=True)
348 ediffr = Signal(espec, reset_less=True)
349 tdiff = Signal(espec, reset_less=True)
350 elz = Signal(reset_less=True)
351 egz = Signal(reset_less=True)
352
353 # connect multi-shifter to t_inp/out mantissa (and tdiff)
354 m.d.comb += msr.inp.eq(t_inp.m)
355 m.d.comb += msr.diff.eq(tdiff)
356 m.d.comb += t_out.m.eq(msr.m)
357 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
358 m.d.comb += t_out.s.eq(t_inp.s)
359
360 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
361 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
362 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
363 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
364
365 # default: A-exp == B-exp, A and B untouched (fall through)
366 m.d.comb += self.o.a.eq(self.i.a)
367 m.d.comb += self.o.b.eq(self.i.b)
368 # only one shifter (muxed)
369 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
370 # exponent of a greater than b: shift b down
371 with m.If(~self.i.out_do_z):
372 with m.If(egz):
373 m.d.comb += [t_inp.eq(self.i.b),
374 tdiff.eq(ediff),
375 self.o.b.eq(t_out),
376 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
377 ]
378 # exponent of b greater than a: shift a down
379 with m.Elif(elz):
380 m.d.comb += [t_inp.eq(self.i.a),
381 tdiff.eq(ediffr),
382 self.o.a.eq(t_out),
383 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
384 ]
385
386 m.d.comb += self.o.mid.eq(self.i.mid)
387 m.d.comb += self.o.z.eq(self.i.z)
388 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
389 m.d.comb += self.o.oz.eq(self.i.oz)
390
391 return m
392
393
394 class FPAddAlignSingle(FPState):
395
396 def __init__(self, width, id_wid):
397 FPState.__init__(self, "align")
398 self.mod = FPAddAlignSingleMod(width, id_wid)
399 self.out_a = FPNumIn(None, width)
400 self.out_b = FPNumIn(None, width)
401
402 def setup(self, m, i):
403 """ links module to inputs and outputs
404 """
405 self.mod.setup(m, i)
406
407 # NOTE: could be done as comb
408 m.d.sync += self.out_a.eq(self.mod.out_a)
409 m.d.sync += self.out_b.eq(self.mod.out_b)
410
411 def action(self, m):
412 m.next = "add_0"
413
414
415 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
416
417 def __init__(self, width, id_wid):
418 FPState.__init__(self, "align")
419 self.width = width
420 self.id_wid = id_wid
421 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
422 self.a1o = self.ospec()
423
424 def ispec(self):
425 return FPSCData(self.width, self.id_wid)
426
427 def ospec(self):
428 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
429
430 def setup(self, m, i):
431 """ links module to inputs and outputs
432 """
433
434 # chain AddAlignSingle, AddStage0 and AddStage1
435 mod = FPAddAlignSingleMod(self.width, self.id_wid)
436 a0mod = FPAddStage0Mod(self.width, self.id_wid)
437 a1mod = FPAddStage1Mod(self.width, self.id_wid)
438
439 chain = StageChain([mod, a0mod, a1mod])
440 chain.setup(m, i)
441
442 self.o = a1mod.o
443
444 def process(self, i):
445 return self.o
446
447 def action(self, m):
448 m.d.sync += self.a1o.eq(self.process(None))
449 m.next = "normalise_1"
450
451
452 class FPAddStage0Data:
453
454 def __init__(self, width, id_wid):
455 self.z = FPNumBase(width, False)
456 self.out_do_z = Signal(reset_less=True)
457 self.oz = Signal(width, reset_less=True)
458 self.tot = Signal(self.z.m_width + 4, reset_less=True)
459 self.mid = Signal(id_wid, reset_less=True)
460
461 def eq(self, i):
462 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
463 self.tot.eq(i.tot), self.mid.eq(i.mid)]
464
465
466 class FPAddStage0Mod:
467
468 def __init__(self, width, id_wid):
469 self.width = width
470 self.id_wid = id_wid
471 self.i = self.ispec()
472 self.o = self.ospec()
473
474 def ispec(self):
475 return FPSCData(self.width, self.id_wid)
476
477 def ospec(self):
478 return FPAddStage0Data(self.width, self.id_wid)
479
480 def process(self, i):
481 return self.o
482
483 def setup(self, m, i):
484 """ links module to inputs and outputs
485 """
486 m.submodules.add0 = self
487 m.d.comb += self.i.eq(i)
488
489 def elaborate(self, platform):
490 m = Module()
491 m.submodules.add0_in_a = self.i.a
492 m.submodules.add0_in_b = self.i.b
493 m.submodules.add0_out_z = self.o.z
494
495 # store intermediate tests (and zero-extended mantissas)
496 seq = Signal(reset_less=True)
497 mge = Signal(reset_less=True)
498 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
499 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
500 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
501 mge.eq(self.i.a.m >= self.i.b.m),
502 am0.eq(Cat(self.i.a.m, 0)),
503 bm0.eq(Cat(self.i.b.m, 0))
504 ]
505 # same-sign (both negative or both positive) add mantissas
506 with m.If(~self.i.out_do_z):
507 m.d.comb += self.o.z.e.eq(self.i.a.e)
508 with m.If(seq):
509 m.d.comb += [
510 self.o.tot.eq(am0 + bm0),
511 self.o.z.s.eq(self.i.a.s)
512 ]
513 # a mantissa greater than b, use a
514 with m.Elif(mge):
515 m.d.comb += [
516 self.o.tot.eq(am0 - bm0),
517 self.o.z.s.eq(self.i.a.s)
518 ]
519 # b mantissa greater than a, use b
520 with m.Else():
521 m.d.comb += [
522 self.o.tot.eq(bm0 - am0),
523 self.o.z.s.eq(self.i.b.s)
524 ]
525
526 m.d.comb += self.o.oz.eq(self.i.oz)
527 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
528 m.d.comb += self.o.mid.eq(self.i.mid)
529 return m
530
531
532 class FPAddStage0(FPState):
533 """ First stage of add. covers same-sign (add) and subtract
534 special-casing when mantissas are greater or equal, to
535 give greatest accuracy.
536 """
537
538 def __init__(self, width, id_wid):
539 FPState.__init__(self, "add_0")
540 self.mod = FPAddStage0Mod(width)
541 self.o = self.mod.ospec()
542
543 def setup(self, m, i):
544 """ links module to inputs and outputs
545 """
546 self.mod.setup(m, i)
547
548 # NOTE: these could be done as combinatorial (merge add0+add1)
549 m.d.sync += self.o.eq(self.mod.o)
550
551 def action(self, m):
552 m.next = "add_1"
553
554
555 class FPAddStage1Mod(FPState):
556 """ Second stage of add: preparation for normalisation.
557 detects when tot sum is too big (tot[27] is kinda a carry bit)
558 """
559
560 def __init__(self, width, id_wid):
561 self.width = width
562 self.id_wid = id_wid
563 self.i = self.ispec()
564 self.o = self.ospec()
565
566 def ispec(self):
567 return FPAddStage0Data(self.width, self.id_wid)
568
569 def ospec(self):
570 return FPAddStage1Data(self.width, self.id_wid)
571
572 def process(self, i):
573 return self.o
574
575 def setup(self, m, i):
576 """ links module to inputs and outputs
577 """
578 m.submodules.add1 = self
579 m.submodules.add1_out_overflow = self.o.of
580
581 m.d.comb += self.i.eq(i)
582
583 def elaborate(self, platform):
584 m = Module()
585 m.d.comb += self.o.z.eq(self.i.z)
586 # tot[-1] (MSB) gets set when the sum overflows. shift result down
587 with m.If(~self.i.out_do_z):
588 with m.If(self.i.tot[-1]):
589 m.d.comb += [
590 self.o.z.m.eq(self.i.tot[4:]),
591 self.o.of.m0.eq(self.i.tot[4]),
592 self.o.of.guard.eq(self.i.tot[3]),
593 self.o.of.round_bit.eq(self.i.tot[2]),
594 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
595 self.o.z.e.eq(self.i.z.e + 1)
596 ]
597 # tot[-1] (MSB) zero case
598 with m.Else():
599 m.d.comb += [
600 self.o.z.m.eq(self.i.tot[3:]),
601 self.o.of.m0.eq(self.i.tot[3]),
602 self.o.of.guard.eq(self.i.tot[2]),
603 self.o.of.round_bit.eq(self.i.tot[1]),
604 self.o.of.sticky.eq(self.i.tot[0])
605 ]
606
607 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
608 m.d.comb += self.o.oz.eq(self.i.oz)
609 m.d.comb += self.o.mid.eq(self.i.mid)
610
611 return m
612
613
614 class FPAddStage1(FPState):
615
616 def __init__(self, width, id_wid):
617 FPState.__init__(self, "add_1")
618 self.mod = FPAddStage1Mod(width)
619 self.out_z = FPNumBase(width, False)
620 self.out_of = Overflow()
621 self.norm_stb = Signal()
622
623 def setup(self, m, i):
624 """ links module to inputs and outputs
625 """
626 self.mod.setup(m, i)
627
628 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
629
630 m.d.sync += self.out_of.eq(self.mod.out_of)
631 m.d.sync += self.out_z.eq(self.mod.out_z)
632 m.d.sync += self.norm_stb.eq(1)
633
634 def action(self, m):
635 m.next = "normalise_1"
636
637
638 class FPNormToPack(FPState, UnbufferedPipeline):
639
640 def __init__(self, width, id_wid):
641 FPState.__init__(self, "normalise_1")
642 self.id_wid = id_wid
643 self.width = width
644 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
645
646 def ispec(self):
647 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
648
649 def ospec(self):
650 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
651
652 def setup(self, m, i):
653 """ links module to inputs and outputs
654 """
655
656 # Normalisation, Rounding Corrections, Pack - in a chain
657 nmod = FPNorm1ModSingle(self.width, self.id_wid)
658 rmod = FPRoundMod(self.width, self.id_wid)
659 cmod = FPCorrectionsMod(self.width, self.id_wid)
660 pmod = FPPackMod(self.width, self.id_wid)
661 chain = StageChain([nmod, rmod, cmod, pmod])
662 chain.setup(m, i)
663 self.out_z = pmod.ospec()
664
665 self.o = pmod.o
666
667 def process(self, i):
668 return self.o
669
670 def action(self, m):
671 m.d.sync += self.out_z.eq(self.process(None))
672 m.next = "pack_put_z"
673
674
675 class FPCorrectionsMod:
676
677 def __init__(self, width, id_wid):
678 self.width = width
679 self.id_wid = id_wid
680 self.i = self.ispec()
681 self.out_z = self.ospec()
682
683 def ispec(self):
684 return FPRoundData(self.width, self.id_wid)
685
686 def ospec(self):
687 return FPRoundData(self.width, self.id_wid)
688
689 def process(self, i):
690 return self.out_z
691
692 def setup(self, m, i):
693 """ links module to inputs and outputs
694 """
695 m.submodules.corrections = self
696 m.d.comb += self.i.eq(i)
697
698 def elaborate(self, platform):
699 m = Module()
700 m.submodules.corr_in_z = self.i.z
701 m.submodules.corr_out_z = self.out_z.z
702 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
703 with m.If(~self.i.out_do_z):
704 with m.If(self.i.z.is_denormalised):
705 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
706 return m
707
708
709 class FPCorrections(FPState):
710
711 def __init__(self, width, id_wid):
712 FPState.__init__(self, "corrections")
713 self.mod = FPCorrectionsMod(width)
714 self.out_z = self.ospec()
715
716 def ispec(self):
717 return self.mod.ispec()
718
719 def ospec(self):
720 return self.mod.ospec()
721
722 def setup(self, m, in_z):
723 """ links module to inputs and outputs
724 """
725 self.mod.setup(m, in_z)
726
727 m.d.sync += self.out_z.eq(self.mod.out_z)
728 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
729
730 def action(self, m):
731 m.next = "pack"
732
733
734 class FPPackData:
735
736 def __init__(self, width, id_wid):
737 self.z = Signal(width, reset_less=True)
738 self.mid = Signal(id_wid, reset_less=True)
739
740 def eq(self, i):
741 return [self.z.eq(i.z), self.mid.eq(i.mid)]
742
743 def ports(self):
744 return [self.z, self.mid]
745
746
747 class FPPackMod:
748
749 def __init__(self, width, id_wid):
750 self.width = width
751 self.id_wid = id_wid
752 self.i = self.ispec()
753 self.o = self.ospec()
754
755 def ispec(self):
756 return FPRoundData(self.width, self.id_wid)
757
758 def ospec(self):
759 return FPPackData(self.width, self.id_wid)
760
761 def process(self, i):
762 return self.o
763
764 def setup(self, m, in_z):
765 """ links module to inputs and outputs
766 """
767 m.submodules.pack = self
768 m.d.comb += self.i.eq(in_z)
769
770 def elaborate(self, platform):
771 m = Module()
772 z = FPNumOut(self.width, False)
773 m.submodules.pack_in_z = self.i.z
774 m.submodules.pack_out_z = z
775 m.d.comb += self.o.mid.eq(self.i.mid)
776 with m.If(~self.i.out_do_z):
777 with m.If(self.i.z.is_overflowed):
778 m.d.comb += z.inf(self.i.z.s)
779 with m.Else():
780 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
781 with m.Else():
782 m.d.comb += z.v.eq(self.i.oz)
783 m.d.comb += self.o.z.eq(z.v)
784 return m
785
786
787 class FPPack(FPState):
788
789 def __init__(self, width, id_wid):
790 FPState.__init__(self, "pack")
791 self.mod = FPPackMod(width)
792 self.out_z = self.ospec()
793
794 def ispec(self):
795 return self.mod.ispec()
796
797 def ospec(self):
798 return self.mod.ospec()
799
800 def setup(self, m, in_z):
801 """ links module to inputs and outputs
802 """
803 self.mod.setup(m, in_z)
804
805 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
806 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
807
808 def action(self, m):
809 m.next = "pack_put_z"
810
811
812 class FPPutZ(FPState):
813
814 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
815 FPState.__init__(self, state)
816 if to_state is None:
817 to_state = "get_ops"
818 self.to_state = to_state
819 self.in_z = in_z
820 self.out_z = out_z
821 self.in_mid = in_mid
822 self.out_mid = out_mid
823
824 def action(self, m):
825 if self.in_mid is not None:
826 m.d.sync += self.out_mid.eq(self.in_mid)
827 m.d.sync += [
828 self.out_z.z.v.eq(self.in_z)
829 ]
830 with m.If(self.out_z.z.stb & self.out_z.z.ack):
831 m.d.sync += self.out_z.z.stb.eq(0)
832 m.next = self.to_state
833 with m.Else():
834 m.d.sync += self.out_z.z.stb.eq(1)
835
836
837 class FPPutZIdx(FPState):
838
839 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
840 FPState.__init__(self, state)
841 if to_state is None:
842 to_state = "get_ops"
843 self.to_state = to_state
844 self.in_z = in_z
845 self.out_zs = out_zs
846 self.in_mid = in_mid
847
848 def action(self, m):
849 outz_stb = Signal(reset_less=True)
850 outz_ack = Signal(reset_less=True)
851 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
852 outz_ack.eq(self.out_zs[self.in_mid].ack),
853 ]
854 m.d.sync += [
855 self.out_zs[self.in_mid].v.eq(self.in_z.v)
856 ]
857 with m.If(outz_stb & outz_ack):
858 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
859 m.next = self.to_state
860 with m.Else():
861 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
862
863
864 class FPOpData:
865 def __init__(self, width, id_wid):
866 self.z = FPOp(width)
867 self.mid = Signal(id_wid, reset_less=True)
868
869 def eq(self, i):
870 return [self.z.eq(i.z), self.mid.eq(i.mid)]
871
872 def ports(self):
873 return [self.z, self.mid]
874
875
876 class FPADDBaseMod:
877
878 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
879 """ IEEE754 FP Add
880
881 * width: bit-width of IEEE754. supported: 16, 32, 64
882 * id_wid: an identifier that is sync-connected to the input
883 * single_cycle: True indicates each stage to complete in 1 clock
884 * compact: True indicates a reduced number of stages
885 """
886 self.width = width
887 self.id_wid = id_wid
888 self.single_cycle = single_cycle
889 self.compact = compact
890
891 self.in_t = Trigger()
892 self.i = self.ispec()
893 self.o = self.ospec()
894
895 self.states = []
896
897 def ispec(self):
898 return FPADDBaseData(self.width, self.id_wid)
899
900 def ospec(self):
901 return FPOpData(self.width, self.id_wid)
902
903 def add_state(self, state):
904 self.states.append(state)
905 return state
906
907 def get_fragment(self, platform=None):
908 """ creates the HDL code-fragment for FPAdd
909 """
910 m = Module()
911 m.submodules.out_z = self.o.z
912 m.submodules.in_t = self.in_t
913 if self.compact:
914 self.get_compact_fragment(m, platform)
915 else:
916 self.get_longer_fragment(m, platform)
917
918 with m.FSM() as fsm:
919
920 for state in self.states:
921 with m.State(state.state_from):
922 state.action(m)
923
924 return m
925
926 def get_longer_fragment(self, m, platform=None):
927
928 get = self.add_state(FPGet2Op("get_ops", "special_cases",
929 self.width))
930 get.setup(m, self.i)
931 a = get.out_op1
932 b = get.out_op2
933 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
934
935 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
936 sc.setup(m, a, b, self.in_mid)
937
938 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
939 dn.setup(m, a, b, sc.in_mid)
940
941 if self.single_cycle:
942 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
943 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
944 else:
945 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
946 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
947
948 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
949 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
950
951 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
952 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
953
954 if self.single_cycle:
955 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
956 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
957 else:
958 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
959 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
960
961 rn = self.add_state(FPRound(self.width, self.id_wid))
962 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
963
964 cor = self.add_state(FPCorrections(self.width, self.id_wid))
965 cor.setup(m, rn.out_z, rn.in_mid)
966
967 pa = self.add_state(FPPack(self.width, self.id_wid))
968 pa.setup(m, cor.out_z, rn.in_mid)
969
970 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
971 pa.in_mid, self.out_mid))
972
973 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
974 pa.in_mid, self.out_mid))
975
976 def get_compact_fragment(self, m, platform=None):
977
978
979 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
980 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
981 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
982 n1 = FPNormToPack(self.width, self.id_wid)
983
984 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
985
986 chainlist = [get, sc, alm, n1]
987 chain = StageChain(chainlist, specallocate=True)
988 chain.setup(m, self.i)
989
990 for mod in chainlist:
991 sc = self.add_state(mod)
992
993 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
994 n1.out_z.mid, self.o.mid))
995
996 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
997 # sc.o.mid, self.o.mid))
998
999
1000 class FPADDBase(FPState):
1001
1002 def __init__(self, width, id_wid=None, single_cycle=False):
1003 """ IEEE754 FP Add
1004
1005 * width: bit-width of IEEE754. supported: 16, 32, 64
1006 * id_wid: an identifier that is sync-connected to the input
1007 * single_cycle: True indicates each stage to complete in 1 clock
1008 """
1009 FPState.__init__(self, "fpadd")
1010 self.width = width
1011 self.single_cycle = single_cycle
1012 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1013 self.o = self.ospec()
1014
1015 self.in_t = Trigger()
1016 self.i = self.ispec()
1017
1018 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1019 self.in_accept = Signal(reset_less=True)
1020 self.add_stb = Signal(reset_less=True)
1021 self.add_ack = Signal(reset=0, reset_less=True)
1022
1023 def ispec(self):
1024 return self.mod.ispec()
1025
1026 def ospec(self):
1027 return self.mod.ospec()
1028
1029 def setup(self, m, i, add_stb, in_mid):
1030 m.d.comb += [self.i.eq(i),
1031 self.mod.i.eq(self.i),
1032 self.z_done.eq(self.mod.o.z.trigger),
1033 #self.add_stb.eq(add_stb),
1034 self.mod.in_t.stb.eq(self.in_t.stb),
1035 self.in_t.ack.eq(self.mod.in_t.ack),
1036 self.o.mid.eq(self.mod.o.mid),
1037 self.o.z.v.eq(self.mod.o.z.v),
1038 self.o.z.stb.eq(self.mod.o.z.stb),
1039 self.mod.o.z.ack.eq(self.o.z.ack),
1040 ]
1041
1042 m.d.sync += self.add_stb.eq(add_stb)
1043 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1044 m.d.sync += self.o.z.ack.eq(0) # likewise
1045 #m.d.sync += self.in_t.stb.eq(0)
1046
1047 m.submodules.fpadd = self.mod
1048
1049 def action(self, m):
1050
1051 # in_accept is set on incoming strobe HIGH and ack LOW.
1052 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1053
1054 #with m.If(self.in_t.ack):
1055 # m.d.sync += self.in_t.stb.eq(0)
1056 with m.If(~self.z_done):
1057 # not done: test for accepting an incoming operand pair
1058 with m.If(self.in_accept):
1059 m.d.sync += [
1060 self.add_ack.eq(1), # acknowledge receipt...
1061 self.in_t.stb.eq(1), # initiate add
1062 ]
1063 with m.Else():
1064 m.d.sync += [self.add_ack.eq(0),
1065 self.in_t.stb.eq(0),
1066 self.o.z.ack.eq(1),
1067 ]
1068 with m.Else():
1069 # done: acknowledge, and write out id and value
1070 m.d.sync += [self.add_ack.eq(1),
1071 self.in_t.stb.eq(0)
1072 ]
1073 m.next = "put_z"
1074
1075 return
1076
1077 if self.in_mid is not None:
1078 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1079
1080 m.d.sync += [
1081 self.out_z.v.eq(self.mod.out_z.v)
1082 ]
1083 # move to output state on detecting z ack
1084 with m.If(self.out_z.trigger):
1085 m.d.sync += self.out_z.stb.eq(0)
1086 m.next = "put_z"
1087 with m.Else():
1088 m.d.sync += self.out_z.stb.eq(1)
1089
1090
1091 class FPADDBasePipe(ControlBase):
1092 def __init__(self, width, id_wid):
1093 ControlBase.__init__(self)
1094 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1095 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1096 self.pipe3 = FPNormToPack(width, id_wid)
1097
1098 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1099
1100 def elaborate(self, platform):
1101 m = Module()
1102 m.submodules.scnorm = self.pipe1
1103 m.submodules.addalign = self.pipe2
1104 m.submodules.normpack = self.pipe3
1105 m.d.comb += self._eqs
1106 return m
1107
1108
1109 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1110 def __init__(self, width, id_wid, num_rows):
1111 self.num_rows = num_rows
1112 def iospec(): return FPADDBaseData(width, id_wid)
1113 stage = PassThroughStage(iospec)
1114 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1115
1116
1117 class FPADDMuxOutPipe(CombMuxOutPipe):
1118 def __init__(self, width, id_wid, num_rows):
1119 self.num_rows = num_rows
1120 def iospec(): return FPPackData(width, id_wid)
1121 stage = PassThroughStage(iospec)
1122 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1123
1124
1125 class FPADDMuxInOut:
1126 """ Reservation-Station version of FPADD pipeline.
1127
1128 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1129 * 3-stage adder pipeline
1130 * fan-out on outputs (an array of FPPackData: z,mid)
1131
1132 Fan-in and Fan-out are combinatorial.
1133 """
1134 def __init__(self, width, id_wid, num_rows):
1135 self.num_rows = num_rows
1136 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1137 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1138 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1139
1140 self.p = self.inpipe.p # kinda annoying,
1141 self.n = self.outpipe.n # use pipe in/out as this class in/out
1142 self._ports = self.inpipe.ports() + self.outpipe.ports()
1143
1144 def elaborate(self, platform):
1145 m = Module()
1146 m.submodules.inpipe = self.inpipe
1147 m.submodules.fpadd = self.fpadd
1148 m.submodules.outpipe = self.outpipe
1149
1150 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1151 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1152
1153 return m
1154
1155 def ports(self):
1156 return self._ports
1157
1158
1159 class FPADD(FPID):
1160 """ FPADD: stages as follows:
1161
1162 FPGetOp (a)
1163 |
1164 FPGetOp (b)
1165 |
1166 FPAddBase---> FPAddBaseMod
1167 | |
1168 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1169
1170 FPAddBase is tricky: it is both a stage and *has* stages.
1171 Connection to FPAddBaseMod therefore requires an in stb/ack
1172 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1173 needs to be the thing that raises the incoming stb.
1174 """
1175
1176 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1177 """ IEEE754 FP Add
1178
1179 * width: bit-width of IEEE754. supported: 16, 32, 64
1180 * id_wid: an identifier that is sync-connected to the input
1181 * single_cycle: True indicates each stage to complete in 1 clock
1182 """
1183 self.width = width
1184 self.id_wid = id_wid
1185 self.single_cycle = single_cycle
1186
1187 #self.out_z = FPOp(width)
1188 self.ids = FPID(id_wid)
1189
1190 rs = []
1191 for i in range(rs_sz):
1192 in_a = FPOp(width)
1193 in_b = FPOp(width)
1194 in_a.name = "in_a_%d" % i
1195 in_b.name = "in_b_%d" % i
1196 rs.append((in_a, in_b))
1197 self.rs = Array(rs)
1198
1199 res = []
1200 for i in range(rs_sz):
1201 out_z = FPOp(width)
1202 out_z.name = "out_z_%d" % i
1203 res.append(out_z)
1204 self.res = Array(res)
1205
1206 self.states = []
1207
1208 def add_state(self, state):
1209 self.states.append(state)
1210 return state
1211
1212 def get_fragment(self, platform=None):
1213 """ creates the HDL code-fragment for FPAdd
1214 """
1215 m = Module()
1216 m.submodules += self.rs
1217
1218 in_a = self.rs[0][0]
1219 in_b = self.rs[0][1]
1220
1221 geta = self.add_state(FPGetOp("get_a", "get_b",
1222 in_a, self.width))
1223 geta.setup(m, in_a)
1224 a = geta.out_op
1225
1226 getb = self.add_state(FPGetOp("get_b", "fpadd",
1227 in_b, self.width))
1228 getb.setup(m, in_b)
1229 b = getb.out_op
1230
1231 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1232 ab = self.add_state(ab)
1233 abd = ab.ispec() # create an input spec object for FPADDBase
1234 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1235 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1236 o = ab.o
1237
1238 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1239 o.mid, "get_a"))
1240
1241 with m.FSM() as fsm:
1242
1243 for state in self.states:
1244 with m.State(state.state_from):
1245 state.action(m)
1246
1247 return m
1248
1249
1250 if __name__ == "__main__":
1251 if True:
1252 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1253 main(alu, ports=alu.rs[0][0].ports() + \
1254 alu.rs[0][1].ports() + \
1255 alu.res[0].ports() + \
1256 [alu.ids.in_mid, alu.ids.out_mid])
1257 else:
1258 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1259 main(alu, ports=[alu.in_a, alu.in_b] + \
1260 alu.in_t.ports() + \
1261 alu.out_z.ports() + \
1262 [alu.in_mid, alu.out_mid])
1263
1264
1265 # works... but don't use, just do "python fname.py convert -t v"
1266 #print (verilog.convert(alu, ports=[
1267 # ports=alu.in_a.ports() + \
1268 # alu.in_b.ports() + \
1269 # alu.out_z.ports())