remove accidentally-included code
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23
24
25 class FPAddSpecialCasesMod:
26 """ special cases: NaNs, infs, zeros, denormalised
27 NOTE: some of these are unique to add. see "Special Operations"
28 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
29 """
30
31 def __init__(self, width, id_wid):
32 self.width = width
33 self.id_wid = id_wid
34 self.i = self.ispec()
35 self.o = self.ospec()
36
37 def ispec(self):
38 return FPADDBaseData(self.width, self.id_wid)
39
40 def ospec(self):
41 return FPSCData(self.width, self.id_wid)
42
43 def setup(self, m, i):
44 """ links module to inputs and outputs
45 """
46 m.submodules.specialcases = self
47 m.d.comb += self.i.eq(i)
48
49 def process(self, i):
50 return self.o
51
52 def elaborate(self, platform):
53 m = Module()
54
55 m.submodules.sc_out_z = self.o.z
56
57 # decode: XXX really should move to separate stage
58 a1 = FPNumIn(None, self.width)
59 b1 = FPNumIn(None, self.width)
60 m.submodules.sc_decode_a = a1
61 m.submodules.sc_decode_b = b1
62 m.d.comb += [a1.decode(self.i.a),
63 b1.decode(self.i.b),
64 ]
65
66 s_nomatch = Signal()
67 m.d.comb += s_nomatch.eq(a1.s != b1.s)
68
69 m_match = Signal()
70 m.d.comb += m_match.eq(a1.m == b1.m)
71
72 # if a is NaN or b is NaN return NaN
73 with m.If(a1.is_nan | b1.is_nan):
74 m.d.comb += self.o.out_do_z.eq(1)
75 m.d.comb += self.o.z.nan(0)
76
77 # XXX WEIRDNESS for FP16 non-canonical NaN handling
78 # under review
79
80 ## if a is zero and b is NaN return -b
81 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
82 # m.d.comb += self.o.out_do_z.eq(1)
83 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
84
85 ## if b is zero and a is NaN return -a
86 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
87 # m.d.comb += self.o.out_do_z.eq(1)
88 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
89
90 ## if a is -zero and b is NaN return -b
91 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
92 # m.d.comb += self.o.out_do_z.eq(1)
93 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
94
95 ## if b is -zero and a is NaN return -a
96 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
97 # m.d.comb += self.o.out_do_z.eq(1)
98 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
99
100 # if a is inf return inf (or NaN)
101 with m.Elif(a1.is_inf):
102 m.d.comb += self.o.out_do_z.eq(1)
103 m.d.comb += self.o.z.inf(a1.s)
104 # if a is inf and signs don't match return NaN
105 with m.If(b1.exp_128 & s_nomatch):
106 m.d.comb += self.o.z.nan(0)
107
108 # if b is inf return inf
109 with m.Elif(b1.is_inf):
110 m.d.comb += self.o.out_do_z.eq(1)
111 m.d.comb += self.o.z.inf(b1.s)
112
113 # if a is zero and b zero return signed-a/b
114 with m.Elif(a1.is_zero & b1.is_zero):
115 m.d.comb += self.o.out_do_z.eq(1)
116 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
117
118 # if a is zero return b
119 with m.Elif(a1.is_zero):
120 m.d.comb += self.o.out_do_z.eq(1)
121 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
122
123 # if b is zero return a
124 with m.Elif(b1.is_zero):
125 m.d.comb += self.o.out_do_z.eq(1)
126 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
127
128 # if a equal to -b return zero (+ve zero)
129 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
130 m.d.comb += self.o.out_do_z.eq(1)
131 m.d.comb += self.o.z.zero(0)
132
133 # Denormalised Number checks next, so pass a/b data through
134 with m.Else():
135 m.d.comb += self.o.out_do_z.eq(0)
136 m.d.comb += self.o.a.eq(a1)
137 m.d.comb += self.o.b.eq(b1)
138
139 m.d.comb += self.o.oz.eq(self.o.z.v)
140 m.d.comb += self.o.mid.eq(self.i.mid)
141
142 return m
143
144
145 class FPAddSpecialCases(FPState):
146 """ special cases: NaNs, infs, zeros, denormalised
147 NOTE: some of these are unique to add. see "Special Operations"
148 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
149 """
150
151 def __init__(self, width, id_wid):
152 FPState.__init__(self, "special_cases")
153 self.mod = FPAddSpecialCasesMod(width)
154 self.out_z = self.mod.ospec()
155 self.out_do_z = Signal(reset_less=True)
156
157 def setup(self, m, i):
158 """ links module to inputs and outputs
159 """
160 self.mod.setup(m, i, self.out_do_z)
161 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
162 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
163
164 def action(self, m):
165 self.idsync(m)
166 with m.If(self.out_do_z):
167 m.next = "put_z"
168 with m.Else():
169 m.next = "denormalise"
170
171
172 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
173 """ special cases: NaNs, infs, zeros, denormalised
174 NOTE: some of these are unique to add. see "Special Operations"
175 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
176 """
177
178 def __init__(self, width, id_wid):
179 FPState.__init__(self, "special_cases")
180 self.width = width
181 self.id_wid = id_wid
182 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
183 self.out = self.ospec()
184
185 def ispec(self):
186 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
187
188 def ospec(self):
189 return FPSCData(self.width, self.id_wid) # DeNorm ospec
190
191 def setup(self, m, i):
192 """ links module to inputs and outputs
193 """
194 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
195 dmod = FPAddDeNormMod(self.width, self.id_wid)
196
197 chain = StageChain([smod, dmod])
198 chain.setup(m, i)
199
200 # only needed for break-out (early-out)
201 # self.out_do_z = smod.o.out_do_z
202
203 self.o = dmod.o
204
205 def process(self, i):
206 return self.o
207
208 def action(self, m):
209 # for break-out (early-out)
210 #with m.If(self.out_do_z):
211 # m.next = "put_z"
212 #with m.Else():
213 m.d.sync += self.out.eq(self.process(None))
214 m.next = "align"
215
216
217 class FPAddAlignMultiMod(FPState):
218
219 def __init__(self, width):
220 self.in_a = FPNumBase(width)
221 self.in_b = FPNumBase(width)
222 self.out_a = FPNumIn(None, width)
223 self.out_b = FPNumIn(None, width)
224 self.exp_eq = Signal(reset_less=True)
225
226 def elaborate(self, platform):
227 # This one however (single-cycle) will do the shift
228 # in one go.
229
230 m = Module()
231
232 m.submodules.align_in_a = self.in_a
233 m.submodules.align_in_b = self.in_b
234 m.submodules.align_out_a = self.out_a
235 m.submodules.align_out_b = self.out_b
236
237 # NOTE: this does *not* do single-cycle multi-shifting,
238 # it *STAYS* in the align state until exponents match
239
240 # exponent of a greater than b: shift b down
241 m.d.comb += self.exp_eq.eq(0)
242 m.d.comb += self.out_a.eq(self.in_a)
243 m.d.comb += self.out_b.eq(self.in_b)
244 agtb = Signal(reset_less=True)
245 altb = Signal(reset_less=True)
246 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
247 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
248 with m.If(agtb):
249 m.d.comb += self.out_b.shift_down(self.in_b)
250 # exponent of b greater than a: shift a down
251 with m.Elif(altb):
252 m.d.comb += self.out_a.shift_down(self.in_a)
253 # exponents equal: move to next stage.
254 with m.Else():
255 m.d.comb += self.exp_eq.eq(1)
256 return m
257
258
259 class FPAddAlignMulti(FPState):
260
261 def __init__(self, width, id_wid):
262 FPState.__init__(self, "align")
263 self.mod = FPAddAlignMultiMod(width)
264 self.out_a = FPNumIn(None, width)
265 self.out_b = FPNumIn(None, width)
266 self.exp_eq = Signal(reset_less=True)
267
268 def setup(self, m, in_a, in_b):
269 """ links module to inputs and outputs
270 """
271 m.submodules.align = self.mod
272 m.d.comb += self.mod.in_a.eq(in_a)
273 m.d.comb += self.mod.in_b.eq(in_b)
274 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
275 m.d.sync += self.out_a.eq(self.mod.out_a)
276 m.d.sync += self.out_b.eq(self.mod.out_b)
277
278 def action(self, m):
279 with m.If(self.exp_eq):
280 m.next = "add_0"
281
282
283 class FPNumIn2Ops:
284
285 def __init__(self, width, id_wid):
286 self.a = FPNumIn(None, width)
287 self.b = FPNumIn(None, width)
288 self.z = FPNumOut(width, False)
289 self.out_do_z = Signal(reset_less=True)
290 self.oz = Signal(width, reset_less=True)
291 self.mid = Signal(id_wid, reset_less=True)
292
293 def eq(self, i):
294 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
295 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
296
297
298 class FPAddAlignSingleMod:
299
300 def __init__(self, width, id_wid):
301 self.width = width
302 self.id_wid = id_wid
303 self.i = self.ispec()
304 self.o = self.ospec()
305
306 def ispec(self):
307 return FPSCData(self.width, self.id_wid)
308
309 def ospec(self):
310 return FPNumIn2Ops(self.width, self.id_wid)
311
312 def process(self, i):
313 return self.o
314
315 def setup(self, m, i):
316 """ links module to inputs and outputs
317 """
318 m.submodules.align = self
319 m.d.comb += self.i.eq(i)
320
321 def elaborate(self, platform):
322 """ Aligns A against B or B against A, depending on which has the
323 greater exponent. This is done in a *single* cycle using
324 variable-width bit-shift
325
326 the shifter used here is quite expensive in terms of gates.
327 Mux A or B in (and out) into temporaries, as only one of them
328 needs to be aligned against the other
329 """
330 m = Module()
331
332 m.submodules.align_in_a = self.i.a
333 m.submodules.align_in_b = self.i.b
334 m.submodules.align_out_a = self.o.a
335 m.submodules.align_out_b = self.o.b
336
337 # temporary (muxed) input and output to be shifted
338 t_inp = FPNumBase(self.width)
339 t_out = FPNumIn(None, self.width)
340 espec = (len(self.i.a.e), True)
341 msr = MultiShiftRMerge(self.i.a.m_width, espec)
342 m.submodules.align_t_in = t_inp
343 m.submodules.align_t_out = t_out
344 m.submodules.multishift_r = msr
345
346 ediff = Signal(espec, reset_less=True)
347 ediffr = Signal(espec, reset_less=True)
348 tdiff = Signal(espec, reset_less=True)
349 elz = Signal(reset_less=True)
350 egz = Signal(reset_less=True)
351
352 # connect multi-shifter to t_inp/out mantissa (and tdiff)
353 m.d.comb += msr.inp.eq(t_inp.m)
354 m.d.comb += msr.diff.eq(tdiff)
355 m.d.comb += t_out.m.eq(msr.m)
356 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
357 m.d.comb += t_out.s.eq(t_inp.s)
358
359 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
360 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
361 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
362 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
363
364 # default: A-exp == B-exp, A and B untouched (fall through)
365 m.d.comb += self.o.a.eq(self.i.a)
366 m.d.comb += self.o.b.eq(self.i.b)
367 # only one shifter (muxed)
368 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
369 # exponent of a greater than b: shift b down
370 with m.If(~self.i.out_do_z):
371 with m.If(egz):
372 m.d.comb += [t_inp.eq(self.i.b),
373 tdiff.eq(ediff),
374 self.o.b.eq(t_out),
375 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
376 ]
377 # exponent of b greater than a: shift a down
378 with m.Elif(elz):
379 m.d.comb += [t_inp.eq(self.i.a),
380 tdiff.eq(ediffr),
381 self.o.a.eq(t_out),
382 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
383 ]
384
385 m.d.comb += self.o.mid.eq(self.i.mid)
386 m.d.comb += self.o.z.eq(self.i.z)
387 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
388 m.d.comb += self.o.oz.eq(self.i.oz)
389
390 return m
391
392
393 class FPAddAlignSingle(FPState):
394
395 def __init__(self, width, id_wid):
396 FPState.__init__(self, "align")
397 self.mod = FPAddAlignSingleMod(width, id_wid)
398 self.out_a = FPNumIn(None, width)
399 self.out_b = FPNumIn(None, width)
400
401 def setup(self, m, i):
402 """ links module to inputs and outputs
403 """
404 self.mod.setup(m, i)
405
406 # NOTE: could be done as comb
407 m.d.sync += self.out_a.eq(self.mod.out_a)
408 m.d.sync += self.out_b.eq(self.mod.out_b)
409
410 def action(self, m):
411 m.next = "add_0"
412
413
414 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
415
416 def __init__(self, width, id_wid):
417 FPState.__init__(self, "align")
418 self.width = width
419 self.id_wid = id_wid
420 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
421 self.a1o = self.ospec()
422
423 def ispec(self):
424 return FPSCData(self.width, self.id_wid)
425
426 def ospec(self):
427 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
428
429 def setup(self, m, i):
430 """ links module to inputs and outputs
431 """
432
433 # chain AddAlignSingle, AddStage0 and AddStage1
434 mod = FPAddAlignSingleMod(self.width, self.id_wid)
435 a0mod = FPAddStage0Mod(self.width, self.id_wid)
436 a1mod = FPAddStage1Mod(self.width, self.id_wid)
437
438 chain = StageChain([mod, a0mod, a1mod])
439 chain.setup(m, i)
440
441 self.o = a1mod.o
442
443 def process(self, i):
444 return self.o
445
446 def action(self, m):
447 m.d.sync += self.a1o.eq(self.process(None))
448 m.next = "normalise_1"
449
450
451 class FPAddStage0Data:
452
453 def __init__(self, width, id_wid):
454 self.z = FPNumBase(width, False)
455 self.out_do_z = Signal(reset_less=True)
456 self.oz = Signal(width, reset_less=True)
457 self.tot = Signal(self.z.m_width + 4, reset_less=True)
458 self.mid = Signal(id_wid, reset_less=True)
459
460 def eq(self, i):
461 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
462 self.tot.eq(i.tot), self.mid.eq(i.mid)]
463
464
465 class FPAddStage0Mod:
466
467 def __init__(self, width, id_wid):
468 self.width = width
469 self.id_wid = id_wid
470 self.i = self.ispec()
471 self.o = self.ospec()
472
473 def ispec(self):
474 return FPSCData(self.width, self.id_wid)
475
476 def ospec(self):
477 return FPAddStage0Data(self.width, self.id_wid)
478
479 def process(self, i):
480 return self.o
481
482 def setup(self, m, i):
483 """ links module to inputs and outputs
484 """
485 m.submodules.add0 = self
486 m.d.comb += self.i.eq(i)
487
488 def elaborate(self, platform):
489 m = Module()
490 m.submodules.add0_in_a = self.i.a
491 m.submodules.add0_in_b = self.i.b
492 m.submodules.add0_out_z = self.o.z
493
494 # store intermediate tests (and zero-extended mantissas)
495 seq = Signal(reset_less=True)
496 mge = Signal(reset_less=True)
497 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
498 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
499 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
500 mge.eq(self.i.a.m >= self.i.b.m),
501 am0.eq(Cat(self.i.a.m, 0)),
502 bm0.eq(Cat(self.i.b.m, 0))
503 ]
504 # same-sign (both negative or both positive) add mantissas
505 with m.If(~self.i.out_do_z):
506 m.d.comb += self.o.z.e.eq(self.i.a.e)
507 with m.If(seq):
508 m.d.comb += [
509 self.o.tot.eq(am0 + bm0),
510 self.o.z.s.eq(self.i.a.s)
511 ]
512 # a mantissa greater than b, use a
513 with m.Elif(mge):
514 m.d.comb += [
515 self.o.tot.eq(am0 - bm0),
516 self.o.z.s.eq(self.i.a.s)
517 ]
518 # b mantissa greater than a, use b
519 with m.Else():
520 m.d.comb += [
521 self.o.tot.eq(bm0 - am0),
522 self.o.z.s.eq(self.i.b.s)
523 ]
524
525 m.d.comb += self.o.oz.eq(self.i.oz)
526 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
527 m.d.comb += self.o.mid.eq(self.i.mid)
528 return m
529
530
531 class FPAddStage0(FPState):
532 """ First stage of add. covers same-sign (add) and subtract
533 special-casing when mantissas are greater or equal, to
534 give greatest accuracy.
535 """
536
537 def __init__(self, width, id_wid):
538 FPState.__init__(self, "add_0")
539 self.mod = FPAddStage0Mod(width)
540 self.o = self.mod.ospec()
541
542 def setup(self, m, i):
543 """ links module to inputs and outputs
544 """
545 self.mod.setup(m, i)
546
547 # NOTE: these could be done as combinatorial (merge add0+add1)
548 m.d.sync += self.o.eq(self.mod.o)
549
550 def action(self, m):
551 m.next = "add_1"
552
553
554 class FPAddStage1Mod(FPState):
555 """ Second stage of add: preparation for normalisation.
556 detects when tot sum is too big (tot[27] is kinda a carry bit)
557 """
558
559 def __init__(self, width, id_wid):
560 self.width = width
561 self.id_wid = id_wid
562 self.i = self.ispec()
563 self.o = self.ospec()
564
565 def ispec(self):
566 return FPAddStage0Data(self.width, self.id_wid)
567
568 def ospec(self):
569 return FPAddStage1Data(self.width, self.id_wid)
570
571 def process(self, i):
572 return self.o
573
574 def setup(self, m, i):
575 """ links module to inputs and outputs
576 """
577 m.submodules.add1 = self
578 m.submodules.add1_out_overflow = self.o.of
579
580 m.d.comb += self.i.eq(i)
581
582 def elaborate(self, platform):
583 m = Module()
584 m.d.comb += self.o.z.eq(self.i.z)
585 # tot[-1] (MSB) gets set when the sum overflows. shift result down
586 with m.If(~self.i.out_do_z):
587 with m.If(self.i.tot[-1]):
588 m.d.comb += [
589 self.o.z.m.eq(self.i.tot[4:]),
590 self.o.of.m0.eq(self.i.tot[4]),
591 self.o.of.guard.eq(self.i.tot[3]),
592 self.o.of.round_bit.eq(self.i.tot[2]),
593 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
594 self.o.z.e.eq(self.i.z.e + 1)
595 ]
596 # tot[-1] (MSB) zero case
597 with m.Else():
598 m.d.comb += [
599 self.o.z.m.eq(self.i.tot[3:]),
600 self.o.of.m0.eq(self.i.tot[3]),
601 self.o.of.guard.eq(self.i.tot[2]),
602 self.o.of.round_bit.eq(self.i.tot[1]),
603 self.o.of.sticky.eq(self.i.tot[0])
604 ]
605
606 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
607 m.d.comb += self.o.oz.eq(self.i.oz)
608 m.d.comb += self.o.mid.eq(self.i.mid)
609
610 return m
611
612
613 class FPAddStage1(FPState):
614
615 def __init__(self, width, id_wid):
616 FPState.__init__(self, "add_1")
617 self.mod = FPAddStage1Mod(width)
618 self.out_z = FPNumBase(width, False)
619 self.out_of = Overflow()
620 self.norm_stb = Signal()
621
622 def setup(self, m, i):
623 """ links module to inputs and outputs
624 """
625 self.mod.setup(m, i)
626
627 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
628
629 m.d.sync += self.out_of.eq(self.mod.out_of)
630 m.d.sync += self.out_z.eq(self.mod.out_z)
631 m.d.sync += self.norm_stb.eq(1)
632
633 def action(self, m):
634 m.next = "normalise_1"
635
636
637 class FPNormToPack(FPState, UnbufferedPipeline):
638
639 def __init__(self, width, id_wid):
640 FPState.__init__(self, "normalise_1")
641 self.id_wid = id_wid
642 self.width = width
643 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
644
645 def ispec(self):
646 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
647
648 def ospec(self):
649 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
650
651 def setup(self, m, i):
652 """ links module to inputs and outputs
653 """
654
655 # Normalisation, Rounding Corrections, Pack - in a chain
656 nmod = FPNorm1ModSingle(self.width, self.id_wid)
657 rmod = FPRoundMod(self.width, self.id_wid)
658 cmod = FPCorrectionsMod(self.width, self.id_wid)
659 pmod = FPPackMod(self.width, self.id_wid)
660 chain = StageChain([nmod, rmod, cmod, pmod])
661 chain.setup(m, i)
662 self.out_z = pmod.ospec()
663
664 self.o = pmod.o
665
666 def process(self, i):
667 return self.o
668
669 def action(self, m):
670 m.d.sync += self.out_z.eq(self.process(None))
671 m.next = "pack_put_z"
672
673
674 class FPRoundData:
675
676 def __init__(self, width, id_wid):
677 self.z = FPNumBase(width, False)
678 self.out_do_z = Signal(reset_less=True)
679 self.oz = Signal(width, reset_less=True)
680 self.mid = Signal(id_wid, reset_less=True)
681
682 def eq(self, i):
683 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
684 self.mid.eq(i.mid)]
685
686
687 class FPRoundMod:
688
689 def __init__(self, width, id_wid):
690 self.width = width
691 self.id_wid = id_wid
692 self.i = self.ispec()
693 self.out_z = self.ospec()
694
695 def ispec(self):
696 return FPNorm1Data(self.width, self.id_wid)
697
698 def ospec(self):
699 return FPRoundData(self.width, self.id_wid)
700
701 def process(self, i):
702 return self.out_z
703
704 def setup(self, m, i):
705 m.submodules.roundz = self
706 m.d.comb += self.i.eq(i)
707
708 def elaborate(self, platform):
709 m = Module()
710 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
711 with m.If(~self.i.out_do_z):
712 with m.If(self.i.roundz):
713 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
714 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
715 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
716
717 return m
718
719
720 class FPRound(FPState):
721
722 def __init__(self, width, id_wid):
723 FPState.__init__(self, "round")
724 self.mod = FPRoundMod(width)
725 self.out_z = self.ospec()
726
727 def ispec(self):
728 return self.mod.ispec()
729
730 def ospec(self):
731 return self.mod.ospec()
732
733 def setup(self, m, i):
734 """ links module to inputs and outputs
735 """
736 self.mod.setup(m, i)
737
738 self.idsync(m)
739 m.d.sync += self.out_z.eq(self.mod.out_z)
740 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
741
742 def action(self, m):
743 m.next = "corrections"
744
745
746 class FPCorrectionsMod:
747
748 def __init__(self, width, id_wid):
749 self.width = width
750 self.id_wid = id_wid
751 self.i = self.ispec()
752 self.out_z = self.ospec()
753
754 def ispec(self):
755 return FPRoundData(self.width, self.id_wid)
756
757 def ospec(self):
758 return FPRoundData(self.width, self.id_wid)
759
760 def process(self, i):
761 return self.out_z
762
763 def setup(self, m, i):
764 """ links module to inputs and outputs
765 """
766 m.submodules.corrections = self
767 m.d.comb += self.i.eq(i)
768
769 def elaborate(self, platform):
770 m = Module()
771 m.submodules.corr_in_z = self.i.z
772 m.submodules.corr_out_z = self.out_z.z
773 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
774 with m.If(~self.i.out_do_z):
775 with m.If(self.i.z.is_denormalised):
776 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
777 return m
778
779
780 class FPCorrections(FPState):
781
782 def __init__(self, width, id_wid):
783 FPState.__init__(self, "corrections")
784 self.mod = FPCorrectionsMod(width)
785 self.out_z = self.ospec()
786
787 def ispec(self):
788 return self.mod.ispec()
789
790 def ospec(self):
791 return self.mod.ospec()
792
793 def setup(self, m, in_z):
794 """ links module to inputs and outputs
795 """
796 self.mod.setup(m, in_z)
797
798 m.d.sync += self.out_z.eq(self.mod.out_z)
799 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
800
801 def action(self, m):
802 m.next = "pack"
803
804
805 class FPPackData:
806
807 def __init__(self, width, id_wid):
808 self.z = Signal(width, reset_less=True)
809 self.mid = Signal(id_wid, reset_less=True)
810
811 def eq(self, i):
812 return [self.z.eq(i.z), self.mid.eq(i.mid)]
813
814 def ports(self):
815 return [self.z, self.mid]
816
817
818 class FPPackMod:
819
820 def __init__(self, width, id_wid):
821 self.width = width
822 self.id_wid = id_wid
823 self.i = self.ispec()
824 self.o = self.ospec()
825
826 def ispec(self):
827 return FPRoundData(self.width, self.id_wid)
828
829 def ospec(self):
830 return FPPackData(self.width, self.id_wid)
831
832 def process(self, i):
833 return self.o
834
835 def setup(self, m, in_z):
836 """ links module to inputs and outputs
837 """
838 m.submodules.pack = self
839 m.d.comb += self.i.eq(in_z)
840
841 def elaborate(self, platform):
842 m = Module()
843 z = FPNumOut(self.width, False)
844 m.submodules.pack_in_z = self.i.z
845 m.submodules.pack_out_z = z
846 m.d.comb += self.o.mid.eq(self.i.mid)
847 with m.If(~self.i.out_do_z):
848 with m.If(self.i.z.is_overflowed):
849 m.d.comb += z.inf(self.i.z.s)
850 with m.Else():
851 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
852 with m.Else():
853 m.d.comb += z.v.eq(self.i.oz)
854 m.d.comb += self.o.z.eq(z.v)
855 return m
856
857
858 class FPPack(FPState):
859
860 def __init__(self, width, id_wid):
861 FPState.__init__(self, "pack")
862 self.mod = FPPackMod(width)
863 self.out_z = self.ospec()
864
865 def ispec(self):
866 return self.mod.ispec()
867
868 def ospec(self):
869 return self.mod.ospec()
870
871 def setup(self, m, in_z):
872 """ links module to inputs and outputs
873 """
874 self.mod.setup(m, in_z)
875
876 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
877 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
878
879 def action(self, m):
880 m.next = "pack_put_z"
881
882
883 class FPPutZ(FPState):
884
885 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
886 FPState.__init__(self, state)
887 if to_state is None:
888 to_state = "get_ops"
889 self.to_state = to_state
890 self.in_z = in_z
891 self.out_z = out_z
892 self.in_mid = in_mid
893 self.out_mid = out_mid
894
895 def action(self, m):
896 if self.in_mid is not None:
897 m.d.sync += self.out_mid.eq(self.in_mid)
898 m.d.sync += [
899 self.out_z.z.v.eq(self.in_z)
900 ]
901 with m.If(self.out_z.z.stb & self.out_z.z.ack):
902 m.d.sync += self.out_z.z.stb.eq(0)
903 m.next = self.to_state
904 with m.Else():
905 m.d.sync += self.out_z.z.stb.eq(1)
906
907
908 class FPPutZIdx(FPState):
909
910 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
911 FPState.__init__(self, state)
912 if to_state is None:
913 to_state = "get_ops"
914 self.to_state = to_state
915 self.in_z = in_z
916 self.out_zs = out_zs
917 self.in_mid = in_mid
918
919 def action(self, m):
920 outz_stb = Signal(reset_less=True)
921 outz_ack = Signal(reset_less=True)
922 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
923 outz_ack.eq(self.out_zs[self.in_mid].ack),
924 ]
925 m.d.sync += [
926 self.out_zs[self.in_mid].v.eq(self.in_z.v)
927 ]
928 with m.If(outz_stb & outz_ack):
929 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
930 m.next = self.to_state
931 with m.Else():
932 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
933
934
935 class FPOpData:
936 def __init__(self, width, id_wid):
937 self.z = FPOp(width)
938 self.mid = Signal(id_wid, reset_less=True)
939
940 def eq(self, i):
941 return [self.z.eq(i.z), self.mid.eq(i.mid)]
942
943 def ports(self):
944 return [self.z, self.mid]
945
946
947 class FPADDBaseMod:
948
949 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
950 """ IEEE754 FP Add
951
952 * width: bit-width of IEEE754. supported: 16, 32, 64
953 * id_wid: an identifier that is sync-connected to the input
954 * single_cycle: True indicates each stage to complete in 1 clock
955 * compact: True indicates a reduced number of stages
956 """
957 self.width = width
958 self.id_wid = id_wid
959 self.single_cycle = single_cycle
960 self.compact = compact
961
962 self.in_t = Trigger()
963 self.i = self.ispec()
964 self.o = self.ospec()
965
966 self.states = []
967
968 def ispec(self):
969 return FPADDBaseData(self.width, self.id_wid)
970
971 def ospec(self):
972 return FPOpData(self.width, self.id_wid)
973
974 def add_state(self, state):
975 self.states.append(state)
976 return state
977
978 def get_fragment(self, platform=None):
979 """ creates the HDL code-fragment for FPAdd
980 """
981 m = Module()
982 m.submodules.out_z = self.o.z
983 m.submodules.in_t = self.in_t
984 if self.compact:
985 self.get_compact_fragment(m, platform)
986 else:
987 self.get_longer_fragment(m, platform)
988
989 with m.FSM() as fsm:
990
991 for state in self.states:
992 with m.State(state.state_from):
993 state.action(m)
994
995 return m
996
997 def get_longer_fragment(self, m, platform=None):
998
999 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1000 self.width))
1001 get.setup(m, self.i)
1002 a = get.out_op1
1003 b = get.out_op2
1004 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1005
1006 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1007 sc.setup(m, a, b, self.in_mid)
1008
1009 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1010 dn.setup(m, a, b, sc.in_mid)
1011
1012 if self.single_cycle:
1013 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1014 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1015 else:
1016 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1017 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1018
1019 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1020 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1021
1022 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1023 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1024
1025 if self.single_cycle:
1026 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1027 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1028 else:
1029 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1030 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1031
1032 rn = self.add_state(FPRound(self.width, self.id_wid))
1033 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1034
1035 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1036 cor.setup(m, rn.out_z, rn.in_mid)
1037
1038 pa = self.add_state(FPPack(self.width, self.id_wid))
1039 pa.setup(m, cor.out_z, rn.in_mid)
1040
1041 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1042 pa.in_mid, self.out_mid))
1043
1044 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1045 pa.in_mid, self.out_mid))
1046
1047 def get_compact_fragment(self, m, platform=None):
1048
1049
1050 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1051 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1052 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1053 n1 = FPNormToPack(self.width, self.id_wid)
1054
1055 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1056
1057 chainlist = [get, sc, alm, n1]
1058 chain = StageChain(chainlist, specallocate=True)
1059 chain.setup(m, self.i)
1060
1061 for mod in chainlist:
1062 sc = self.add_state(mod)
1063
1064 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1065 n1.out_z.mid, self.o.mid))
1066
1067 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1068 # sc.o.mid, self.o.mid))
1069
1070
1071 class FPADDBase(FPState):
1072
1073 def __init__(self, width, id_wid=None, single_cycle=False):
1074 """ IEEE754 FP Add
1075
1076 * width: bit-width of IEEE754. supported: 16, 32, 64
1077 * id_wid: an identifier that is sync-connected to the input
1078 * single_cycle: True indicates each stage to complete in 1 clock
1079 """
1080 FPState.__init__(self, "fpadd")
1081 self.width = width
1082 self.single_cycle = single_cycle
1083 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1084 self.o = self.ospec()
1085
1086 self.in_t = Trigger()
1087 self.i = self.ispec()
1088
1089 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1090 self.in_accept = Signal(reset_less=True)
1091 self.add_stb = Signal(reset_less=True)
1092 self.add_ack = Signal(reset=0, reset_less=True)
1093
1094 def ispec(self):
1095 return self.mod.ispec()
1096
1097 def ospec(self):
1098 return self.mod.ospec()
1099
1100 def setup(self, m, i, add_stb, in_mid):
1101 m.d.comb += [self.i.eq(i),
1102 self.mod.i.eq(self.i),
1103 self.z_done.eq(self.mod.o.z.trigger),
1104 #self.add_stb.eq(add_stb),
1105 self.mod.in_t.stb.eq(self.in_t.stb),
1106 self.in_t.ack.eq(self.mod.in_t.ack),
1107 self.o.mid.eq(self.mod.o.mid),
1108 self.o.z.v.eq(self.mod.o.z.v),
1109 self.o.z.stb.eq(self.mod.o.z.stb),
1110 self.mod.o.z.ack.eq(self.o.z.ack),
1111 ]
1112
1113 m.d.sync += self.add_stb.eq(add_stb)
1114 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1115 m.d.sync += self.o.z.ack.eq(0) # likewise
1116 #m.d.sync += self.in_t.stb.eq(0)
1117
1118 m.submodules.fpadd = self.mod
1119
1120 def action(self, m):
1121
1122 # in_accept is set on incoming strobe HIGH and ack LOW.
1123 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1124
1125 #with m.If(self.in_t.ack):
1126 # m.d.sync += self.in_t.stb.eq(0)
1127 with m.If(~self.z_done):
1128 # not done: test for accepting an incoming operand pair
1129 with m.If(self.in_accept):
1130 m.d.sync += [
1131 self.add_ack.eq(1), # acknowledge receipt...
1132 self.in_t.stb.eq(1), # initiate add
1133 ]
1134 with m.Else():
1135 m.d.sync += [self.add_ack.eq(0),
1136 self.in_t.stb.eq(0),
1137 self.o.z.ack.eq(1),
1138 ]
1139 with m.Else():
1140 # done: acknowledge, and write out id and value
1141 m.d.sync += [self.add_ack.eq(1),
1142 self.in_t.stb.eq(0)
1143 ]
1144 m.next = "put_z"
1145
1146 return
1147
1148 if self.in_mid is not None:
1149 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1150
1151 m.d.sync += [
1152 self.out_z.v.eq(self.mod.out_z.v)
1153 ]
1154 # move to output state on detecting z ack
1155 with m.If(self.out_z.trigger):
1156 m.d.sync += self.out_z.stb.eq(0)
1157 m.next = "put_z"
1158 with m.Else():
1159 m.d.sync += self.out_z.stb.eq(1)
1160
1161
1162 class FPADDBasePipe(ControlBase):
1163 def __init__(self, width, id_wid):
1164 ControlBase.__init__(self)
1165 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1166 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1167 self.pipe3 = FPNormToPack(width, id_wid)
1168
1169 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1170
1171 def elaborate(self, platform):
1172 m = Module()
1173 m.submodules.scnorm = self.pipe1
1174 m.submodules.addalign = self.pipe2
1175 m.submodules.normpack = self.pipe3
1176 m.d.comb += self._eqs
1177 return m
1178
1179
1180 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1181 def __init__(self, width, id_wid, num_rows):
1182 self.num_rows = num_rows
1183 def iospec(): return FPADDBaseData(width, id_wid)
1184 stage = PassThroughStage(iospec)
1185 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1186
1187
1188 class FPADDMuxOutPipe(CombMuxOutPipe):
1189 def __init__(self, width, id_wid, num_rows):
1190 self.num_rows = num_rows
1191 def iospec(): return FPPackData(width, id_wid)
1192 stage = PassThroughStage(iospec)
1193 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1194
1195
1196 class FPADDMuxInOut:
1197 """ Reservation-Station version of FPADD pipeline.
1198
1199 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1200 * 3-stage adder pipeline
1201 * fan-out on outputs (an array of FPPackData: z,mid)
1202
1203 Fan-in and Fan-out are combinatorial.
1204 """
1205 def __init__(self, width, id_wid, num_rows):
1206 self.num_rows = num_rows
1207 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1208 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1209 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1210
1211 self.p = self.inpipe.p # kinda annoying,
1212 self.n = self.outpipe.n # use pipe in/out as this class in/out
1213 self._ports = self.inpipe.ports() + self.outpipe.ports()
1214
1215 def elaborate(self, platform):
1216 m = Module()
1217 m.submodules.inpipe = self.inpipe
1218 m.submodules.fpadd = self.fpadd
1219 m.submodules.outpipe = self.outpipe
1220
1221 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1222 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1223
1224 return m
1225
1226 def ports(self):
1227 return self._ports
1228
1229
1230 class FPADD(FPID):
1231 """ FPADD: stages as follows:
1232
1233 FPGetOp (a)
1234 |
1235 FPGetOp (b)
1236 |
1237 FPAddBase---> FPAddBaseMod
1238 | |
1239 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1240
1241 FPAddBase is tricky: it is both a stage and *has* stages.
1242 Connection to FPAddBaseMod therefore requires an in stb/ack
1243 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1244 needs to be the thing that raises the incoming stb.
1245 """
1246
1247 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1248 """ IEEE754 FP Add
1249
1250 * width: bit-width of IEEE754. supported: 16, 32, 64
1251 * id_wid: an identifier that is sync-connected to the input
1252 * single_cycle: True indicates each stage to complete in 1 clock
1253 """
1254 self.width = width
1255 self.id_wid = id_wid
1256 self.single_cycle = single_cycle
1257
1258 #self.out_z = FPOp(width)
1259 self.ids = FPID(id_wid)
1260
1261 rs = []
1262 for i in range(rs_sz):
1263 in_a = FPOp(width)
1264 in_b = FPOp(width)
1265 in_a.name = "in_a_%d" % i
1266 in_b.name = "in_b_%d" % i
1267 rs.append((in_a, in_b))
1268 self.rs = Array(rs)
1269
1270 res = []
1271 for i in range(rs_sz):
1272 out_z = FPOp(width)
1273 out_z.name = "out_z_%d" % i
1274 res.append(out_z)
1275 self.res = Array(res)
1276
1277 self.states = []
1278
1279 def add_state(self, state):
1280 self.states.append(state)
1281 return state
1282
1283 def get_fragment(self, platform=None):
1284 """ creates the HDL code-fragment for FPAdd
1285 """
1286 m = Module()
1287 m.submodules += self.rs
1288
1289 in_a = self.rs[0][0]
1290 in_b = self.rs[0][1]
1291
1292 geta = self.add_state(FPGetOp("get_a", "get_b",
1293 in_a, self.width))
1294 geta.setup(m, in_a)
1295 a = geta.out_op
1296
1297 getb = self.add_state(FPGetOp("get_b", "fpadd",
1298 in_b, self.width))
1299 getb.setup(m, in_b)
1300 b = getb.out_op
1301
1302 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1303 ab = self.add_state(ab)
1304 abd = ab.ispec() # create an input spec object for FPADDBase
1305 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1306 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1307 o = ab.o
1308
1309 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1310 o.mid, "get_a"))
1311
1312 with m.FSM() as fsm:
1313
1314 for state in self.states:
1315 with m.State(state.state_from):
1316 state.action(m)
1317
1318 return m
1319
1320
1321 if __name__ == "__main__":
1322 if True:
1323 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1324 main(alu, ports=alu.rs[0][0].ports() + \
1325 alu.rs[0][1].ports() + \
1326 alu.res[0].ports() + \
1327 [alu.ids.in_mid, alu.ids.out_mid])
1328 else:
1329 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1330 main(alu, ports=[alu.in_a, alu.in_b] + \
1331 alu.in_t.ports() + \
1332 alu.out_z.ports() + \
1333 [alu.in_mid, alu.out_mid])
1334
1335
1336 # works... but don't use, just do "python fname.py convert -t v"
1337 #print (verilog.convert(alu, ports=[
1338 # ports=alu.in_a.ports() + \
1339 # alu.in_b.ports() + \
1340 # alu.out_z.ports())