split out pack to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
26
27
28 class FPAddSpecialCasesMod:
29 """ special cases: NaNs, infs, zeros, denormalised
30 NOTE: some of these are unique to add. see "Special Operations"
31 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
32 """
33
34 def __init__(self, width, id_wid):
35 self.width = width
36 self.id_wid = id_wid
37 self.i = self.ispec()
38 self.o = self.ospec()
39
40 def ispec(self):
41 return FPADDBaseData(self.width, self.id_wid)
42
43 def ospec(self):
44 return FPSCData(self.width, self.id_wid)
45
46 def setup(self, m, i):
47 """ links module to inputs and outputs
48 """
49 m.submodules.specialcases = self
50 m.d.comb += self.i.eq(i)
51
52 def process(self, i):
53 return self.o
54
55 def elaborate(self, platform):
56 m = Module()
57
58 m.submodules.sc_out_z = self.o.z
59
60 # decode: XXX really should move to separate stage
61 a1 = FPNumIn(None, self.width)
62 b1 = FPNumIn(None, self.width)
63 m.submodules.sc_decode_a = a1
64 m.submodules.sc_decode_b = b1
65 m.d.comb += [a1.decode(self.i.a),
66 b1.decode(self.i.b),
67 ]
68
69 s_nomatch = Signal()
70 m.d.comb += s_nomatch.eq(a1.s != b1.s)
71
72 m_match = Signal()
73 m.d.comb += m_match.eq(a1.m == b1.m)
74
75 # if a is NaN or b is NaN return NaN
76 with m.If(a1.is_nan | b1.is_nan):
77 m.d.comb += self.o.out_do_z.eq(1)
78 m.d.comb += self.o.z.nan(0)
79
80 # XXX WEIRDNESS for FP16 non-canonical NaN handling
81 # under review
82
83 ## if a is zero and b is NaN return -b
84 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
85 # m.d.comb += self.o.out_do_z.eq(1)
86 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
87
88 ## if b is zero and a is NaN return -a
89 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
90 # m.d.comb += self.o.out_do_z.eq(1)
91 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
92
93 ## if a is -zero and b is NaN return -b
94 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
95 # m.d.comb += self.o.out_do_z.eq(1)
96 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
97
98 ## if b is -zero and a is NaN return -a
99 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
100 # m.d.comb += self.o.out_do_z.eq(1)
101 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
102
103 # if a is inf return inf (or NaN)
104 with m.Elif(a1.is_inf):
105 m.d.comb += self.o.out_do_z.eq(1)
106 m.d.comb += self.o.z.inf(a1.s)
107 # if a is inf and signs don't match return NaN
108 with m.If(b1.exp_128 & s_nomatch):
109 m.d.comb += self.o.z.nan(0)
110
111 # if b is inf return inf
112 with m.Elif(b1.is_inf):
113 m.d.comb += self.o.out_do_z.eq(1)
114 m.d.comb += self.o.z.inf(b1.s)
115
116 # if a is zero and b zero return signed-a/b
117 with m.Elif(a1.is_zero & b1.is_zero):
118 m.d.comb += self.o.out_do_z.eq(1)
119 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
120
121 # if a is zero return b
122 with m.Elif(a1.is_zero):
123 m.d.comb += self.o.out_do_z.eq(1)
124 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
125
126 # if b is zero return a
127 with m.Elif(b1.is_zero):
128 m.d.comb += self.o.out_do_z.eq(1)
129 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
130
131 # if a equal to -b return zero (+ve zero)
132 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
133 m.d.comb += self.o.out_do_z.eq(1)
134 m.d.comb += self.o.z.zero(0)
135
136 # Denormalised Number checks next, so pass a/b data through
137 with m.Else():
138 m.d.comb += self.o.out_do_z.eq(0)
139 m.d.comb += self.o.a.eq(a1)
140 m.d.comb += self.o.b.eq(b1)
141
142 m.d.comb += self.o.oz.eq(self.o.z.v)
143 m.d.comb += self.o.mid.eq(self.i.mid)
144
145 return m
146
147
148 class FPAddSpecialCases(FPState):
149 """ special cases: NaNs, infs, zeros, denormalised
150 NOTE: some of these are unique to add. see "Special Operations"
151 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
152 """
153
154 def __init__(self, width, id_wid):
155 FPState.__init__(self, "special_cases")
156 self.mod = FPAddSpecialCasesMod(width)
157 self.out_z = self.mod.ospec()
158 self.out_do_z = Signal(reset_less=True)
159
160 def setup(self, m, i):
161 """ links module to inputs and outputs
162 """
163 self.mod.setup(m, i, self.out_do_z)
164 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
165 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
166
167 def action(self, m):
168 self.idsync(m)
169 with m.If(self.out_do_z):
170 m.next = "put_z"
171 with m.Else():
172 m.next = "denormalise"
173
174
175 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
176 """ special cases: NaNs, infs, zeros, denormalised
177 NOTE: some of these are unique to add. see "Special Operations"
178 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
179 """
180
181 def __init__(self, width, id_wid):
182 FPState.__init__(self, "special_cases")
183 self.width = width
184 self.id_wid = id_wid
185 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
186 self.out = self.ospec()
187
188 def ispec(self):
189 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
190
191 def ospec(self):
192 return FPSCData(self.width, self.id_wid) # DeNorm ospec
193
194 def setup(self, m, i):
195 """ links module to inputs and outputs
196 """
197 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
198 dmod = FPAddDeNormMod(self.width, self.id_wid)
199
200 chain = StageChain([smod, dmod])
201 chain.setup(m, i)
202
203 # only needed for break-out (early-out)
204 # self.out_do_z = smod.o.out_do_z
205
206 self.o = dmod.o
207
208 def process(self, i):
209 return self.o
210
211 def action(self, m):
212 # for break-out (early-out)
213 #with m.If(self.out_do_z):
214 # m.next = "put_z"
215 #with m.Else():
216 m.d.sync += self.out.eq(self.process(None))
217 m.next = "align"
218
219
220 class FPAddAlignMultiMod(FPState):
221
222 def __init__(self, width):
223 self.in_a = FPNumBase(width)
224 self.in_b = FPNumBase(width)
225 self.out_a = FPNumIn(None, width)
226 self.out_b = FPNumIn(None, width)
227 self.exp_eq = Signal(reset_less=True)
228
229 def elaborate(self, platform):
230 # This one however (single-cycle) will do the shift
231 # in one go.
232
233 m = Module()
234
235 m.submodules.align_in_a = self.in_a
236 m.submodules.align_in_b = self.in_b
237 m.submodules.align_out_a = self.out_a
238 m.submodules.align_out_b = self.out_b
239
240 # NOTE: this does *not* do single-cycle multi-shifting,
241 # it *STAYS* in the align state until exponents match
242
243 # exponent of a greater than b: shift b down
244 m.d.comb += self.exp_eq.eq(0)
245 m.d.comb += self.out_a.eq(self.in_a)
246 m.d.comb += self.out_b.eq(self.in_b)
247 agtb = Signal(reset_less=True)
248 altb = Signal(reset_less=True)
249 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
250 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
251 with m.If(agtb):
252 m.d.comb += self.out_b.shift_down(self.in_b)
253 # exponent of b greater than a: shift a down
254 with m.Elif(altb):
255 m.d.comb += self.out_a.shift_down(self.in_a)
256 # exponents equal: move to next stage.
257 with m.Else():
258 m.d.comb += self.exp_eq.eq(1)
259 return m
260
261
262 class FPAddAlignMulti(FPState):
263
264 def __init__(self, width, id_wid):
265 FPState.__init__(self, "align")
266 self.mod = FPAddAlignMultiMod(width)
267 self.out_a = FPNumIn(None, width)
268 self.out_b = FPNumIn(None, width)
269 self.exp_eq = Signal(reset_less=True)
270
271 def setup(self, m, in_a, in_b):
272 """ links module to inputs and outputs
273 """
274 m.submodules.align = self.mod
275 m.d.comb += self.mod.in_a.eq(in_a)
276 m.d.comb += self.mod.in_b.eq(in_b)
277 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
278 m.d.sync += self.out_a.eq(self.mod.out_a)
279 m.d.sync += self.out_b.eq(self.mod.out_b)
280
281 def action(self, m):
282 with m.If(self.exp_eq):
283 m.next = "add_0"
284
285
286 class FPNumIn2Ops:
287
288 def __init__(self, width, id_wid):
289 self.a = FPNumIn(None, width)
290 self.b = FPNumIn(None, width)
291 self.z = FPNumOut(width, False)
292 self.out_do_z = Signal(reset_less=True)
293 self.oz = Signal(width, reset_less=True)
294 self.mid = Signal(id_wid, reset_less=True)
295
296 def eq(self, i):
297 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
298 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
299
300
301 class FPAddAlignSingleMod:
302
303 def __init__(self, width, id_wid):
304 self.width = width
305 self.id_wid = id_wid
306 self.i = self.ispec()
307 self.o = self.ospec()
308
309 def ispec(self):
310 return FPSCData(self.width, self.id_wid)
311
312 def ospec(self):
313 return FPNumIn2Ops(self.width, self.id_wid)
314
315 def process(self, i):
316 return self.o
317
318 def setup(self, m, i):
319 """ links module to inputs and outputs
320 """
321 m.submodules.align = self
322 m.d.comb += self.i.eq(i)
323
324 def elaborate(self, platform):
325 """ Aligns A against B or B against A, depending on which has the
326 greater exponent. This is done in a *single* cycle using
327 variable-width bit-shift
328
329 the shifter used here is quite expensive in terms of gates.
330 Mux A or B in (and out) into temporaries, as only one of them
331 needs to be aligned against the other
332 """
333 m = Module()
334
335 m.submodules.align_in_a = self.i.a
336 m.submodules.align_in_b = self.i.b
337 m.submodules.align_out_a = self.o.a
338 m.submodules.align_out_b = self.o.b
339
340 # temporary (muxed) input and output to be shifted
341 t_inp = FPNumBase(self.width)
342 t_out = FPNumIn(None, self.width)
343 espec = (len(self.i.a.e), True)
344 msr = MultiShiftRMerge(self.i.a.m_width, espec)
345 m.submodules.align_t_in = t_inp
346 m.submodules.align_t_out = t_out
347 m.submodules.multishift_r = msr
348
349 ediff = Signal(espec, reset_less=True)
350 ediffr = Signal(espec, reset_less=True)
351 tdiff = Signal(espec, reset_less=True)
352 elz = Signal(reset_less=True)
353 egz = Signal(reset_less=True)
354
355 # connect multi-shifter to t_inp/out mantissa (and tdiff)
356 m.d.comb += msr.inp.eq(t_inp.m)
357 m.d.comb += msr.diff.eq(tdiff)
358 m.d.comb += t_out.m.eq(msr.m)
359 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
360 m.d.comb += t_out.s.eq(t_inp.s)
361
362 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
363 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
364 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
365 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
366
367 # default: A-exp == B-exp, A and B untouched (fall through)
368 m.d.comb += self.o.a.eq(self.i.a)
369 m.d.comb += self.o.b.eq(self.i.b)
370 # only one shifter (muxed)
371 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
372 # exponent of a greater than b: shift b down
373 with m.If(~self.i.out_do_z):
374 with m.If(egz):
375 m.d.comb += [t_inp.eq(self.i.b),
376 tdiff.eq(ediff),
377 self.o.b.eq(t_out),
378 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
379 ]
380 # exponent of b greater than a: shift a down
381 with m.Elif(elz):
382 m.d.comb += [t_inp.eq(self.i.a),
383 tdiff.eq(ediffr),
384 self.o.a.eq(t_out),
385 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
386 ]
387
388 m.d.comb += self.o.mid.eq(self.i.mid)
389 m.d.comb += self.o.z.eq(self.i.z)
390 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
391 m.d.comb += self.o.oz.eq(self.i.oz)
392
393 return m
394
395
396 class FPAddAlignSingle(FPState):
397
398 def __init__(self, width, id_wid):
399 FPState.__init__(self, "align")
400 self.mod = FPAddAlignSingleMod(width, id_wid)
401 self.out_a = FPNumIn(None, width)
402 self.out_b = FPNumIn(None, width)
403
404 def setup(self, m, i):
405 """ links module to inputs and outputs
406 """
407 self.mod.setup(m, i)
408
409 # NOTE: could be done as comb
410 m.d.sync += self.out_a.eq(self.mod.out_a)
411 m.d.sync += self.out_b.eq(self.mod.out_b)
412
413 def action(self, m):
414 m.next = "add_0"
415
416
417 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
418
419 def __init__(self, width, id_wid):
420 FPState.__init__(self, "align")
421 self.width = width
422 self.id_wid = id_wid
423 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
424 self.a1o = self.ospec()
425
426 def ispec(self):
427 return FPSCData(self.width, self.id_wid)
428
429 def ospec(self):
430 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
431
432 def setup(self, m, i):
433 """ links module to inputs and outputs
434 """
435
436 # chain AddAlignSingle, AddStage0 and AddStage1
437 mod = FPAddAlignSingleMod(self.width, self.id_wid)
438 a0mod = FPAddStage0Mod(self.width, self.id_wid)
439 a1mod = FPAddStage1Mod(self.width, self.id_wid)
440
441 chain = StageChain([mod, a0mod, a1mod])
442 chain.setup(m, i)
443
444 self.o = a1mod.o
445
446 def process(self, i):
447 return self.o
448
449 def action(self, m):
450 m.d.sync += self.a1o.eq(self.process(None))
451 m.next = "normalise_1"
452
453
454 class FPAddStage0Data:
455
456 def __init__(self, width, id_wid):
457 self.z = FPNumBase(width, False)
458 self.out_do_z = Signal(reset_less=True)
459 self.oz = Signal(width, reset_less=True)
460 self.tot = Signal(self.z.m_width + 4, reset_less=True)
461 self.mid = Signal(id_wid, reset_less=True)
462
463 def eq(self, i):
464 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
465 self.tot.eq(i.tot), self.mid.eq(i.mid)]
466
467
468 class FPAddStage0Mod:
469
470 def __init__(self, width, id_wid):
471 self.width = width
472 self.id_wid = id_wid
473 self.i = self.ispec()
474 self.o = self.ospec()
475
476 def ispec(self):
477 return FPSCData(self.width, self.id_wid)
478
479 def ospec(self):
480 return FPAddStage0Data(self.width, self.id_wid)
481
482 def process(self, i):
483 return self.o
484
485 def setup(self, m, i):
486 """ links module to inputs and outputs
487 """
488 m.submodules.add0 = self
489 m.d.comb += self.i.eq(i)
490
491 def elaborate(self, platform):
492 m = Module()
493 m.submodules.add0_in_a = self.i.a
494 m.submodules.add0_in_b = self.i.b
495 m.submodules.add0_out_z = self.o.z
496
497 # store intermediate tests (and zero-extended mantissas)
498 seq = Signal(reset_less=True)
499 mge = Signal(reset_less=True)
500 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
501 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
502 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
503 mge.eq(self.i.a.m >= self.i.b.m),
504 am0.eq(Cat(self.i.a.m, 0)),
505 bm0.eq(Cat(self.i.b.m, 0))
506 ]
507 # same-sign (both negative or both positive) add mantissas
508 with m.If(~self.i.out_do_z):
509 m.d.comb += self.o.z.e.eq(self.i.a.e)
510 with m.If(seq):
511 m.d.comb += [
512 self.o.tot.eq(am0 + bm0),
513 self.o.z.s.eq(self.i.a.s)
514 ]
515 # a mantissa greater than b, use a
516 with m.Elif(mge):
517 m.d.comb += [
518 self.o.tot.eq(am0 - bm0),
519 self.o.z.s.eq(self.i.a.s)
520 ]
521 # b mantissa greater than a, use b
522 with m.Else():
523 m.d.comb += [
524 self.o.tot.eq(bm0 - am0),
525 self.o.z.s.eq(self.i.b.s)
526 ]
527
528 m.d.comb += self.o.oz.eq(self.i.oz)
529 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
530 m.d.comb += self.o.mid.eq(self.i.mid)
531 return m
532
533
534 class FPAddStage0(FPState):
535 """ First stage of add. covers same-sign (add) and subtract
536 special-casing when mantissas are greater or equal, to
537 give greatest accuracy.
538 """
539
540 def __init__(self, width, id_wid):
541 FPState.__init__(self, "add_0")
542 self.mod = FPAddStage0Mod(width)
543 self.o = self.mod.ospec()
544
545 def setup(self, m, i):
546 """ links module to inputs and outputs
547 """
548 self.mod.setup(m, i)
549
550 # NOTE: these could be done as combinatorial (merge add0+add1)
551 m.d.sync += self.o.eq(self.mod.o)
552
553 def action(self, m):
554 m.next = "add_1"
555
556
557 class FPAddStage1Mod(FPState):
558 """ Second stage of add: preparation for normalisation.
559 detects when tot sum is too big (tot[27] is kinda a carry bit)
560 """
561
562 def __init__(self, width, id_wid):
563 self.width = width
564 self.id_wid = id_wid
565 self.i = self.ispec()
566 self.o = self.ospec()
567
568 def ispec(self):
569 return FPAddStage0Data(self.width, self.id_wid)
570
571 def ospec(self):
572 return FPAddStage1Data(self.width, self.id_wid)
573
574 def process(self, i):
575 return self.o
576
577 def setup(self, m, i):
578 """ links module to inputs and outputs
579 """
580 m.submodules.add1 = self
581 m.submodules.add1_out_overflow = self.o.of
582
583 m.d.comb += self.i.eq(i)
584
585 def elaborate(self, platform):
586 m = Module()
587 m.d.comb += self.o.z.eq(self.i.z)
588 # tot[-1] (MSB) gets set when the sum overflows. shift result down
589 with m.If(~self.i.out_do_z):
590 with m.If(self.i.tot[-1]):
591 m.d.comb += [
592 self.o.z.m.eq(self.i.tot[4:]),
593 self.o.of.m0.eq(self.i.tot[4]),
594 self.o.of.guard.eq(self.i.tot[3]),
595 self.o.of.round_bit.eq(self.i.tot[2]),
596 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
597 self.o.z.e.eq(self.i.z.e + 1)
598 ]
599 # tot[-1] (MSB) zero case
600 with m.Else():
601 m.d.comb += [
602 self.o.z.m.eq(self.i.tot[3:]),
603 self.o.of.m0.eq(self.i.tot[3]),
604 self.o.of.guard.eq(self.i.tot[2]),
605 self.o.of.round_bit.eq(self.i.tot[1]),
606 self.o.of.sticky.eq(self.i.tot[0])
607 ]
608
609 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
610 m.d.comb += self.o.oz.eq(self.i.oz)
611 m.d.comb += self.o.mid.eq(self.i.mid)
612
613 return m
614
615
616 class FPAddStage1(FPState):
617
618 def __init__(self, width, id_wid):
619 FPState.__init__(self, "add_1")
620 self.mod = FPAddStage1Mod(width)
621 self.out_z = FPNumBase(width, False)
622 self.out_of = Overflow()
623 self.norm_stb = Signal()
624
625 def setup(self, m, i):
626 """ links module to inputs and outputs
627 """
628 self.mod.setup(m, i)
629
630 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
631
632 m.d.sync += self.out_of.eq(self.mod.out_of)
633 m.d.sync += self.out_z.eq(self.mod.out_z)
634 m.d.sync += self.norm_stb.eq(1)
635
636 def action(self, m):
637 m.next = "normalise_1"
638
639
640 class FPNormToPack(FPState, UnbufferedPipeline):
641
642 def __init__(self, width, id_wid):
643 FPState.__init__(self, "normalise_1")
644 self.id_wid = id_wid
645 self.width = width
646 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
647
648 def ispec(self):
649 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
650
651 def ospec(self):
652 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
653
654 def setup(self, m, i):
655 """ links module to inputs and outputs
656 """
657
658 # Normalisation, Rounding Corrections, Pack - in a chain
659 nmod = FPNorm1ModSingle(self.width, self.id_wid)
660 rmod = FPRoundMod(self.width, self.id_wid)
661 cmod = FPCorrectionsMod(self.width, self.id_wid)
662 pmod = FPPackMod(self.width, self.id_wid)
663 chain = StageChain([nmod, rmod, cmod, pmod])
664 chain.setup(m, i)
665 self.out_z = pmod.ospec()
666
667 self.o = pmod.o
668
669 def process(self, i):
670 return self.o
671
672 def action(self, m):
673 m.d.sync += self.out_z.eq(self.process(None))
674 m.next = "pack_put_z"
675
676
677
678 class FPPutZ(FPState):
679
680 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
681 FPState.__init__(self, state)
682 if to_state is None:
683 to_state = "get_ops"
684 self.to_state = to_state
685 self.in_z = in_z
686 self.out_z = out_z
687 self.in_mid = in_mid
688 self.out_mid = out_mid
689
690 def action(self, m):
691 if self.in_mid is not None:
692 m.d.sync += self.out_mid.eq(self.in_mid)
693 m.d.sync += [
694 self.out_z.z.v.eq(self.in_z)
695 ]
696 with m.If(self.out_z.z.stb & self.out_z.z.ack):
697 m.d.sync += self.out_z.z.stb.eq(0)
698 m.next = self.to_state
699 with m.Else():
700 m.d.sync += self.out_z.z.stb.eq(1)
701
702
703 class FPPutZIdx(FPState):
704
705 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
706 FPState.__init__(self, state)
707 if to_state is None:
708 to_state = "get_ops"
709 self.to_state = to_state
710 self.in_z = in_z
711 self.out_zs = out_zs
712 self.in_mid = in_mid
713
714 def action(self, m):
715 outz_stb = Signal(reset_less=True)
716 outz_ack = Signal(reset_less=True)
717 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
718 outz_ack.eq(self.out_zs[self.in_mid].ack),
719 ]
720 m.d.sync += [
721 self.out_zs[self.in_mid].v.eq(self.in_z.v)
722 ]
723 with m.If(outz_stb & outz_ack):
724 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
725 m.next = self.to_state
726 with m.Else():
727 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
728
729
730 class FPOpData:
731 def __init__(self, width, id_wid):
732 self.z = FPOp(width)
733 self.mid = Signal(id_wid, reset_less=True)
734
735 def eq(self, i):
736 return [self.z.eq(i.z), self.mid.eq(i.mid)]
737
738 def ports(self):
739 return [self.z, self.mid]
740
741
742 class FPADDBaseMod:
743
744 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
745 """ IEEE754 FP Add
746
747 * width: bit-width of IEEE754. supported: 16, 32, 64
748 * id_wid: an identifier that is sync-connected to the input
749 * single_cycle: True indicates each stage to complete in 1 clock
750 * compact: True indicates a reduced number of stages
751 """
752 self.width = width
753 self.id_wid = id_wid
754 self.single_cycle = single_cycle
755 self.compact = compact
756
757 self.in_t = Trigger()
758 self.i = self.ispec()
759 self.o = self.ospec()
760
761 self.states = []
762
763 def ispec(self):
764 return FPADDBaseData(self.width, self.id_wid)
765
766 def ospec(self):
767 return FPOpData(self.width, self.id_wid)
768
769 def add_state(self, state):
770 self.states.append(state)
771 return state
772
773 def get_fragment(self, platform=None):
774 """ creates the HDL code-fragment for FPAdd
775 """
776 m = Module()
777 m.submodules.out_z = self.o.z
778 m.submodules.in_t = self.in_t
779 if self.compact:
780 self.get_compact_fragment(m, platform)
781 else:
782 self.get_longer_fragment(m, platform)
783
784 with m.FSM() as fsm:
785
786 for state in self.states:
787 with m.State(state.state_from):
788 state.action(m)
789
790 return m
791
792 def get_longer_fragment(self, m, platform=None):
793
794 get = self.add_state(FPGet2Op("get_ops", "special_cases",
795 self.width))
796 get.setup(m, self.i)
797 a = get.out_op1
798 b = get.out_op2
799 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
800
801 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
802 sc.setup(m, a, b, self.in_mid)
803
804 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
805 dn.setup(m, a, b, sc.in_mid)
806
807 if self.single_cycle:
808 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
809 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
810 else:
811 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
812 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
813
814 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
815 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
816
817 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
818 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
819
820 if self.single_cycle:
821 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
822 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
823 else:
824 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
825 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
826
827 rn = self.add_state(FPRound(self.width, self.id_wid))
828 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
829
830 cor = self.add_state(FPCorrections(self.width, self.id_wid))
831 cor.setup(m, rn.out_z, rn.in_mid)
832
833 pa = self.add_state(FPPack(self.width, self.id_wid))
834 pa.setup(m, cor.out_z, rn.in_mid)
835
836 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
837 pa.in_mid, self.out_mid))
838
839 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
840 pa.in_mid, self.out_mid))
841
842 def get_compact_fragment(self, m, platform=None):
843
844
845 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
846 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
847 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
848 n1 = FPNormToPack(self.width, self.id_wid)
849
850 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
851
852 chainlist = [get, sc, alm, n1]
853 chain = StageChain(chainlist, specallocate=True)
854 chain.setup(m, self.i)
855
856 for mod in chainlist:
857 sc = self.add_state(mod)
858
859 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
860 n1.out_z.mid, self.o.mid))
861
862 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
863 # sc.o.mid, self.o.mid))
864
865
866 class FPADDBase(FPState):
867
868 def __init__(self, width, id_wid=None, single_cycle=False):
869 """ IEEE754 FP Add
870
871 * width: bit-width of IEEE754. supported: 16, 32, 64
872 * id_wid: an identifier that is sync-connected to the input
873 * single_cycle: True indicates each stage to complete in 1 clock
874 """
875 FPState.__init__(self, "fpadd")
876 self.width = width
877 self.single_cycle = single_cycle
878 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
879 self.o = self.ospec()
880
881 self.in_t = Trigger()
882 self.i = self.ispec()
883
884 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
885 self.in_accept = Signal(reset_less=True)
886 self.add_stb = Signal(reset_less=True)
887 self.add_ack = Signal(reset=0, reset_less=True)
888
889 def ispec(self):
890 return self.mod.ispec()
891
892 def ospec(self):
893 return self.mod.ospec()
894
895 def setup(self, m, i, add_stb, in_mid):
896 m.d.comb += [self.i.eq(i),
897 self.mod.i.eq(self.i),
898 self.z_done.eq(self.mod.o.z.trigger),
899 #self.add_stb.eq(add_stb),
900 self.mod.in_t.stb.eq(self.in_t.stb),
901 self.in_t.ack.eq(self.mod.in_t.ack),
902 self.o.mid.eq(self.mod.o.mid),
903 self.o.z.v.eq(self.mod.o.z.v),
904 self.o.z.stb.eq(self.mod.o.z.stb),
905 self.mod.o.z.ack.eq(self.o.z.ack),
906 ]
907
908 m.d.sync += self.add_stb.eq(add_stb)
909 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
910 m.d.sync += self.o.z.ack.eq(0) # likewise
911 #m.d.sync += self.in_t.stb.eq(0)
912
913 m.submodules.fpadd = self.mod
914
915 def action(self, m):
916
917 # in_accept is set on incoming strobe HIGH and ack LOW.
918 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
919
920 #with m.If(self.in_t.ack):
921 # m.d.sync += self.in_t.stb.eq(0)
922 with m.If(~self.z_done):
923 # not done: test for accepting an incoming operand pair
924 with m.If(self.in_accept):
925 m.d.sync += [
926 self.add_ack.eq(1), # acknowledge receipt...
927 self.in_t.stb.eq(1), # initiate add
928 ]
929 with m.Else():
930 m.d.sync += [self.add_ack.eq(0),
931 self.in_t.stb.eq(0),
932 self.o.z.ack.eq(1),
933 ]
934 with m.Else():
935 # done: acknowledge, and write out id and value
936 m.d.sync += [self.add_ack.eq(1),
937 self.in_t.stb.eq(0)
938 ]
939 m.next = "put_z"
940
941 return
942
943 if self.in_mid is not None:
944 m.d.sync += self.out_mid.eq(self.mod.out_mid)
945
946 m.d.sync += [
947 self.out_z.v.eq(self.mod.out_z.v)
948 ]
949 # move to output state on detecting z ack
950 with m.If(self.out_z.trigger):
951 m.d.sync += self.out_z.stb.eq(0)
952 m.next = "put_z"
953 with m.Else():
954 m.d.sync += self.out_z.stb.eq(1)
955
956
957 class FPADDBasePipe(ControlBase):
958 def __init__(self, width, id_wid):
959 ControlBase.__init__(self)
960 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
961 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
962 self.pipe3 = FPNormToPack(width, id_wid)
963
964 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
965
966 def elaborate(self, platform):
967 m = Module()
968 m.submodules.scnorm = self.pipe1
969 m.submodules.addalign = self.pipe2
970 m.submodules.normpack = self.pipe3
971 m.d.comb += self._eqs
972 return m
973
974
975 class FPADDInMuxPipe(PriorityCombMuxInPipe):
976 def __init__(self, width, id_wid, num_rows):
977 self.num_rows = num_rows
978 def iospec(): return FPADDBaseData(width, id_wid)
979 stage = PassThroughStage(iospec)
980 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
981
982
983 class FPADDMuxOutPipe(CombMuxOutPipe):
984 def __init__(self, width, id_wid, num_rows):
985 self.num_rows = num_rows
986 def iospec(): return FPPackData(width, id_wid)
987 stage = PassThroughStage(iospec)
988 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
989
990
991 class FPADDMuxInOut:
992 """ Reservation-Station version of FPADD pipeline.
993
994 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
995 * 3-stage adder pipeline
996 * fan-out on outputs (an array of FPPackData: z,mid)
997
998 Fan-in and Fan-out are combinatorial.
999 """
1000 def __init__(self, width, id_wid, num_rows):
1001 self.num_rows = num_rows
1002 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1003 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1004 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1005
1006 self.p = self.inpipe.p # kinda annoying,
1007 self.n = self.outpipe.n # use pipe in/out as this class in/out
1008 self._ports = self.inpipe.ports() + self.outpipe.ports()
1009
1010 def elaborate(self, platform):
1011 m = Module()
1012 m.submodules.inpipe = self.inpipe
1013 m.submodules.fpadd = self.fpadd
1014 m.submodules.outpipe = self.outpipe
1015
1016 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1017 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1018
1019 return m
1020
1021 def ports(self):
1022 return self._ports
1023
1024
1025 class FPADD(FPID):
1026 """ FPADD: stages as follows:
1027
1028 FPGetOp (a)
1029 |
1030 FPGetOp (b)
1031 |
1032 FPAddBase---> FPAddBaseMod
1033 | |
1034 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1035
1036 FPAddBase is tricky: it is both a stage and *has* stages.
1037 Connection to FPAddBaseMod therefore requires an in stb/ack
1038 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1039 needs to be the thing that raises the incoming stb.
1040 """
1041
1042 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1043 """ IEEE754 FP Add
1044
1045 * width: bit-width of IEEE754. supported: 16, 32, 64
1046 * id_wid: an identifier that is sync-connected to the input
1047 * single_cycle: True indicates each stage to complete in 1 clock
1048 """
1049 self.width = width
1050 self.id_wid = id_wid
1051 self.single_cycle = single_cycle
1052
1053 #self.out_z = FPOp(width)
1054 self.ids = FPID(id_wid)
1055
1056 rs = []
1057 for i in range(rs_sz):
1058 in_a = FPOp(width)
1059 in_b = FPOp(width)
1060 in_a.name = "in_a_%d" % i
1061 in_b.name = "in_b_%d" % i
1062 rs.append((in_a, in_b))
1063 self.rs = Array(rs)
1064
1065 res = []
1066 for i in range(rs_sz):
1067 out_z = FPOp(width)
1068 out_z.name = "out_z_%d" % i
1069 res.append(out_z)
1070 self.res = Array(res)
1071
1072 self.states = []
1073
1074 def add_state(self, state):
1075 self.states.append(state)
1076 return state
1077
1078 def get_fragment(self, platform=None):
1079 """ creates the HDL code-fragment for FPAdd
1080 """
1081 m = Module()
1082 m.submodules += self.rs
1083
1084 in_a = self.rs[0][0]
1085 in_b = self.rs[0][1]
1086
1087 geta = self.add_state(FPGetOp("get_a", "get_b",
1088 in_a, self.width))
1089 geta.setup(m, in_a)
1090 a = geta.out_op
1091
1092 getb = self.add_state(FPGetOp("get_b", "fpadd",
1093 in_b, self.width))
1094 getb.setup(m, in_b)
1095 b = getb.out_op
1096
1097 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1098 ab = self.add_state(ab)
1099 abd = ab.ispec() # create an input spec object for FPADDBase
1100 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1101 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1102 o = ab.o
1103
1104 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1105 o.mid, "get_a"))
1106
1107 with m.FSM() as fsm:
1108
1109 for state in self.states:
1110 with m.State(state.state_from):
1111 state.action(m)
1112
1113 return m
1114
1115
1116 if __name__ == "__main__":
1117 if True:
1118 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1119 main(alu, ports=alu.rs[0][0].ports() + \
1120 alu.rs[0][1].ports() + \
1121 alu.res[0].ports() + \
1122 [alu.ids.in_mid, alu.ids.out_mid])
1123 else:
1124 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1125 main(alu, ports=[alu.in_a, alu.in_b] + \
1126 alu.in_t.ports() + \
1127 alu.out_z.ports() + \
1128 [alu.in_mid, alu.out_mid])
1129
1130
1131 # works... but don't use, just do "python fname.py convert -t v"
1132 #print (verilog.convert(alu, ports=[
1133 # ports=alu.in_a.ports() + \
1134 # alu.in_b.ports() + \
1135 # alu.out_z.ports())