split out normtopack to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState, FPID
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
26 from fpcommon.normtopack import FPNormToPack
27
28
29 class FPAddSpecialCasesMod:
30 """ special cases: NaNs, infs, zeros, denormalised
31 NOTE: some of these are unique to add. see "Special Operations"
32 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
33 """
34
35 def __init__(self, width, id_wid):
36 self.width = width
37 self.id_wid = id_wid
38 self.i = self.ispec()
39 self.o = self.ospec()
40
41 def ispec(self):
42 return FPADDBaseData(self.width, self.id_wid)
43
44 def ospec(self):
45 return FPSCData(self.width, self.id_wid)
46
47 def setup(self, m, i):
48 """ links module to inputs and outputs
49 """
50 m.submodules.specialcases = self
51 m.d.comb += self.i.eq(i)
52
53 def process(self, i):
54 return self.o
55
56 def elaborate(self, platform):
57 m = Module()
58
59 m.submodules.sc_out_z = self.o.z
60
61 # decode: XXX really should move to separate stage
62 a1 = FPNumIn(None, self.width)
63 b1 = FPNumIn(None, self.width)
64 m.submodules.sc_decode_a = a1
65 m.submodules.sc_decode_b = b1
66 m.d.comb += [a1.decode(self.i.a),
67 b1.decode(self.i.b),
68 ]
69
70 s_nomatch = Signal()
71 m.d.comb += s_nomatch.eq(a1.s != b1.s)
72
73 m_match = Signal()
74 m.d.comb += m_match.eq(a1.m == b1.m)
75
76 # if a is NaN or b is NaN return NaN
77 with m.If(a1.is_nan | b1.is_nan):
78 m.d.comb += self.o.out_do_z.eq(1)
79 m.d.comb += self.o.z.nan(0)
80
81 # XXX WEIRDNESS for FP16 non-canonical NaN handling
82 # under review
83
84 ## if a is zero and b is NaN return -b
85 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
86 # m.d.comb += self.o.out_do_z.eq(1)
87 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
88
89 ## if b is zero and a is NaN return -a
90 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
91 # m.d.comb += self.o.out_do_z.eq(1)
92 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
93
94 ## if a is -zero and b is NaN return -b
95 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
96 # m.d.comb += self.o.out_do_z.eq(1)
97 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
98
99 ## if b is -zero and a is NaN return -a
100 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
101 # m.d.comb += self.o.out_do_z.eq(1)
102 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
103
104 # if a is inf return inf (or NaN)
105 with m.Elif(a1.is_inf):
106 m.d.comb += self.o.out_do_z.eq(1)
107 m.d.comb += self.o.z.inf(a1.s)
108 # if a is inf and signs don't match return NaN
109 with m.If(b1.exp_128 & s_nomatch):
110 m.d.comb += self.o.z.nan(0)
111
112 # if b is inf return inf
113 with m.Elif(b1.is_inf):
114 m.d.comb += self.o.out_do_z.eq(1)
115 m.d.comb += self.o.z.inf(b1.s)
116
117 # if a is zero and b zero return signed-a/b
118 with m.Elif(a1.is_zero & b1.is_zero):
119 m.d.comb += self.o.out_do_z.eq(1)
120 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
121
122 # if a is zero return b
123 with m.Elif(a1.is_zero):
124 m.d.comb += self.o.out_do_z.eq(1)
125 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
126
127 # if b is zero return a
128 with m.Elif(b1.is_zero):
129 m.d.comb += self.o.out_do_z.eq(1)
130 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
131
132 # if a equal to -b return zero (+ve zero)
133 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
134 m.d.comb += self.o.out_do_z.eq(1)
135 m.d.comb += self.o.z.zero(0)
136
137 # Denormalised Number checks next, so pass a/b data through
138 with m.Else():
139 m.d.comb += self.o.out_do_z.eq(0)
140 m.d.comb += self.o.a.eq(a1)
141 m.d.comb += self.o.b.eq(b1)
142
143 m.d.comb += self.o.oz.eq(self.o.z.v)
144 m.d.comb += self.o.mid.eq(self.i.mid)
145
146 return m
147
148
149 class FPAddSpecialCases(FPState):
150 """ special cases: NaNs, infs, zeros, denormalised
151 NOTE: some of these are unique to add. see "Special Operations"
152 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
153 """
154
155 def __init__(self, width, id_wid):
156 FPState.__init__(self, "special_cases")
157 self.mod = FPAddSpecialCasesMod(width)
158 self.out_z = self.mod.ospec()
159 self.out_do_z = Signal(reset_less=True)
160
161 def setup(self, m, i):
162 """ links module to inputs and outputs
163 """
164 self.mod.setup(m, i, self.out_do_z)
165 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
166 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
167
168 def action(self, m):
169 self.idsync(m)
170 with m.If(self.out_do_z):
171 m.next = "put_z"
172 with m.Else():
173 m.next = "denormalise"
174
175
176 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
177 """ special cases: NaNs, infs, zeros, denormalised
178 NOTE: some of these are unique to add. see "Special Operations"
179 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
180 """
181
182 def __init__(self, width, id_wid):
183 FPState.__init__(self, "special_cases")
184 self.width = width
185 self.id_wid = id_wid
186 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
187 self.out = self.ospec()
188
189 def ispec(self):
190 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
191
192 def ospec(self):
193 return FPSCData(self.width, self.id_wid) # DeNorm ospec
194
195 def setup(self, m, i):
196 """ links module to inputs and outputs
197 """
198 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
199 dmod = FPAddDeNormMod(self.width, self.id_wid)
200
201 chain = StageChain([smod, dmod])
202 chain.setup(m, i)
203
204 # only needed for break-out (early-out)
205 # self.out_do_z = smod.o.out_do_z
206
207 self.o = dmod.o
208
209 def process(self, i):
210 return self.o
211
212 def action(self, m):
213 # for break-out (early-out)
214 #with m.If(self.out_do_z):
215 # m.next = "put_z"
216 #with m.Else():
217 m.d.sync += self.out.eq(self.process(None))
218 m.next = "align"
219
220
221 class FPAddAlignMultiMod(FPState):
222
223 def __init__(self, width):
224 self.in_a = FPNumBase(width)
225 self.in_b = FPNumBase(width)
226 self.out_a = FPNumIn(None, width)
227 self.out_b = FPNumIn(None, width)
228 self.exp_eq = Signal(reset_less=True)
229
230 def elaborate(self, platform):
231 # This one however (single-cycle) will do the shift
232 # in one go.
233
234 m = Module()
235
236 m.submodules.align_in_a = self.in_a
237 m.submodules.align_in_b = self.in_b
238 m.submodules.align_out_a = self.out_a
239 m.submodules.align_out_b = self.out_b
240
241 # NOTE: this does *not* do single-cycle multi-shifting,
242 # it *STAYS* in the align state until exponents match
243
244 # exponent of a greater than b: shift b down
245 m.d.comb += self.exp_eq.eq(0)
246 m.d.comb += self.out_a.eq(self.in_a)
247 m.d.comb += self.out_b.eq(self.in_b)
248 agtb = Signal(reset_less=True)
249 altb = Signal(reset_less=True)
250 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
251 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
252 with m.If(agtb):
253 m.d.comb += self.out_b.shift_down(self.in_b)
254 # exponent of b greater than a: shift a down
255 with m.Elif(altb):
256 m.d.comb += self.out_a.shift_down(self.in_a)
257 # exponents equal: move to next stage.
258 with m.Else():
259 m.d.comb += self.exp_eq.eq(1)
260 return m
261
262
263 class FPAddAlignMulti(FPState):
264
265 def __init__(self, width, id_wid):
266 FPState.__init__(self, "align")
267 self.mod = FPAddAlignMultiMod(width)
268 self.out_a = FPNumIn(None, width)
269 self.out_b = FPNumIn(None, width)
270 self.exp_eq = Signal(reset_less=True)
271
272 def setup(self, m, in_a, in_b):
273 """ links module to inputs and outputs
274 """
275 m.submodules.align = self.mod
276 m.d.comb += self.mod.in_a.eq(in_a)
277 m.d.comb += self.mod.in_b.eq(in_b)
278 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
279 m.d.sync += self.out_a.eq(self.mod.out_a)
280 m.d.sync += self.out_b.eq(self.mod.out_b)
281
282 def action(self, m):
283 with m.If(self.exp_eq):
284 m.next = "add_0"
285
286
287 class FPNumIn2Ops:
288
289 def __init__(self, width, id_wid):
290 self.a = FPNumIn(None, width)
291 self.b = FPNumIn(None, width)
292 self.z = FPNumOut(width, False)
293 self.out_do_z = Signal(reset_less=True)
294 self.oz = Signal(width, reset_less=True)
295 self.mid = Signal(id_wid, reset_less=True)
296
297 def eq(self, i):
298 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
299 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
300
301
302 class FPAddAlignSingleMod:
303
304 def __init__(self, width, id_wid):
305 self.width = width
306 self.id_wid = id_wid
307 self.i = self.ispec()
308 self.o = self.ospec()
309
310 def ispec(self):
311 return FPSCData(self.width, self.id_wid)
312
313 def ospec(self):
314 return FPNumIn2Ops(self.width, self.id_wid)
315
316 def process(self, i):
317 return self.o
318
319 def setup(self, m, i):
320 """ links module to inputs and outputs
321 """
322 m.submodules.align = self
323 m.d.comb += self.i.eq(i)
324
325 def elaborate(self, platform):
326 """ Aligns A against B or B against A, depending on which has the
327 greater exponent. This is done in a *single* cycle using
328 variable-width bit-shift
329
330 the shifter used here is quite expensive in terms of gates.
331 Mux A or B in (and out) into temporaries, as only one of them
332 needs to be aligned against the other
333 """
334 m = Module()
335
336 m.submodules.align_in_a = self.i.a
337 m.submodules.align_in_b = self.i.b
338 m.submodules.align_out_a = self.o.a
339 m.submodules.align_out_b = self.o.b
340
341 # temporary (muxed) input and output to be shifted
342 t_inp = FPNumBase(self.width)
343 t_out = FPNumIn(None, self.width)
344 espec = (len(self.i.a.e), True)
345 msr = MultiShiftRMerge(self.i.a.m_width, espec)
346 m.submodules.align_t_in = t_inp
347 m.submodules.align_t_out = t_out
348 m.submodules.multishift_r = msr
349
350 ediff = Signal(espec, reset_less=True)
351 ediffr = Signal(espec, reset_less=True)
352 tdiff = Signal(espec, reset_less=True)
353 elz = Signal(reset_less=True)
354 egz = Signal(reset_less=True)
355
356 # connect multi-shifter to t_inp/out mantissa (and tdiff)
357 m.d.comb += msr.inp.eq(t_inp.m)
358 m.d.comb += msr.diff.eq(tdiff)
359 m.d.comb += t_out.m.eq(msr.m)
360 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
361 m.d.comb += t_out.s.eq(t_inp.s)
362
363 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
364 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
365 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
366 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
367
368 # default: A-exp == B-exp, A and B untouched (fall through)
369 m.d.comb += self.o.a.eq(self.i.a)
370 m.d.comb += self.o.b.eq(self.i.b)
371 # only one shifter (muxed)
372 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
373 # exponent of a greater than b: shift b down
374 with m.If(~self.i.out_do_z):
375 with m.If(egz):
376 m.d.comb += [t_inp.eq(self.i.b),
377 tdiff.eq(ediff),
378 self.o.b.eq(t_out),
379 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
380 ]
381 # exponent of b greater than a: shift a down
382 with m.Elif(elz):
383 m.d.comb += [t_inp.eq(self.i.a),
384 tdiff.eq(ediffr),
385 self.o.a.eq(t_out),
386 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
387 ]
388
389 m.d.comb += self.o.mid.eq(self.i.mid)
390 m.d.comb += self.o.z.eq(self.i.z)
391 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
392 m.d.comb += self.o.oz.eq(self.i.oz)
393
394 return m
395
396
397 class FPAddAlignSingle(FPState):
398
399 def __init__(self, width, id_wid):
400 FPState.__init__(self, "align")
401 self.mod = FPAddAlignSingleMod(width, id_wid)
402 self.out_a = FPNumIn(None, width)
403 self.out_b = FPNumIn(None, width)
404
405 def setup(self, m, i):
406 """ links module to inputs and outputs
407 """
408 self.mod.setup(m, i)
409
410 # NOTE: could be done as comb
411 m.d.sync += self.out_a.eq(self.mod.out_a)
412 m.d.sync += self.out_b.eq(self.mod.out_b)
413
414 def action(self, m):
415 m.next = "add_0"
416
417
418 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
419
420 def __init__(self, width, id_wid):
421 FPState.__init__(self, "align")
422 self.width = width
423 self.id_wid = id_wid
424 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
425 self.a1o = self.ospec()
426
427 def ispec(self):
428 return FPSCData(self.width, self.id_wid)
429
430 def ospec(self):
431 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
432
433 def setup(self, m, i):
434 """ links module to inputs and outputs
435 """
436
437 # chain AddAlignSingle, AddStage0 and AddStage1
438 mod = FPAddAlignSingleMod(self.width, self.id_wid)
439 a0mod = FPAddStage0Mod(self.width, self.id_wid)
440 a1mod = FPAddStage1Mod(self.width, self.id_wid)
441
442 chain = StageChain([mod, a0mod, a1mod])
443 chain.setup(m, i)
444
445 self.o = a1mod.o
446
447 def process(self, i):
448 return self.o
449
450 def action(self, m):
451 m.d.sync += self.a1o.eq(self.process(None))
452 m.next = "normalise_1"
453
454
455 class FPAddStage0Data:
456
457 def __init__(self, width, id_wid):
458 self.z = FPNumBase(width, False)
459 self.out_do_z = Signal(reset_less=True)
460 self.oz = Signal(width, reset_less=True)
461 self.tot = Signal(self.z.m_width + 4, reset_less=True)
462 self.mid = Signal(id_wid, reset_less=True)
463
464 def eq(self, i):
465 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
466 self.tot.eq(i.tot), self.mid.eq(i.mid)]
467
468
469 class FPAddStage0Mod:
470
471 def __init__(self, width, id_wid):
472 self.width = width
473 self.id_wid = id_wid
474 self.i = self.ispec()
475 self.o = self.ospec()
476
477 def ispec(self):
478 return FPSCData(self.width, self.id_wid)
479
480 def ospec(self):
481 return FPAddStage0Data(self.width, self.id_wid)
482
483 def process(self, i):
484 return self.o
485
486 def setup(self, m, i):
487 """ links module to inputs and outputs
488 """
489 m.submodules.add0 = self
490 m.d.comb += self.i.eq(i)
491
492 def elaborate(self, platform):
493 m = Module()
494 m.submodules.add0_in_a = self.i.a
495 m.submodules.add0_in_b = self.i.b
496 m.submodules.add0_out_z = self.o.z
497
498 # store intermediate tests (and zero-extended mantissas)
499 seq = Signal(reset_less=True)
500 mge = Signal(reset_less=True)
501 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
502 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
503 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
504 mge.eq(self.i.a.m >= self.i.b.m),
505 am0.eq(Cat(self.i.a.m, 0)),
506 bm0.eq(Cat(self.i.b.m, 0))
507 ]
508 # same-sign (both negative or both positive) add mantissas
509 with m.If(~self.i.out_do_z):
510 m.d.comb += self.o.z.e.eq(self.i.a.e)
511 with m.If(seq):
512 m.d.comb += [
513 self.o.tot.eq(am0 + bm0),
514 self.o.z.s.eq(self.i.a.s)
515 ]
516 # a mantissa greater than b, use a
517 with m.Elif(mge):
518 m.d.comb += [
519 self.o.tot.eq(am0 - bm0),
520 self.o.z.s.eq(self.i.a.s)
521 ]
522 # b mantissa greater than a, use b
523 with m.Else():
524 m.d.comb += [
525 self.o.tot.eq(bm0 - am0),
526 self.o.z.s.eq(self.i.b.s)
527 ]
528
529 m.d.comb += self.o.oz.eq(self.i.oz)
530 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
531 m.d.comb += self.o.mid.eq(self.i.mid)
532 return m
533
534
535 class FPAddStage0(FPState):
536 """ First stage of add. covers same-sign (add) and subtract
537 special-casing when mantissas are greater or equal, to
538 give greatest accuracy.
539 """
540
541 def __init__(self, width, id_wid):
542 FPState.__init__(self, "add_0")
543 self.mod = FPAddStage0Mod(width)
544 self.o = self.mod.ospec()
545
546 def setup(self, m, i):
547 """ links module to inputs and outputs
548 """
549 self.mod.setup(m, i)
550
551 # NOTE: these could be done as combinatorial (merge add0+add1)
552 m.d.sync += self.o.eq(self.mod.o)
553
554 def action(self, m):
555 m.next = "add_1"
556
557
558 class FPAddStage1Mod(FPState):
559 """ Second stage of add: preparation for normalisation.
560 detects when tot sum is too big (tot[27] is kinda a carry bit)
561 """
562
563 def __init__(self, width, id_wid):
564 self.width = width
565 self.id_wid = id_wid
566 self.i = self.ispec()
567 self.o = self.ospec()
568
569 def ispec(self):
570 return FPAddStage0Data(self.width, self.id_wid)
571
572 def ospec(self):
573 return FPAddStage1Data(self.width, self.id_wid)
574
575 def process(self, i):
576 return self.o
577
578 def setup(self, m, i):
579 """ links module to inputs and outputs
580 """
581 m.submodules.add1 = self
582 m.submodules.add1_out_overflow = self.o.of
583
584 m.d.comb += self.i.eq(i)
585
586 def elaborate(self, platform):
587 m = Module()
588 m.d.comb += self.o.z.eq(self.i.z)
589 # tot[-1] (MSB) gets set when the sum overflows. shift result down
590 with m.If(~self.i.out_do_z):
591 with m.If(self.i.tot[-1]):
592 m.d.comb += [
593 self.o.z.m.eq(self.i.tot[4:]),
594 self.o.of.m0.eq(self.i.tot[4]),
595 self.o.of.guard.eq(self.i.tot[3]),
596 self.o.of.round_bit.eq(self.i.tot[2]),
597 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
598 self.o.z.e.eq(self.i.z.e + 1)
599 ]
600 # tot[-1] (MSB) zero case
601 with m.Else():
602 m.d.comb += [
603 self.o.z.m.eq(self.i.tot[3:]),
604 self.o.of.m0.eq(self.i.tot[3]),
605 self.o.of.guard.eq(self.i.tot[2]),
606 self.o.of.round_bit.eq(self.i.tot[1]),
607 self.o.of.sticky.eq(self.i.tot[0])
608 ]
609
610 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
611 m.d.comb += self.o.oz.eq(self.i.oz)
612 m.d.comb += self.o.mid.eq(self.i.mid)
613
614 return m
615
616
617 class FPAddStage1(FPState):
618
619 def __init__(self, width, id_wid):
620 FPState.__init__(self, "add_1")
621 self.mod = FPAddStage1Mod(width)
622 self.out_z = FPNumBase(width, False)
623 self.out_of = Overflow()
624 self.norm_stb = Signal()
625
626 def setup(self, m, i):
627 """ links module to inputs and outputs
628 """
629 self.mod.setup(m, i)
630
631 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
632
633 m.d.sync += self.out_of.eq(self.mod.out_of)
634 m.d.sync += self.out_z.eq(self.mod.out_z)
635 m.d.sync += self.norm_stb.eq(1)
636
637 def action(self, m):
638 m.next = "normalise_1"
639
640
641
642 class FPPutZ(FPState):
643
644 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
645 FPState.__init__(self, state)
646 if to_state is None:
647 to_state = "get_ops"
648 self.to_state = to_state
649 self.in_z = in_z
650 self.out_z = out_z
651 self.in_mid = in_mid
652 self.out_mid = out_mid
653
654 def action(self, m):
655 if self.in_mid is not None:
656 m.d.sync += self.out_mid.eq(self.in_mid)
657 m.d.sync += [
658 self.out_z.z.v.eq(self.in_z)
659 ]
660 with m.If(self.out_z.z.stb & self.out_z.z.ack):
661 m.d.sync += self.out_z.z.stb.eq(0)
662 m.next = self.to_state
663 with m.Else():
664 m.d.sync += self.out_z.z.stb.eq(1)
665
666
667 class FPPutZIdx(FPState):
668
669 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
670 FPState.__init__(self, state)
671 if to_state is None:
672 to_state = "get_ops"
673 self.to_state = to_state
674 self.in_z = in_z
675 self.out_zs = out_zs
676 self.in_mid = in_mid
677
678 def action(self, m):
679 outz_stb = Signal(reset_less=True)
680 outz_ack = Signal(reset_less=True)
681 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
682 outz_ack.eq(self.out_zs[self.in_mid].ack),
683 ]
684 m.d.sync += [
685 self.out_zs[self.in_mid].v.eq(self.in_z.v)
686 ]
687 with m.If(outz_stb & outz_ack):
688 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
689 m.next = self.to_state
690 with m.Else():
691 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
692
693
694 class FPOpData:
695 def __init__(self, width, id_wid):
696 self.z = FPOp(width)
697 self.mid = Signal(id_wid, reset_less=True)
698
699 def eq(self, i):
700 return [self.z.eq(i.z), self.mid.eq(i.mid)]
701
702 def ports(self):
703 return [self.z, self.mid]
704
705
706 class FPADDBaseMod:
707
708 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
709 """ IEEE754 FP Add
710
711 * width: bit-width of IEEE754. supported: 16, 32, 64
712 * id_wid: an identifier that is sync-connected to the input
713 * single_cycle: True indicates each stage to complete in 1 clock
714 * compact: True indicates a reduced number of stages
715 """
716 self.width = width
717 self.id_wid = id_wid
718 self.single_cycle = single_cycle
719 self.compact = compact
720
721 self.in_t = Trigger()
722 self.i = self.ispec()
723 self.o = self.ospec()
724
725 self.states = []
726
727 def ispec(self):
728 return FPADDBaseData(self.width, self.id_wid)
729
730 def ospec(self):
731 return FPOpData(self.width, self.id_wid)
732
733 def add_state(self, state):
734 self.states.append(state)
735 return state
736
737 def get_fragment(self, platform=None):
738 """ creates the HDL code-fragment for FPAdd
739 """
740 m = Module()
741 m.submodules.out_z = self.o.z
742 m.submodules.in_t = self.in_t
743 if self.compact:
744 self.get_compact_fragment(m, platform)
745 else:
746 self.get_longer_fragment(m, platform)
747
748 with m.FSM() as fsm:
749
750 for state in self.states:
751 with m.State(state.state_from):
752 state.action(m)
753
754 return m
755
756 def get_longer_fragment(self, m, platform=None):
757
758 get = self.add_state(FPGet2Op("get_ops", "special_cases",
759 self.width))
760 get.setup(m, self.i)
761 a = get.out_op1
762 b = get.out_op2
763 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
764
765 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
766 sc.setup(m, a, b, self.in_mid)
767
768 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
769 dn.setup(m, a, b, sc.in_mid)
770
771 if self.single_cycle:
772 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
773 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
774 else:
775 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
776 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
777
778 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
779 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
780
781 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
782 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
783
784 if self.single_cycle:
785 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
786 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
787 else:
788 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
789 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
790
791 rn = self.add_state(FPRound(self.width, self.id_wid))
792 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
793
794 cor = self.add_state(FPCorrections(self.width, self.id_wid))
795 cor.setup(m, rn.out_z, rn.in_mid)
796
797 pa = self.add_state(FPPack(self.width, self.id_wid))
798 pa.setup(m, cor.out_z, rn.in_mid)
799
800 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
801 pa.in_mid, self.out_mid))
802
803 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
804 pa.in_mid, self.out_mid))
805
806 def get_compact_fragment(self, m, platform=None):
807
808
809 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
810 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
811 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
812 n1 = FPNormToPack(self.width, self.id_wid)
813
814 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
815
816 chainlist = [get, sc, alm, n1]
817 chain = StageChain(chainlist, specallocate=True)
818 chain.setup(m, self.i)
819
820 for mod in chainlist:
821 sc = self.add_state(mod)
822
823 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
824 n1.out_z.mid, self.o.mid))
825
826 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
827 # sc.o.mid, self.o.mid))
828
829
830 class FPADDBase(FPState):
831
832 def __init__(self, width, id_wid=None, single_cycle=False):
833 """ IEEE754 FP Add
834
835 * width: bit-width of IEEE754. supported: 16, 32, 64
836 * id_wid: an identifier that is sync-connected to the input
837 * single_cycle: True indicates each stage to complete in 1 clock
838 """
839 FPState.__init__(self, "fpadd")
840 self.width = width
841 self.single_cycle = single_cycle
842 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
843 self.o = self.ospec()
844
845 self.in_t = Trigger()
846 self.i = self.ispec()
847
848 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
849 self.in_accept = Signal(reset_less=True)
850 self.add_stb = Signal(reset_less=True)
851 self.add_ack = Signal(reset=0, reset_less=True)
852
853 def ispec(self):
854 return self.mod.ispec()
855
856 def ospec(self):
857 return self.mod.ospec()
858
859 def setup(self, m, i, add_stb, in_mid):
860 m.d.comb += [self.i.eq(i),
861 self.mod.i.eq(self.i),
862 self.z_done.eq(self.mod.o.z.trigger),
863 #self.add_stb.eq(add_stb),
864 self.mod.in_t.stb.eq(self.in_t.stb),
865 self.in_t.ack.eq(self.mod.in_t.ack),
866 self.o.mid.eq(self.mod.o.mid),
867 self.o.z.v.eq(self.mod.o.z.v),
868 self.o.z.stb.eq(self.mod.o.z.stb),
869 self.mod.o.z.ack.eq(self.o.z.ack),
870 ]
871
872 m.d.sync += self.add_stb.eq(add_stb)
873 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
874 m.d.sync += self.o.z.ack.eq(0) # likewise
875 #m.d.sync += self.in_t.stb.eq(0)
876
877 m.submodules.fpadd = self.mod
878
879 def action(self, m):
880
881 # in_accept is set on incoming strobe HIGH and ack LOW.
882 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
883
884 #with m.If(self.in_t.ack):
885 # m.d.sync += self.in_t.stb.eq(0)
886 with m.If(~self.z_done):
887 # not done: test for accepting an incoming operand pair
888 with m.If(self.in_accept):
889 m.d.sync += [
890 self.add_ack.eq(1), # acknowledge receipt...
891 self.in_t.stb.eq(1), # initiate add
892 ]
893 with m.Else():
894 m.d.sync += [self.add_ack.eq(0),
895 self.in_t.stb.eq(0),
896 self.o.z.ack.eq(1),
897 ]
898 with m.Else():
899 # done: acknowledge, and write out id and value
900 m.d.sync += [self.add_ack.eq(1),
901 self.in_t.stb.eq(0)
902 ]
903 m.next = "put_z"
904
905 return
906
907 if self.in_mid is not None:
908 m.d.sync += self.out_mid.eq(self.mod.out_mid)
909
910 m.d.sync += [
911 self.out_z.v.eq(self.mod.out_z.v)
912 ]
913 # move to output state on detecting z ack
914 with m.If(self.out_z.trigger):
915 m.d.sync += self.out_z.stb.eq(0)
916 m.next = "put_z"
917 with m.Else():
918 m.d.sync += self.out_z.stb.eq(1)
919
920
921 class FPADDBasePipe(ControlBase):
922 def __init__(self, width, id_wid):
923 ControlBase.__init__(self)
924 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
925 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
926 self.pipe3 = FPNormToPack(width, id_wid)
927
928 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
929
930 def elaborate(self, platform):
931 m = Module()
932 m.submodules.scnorm = self.pipe1
933 m.submodules.addalign = self.pipe2
934 m.submodules.normpack = self.pipe3
935 m.d.comb += self._eqs
936 return m
937
938
939 class FPADDInMuxPipe(PriorityCombMuxInPipe):
940 def __init__(self, width, id_wid, num_rows):
941 self.num_rows = num_rows
942 def iospec(): return FPADDBaseData(width, id_wid)
943 stage = PassThroughStage(iospec)
944 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
945
946
947 class FPADDMuxOutPipe(CombMuxOutPipe):
948 def __init__(self, width, id_wid, num_rows):
949 self.num_rows = num_rows
950 def iospec(): return FPPackData(width, id_wid)
951 stage = PassThroughStage(iospec)
952 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
953
954
955 class FPADDMuxInOut:
956 """ Reservation-Station version of FPADD pipeline.
957
958 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
959 * 3-stage adder pipeline
960 * fan-out on outputs (an array of FPPackData: z,mid)
961
962 Fan-in and Fan-out are combinatorial.
963 """
964 def __init__(self, width, id_wid, num_rows):
965 self.num_rows = num_rows
966 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
967 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
968 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
969
970 self.p = self.inpipe.p # kinda annoying,
971 self.n = self.outpipe.n # use pipe in/out as this class in/out
972 self._ports = self.inpipe.ports() + self.outpipe.ports()
973
974 def elaborate(self, platform):
975 m = Module()
976 m.submodules.inpipe = self.inpipe
977 m.submodules.fpadd = self.fpadd
978 m.submodules.outpipe = self.outpipe
979
980 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
981 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
982
983 return m
984
985 def ports(self):
986 return self._ports
987
988
989 class FPADD(FPID):
990 """ FPADD: stages as follows:
991
992 FPGetOp (a)
993 |
994 FPGetOp (b)
995 |
996 FPAddBase---> FPAddBaseMod
997 | |
998 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
999
1000 FPAddBase is tricky: it is both a stage and *has* stages.
1001 Connection to FPAddBaseMod therefore requires an in stb/ack
1002 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1003 needs to be the thing that raises the incoming stb.
1004 """
1005
1006 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1007 """ IEEE754 FP Add
1008
1009 * width: bit-width of IEEE754. supported: 16, 32, 64
1010 * id_wid: an identifier that is sync-connected to the input
1011 * single_cycle: True indicates each stage to complete in 1 clock
1012 """
1013 self.width = width
1014 self.id_wid = id_wid
1015 self.single_cycle = single_cycle
1016
1017 #self.out_z = FPOp(width)
1018 self.ids = FPID(id_wid)
1019
1020 rs = []
1021 for i in range(rs_sz):
1022 in_a = FPOp(width)
1023 in_b = FPOp(width)
1024 in_a.name = "in_a_%d" % i
1025 in_b.name = "in_b_%d" % i
1026 rs.append((in_a, in_b))
1027 self.rs = Array(rs)
1028
1029 res = []
1030 for i in range(rs_sz):
1031 out_z = FPOp(width)
1032 out_z.name = "out_z_%d" % i
1033 res.append(out_z)
1034 self.res = Array(res)
1035
1036 self.states = []
1037
1038 def add_state(self, state):
1039 self.states.append(state)
1040 return state
1041
1042 def get_fragment(self, platform=None):
1043 """ creates the HDL code-fragment for FPAdd
1044 """
1045 m = Module()
1046 m.submodules += self.rs
1047
1048 in_a = self.rs[0][0]
1049 in_b = self.rs[0][1]
1050
1051 geta = self.add_state(FPGetOp("get_a", "get_b",
1052 in_a, self.width))
1053 geta.setup(m, in_a)
1054 a = geta.out_op
1055
1056 getb = self.add_state(FPGetOp("get_b", "fpadd",
1057 in_b, self.width))
1058 getb.setup(m, in_b)
1059 b = getb.out_op
1060
1061 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1062 ab = self.add_state(ab)
1063 abd = ab.ispec() # create an input spec object for FPADDBase
1064 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1065 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1066 o = ab.o
1067
1068 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1069 o.mid, "get_a"))
1070
1071 with m.FSM() as fsm:
1072
1073 for state in self.states:
1074 with m.State(state.state_from):
1075 state.action(m)
1076
1077 return m
1078
1079
1080 if __name__ == "__main__":
1081 if True:
1082 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1083 main(alu, ports=alu.rs[0][0].ports() + \
1084 alu.rs[0][1].ports() + \
1085 alu.res[0].ports() + \
1086 [alu.ids.in_mid, alu.ids.out_mid])
1087 else:
1088 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1089 main(alu, ports=[alu.in_a, alu.in_b] + \
1090 alu.in_t.ports() + \
1091 alu.out_z.ports() + \
1092 [alu.in_mid, alu.out_mid])
1093
1094
1095 # works... but don't use, just do "python fname.py convert -t v"
1096 #print (verilog.convert(alu, ports=[
1097 # ports=alu.in_a.ports() + \
1098 # alu.in_b.ports() + \
1099 # alu.out_z.ports())