split out denorm to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20
21
22 class FPAddSpecialCasesMod:
23 """ special cases: NaNs, infs, zeros, denormalised
24 NOTE: some of these are unique to add. see "Special Operations"
25 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
26 """
27
28 def __init__(self, width, id_wid):
29 self.width = width
30 self.id_wid = id_wid
31 self.i = self.ispec()
32 self.o = self.ospec()
33
34 def ispec(self):
35 return FPADDBaseData(self.width, self.id_wid)
36
37 def ospec(self):
38 return FPSCData(self.width, self.id_wid)
39
40 def setup(self, m, i):
41 """ links module to inputs and outputs
42 """
43 m.submodules.specialcases = self
44 m.d.comb += self.i.eq(i)
45
46 def process(self, i):
47 return self.o
48
49 def elaborate(self, platform):
50 m = Module()
51
52 m.submodules.sc_out_z = self.o.z
53
54 # decode: XXX really should move to separate stage
55 a1 = FPNumIn(None, self.width)
56 b1 = FPNumIn(None, self.width)
57 m.submodules.sc_decode_a = a1
58 m.submodules.sc_decode_b = b1
59 m.d.comb += [a1.decode(self.i.a),
60 b1.decode(self.i.b),
61 ]
62
63 s_nomatch = Signal()
64 m.d.comb += s_nomatch.eq(a1.s != b1.s)
65
66 m_match = Signal()
67 m.d.comb += m_match.eq(a1.m == b1.m)
68
69 # if a is NaN or b is NaN return NaN
70 with m.If(a1.is_nan | b1.is_nan):
71 m.d.comb += self.o.out_do_z.eq(1)
72 m.d.comb += self.o.z.nan(0)
73
74 # XXX WEIRDNESS for FP16 non-canonical NaN handling
75 # under review
76
77 ## if a is zero and b is NaN return -b
78 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
79 # m.d.comb += self.o.out_do_z.eq(1)
80 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
81
82 ## if b is zero and a is NaN return -a
83 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
86
87 ## if a is -zero and b is NaN return -b
88 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
91
92 ## if b is -zero and a is NaN return -a
93 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
96
97 # if a is inf return inf (or NaN)
98 with m.Elif(a1.is_inf):
99 m.d.comb += self.o.out_do_z.eq(1)
100 m.d.comb += self.o.z.inf(a1.s)
101 # if a is inf and signs don't match return NaN
102 with m.If(b1.exp_128 & s_nomatch):
103 m.d.comb += self.o.z.nan(0)
104
105 # if b is inf return inf
106 with m.Elif(b1.is_inf):
107 m.d.comb += self.o.out_do_z.eq(1)
108 m.d.comb += self.o.z.inf(b1.s)
109
110 # if a is zero and b zero return signed-a/b
111 with m.Elif(a1.is_zero & b1.is_zero):
112 m.d.comb += self.o.out_do_z.eq(1)
113 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
114
115 # if a is zero return b
116 with m.Elif(a1.is_zero):
117 m.d.comb += self.o.out_do_z.eq(1)
118 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
119
120 # if b is zero return a
121 with m.Elif(b1.is_zero):
122 m.d.comb += self.o.out_do_z.eq(1)
123 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
124
125 # if a equal to -b return zero (+ve zero)
126 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
127 m.d.comb += self.o.out_do_z.eq(1)
128 m.d.comb += self.o.z.zero(0)
129
130 # Denormalised Number checks next, so pass a/b data through
131 with m.Else():
132 m.d.comb += self.o.out_do_z.eq(0)
133 m.d.comb += self.o.a.eq(a1)
134 m.d.comb += self.o.b.eq(b1)
135
136 m.d.comb += self.o.oz.eq(self.o.z.v)
137 m.d.comb += self.o.mid.eq(self.i.mid)
138
139 return m
140
141
142 class FPID:
143 def __init__(self, id_wid):
144 self.id_wid = id_wid
145 if self.id_wid:
146 self.in_mid = Signal(id_wid, reset_less=True)
147 self.out_mid = Signal(id_wid, reset_less=True)
148 else:
149 self.in_mid = None
150 self.out_mid = None
151
152 def idsync(self, m):
153 if self.id_wid is not None:
154 m.d.sync += self.out_mid.eq(self.in_mid)
155
156
157 class FPAddSpecialCases(FPState):
158 """ special cases: NaNs, infs, zeros, denormalised
159 NOTE: some of these are unique to add. see "Special Operations"
160 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
161 """
162
163 def __init__(self, width, id_wid):
164 FPState.__init__(self, "special_cases")
165 self.mod = FPAddSpecialCasesMod(width)
166 self.out_z = self.mod.ospec()
167 self.out_do_z = Signal(reset_less=True)
168
169 def setup(self, m, i):
170 """ links module to inputs and outputs
171 """
172 self.mod.setup(m, i, self.out_do_z)
173 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
174 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
175
176 def action(self, m):
177 self.idsync(m)
178 with m.If(self.out_do_z):
179 m.next = "put_z"
180 with m.Else():
181 m.next = "denormalise"
182
183
184 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
185 """ special cases: NaNs, infs, zeros, denormalised
186 NOTE: some of these are unique to add. see "Special Operations"
187 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
188 """
189
190 def __init__(self, width, id_wid):
191 FPState.__init__(self, "special_cases")
192 self.width = width
193 self.id_wid = id_wid
194 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
195 self.out = self.ospec()
196
197 def ispec(self):
198 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
199
200 def ospec(self):
201 return FPSCData(self.width, self.id_wid) # DeNorm ospec
202
203 def setup(self, m, i):
204 """ links module to inputs and outputs
205 """
206 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
207 dmod = FPAddDeNormMod(self.width, self.id_wid)
208
209 chain = StageChain([smod, dmod])
210 chain.setup(m, i)
211
212 # only needed for break-out (early-out)
213 # self.out_do_z = smod.o.out_do_z
214
215 self.o = dmod.o
216
217 def process(self, i):
218 return self.o
219
220 def action(self, m):
221 # for break-out (early-out)
222 #with m.If(self.out_do_z):
223 # m.next = "put_z"
224 #with m.Else():
225 m.d.sync += self.out.eq(self.process(None))
226 m.next = "align"
227
228
229 class FPAddAlignMultiMod(FPState):
230
231 def __init__(self, width):
232 self.in_a = FPNumBase(width)
233 self.in_b = FPNumBase(width)
234 self.out_a = FPNumIn(None, width)
235 self.out_b = FPNumIn(None, width)
236 self.exp_eq = Signal(reset_less=True)
237
238 def elaborate(self, platform):
239 # This one however (single-cycle) will do the shift
240 # in one go.
241
242 m = Module()
243
244 m.submodules.align_in_a = self.in_a
245 m.submodules.align_in_b = self.in_b
246 m.submodules.align_out_a = self.out_a
247 m.submodules.align_out_b = self.out_b
248
249 # NOTE: this does *not* do single-cycle multi-shifting,
250 # it *STAYS* in the align state until exponents match
251
252 # exponent of a greater than b: shift b down
253 m.d.comb += self.exp_eq.eq(0)
254 m.d.comb += self.out_a.eq(self.in_a)
255 m.d.comb += self.out_b.eq(self.in_b)
256 agtb = Signal(reset_less=True)
257 altb = Signal(reset_less=True)
258 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
259 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
260 with m.If(agtb):
261 m.d.comb += self.out_b.shift_down(self.in_b)
262 # exponent of b greater than a: shift a down
263 with m.Elif(altb):
264 m.d.comb += self.out_a.shift_down(self.in_a)
265 # exponents equal: move to next stage.
266 with m.Else():
267 m.d.comb += self.exp_eq.eq(1)
268 return m
269
270
271 class FPAddAlignMulti(FPState):
272
273 def __init__(self, width, id_wid):
274 FPState.__init__(self, "align")
275 self.mod = FPAddAlignMultiMod(width)
276 self.out_a = FPNumIn(None, width)
277 self.out_b = FPNumIn(None, width)
278 self.exp_eq = Signal(reset_less=True)
279
280 def setup(self, m, in_a, in_b):
281 """ links module to inputs and outputs
282 """
283 m.submodules.align = self.mod
284 m.d.comb += self.mod.in_a.eq(in_a)
285 m.d.comb += self.mod.in_b.eq(in_b)
286 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
287 m.d.sync += self.out_a.eq(self.mod.out_a)
288 m.d.sync += self.out_b.eq(self.mod.out_b)
289
290 def action(self, m):
291 with m.If(self.exp_eq):
292 m.next = "add_0"
293
294
295 class FPNumIn2Ops:
296
297 def __init__(self, width, id_wid):
298 self.a = FPNumIn(None, width)
299 self.b = FPNumIn(None, width)
300 self.z = FPNumOut(width, False)
301 self.out_do_z = Signal(reset_less=True)
302 self.oz = Signal(width, reset_less=True)
303 self.mid = Signal(id_wid, reset_less=True)
304
305 def eq(self, i):
306 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
307 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
308
309
310 class FPAddAlignSingleMod:
311
312 def __init__(self, width, id_wid):
313 self.width = width
314 self.id_wid = id_wid
315 self.i = self.ispec()
316 self.o = self.ospec()
317
318 def ispec(self):
319 return FPSCData(self.width, self.id_wid)
320
321 def ospec(self):
322 return FPNumIn2Ops(self.width, self.id_wid)
323
324 def process(self, i):
325 return self.o
326
327 def setup(self, m, i):
328 """ links module to inputs and outputs
329 """
330 m.submodules.align = self
331 m.d.comb += self.i.eq(i)
332
333 def elaborate(self, platform):
334 """ Aligns A against B or B against A, depending on which has the
335 greater exponent. This is done in a *single* cycle using
336 variable-width bit-shift
337
338 the shifter used here is quite expensive in terms of gates.
339 Mux A or B in (and out) into temporaries, as only one of them
340 needs to be aligned against the other
341 """
342 m = Module()
343
344 m.submodules.align_in_a = self.i.a
345 m.submodules.align_in_b = self.i.b
346 m.submodules.align_out_a = self.o.a
347 m.submodules.align_out_b = self.o.b
348
349 # temporary (muxed) input and output to be shifted
350 t_inp = FPNumBase(self.width)
351 t_out = FPNumIn(None, self.width)
352 espec = (len(self.i.a.e), True)
353 msr = MultiShiftRMerge(self.i.a.m_width, espec)
354 m.submodules.align_t_in = t_inp
355 m.submodules.align_t_out = t_out
356 m.submodules.multishift_r = msr
357
358 ediff = Signal(espec, reset_less=True)
359 ediffr = Signal(espec, reset_less=True)
360 tdiff = Signal(espec, reset_less=True)
361 elz = Signal(reset_less=True)
362 egz = Signal(reset_less=True)
363
364 # connect multi-shifter to t_inp/out mantissa (and tdiff)
365 m.d.comb += msr.inp.eq(t_inp.m)
366 m.d.comb += msr.diff.eq(tdiff)
367 m.d.comb += t_out.m.eq(msr.m)
368 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
369 m.d.comb += t_out.s.eq(t_inp.s)
370
371 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
372 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
373 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
374 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
375
376 # default: A-exp == B-exp, A and B untouched (fall through)
377 m.d.comb += self.o.a.eq(self.i.a)
378 m.d.comb += self.o.b.eq(self.i.b)
379 # only one shifter (muxed)
380 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
381 # exponent of a greater than b: shift b down
382 with m.If(~self.i.out_do_z):
383 with m.If(egz):
384 m.d.comb += [t_inp.eq(self.i.b),
385 tdiff.eq(ediff),
386 self.o.b.eq(t_out),
387 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
388 ]
389 # exponent of b greater than a: shift a down
390 with m.Elif(elz):
391 m.d.comb += [t_inp.eq(self.i.a),
392 tdiff.eq(ediffr),
393 self.o.a.eq(t_out),
394 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
395 ]
396
397 m.d.comb += self.o.mid.eq(self.i.mid)
398 m.d.comb += self.o.z.eq(self.i.z)
399 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
400 m.d.comb += self.o.oz.eq(self.i.oz)
401
402 return m
403
404
405 class FPAddAlignSingle(FPState):
406
407 def __init__(self, width, id_wid):
408 FPState.__init__(self, "align")
409 self.mod = FPAddAlignSingleMod(width, id_wid)
410 self.out_a = FPNumIn(None, width)
411 self.out_b = FPNumIn(None, width)
412
413 def setup(self, m, i):
414 """ links module to inputs and outputs
415 """
416 self.mod.setup(m, i)
417
418 # NOTE: could be done as comb
419 m.d.sync += self.out_a.eq(self.mod.out_a)
420 m.d.sync += self.out_b.eq(self.mod.out_b)
421
422 def action(self, m):
423 m.next = "add_0"
424
425
426 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
427
428 def __init__(self, width, id_wid):
429 FPState.__init__(self, "align")
430 self.width = width
431 self.id_wid = id_wid
432 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
433 self.a1o = self.ospec()
434
435 def ispec(self):
436 return FPSCData(self.width, self.id_wid)
437
438 def ospec(self):
439 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
440
441 def setup(self, m, i):
442 """ links module to inputs and outputs
443 """
444
445 # chain AddAlignSingle, AddStage0 and AddStage1
446 mod = FPAddAlignSingleMod(self.width, self.id_wid)
447 a0mod = FPAddStage0Mod(self.width, self.id_wid)
448 a1mod = FPAddStage1Mod(self.width, self.id_wid)
449
450 chain = StageChain([mod, a0mod, a1mod])
451 chain.setup(m, i)
452
453 self.o = a1mod.o
454
455 def process(self, i):
456 return self.o
457
458 def action(self, m):
459 m.d.sync += self.a1o.eq(self.process(None))
460 m.next = "normalise_1"
461
462
463 class FPAddStage0Data:
464
465 def __init__(self, width, id_wid):
466 self.z = FPNumBase(width, False)
467 self.out_do_z = Signal(reset_less=True)
468 self.oz = Signal(width, reset_less=True)
469 self.tot = Signal(self.z.m_width + 4, reset_less=True)
470 self.mid = Signal(id_wid, reset_less=True)
471
472 def eq(self, i):
473 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
474 self.tot.eq(i.tot), self.mid.eq(i.mid)]
475
476
477 class FPAddStage0Mod:
478
479 def __init__(self, width, id_wid):
480 self.width = width
481 self.id_wid = id_wid
482 self.i = self.ispec()
483 self.o = self.ospec()
484
485 def ispec(self):
486 return FPSCData(self.width, self.id_wid)
487
488 def ospec(self):
489 return FPAddStage0Data(self.width, self.id_wid)
490
491 def process(self, i):
492 return self.o
493
494 def setup(self, m, i):
495 """ links module to inputs and outputs
496 """
497 m.submodules.add0 = self
498 m.d.comb += self.i.eq(i)
499
500 def elaborate(self, platform):
501 m = Module()
502 m.submodules.add0_in_a = self.i.a
503 m.submodules.add0_in_b = self.i.b
504 m.submodules.add0_out_z = self.o.z
505
506 # store intermediate tests (and zero-extended mantissas)
507 seq = Signal(reset_less=True)
508 mge = Signal(reset_less=True)
509 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
510 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
511 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
512 mge.eq(self.i.a.m >= self.i.b.m),
513 am0.eq(Cat(self.i.a.m, 0)),
514 bm0.eq(Cat(self.i.b.m, 0))
515 ]
516 # same-sign (both negative or both positive) add mantissas
517 with m.If(~self.i.out_do_z):
518 m.d.comb += self.o.z.e.eq(self.i.a.e)
519 with m.If(seq):
520 m.d.comb += [
521 self.o.tot.eq(am0 + bm0),
522 self.o.z.s.eq(self.i.a.s)
523 ]
524 # a mantissa greater than b, use a
525 with m.Elif(mge):
526 m.d.comb += [
527 self.o.tot.eq(am0 - bm0),
528 self.o.z.s.eq(self.i.a.s)
529 ]
530 # b mantissa greater than a, use b
531 with m.Else():
532 m.d.comb += [
533 self.o.tot.eq(bm0 - am0),
534 self.o.z.s.eq(self.i.b.s)
535 ]
536
537 m.d.comb += self.o.oz.eq(self.i.oz)
538 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
539 m.d.comb += self.o.mid.eq(self.i.mid)
540 return m
541
542
543 class FPAddStage0(FPState):
544 """ First stage of add. covers same-sign (add) and subtract
545 special-casing when mantissas are greater or equal, to
546 give greatest accuracy.
547 """
548
549 def __init__(self, width, id_wid):
550 FPState.__init__(self, "add_0")
551 self.mod = FPAddStage0Mod(width)
552 self.o = self.mod.ospec()
553
554 def setup(self, m, i):
555 """ links module to inputs and outputs
556 """
557 self.mod.setup(m, i)
558
559 # NOTE: these could be done as combinatorial (merge add0+add1)
560 m.d.sync += self.o.eq(self.mod.o)
561
562 def action(self, m):
563 m.next = "add_1"
564
565
566 class FPAddStage1Data:
567
568 def __init__(self, width, id_wid):
569 self.z = FPNumBase(width, False)
570 self.out_do_z = Signal(reset_less=True)
571 self.oz = Signal(width, reset_less=True)
572 self.of = Overflow()
573 self.mid = Signal(id_wid, reset_less=True)
574
575 def eq(self, i):
576 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
577 self.of.eq(i.of), self.mid.eq(i.mid)]
578
579
580
581 class FPAddStage1Mod(FPState):
582 """ Second stage of add: preparation for normalisation.
583 detects when tot sum is too big (tot[27] is kinda a carry bit)
584 """
585
586 def __init__(self, width, id_wid):
587 self.width = width
588 self.id_wid = id_wid
589 self.i = self.ispec()
590 self.o = self.ospec()
591
592 def ispec(self):
593 return FPAddStage0Data(self.width, self.id_wid)
594
595 def ospec(self):
596 return FPAddStage1Data(self.width, self.id_wid)
597
598 def process(self, i):
599 return self.o
600
601 def setup(self, m, i):
602 """ links module to inputs and outputs
603 """
604 m.submodules.add1 = self
605 m.submodules.add1_out_overflow = self.o.of
606
607 m.d.comb += self.i.eq(i)
608
609 def elaborate(self, platform):
610 m = Module()
611 m.d.comb += self.o.z.eq(self.i.z)
612 # tot[-1] (MSB) gets set when the sum overflows. shift result down
613 with m.If(~self.i.out_do_z):
614 with m.If(self.i.tot[-1]):
615 m.d.comb += [
616 self.o.z.m.eq(self.i.tot[4:]),
617 self.o.of.m0.eq(self.i.tot[4]),
618 self.o.of.guard.eq(self.i.tot[3]),
619 self.o.of.round_bit.eq(self.i.tot[2]),
620 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
621 self.o.z.e.eq(self.i.z.e + 1)
622 ]
623 # tot[-1] (MSB) zero case
624 with m.Else():
625 m.d.comb += [
626 self.o.z.m.eq(self.i.tot[3:]),
627 self.o.of.m0.eq(self.i.tot[3]),
628 self.o.of.guard.eq(self.i.tot[2]),
629 self.o.of.round_bit.eq(self.i.tot[1]),
630 self.o.of.sticky.eq(self.i.tot[0])
631 ]
632
633 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
634 m.d.comb += self.o.oz.eq(self.i.oz)
635 m.d.comb += self.o.mid.eq(self.i.mid)
636
637 return m
638
639
640 class FPAddStage1(FPState):
641
642 def __init__(self, width, id_wid):
643 FPState.__init__(self, "add_1")
644 self.mod = FPAddStage1Mod(width)
645 self.out_z = FPNumBase(width, False)
646 self.out_of = Overflow()
647 self.norm_stb = Signal()
648
649 def setup(self, m, i):
650 """ links module to inputs and outputs
651 """
652 self.mod.setup(m, i)
653
654 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
655
656 m.d.sync += self.out_of.eq(self.mod.out_of)
657 m.d.sync += self.out_z.eq(self.mod.out_z)
658 m.d.sync += self.norm_stb.eq(1)
659
660 def action(self, m):
661 m.next = "normalise_1"
662
663
664 class FPNormaliseModSingle:
665
666 def __init__(self, width):
667 self.width = width
668 self.in_z = self.ispec()
669 self.out_z = self.ospec()
670
671 def ispec(self):
672 return FPNumBase(self.width, False)
673
674 def ospec(self):
675 return FPNumBase(self.width, False)
676
677 def setup(self, m, i):
678 """ links module to inputs and outputs
679 """
680 m.submodules.normalise = self
681 m.d.comb += self.i.eq(i)
682
683 def elaborate(self, platform):
684 m = Module()
685
686 mwid = self.out_z.m_width+2
687 pe = PriorityEncoder(mwid)
688 m.submodules.norm_pe = pe
689
690 m.submodules.norm1_out_z = self.out_z
691 m.submodules.norm1_in_z = self.in_z
692
693 in_z = FPNumBase(self.width, False)
694 in_of = Overflow()
695 m.submodules.norm1_insel_z = in_z
696 m.submodules.norm1_insel_overflow = in_of
697
698 espec = (len(in_z.e), True)
699 ediff_n126 = Signal(espec, reset_less=True)
700 msr = MultiShiftRMerge(mwid, espec)
701 m.submodules.multishift_r = msr
702
703 m.d.comb += in_z.eq(self.in_z)
704 m.d.comb += in_of.eq(self.in_of)
705 # initialise out from in (overridden below)
706 m.d.comb += self.out_z.eq(in_z)
707 m.d.comb += self.out_of.eq(in_of)
708 # normalisation decrease condition
709 decrease = Signal(reset_less=True)
710 m.d.comb += decrease.eq(in_z.m_msbzero)
711 # decrease exponent
712 with m.If(decrease):
713 # *sigh* not entirely obvious: count leading zeros (clz)
714 # with a PriorityEncoder: to find from the MSB
715 # we reverse the order of the bits.
716 temp_m = Signal(mwid, reset_less=True)
717 temp_s = Signal(mwid+1, reset_less=True)
718 clz = Signal((len(in_z.e), True), reset_less=True)
719 m.d.comb += [
720 # cat round and guard bits back into the mantissa
721 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
722 pe.i.eq(temp_m[::-1]), # inverted
723 clz.eq(pe.o), # count zeros from MSB down
724 temp_s.eq(temp_m << clz), # shift mantissa UP
725 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
726 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
727 ]
728
729 return m
730
731
732 class FPNorm1Data:
733
734 def __init__(self, width, id_wid):
735 self.roundz = Signal(reset_less=True)
736 self.z = FPNumBase(width, False)
737 self.out_do_z = Signal(reset_less=True)
738 self.oz = Signal(width, reset_less=True)
739 self.mid = Signal(id_wid, reset_less=True)
740
741 def eq(self, i):
742 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
743 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
744
745
746 class FPNorm1ModSingle:
747
748 def __init__(self, width, id_wid):
749 self.width = width
750 self.id_wid = id_wid
751 self.i = self.ispec()
752 self.o = self.ospec()
753
754 def ispec(self):
755 return FPAddStage1Data(self.width, self.id_wid)
756
757 def ospec(self):
758 return FPNorm1Data(self.width, self.id_wid)
759
760 def setup(self, m, i):
761 """ links module to inputs and outputs
762 """
763 m.submodules.normalise_1 = self
764 m.d.comb += self.i.eq(i)
765
766 def process(self, i):
767 return self.o
768
769 def elaborate(self, platform):
770 m = Module()
771
772 mwid = self.o.z.m_width+2
773 pe = PriorityEncoder(mwid)
774 m.submodules.norm_pe = pe
775
776 of = Overflow()
777 m.d.comb += self.o.roundz.eq(of.roundz)
778
779 m.submodules.norm1_out_z = self.o.z
780 m.submodules.norm1_out_overflow = of
781 m.submodules.norm1_in_z = self.i.z
782 m.submodules.norm1_in_overflow = self.i.of
783
784 i = self.ispec()
785 m.submodules.norm1_insel_z = i.z
786 m.submodules.norm1_insel_overflow = i.of
787
788 espec = (len(i.z.e), True)
789 ediff_n126 = Signal(espec, reset_less=True)
790 msr = MultiShiftRMerge(mwid, espec)
791 m.submodules.multishift_r = msr
792
793 m.d.comb += i.eq(self.i)
794 # initialise out from in (overridden below)
795 m.d.comb += self.o.z.eq(i.z)
796 m.d.comb += of.eq(i.of)
797 # normalisation increase/decrease conditions
798 decrease = Signal(reset_less=True)
799 increase = Signal(reset_less=True)
800 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
801 m.d.comb += increase.eq(i.z.exp_lt_n126)
802 # decrease exponent
803 with m.If(~self.i.out_do_z):
804 with m.If(decrease):
805 # *sigh* not entirely obvious: count leading zeros (clz)
806 # with a PriorityEncoder: to find from the MSB
807 # we reverse the order of the bits.
808 temp_m = Signal(mwid, reset_less=True)
809 temp_s = Signal(mwid+1, reset_less=True)
810 clz = Signal((len(i.z.e), True), reset_less=True)
811 # make sure that the amount to decrease by does NOT
812 # go below the minimum non-INF/NaN exponent
813 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
814 i.z.exp_sub_n126)
815 m.d.comb += [
816 # cat round and guard bits back into the mantissa
817 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
818 pe.i.eq(temp_m[::-1]), # inverted
819 clz.eq(limclz), # count zeros from MSB down
820 temp_s.eq(temp_m << clz), # shift mantissa UP
821 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
822 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
823 of.m0.eq(temp_s[2]), # copy of mantissa[0]
824 # overflow in bits 0..1: got shifted too (leave sticky)
825 of.guard.eq(temp_s[1]), # guard
826 of.round_bit.eq(temp_s[0]), # round
827 ]
828 # increase exponent
829 with m.Elif(increase):
830 temp_m = Signal(mwid+1, reset_less=True)
831 m.d.comb += [
832 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
833 i.z.m)),
834 ediff_n126.eq(i.z.N126 - i.z.e),
835 # connect multi-shifter to inp/out mantissa (and ediff)
836 msr.inp.eq(temp_m),
837 msr.diff.eq(ediff_n126),
838 self.o.z.m.eq(msr.m[3:]),
839 of.m0.eq(temp_s[3]), # copy of mantissa[0]
840 # overflow in bits 0..1: got shifted too (leave sticky)
841 of.guard.eq(temp_s[2]), # guard
842 of.round_bit.eq(temp_s[1]), # round
843 of.sticky.eq(temp_s[0]), # sticky
844 self.o.z.e.eq(i.z.e + ediff_n126),
845 ]
846
847 m.d.comb += self.o.mid.eq(self.i.mid)
848 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
849 m.d.comb += self.o.oz.eq(self.i.oz)
850
851 return m
852
853
854 class FPNorm1ModMulti:
855
856 def __init__(self, width, single_cycle=True):
857 self.width = width
858 self.in_select = Signal(reset_less=True)
859 self.in_z = FPNumBase(width, False)
860 self.in_of = Overflow()
861 self.temp_z = FPNumBase(width, False)
862 self.temp_of = Overflow()
863 self.out_z = FPNumBase(width, False)
864 self.out_of = Overflow()
865
866 def elaborate(self, platform):
867 m = Module()
868
869 m.submodules.norm1_out_z = self.out_z
870 m.submodules.norm1_out_overflow = self.out_of
871 m.submodules.norm1_temp_z = self.temp_z
872 m.submodules.norm1_temp_of = self.temp_of
873 m.submodules.norm1_in_z = self.in_z
874 m.submodules.norm1_in_overflow = self.in_of
875
876 in_z = FPNumBase(self.width, False)
877 in_of = Overflow()
878 m.submodules.norm1_insel_z = in_z
879 m.submodules.norm1_insel_overflow = in_of
880
881 # select which of temp or in z/of to use
882 with m.If(self.in_select):
883 m.d.comb += in_z.eq(self.in_z)
884 m.d.comb += in_of.eq(self.in_of)
885 with m.Else():
886 m.d.comb += in_z.eq(self.temp_z)
887 m.d.comb += in_of.eq(self.temp_of)
888 # initialise out from in (overridden below)
889 m.d.comb += self.out_z.eq(in_z)
890 m.d.comb += self.out_of.eq(in_of)
891 # normalisation increase/decrease conditions
892 decrease = Signal(reset_less=True)
893 increase = Signal(reset_less=True)
894 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
895 m.d.comb += increase.eq(in_z.exp_lt_n126)
896 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
897 # decrease exponent
898 with m.If(decrease):
899 m.d.comb += [
900 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
901 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
902 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
903 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
904 self.out_of.round_bit.eq(0), # reset round bit
905 self.out_of.m0.eq(in_of.guard),
906 ]
907 # increase exponent
908 with m.Elif(increase):
909 m.d.comb += [
910 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
911 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
912 self.out_of.guard.eq(in_z.m[0]),
913 self.out_of.m0.eq(in_z.m[1]),
914 self.out_of.round_bit.eq(in_of.guard),
915 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
916 ]
917
918 return m
919
920
921 class FPNorm1Single(FPState):
922
923 def __init__(self, width, id_wid, single_cycle=True):
924 FPState.__init__(self, "normalise_1")
925 self.mod = FPNorm1ModSingle(width)
926 self.o = self.ospec()
927 self.out_z = FPNumBase(width, False)
928 self.out_roundz = Signal(reset_less=True)
929
930 def ispec(self):
931 return self.mod.ispec()
932
933 def ospec(self):
934 return self.mod.ospec()
935
936 def setup(self, m, i):
937 """ links module to inputs and outputs
938 """
939 self.mod.setup(m, i)
940
941 def action(self, m):
942 m.next = "round"
943
944
945 class FPNorm1Multi(FPState):
946
947 def __init__(self, width, id_wid):
948 FPState.__init__(self, "normalise_1")
949 self.mod = FPNorm1ModMulti(width)
950 self.stb = Signal(reset_less=True)
951 self.ack = Signal(reset=0, reset_less=True)
952 self.out_norm = Signal(reset_less=True)
953 self.in_accept = Signal(reset_less=True)
954 self.temp_z = FPNumBase(width)
955 self.temp_of = Overflow()
956 self.out_z = FPNumBase(width)
957 self.out_roundz = Signal(reset_less=True)
958
959 def setup(self, m, in_z, in_of, norm_stb):
960 """ links module to inputs and outputs
961 """
962 self.mod.setup(m, in_z, in_of, norm_stb,
963 self.in_accept, self.temp_z, self.temp_of,
964 self.out_z, self.out_norm)
965
966 m.d.comb += self.stb.eq(norm_stb)
967 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
968
969 def action(self, m):
970 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
971 m.d.sync += self.temp_of.eq(self.mod.out_of)
972 m.d.sync += self.temp_z.eq(self.out_z)
973 with m.If(self.out_norm):
974 with m.If(self.in_accept):
975 m.d.sync += [
976 self.ack.eq(1),
977 ]
978 with m.Else():
979 m.d.sync += self.ack.eq(0)
980 with m.Else():
981 # normalisation not required (or done).
982 m.next = "round"
983 m.d.sync += self.ack.eq(1)
984 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
985
986
987 class FPNormToPack(FPState, UnbufferedPipeline):
988
989 def __init__(self, width, id_wid):
990 FPState.__init__(self, "normalise_1")
991 self.id_wid = id_wid
992 self.width = width
993 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
994
995 def ispec(self):
996 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
997
998 def ospec(self):
999 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1000
1001 def setup(self, m, i):
1002 """ links module to inputs and outputs
1003 """
1004
1005 # Normalisation, Rounding Corrections, Pack - in a chain
1006 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1007 rmod = FPRoundMod(self.width, self.id_wid)
1008 cmod = FPCorrectionsMod(self.width, self.id_wid)
1009 pmod = FPPackMod(self.width, self.id_wid)
1010 chain = StageChain([nmod, rmod, cmod, pmod])
1011 chain.setup(m, i)
1012 self.out_z = pmod.ospec()
1013
1014 self.o = pmod.o
1015
1016 def process(self, i):
1017 return self.o
1018
1019 def action(self, m):
1020 m.d.sync += self.out_z.eq(self.process(None))
1021 m.next = "pack_put_z"
1022
1023
1024 class FPRoundData:
1025
1026 def __init__(self, width, id_wid):
1027 self.z = FPNumBase(width, False)
1028 self.out_do_z = Signal(reset_less=True)
1029 self.oz = Signal(width, reset_less=True)
1030 self.mid = Signal(id_wid, reset_less=True)
1031
1032 def eq(self, i):
1033 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1034 self.mid.eq(i.mid)]
1035
1036
1037 class FPRoundMod:
1038
1039 def __init__(self, width, id_wid):
1040 self.width = width
1041 self.id_wid = id_wid
1042 self.i = self.ispec()
1043 self.out_z = self.ospec()
1044
1045 def ispec(self):
1046 return FPNorm1Data(self.width, self.id_wid)
1047
1048 def ospec(self):
1049 return FPRoundData(self.width, self.id_wid)
1050
1051 def process(self, i):
1052 return self.out_z
1053
1054 def setup(self, m, i):
1055 m.submodules.roundz = self
1056 m.d.comb += self.i.eq(i)
1057
1058 def elaborate(self, platform):
1059 m = Module()
1060 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1061 with m.If(~self.i.out_do_z):
1062 with m.If(self.i.roundz):
1063 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1064 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1065 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1066
1067 return m
1068
1069
1070 class FPRound(FPState):
1071
1072 def __init__(self, width, id_wid):
1073 FPState.__init__(self, "round")
1074 self.mod = FPRoundMod(width)
1075 self.out_z = self.ospec()
1076
1077 def ispec(self):
1078 return self.mod.ispec()
1079
1080 def ospec(self):
1081 return self.mod.ospec()
1082
1083 def setup(self, m, i):
1084 """ links module to inputs and outputs
1085 """
1086 self.mod.setup(m, i)
1087
1088 self.idsync(m)
1089 m.d.sync += self.out_z.eq(self.mod.out_z)
1090 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1091
1092 def action(self, m):
1093 m.next = "corrections"
1094
1095
1096 class FPCorrectionsMod:
1097
1098 def __init__(self, width, id_wid):
1099 self.width = width
1100 self.id_wid = id_wid
1101 self.i = self.ispec()
1102 self.out_z = self.ospec()
1103
1104 def ispec(self):
1105 return FPRoundData(self.width, self.id_wid)
1106
1107 def ospec(self):
1108 return FPRoundData(self.width, self.id_wid)
1109
1110 def process(self, i):
1111 return self.out_z
1112
1113 def setup(self, m, i):
1114 """ links module to inputs and outputs
1115 """
1116 m.submodules.corrections = self
1117 m.d.comb += self.i.eq(i)
1118
1119 def elaborate(self, platform):
1120 m = Module()
1121 m.submodules.corr_in_z = self.i.z
1122 m.submodules.corr_out_z = self.out_z.z
1123 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1124 with m.If(~self.i.out_do_z):
1125 with m.If(self.i.z.is_denormalised):
1126 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1127 return m
1128
1129
1130 class FPCorrections(FPState):
1131
1132 def __init__(self, width, id_wid):
1133 FPState.__init__(self, "corrections")
1134 self.mod = FPCorrectionsMod(width)
1135 self.out_z = self.ospec()
1136
1137 def ispec(self):
1138 return self.mod.ispec()
1139
1140 def ospec(self):
1141 return self.mod.ospec()
1142
1143 def setup(self, m, in_z):
1144 """ links module to inputs and outputs
1145 """
1146 self.mod.setup(m, in_z)
1147
1148 m.d.sync += self.out_z.eq(self.mod.out_z)
1149 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1150
1151 def action(self, m):
1152 m.next = "pack"
1153
1154
1155 class FPPackData:
1156
1157 def __init__(self, width, id_wid):
1158 self.z = Signal(width, reset_less=True)
1159 self.mid = Signal(id_wid, reset_less=True)
1160
1161 def eq(self, i):
1162 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1163
1164 def ports(self):
1165 return [self.z, self.mid]
1166
1167
1168 class FPPackMod:
1169
1170 def __init__(self, width, id_wid):
1171 self.width = width
1172 self.id_wid = id_wid
1173 self.i = self.ispec()
1174 self.o = self.ospec()
1175
1176 def ispec(self):
1177 return FPRoundData(self.width, self.id_wid)
1178
1179 def ospec(self):
1180 return FPPackData(self.width, self.id_wid)
1181
1182 def process(self, i):
1183 return self.o
1184
1185 def setup(self, m, in_z):
1186 """ links module to inputs and outputs
1187 """
1188 m.submodules.pack = self
1189 m.d.comb += self.i.eq(in_z)
1190
1191 def elaborate(self, platform):
1192 m = Module()
1193 z = FPNumOut(self.width, False)
1194 m.submodules.pack_in_z = self.i.z
1195 m.submodules.pack_out_z = z
1196 m.d.comb += self.o.mid.eq(self.i.mid)
1197 with m.If(~self.i.out_do_z):
1198 with m.If(self.i.z.is_overflowed):
1199 m.d.comb += z.inf(self.i.z.s)
1200 with m.Else():
1201 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1202 with m.Else():
1203 m.d.comb += z.v.eq(self.i.oz)
1204 m.d.comb += self.o.z.eq(z.v)
1205 return m
1206
1207
1208 class FPPack(FPState):
1209
1210 def __init__(self, width, id_wid):
1211 FPState.__init__(self, "pack")
1212 self.mod = FPPackMod(width)
1213 self.out_z = self.ospec()
1214
1215 def ispec(self):
1216 return self.mod.ispec()
1217
1218 def ospec(self):
1219 return self.mod.ospec()
1220
1221 def setup(self, m, in_z):
1222 """ links module to inputs and outputs
1223 """
1224 self.mod.setup(m, in_z)
1225
1226 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1227 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1228
1229 def action(self, m):
1230 m.next = "pack_put_z"
1231
1232
1233 class FPPutZ(FPState):
1234
1235 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1236 FPState.__init__(self, state)
1237 if to_state is None:
1238 to_state = "get_ops"
1239 self.to_state = to_state
1240 self.in_z = in_z
1241 self.out_z = out_z
1242 self.in_mid = in_mid
1243 self.out_mid = out_mid
1244
1245 def action(self, m):
1246 if self.in_mid is not None:
1247 m.d.sync += self.out_mid.eq(self.in_mid)
1248 m.d.sync += [
1249 self.out_z.z.v.eq(self.in_z)
1250 ]
1251 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1252 m.d.sync += self.out_z.z.stb.eq(0)
1253 m.next = self.to_state
1254 with m.Else():
1255 m.d.sync += self.out_z.z.stb.eq(1)
1256
1257
1258 class FPPutZIdx(FPState):
1259
1260 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1261 FPState.__init__(self, state)
1262 if to_state is None:
1263 to_state = "get_ops"
1264 self.to_state = to_state
1265 self.in_z = in_z
1266 self.out_zs = out_zs
1267 self.in_mid = in_mid
1268
1269 def action(self, m):
1270 outz_stb = Signal(reset_less=True)
1271 outz_ack = Signal(reset_less=True)
1272 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1273 outz_ack.eq(self.out_zs[self.in_mid].ack),
1274 ]
1275 m.d.sync += [
1276 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1277 ]
1278 with m.If(outz_stb & outz_ack):
1279 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1280 m.next = self.to_state
1281 with m.Else():
1282 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1283
1284
1285 class FPOpData:
1286 def __init__(self, width, id_wid):
1287 self.z = FPOp(width)
1288 self.mid = Signal(id_wid, reset_less=True)
1289
1290 def eq(self, i):
1291 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1292
1293 def ports(self):
1294 return [self.z, self.mid]
1295
1296
1297 class FPADDBaseMod:
1298
1299 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1300 """ IEEE754 FP Add
1301
1302 * width: bit-width of IEEE754. supported: 16, 32, 64
1303 * id_wid: an identifier that is sync-connected to the input
1304 * single_cycle: True indicates each stage to complete in 1 clock
1305 * compact: True indicates a reduced number of stages
1306 """
1307 self.width = width
1308 self.id_wid = id_wid
1309 self.single_cycle = single_cycle
1310 self.compact = compact
1311
1312 self.in_t = Trigger()
1313 self.i = self.ispec()
1314 self.o = self.ospec()
1315
1316 self.states = []
1317
1318 def ispec(self):
1319 return FPADDBaseData(self.width, self.id_wid)
1320
1321 def ospec(self):
1322 return FPOpData(self.width, self.id_wid)
1323
1324 def add_state(self, state):
1325 self.states.append(state)
1326 return state
1327
1328 def get_fragment(self, platform=None):
1329 """ creates the HDL code-fragment for FPAdd
1330 """
1331 m = Module()
1332 m.submodules.out_z = self.o.z
1333 m.submodules.in_t = self.in_t
1334 if self.compact:
1335 self.get_compact_fragment(m, platform)
1336 else:
1337 self.get_longer_fragment(m, platform)
1338
1339 with m.FSM() as fsm:
1340
1341 for state in self.states:
1342 with m.State(state.state_from):
1343 state.action(m)
1344
1345 return m
1346
1347 def get_longer_fragment(self, m, platform=None):
1348
1349 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1350 self.width))
1351 get.setup(m, self.i)
1352 a = get.out_op1
1353 b = get.out_op2
1354 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1355
1356 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1357 sc.setup(m, a, b, self.in_mid)
1358
1359 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1360 dn.setup(m, a, b, sc.in_mid)
1361
1362 if self.single_cycle:
1363 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1364 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1365 else:
1366 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1367 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1368
1369 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1370 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1371
1372 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1373 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1374
1375 if self.single_cycle:
1376 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1377 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1378 else:
1379 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1380 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1381
1382 rn = self.add_state(FPRound(self.width, self.id_wid))
1383 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1384
1385 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1386 cor.setup(m, rn.out_z, rn.in_mid)
1387
1388 pa = self.add_state(FPPack(self.width, self.id_wid))
1389 pa.setup(m, cor.out_z, rn.in_mid)
1390
1391 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1392 pa.in_mid, self.out_mid))
1393
1394 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1395 pa.in_mid, self.out_mid))
1396
1397 def get_compact_fragment(self, m, platform=None):
1398
1399
1400 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1401 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1402 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1403 n1 = FPNormToPack(self.width, self.id_wid)
1404
1405 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1406
1407 chainlist = [get, sc, alm, n1]
1408 chain = StageChain(chainlist, specallocate=True)
1409 chain.setup(m, self.i)
1410
1411 for mod in chainlist:
1412 sc = self.add_state(mod)
1413
1414 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1415 n1.out_z.mid, self.o.mid))
1416
1417 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1418 # sc.o.mid, self.o.mid))
1419
1420
1421 class FPADDBase(FPState):
1422
1423 def __init__(self, width, id_wid=None, single_cycle=False):
1424 """ IEEE754 FP Add
1425
1426 * width: bit-width of IEEE754. supported: 16, 32, 64
1427 * id_wid: an identifier that is sync-connected to the input
1428 * single_cycle: True indicates each stage to complete in 1 clock
1429 """
1430 FPState.__init__(self, "fpadd")
1431 self.width = width
1432 self.single_cycle = single_cycle
1433 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1434 self.o = self.ospec()
1435
1436 self.in_t = Trigger()
1437 self.i = self.ispec()
1438
1439 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1440 self.in_accept = Signal(reset_less=True)
1441 self.add_stb = Signal(reset_less=True)
1442 self.add_ack = Signal(reset=0, reset_less=True)
1443
1444 def ispec(self):
1445 return self.mod.ispec()
1446
1447 def ospec(self):
1448 return self.mod.ospec()
1449
1450 def setup(self, m, i, add_stb, in_mid):
1451 m.d.comb += [self.i.eq(i),
1452 self.mod.i.eq(self.i),
1453 self.z_done.eq(self.mod.o.z.trigger),
1454 #self.add_stb.eq(add_stb),
1455 self.mod.in_t.stb.eq(self.in_t.stb),
1456 self.in_t.ack.eq(self.mod.in_t.ack),
1457 self.o.mid.eq(self.mod.o.mid),
1458 self.o.z.v.eq(self.mod.o.z.v),
1459 self.o.z.stb.eq(self.mod.o.z.stb),
1460 self.mod.o.z.ack.eq(self.o.z.ack),
1461 ]
1462
1463 m.d.sync += self.add_stb.eq(add_stb)
1464 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1465 m.d.sync += self.o.z.ack.eq(0) # likewise
1466 #m.d.sync += self.in_t.stb.eq(0)
1467
1468 m.submodules.fpadd = self.mod
1469
1470 def action(self, m):
1471
1472 # in_accept is set on incoming strobe HIGH and ack LOW.
1473 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1474
1475 #with m.If(self.in_t.ack):
1476 # m.d.sync += self.in_t.stb.eq(0)
1477 with m.If(~self.z_done):
1478 # not done: test for accepting an incoming operand pair
1479 with m.If(self.in_accept):
1480 m.d.sync += [
1481 self.add_ack.eq(1), # acknowledge receipt...
1482 self.in_t.stb.eq(1), # initiate add
1483 ]
1484 with m.Else():
1485 m.d.sync += [self.add_ack.eq(0),
1486 self.in_t.stb.eq(0),
1487 self.o.z.ack.eq(1),
1488 ]
1489 with m.Else():
1490 # done: acknowledge, and write out id and value
1491 m.d.sync += [self.add_ack.eq(1),
1492 self.in_t.stb.eq(0)
1493 ]
1494 m.next = "put_z"
1495
1496 return
1497
1498 if self.in_mid is not None:
1499 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1500
1501 m.d.sync += [
1502 self.out_z.v.eq(self.mod.out_z.v)
1503 ]
1504 # move to output state on detecting z ack
1505 with m.If(self.out_z.trigger):
1506 m.d.sync += self.out_z.stb.eq(0)
1507 m.next = "put_z"
1508 with m.Else():
1509 m.d.sync += self.out_z.stb.eq(1)
1510
1511
1512 class FPADDBasePipe(ControlBase):
1513 def __init__(self, width, id_wid):
1514 ControlBase.__init__(self)
1515 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1516 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1517 self.pipe3 = FPNormToPack(width, id_wid)
1518
1519 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1520
1521 def elaborate(self, platform):
1522 m = Module()
1523 m.submodules.scnorm = self.pipe1
1524 m.submodules.addalign = self.pipe2
1525 m.submodules.normpack = self.pipe3
1526 m.d.comb += self._eqs
1527 return m
1528
1529
1530 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1531 def __init__(self, width, id_wid, num_rows):
1532 self.num_rows = num_rows
1533 def iospec(): return FPADDBaseData(width, id_wid)
1534 stage = PassThroughStage(iospec)
1535 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1536
1537
1538 class FPADDMuxOutPipe(CombMuxOutPipe):
1539 def __init__(self, width, id_wid, num_rows):
1540 self.num_rows = num_rows
1541 def iospec(): return FPPackData(width, id_wid)
1542 stage = PassThroughStage(iospec)
1543 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1544
1545
1546 class FPADDMuxInOut:
1547 """ Reservation-Station version of FPADD pipeline.
1548
1549 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1550 * 3-stage adder pipeline
1551 * fan-out on outputs (an array of FPPackData: z,mid)
1552
1553 Fan-in and Fan-out are combinatorial.
1554 """
1555 def __init__(self, width, id_wid, num_rows):
1556 self.num_rows = num_rows
1557 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1558 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1559 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1560
1561 self.p = self.inpipe.p # kinda annoying,
1562 self.n = self.outpipe.n # use pipe in/out as this class in/out
1563 self._ports = self.inpipe.ports() + self.outpipe.ports()
1564
1565 def elaborate(self, platform):
1566 m = Module()
1567 m.submodules.inpipe = self.inpipe
1568 m.submodules.fpadd = self.fpadd
1569 m.submodules.outpipe = self.outpipe
1570
1571 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1572 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1573
1574 return m
1575
1576 def ports(self):
1577 return self._ports
1578
1579
1580 class FPADD(FPID):
1581 """ FPADD: stages as follows:
1582
1583 FPGetOp (a)
1584 |
1585 FPGetOp (b)
1586 |
1587 FPAddBase---> FPAddBaseMod
1588 | |
1589 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1590
1591 FPAddBase is tricky: it is both a stage and *has* stages.
1592 Connection to FPAddBaseMod therefore requires an in stb/ack
1593 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1594 needs to be the thing that raises the incoming stb.
1595 """
1596
1597 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1598 """ IEEE754 FP Add
1599
1600 * width: bit-width of IEEE754. supported: 16, 32, 64
1601 * id_wid: an identifier that is sync-connected to the input
1602 * single_cycle: True indicates each stage to complete in 1 clock
1603 """
1604 self.width = width
1605 self.id_wid = id_wid
1606 self.single_cycle = single_cycle
1607
1608 #self.out_z = FPOp(width)
1609 self.ids = FPID(id_wid)
1610
1611 rs = []
1612 for i in range(rs_sz):
1613 in_a = FPOp(width)
1614 in_b = FPOp(width)
1615 in_a.name = "in_a_%d" % i
1616 in_b.name = "in_b_%d" % i
1617 rs.append((in_a, in_b))
1618 self.rs = Array(rs)
1619
1620 res = []
1621 for i in range(rs_sz):
1622 out_z = FPOp(width)
1623 out_z.name = "out_z_%d" % i
1624 res.append(out_z)
1625 self.res = Array(res)
1626
1627 self.states = []
1628
1629 def add_state(self, state):
1630 self.states.append(state)
1631 return state
1632
1633 def get_fragment(self, platform=None):
1634 """ creates the HDL code-fragment for FPAdd
1635 """
1636 m = Module()
1637 m.submodules += self.rs
1638
1639 in_a = self.rs[0][0]
1640 in_b = self.rs[0][1]
1641
1642 geta = self.add_state(FPGetOp("get_a", "get_b",
1643 in_a, self.width))
1644 geta.setup(m, in_a)
1645 a = geta.out_op
1646
1647 getb = self.add_state(FPGetOp("get_b", "fpadd",
1648 in_b, self.width))
1649 getb.setup(m, in_b)
1650 b = getb.out_op
1651
1652 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1653 ab = self.add_state(ab)
1654 abd = ab.ispec() # create an input spec object for FPADDBase
1655 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1656 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1657 o = ab.o
1658
1659 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1660 o.mid, "get_a"))
1661
1662 with m.FSM() as fsm:
1663
1664 for state in self.states:
1665 with m.State(state.state_from):
1666 state.action(m)
1667
1668 return m
1669
1670
1671 if __name__ == "__main__":
1672 if True:
1673 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1674 main(alu, ports=alu.rs[0][0].ports() + \
1675 alu.rs[0][1].ports() + \
1676 alu.res[0].ports() + \
1677 [alu.ids.in_mid, alu.ids.out_mid])
1678 else:
1679 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1680 main(alu, ports=[alu.in_a, alu.in_b] + \
1681 alu.in_t.ports() + \
1682 alu.out_z.ports() + \
1683 [alu.in_mid, alu.out_mid])
1684
1685
1686 # works... but don't use, just do "python fname.py convert -t v"
1687 #print (verilog.convert(alu, ports=[
1688 # ports=alu.in_a.ports() + \
1689 # alu.in_b.ports() + \
1690 # alu.out_z.ports())