split out prenormalisation to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20
21
22 class FPAddSpecialCasesMod:
23 """ special cases: NaNs, infs, zeros, denormalised
24 NOTE: some of these are unique to add. see "Special Operations"
25 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
26 """
27
28 def __init__(self, width, id_wid):
29 self.width = width
30 self.id_wid = id_wid
31 self.i = self.ispec()
32 self.o = self.ospec()
33
34 def ispec(self):
35 return FPADDBaseData(self.width, self.id_wid)
36
37 def ospec(self):
38 return FPSCData(self.width, self.id_wid)
39
40 def setup(self, m, i):
41 """ links module to inputs and outputs
42 """
43 m.submodules.specialcases = self
44 m.d.comb += self.i.eq(i)
45
46 def process(self, i):
47 return self.o
48
49 def elaborate(self, platform):
50 m = Module()
51
52 m.submodules.sc_out_z = self.o.z
53
54 # decode: XXX really should move to separate stage
55 a1 = FPNumIn(None, self.width)
56 b1 = FPNumIn(None, self.width)
57 m.submodules.sc_decode_a = a1
58 m.submodules.sc_decode_b = b1
59 m.d.comb += [a1.decode(self.i.a),
60 b1.decode(self.i.b),
61 ]
62
63 s_nomatch = Signal()
64 m.d.comb += s_nomatch.eq(a1.s != b1.s)
65
66 m_match = Signal()
67 m.d.comb += m_match.eq(a1.m == b1.m)
68
69 # if a is NaN or b is NaN return NaN
70 with m.If(a1.is_nan | b1.is_nan):
71 m.d.comb += self.o.out_do_z.eq(1)
72 m.d.comb += self.o.z.nan(0)
73
74 # XXX WEIRDNESS for FP16 non-canonical NaN handling
75 # under review
76
77 ## if a is zero and b is NaN return -b
78 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
79 # m.d.comb += self.o.out_do_z.eq(1)
80 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
81
82 ## if b is zero and a is NaN return -a
83 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
84 # m.d.comb += self.o.out_do_z.eq(1)
85 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
86
87 ## if a is -zero and b is NaN return -b
88 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
89 # m.d.comb += self.o.out_do_z.eq(1)
90 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
91
92 ## if b is -zero and a is NaN return -a
93 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
94 # m.d.comb += self.o.out_do_z.eq(1)
95 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
96
97 # if a is inf return inf (or NaN)
98 with m.Elif(a1.is_inf):
99 m.d.comb += self.o.out_do_z.eq(1)
100 m.d.comb += self.o.z.inf(a1.s)
101 # if a is inf and signs don't match return NaN
102 with m.If(b1.exp_128 & s_nomatch):
103 m.d.comb += self.o.z.nan(0)
104
105 # if b is inf return inf
106 with m.Elif(b1.is_inf):
107 m.d.comb += self.o.out_do_z.eq(1)
108 m.d.comb += self.o.z.inf(b1.s)
109
110 # if a is zero and b zero return signed-a/b
111 with m.Elif(a1.is_zero & b1.is_zero):
112 m.d.comb += self.o.out_do_z.eq(1)
113 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
114
115 # if a is zero return b
116 with m.Elif(a1.is_zero):
117 m.d.comb += self.o.out_do_z.eq(1)
118 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
119
120 # if b is zero return a
121 with m.Elif(b1.is_zero):
122 m.d.comb += self.o.out_do_z.eq(1)
123 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
124
125 # if a equal to -b return zero (+ve zero)
126 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
127 m.d.comb += self.o.out_do_z.eq(1)
128 m.d.comb += self.o.z.zero(0)
129
130 # Denormalised Number checks next, so pass a/b data through
131 with m.Else():
132 m.d.comb += self.o.out_do_z.eq(0)
133 m.d.comb += self.o.a.eq(a1)
134 m.d.comb += self.o.b.eq(b1)
135
136 m.d.comb += self.o.oz.eq(self.o.z.v)
137 m.d.comb += self.o.mid.eq(self.i.mid)
138
139 return m
140
141
142 class FPID:
143 def __init__(self, id_wid):
144 self.id_wid = id_wid
145 if self.id_wid:
146 self.in_mid = Signal(id_wid, reset_less=True)
147 self.out_mid = Signal(id_wid, reset_less=True)
148 else:
149 self.in_mid = None
150 self.out_mid = None
151
152 def idsync(self, m):
153 if self.id_wid is not None:
154 m.d.sync += self.out_mid.eq(self.in_mid)
155
156
157 class FPAddSpecialCases(FPState):
158 """ special cases: NaNs, infs, zeros, denormalised
159 NOTE: some of these are unique to add. see "Special Operations"
160 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
161 """
162
163 def __init__(self, width, id_wid):
164 FPState.__init__(self, "special_cases")
165 self.mod = FPAddSpecialCasesMod(width)
166 self.out_z = self.mod.ospec()
167 self.out_do_z = Signal(reset_less=True)
168
169 def setup(self, m, i):
170 """ links module to inputs and outputs
171 """
172 self.mod.setup(m, i, self.out_do_z)
173 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
174 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
175
176 def action(self, m):
177 self.idsync(m)
178 with m.If(self.out_do_z):
179 m.next = "put_z"
180 with m.Else():
181 m.next = "denormalise"
182
183
184 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
185 """ special cases: NaNs, infs, zeros, denormalised
186 NOTE: some of these are unique to add. see "Special Operations"
187 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
188 """
189
190 def __init__(self, width, id_wid):
191 FPState.__init__(self, "special_cases")
192 self.width = width
193 self.id_wid = id_wid
194 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
195 self.out = self.ospec()
196
197 def ispec(self):
198 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
199
200 def ospec(self):
201 return FPSCData(self.width, self.id_wid) # DeNorm ospec
202
203 def setup(self, m, i):
204 """ links module to inputs and outputs
205 """
206 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
207 dmod = FPAddDeNormMod(self.width, self.id_wid)
208
209 chain = StageChain([smod, dmod])
210 chain.setup(m, i)
211
212 # only needed for break-out (early-out)
213 # self.out_do_z = smod.o.out_do_z
214
215 self.o = dmod.o
216
217 def process(self, i):
218 return self.o
219
220 def action(self, m):
221 # for break-out (early-out)
222 #with m.If(self.out_do_z):
223 # m.next = "put_z"
224 #with m.Else():
225 m.d.sync += self.out.eq(self.process(None))
226 m.next = "align"
227
228
229 class FPAddAlignMultiMod(FPState):
230
231 def __init__(self, width):
232 self.in_a = FPNumBase(width)
233 self.in_b = FPNumBase(width)
234 self.out_a = FPNumIn(None, width)
235 self.out_b = FPNumIn(None, width)
236 self.exp_eq = Signal(reset_less=True)
237
238 def elaborate(self, platform):
239 # This one however (single-cycle) will do the shift
240 # in one go.
241
242 m = Module()
243
244 m.submodules.align_in_a = self.in_a
245 m.submodules.align_in_b = self.in_b
246 m.submodules.align_out_a = self.out_a
247 m.submodules.align_out_b = self.out_b
248
249 # NOTE: this does *not* do single-cycle multi-shifting,
250 # it *STAYS* in the align state until exponents match
251
252 # exponent of a greater than b: shift b down
253 m.d.comb += self.exp_eq.eq(0)
254 m.d.comb += self.out_a.eq(self.in_a)
255 m.d.comb += self.out_b.eq(self.in_b)
256 agtb = Signal(reset_less=True)
257 altb = Signal(reset_less=True)
258 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
259 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
260 with m.If(agtb):
261 m.d.comb += self.out_b.shift_down(self.in_b)
262 # exponent of b greater than a: shift a down
263 with m.Elif(altb):
264 m.d.comb += self.out_a.shift_down(self.in_a)
265 # exponents equal: move to next stage.
266 with m.Else():
267 m.d.comb += self.exp_eq.eq(1)
268 return m
269
270
271 class FPAddAlignMulti(FPState):
272
273 def __init__(self, width, id_wid):
274 FPState.__init__(self, "align")
275 self.mod = FPAddAlignMultiMod(width)
276 self.out_a = FPNumIn(None, width)
277 self.out_b = FPNumIn(None, width)
278 self.exp_eq = Signal(reset_less=True)
279
280 def setup(self, m, in_a, in_b):
281 """ links module to inputs and outputs
282 """
283 m.submodules.align = self.mod
284 m.d.comb += self.mod.in_a.eq(in_a)
285 m.d.comb += self.mod.in_b.eq(in_b)
286 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
287 m.d.sync += self.out_a.eq(self.mod.out_a)
288 m.d.sync += self.out_b.eq(self.mod.out_b)
289
290 def action(self, m):
291 with m.If(self.exp_eq):
292 m.next = "add_0"
293
294
295 class FPNumIn2Ops:
296
297 def __init__(self, width, id_wid):
298 self.a = FPNumIn(None, width)
299 self.b = FPNumIn(None, width)
300 self.z = FPNumOut(width, False)
301 self.out_do_z = Signal(reset_less=True)
302 self.oz = Signal(width, reset_less=True)
303 self.mid = Signal(id_wid, reset_less=True)
304
305 def eq(self, i):
306 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
307 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
308
309
310 class FPAddAlignSingleMod:
311
312 def __init__(self, width, id_wid):
313 self.width = width
314 self.id_wid = id_wid
315 self.i = self.ispec()
316 self.o = self.ospec()
317
318 def ispec(self):
319 return FPSCData(self.width, self.id_wid)
320
321 def ospec(self):
322 return FPNumIn2Ops(self.width, self.id_wid)
323
324 def process(self, i):
325 return self.o
326
327 def setup(self, m, i):
328 """ links module to inputs and outputs
329 """
330 m.submodules.align = self
331 m.d.comb += self.i.eq(i)
332
333 def elaborate(self, platform):
334 """ Aligns A against B or B against A, depending on which has the
335 greater exponent. This is done in a *single* cycle using
336 variable-width bit-shift
337
338 the shifter used here is quite expensive in terms of gates.
339 Mux A or B in (and out) into temporaries, as only one of them
340 needs to be aligned against the other
341 """
342 m = Module()
343
344 m.submodules.align_in_a = self.i.a
345 m.submodules.align_in_b = self.i.b
346 m.submodules.align_out_a = self.o.a
347 m.submodules.align_out_b = self.o.b
348
349 # temporary (muxed) input and output to be shifted
350 t_inp = FPNumBase(self.width)
351 t_out = FPNumIn(None, self.width)
352 espec = (len(self.i.a.e), True)
353 msr = MultiShiftRMerge(self.i.a.m_width, espec)
354 m.submodules.align_t_in = t_inp
355 m.submodules.align_t_out = t_out
356 m.submodules.multishift_r = msr
357
358 ediff = Signal(espec, reset_less=True)
359 ediffr = Signal(espec, reset_less=True)
360 tdiff = Signal(espec, reset_less=True)
361 elz = Signal(reset_less=True)
362 egz = Signal(reset_less=True)
363
364 # connect multi-shifter to t_inp/out mantissa (and tdiff)
365 m.d.comb += msr.inp.eq(t_inp.m)
366 m.d.comb += msr.diff.eq(tdiff)
367 m.d.comb += t_out.m.eq(msr.m)
368 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
369 m.d.comb += t_out.s.eq(t_inp.s)
370
371 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
372 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
373 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
374 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
375
376 # default: A-exp == B-exp, A and B untouched (fall through)
377 m.d.comb += self.o.a.eq(self.i.a)
378 m.d.comb += self.o.b.eq(self.i.b)
379 # only one shifter (muxed)
380 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
381 # exponent of a greater than b: shift b down
382 with m.If(~self.i.out_do_z):
383 with m.If(egz):
384 m.d.comb += [t_inp.eq(self.i.b),
385 tdiff.eq(ediff),
386 self.o.b.eq(t_out),
387 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
388 ]
389 # exponent of b greater than a: shift a down
390 with m.Elif(elz):
391 m.d.comb += [t_inp.eq(self.i.a),
392 tdiff.eq(ediffr),
393 self.o.a.eq(t_out),
394 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
395 ]
396
397 m.d.comb += self.o.mid.eq(self.i.mid)
398 m.d.comb += self.o.z.eq(self.i.z)
399 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
400 m.d.comb += self.o.oz.eq(self.i.oz)
401
402 return m
403
404
405 class FPAddAlignSingle(FPState):
406
407 def __init__(self, width, id_wid):
408 FPState.__init__(self, "align")
409 self.mod = FPAddAlignSingleMod(width, id_wid)
410 self.out_a = FPNumIn(None, width)
411 self.out_b = FPNumIn(None, width)
412
413 def setup(self, m, i):
414 """ links module to inputs and outputs
415 """
416 self.mod.setup(m, i)
417
418 # NOTE: could be done as comb
419 m.d.sync += self.out_a.eq(self.mod.out_a)
420 m.d.sync += self.out_b.eq(self.mod.out_b)
421
422 def action(self, m):
423 m.next = "add_0"
424
425
426 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
427
428 def __init__(self, width, id_wid):
429 FPState.__init__(self, "align")
430 self.width = width
431 self.id_wid = id_wid
432 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
433 self.a1o = self.ospec()
434
435 def ispec(self):
436 return FPSCData(self.width, self.id_wid)
437
438 def ospec(self):
439 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
440
441 def setup(self, m, i):
442 """ links module to inputs and outputs
443 """
444
445 # chain AddAlignSingle, AddStage0 and AddStage1
446 mod = FPAddAlignSingleMod(self.width, self.id_wid)
447 a0mod = FPAddStage0Mod(self.width, self.id_wid)
448 a1mod = FPAddStage1Mod(self.width, self.id_wid)
449
450 chain = StageChain([mod, a0mod, a1mod])
451 chain.setup(m, i)
452
453 self.o = a1mod.o
454
455 def process(self, i):
456 return self.o
457
458 def action(self, m):
459 m.d.sync += self.a1o.eq(self.process(None))
460 m.next = "normalise_1"
461
462
463 class FPAddStage0Data:
464
465 def __init__(self, width, id_wid):
466 self.z = FPNumBase(width, False)
467 self.out_do_z = Signal(reset_less=True)
468 self.oz = Signal(width, reset_less=True)
469 self.tot = Signal(self.z.m_width + 4, reset_less=True)
470 self.mid = Signal(id_wid, reset_less=True)
471
472 def eq(self, i):
473 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
474 self.tot.eq(i.tot), self.mid.eq(i.mid)]
475
476
477 class FPAddStage0Mod:
478
479 def __init__(self, width, id_wid):
480 self.width = width
481 self.id_wid = id_wid
482 self.i = self.ispec()
483 self.o = self.ospec()
484
485 def ispec(self):
486 return FPSCData(self.width, self.id_wid)
487
488 def ospec(self):
489 return FPAddStage0Data(self.width, self.id_wid)
490
491 def process(self, i):
492 return self.o
493
494 def setup(self, m, i):
495 """ links module to inputs and outputs
496 """
497 m.submodules.add0 = self
498 m.d.comb += self.i.eq(i)
499
500 def elaborate(self, platform):
501 m = Module()
502 m.submodules.add0_in_a = self.i.a
503 m.submodules.add0_in_b = self.i.b
504 m.submodules.add0_out_z = self.o.z
505
506 # store intermediate tests (and zero-extended mantissas)
507 seq = Signal(reset_less=True)
508 mge = Signal(reset_less=True)
509 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
510 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
511 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
512 mge.eq(self.i.a.m >= self.i.b.m),
513 am0.eq(Cat(self.i.a.m, 0)),
514 bm0.eq(Cat(self.i.b.m, 0))
515 ]
516 # same-sign (both negative or both positive) add mantissas
517 with m.If(~self.i.out_do_z):
518 m.d.comb += self.o.z.e.eq(self.i.a.e)
519 with m.If(seq):
520 m.d.comb += [
521 self.o.tot.eq(am0 + bm0),
522 self.o.z.s.eq(self.i.a.s)
523 ]
524 # a mantissa greater than b, use a
525 with m.Elif(mge):
526 m.d.comb += [
527 self.o.tot.eq(am0 - bm0),
528 self.o.z.s.eq(self.i.a.s)
529 ]
530 # b mantissa greater than a, use b
531 with m.Else():
532 m.d.comb += [
533 self.o.tot.eq(bm0 - am0),
534 self.o.z.s.eq(self.i.b.s)
535 ]
536
537 m.d.comb += self.o.oz.eq(self.i.oz)
538 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
539 m.d.comb += self.o.mid.eq(self.i.mid)
540 return m
541
542
543 class FPAddStage0(FPState):
544 """ First stage of add. covers same-sign (add) and subtract
545 special-casing when mantissas are greater or equal, to
546 give greatest accuracy.
547 """
548
549 def __init__(self, width, id_wid):
550 FPState.__init__(self, "add_0")
551 self.mod = FPAddStage0Mod(width)
552 self.o = self.mod.ospec()
553
554 def setup(self, m, i):
555 """ links module to inputs and outputs
556 """
557 self.mod.setup(m, i)
558
559 # NOTE: these could be done as combinatorial (merge add0+add1)
560 m.d.sync += self.o.eq(self.mod.o)
561
562 def action(self, m):
563 m.next = "add_1"
564
565
566 class FPAddStage1Data:
567
568 def __init__(self, width, id_wid):
569 self.z = FPNumBase(width, False)
570 self.out_do_z = Signal(reset_less=True)
571 self.oz = Signal(width, reset_less=True)
572 self.of = Overflow()
573 self.mid = Signal(id_wid, reset_less=True)
574
575 def eq(self, i):
576 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
577 self.of.eq(i.of), self.mid.eq(i.mid)]
578
579
580
581 class FPAddStage1Mod(FPState):
582 """ Second stage of add: preparation for normalisation.
583 detects when tot sum is too big (tot[27] is kinda a carry bit)
584 """
585
586 def __init__(self, width, id_wid):
587 self.width = width
588 self.id_wid = id_wid
589 self.i = self.ispec()
590 self.o = self.ospec()
591
592 def ispec(self):
593 return FPAddStage0Data(self.width, self.id_wid)
594
595 def ospec(self):
596 return FPAddStage1Data(self.width, self.id_wid)
597
598 def process(self, i):
599 return self.o
600
601 def setup(self, m, i):
602 """ links module to inputs and outputs
603 """
604 m.submodules.add1 = self
605 m.submodules.add1_out_overflow = self.o.of
606
607 m.d.comb += self.i.eq(i)
608
609 def elaborate(self, platform):
610 m = Module()
611 m.d.comb += self.o.z.eq(self.i.z)
612 # tot[-1] (MSB) gets set when the sum overflows. shift result down
613 with m.If(~self.i.out_do_z):
614 with m.If(self.i.tot[-1]):
615 m.d.comb += [
616 self.o.z.m.eq(self.i.tot[4:]),
617 self.o.of.m0.eq(self.i.tot[4]),
618 self.o.of.guard.eq(self.i.tot[3]),
619 self.o.of.round_bit.eq(self.i.tot[2]),
620 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
621 self.o.z.e.eq(self.i.z.e + 1)
622 ]
623 # tot[-1] (MSB) zero case
624 with m.Else():
625 m.d.comb += [
626 self.o.z.m.eq(self.i.tot[3:]),
627 self.o.of.m0.eq(self.i.tot[3]),
628 self.o.of.guard.eq(self.i.tot[2]),
629 self.o.of.round_bit.eq(self.i.tot[1]),
630 self.o.of.sticky.eq(self.i.tot[0])
631 ]
632
633 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
634 m.d.comb += self.o.oz.eq(self.i.oz)
635 m.d.comb += self.o.mid.eq(self.i.mid)
636
637 return m
638
639
640 class FPAddStage1(FPState):
641
642 def __init__(self, width, id_wid):
643 FPState.__init__(self, "add_1")
644 self.mod = FPAddStage1Mod(width)
645 self.out_z = FPNumBase(width, False)
646 self.out_of = Overflow()
647 self.norm_stb = Signal()
648
649 def setup(self, m, i):
650 """ links module to inputs and outputs
651 """
652 self.mod.setup(m, i)
653
654 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
655
656 m.d.sync += self.out_of.eq(self.mod.out_of)
657 m.d.sync += self.out_z.eq(self.mod.out_z)
658 m.d.sync += self.norm_stb.eq(1)
659
660 def action(self, m):
661 m.next = "normalise_1"
662
663
664 class FPNorm1Data:
665
666 def __init__(self, width, id_wid):
667 self.roundz = Signal(reset_less=True)
668 self.z = FPNumBase(width, False)
669 self.out_do_z = Signal(reset_less=True)
670 self.oz = Signal(width, reset_less=True)
671 self.mid = Signal(id_wid, reset_less=True)
672
673 def eq(self, i):
674 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
675 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
676
677
678 class FPNorm1ModSingle:
679
680 def __init__(self, width, id_wid):
681 self.width = width
682 self.id_wid = id_wid
683 self.i = self.ispec()
684 self.o = self.ospec()
685
686 def ispec(self):
687 return FPAddStage1Data(self.width, self.id_wid)
688
689 def ospec(self):
690 return FPNorm1Data(self.width, self.id_wid)
691
692 def setup(self, m, i):
693 """ links module to inputs and outputs
694 """
695 m.submodules.normalise_1 = self
696 m.d.comb += self.i.eq(i)
697
698 def process(self, i):
699 return self.o
700
701 def elaborate(self, platform):
702 m = Module()
703
704 mwid = self.o.z.m_width+2
705 pe = PriorityEncoder(mwid)
706 m.submodules.norm_pe = pe
707
708 of = Overflow()
709 m.d.comb += self.o.roundz.eq(of.roundz)
710
711 m.submodules.norm1_out_z = self.o.z
712 m.submodules.norm1_out_overflow = of
713 m.submodules.norm1_in_z = self.i.z
714 m.submodules.norm1_in_overflow = self.i.of
715
716 i = self.ispec()
717 m.submodules.norm1_insel_z = i.z
718 m.submodules.norm1_insel_overflow = i.of
719
720 espec = (len(i.z.e), True)
721 ediff_n126 = Signal(espec, reset_less=True)
722 msr = MultiShiftRMerge(mwid, espec)
723 m.submodules.multishift_r = msr
724
725 m.d.comb += i.eq(self.i)
726 # initialise out from in (overridden below)
727 m.d.comb += self.o.z.eq(i.z)
728 m.d.comb += of.eq(i.of)
729 # normalisation increase/decrease conditions
730 decrease = Signal(reset_less=True)
731 increase = Signal(reset_less=True)
732 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
733 m.d.comb += increase.eq(i.z.exp_lt_n126)
734 # decrease exponent
735 with m.If(~self.i.out_do_z):
736 with m.If(decrease):
737 # *sigh* not entirely obvious: count leading zeros (clz)
738 # with a PriorityEncoder: to find from the MSB
739 # we reverse the order of the bits.
740 temp_m = Signal(mwid, reset_less=True)
741 temp_s = Signal(mwid+1, reset_less=True)
742 clz = Signal((len(i.z.e), True), reset_less=True)
743 # make sure that the amount to decrease by does NOT
744 # go below the minimum non-INF/NaN exponent
745 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
746 i.z.exp_sub_n126)
747 m.d.comb += [
748 # cat round and guard bits back into the mantissa
749 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
750 pe.i.eq(temp_m[::-1]), # inverted
751 clz.eq(limclz), # count zeros from MSB down
752 temp_s.eq(temp_m << clz), # shift mantissa UP
753 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
754 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
755 of.m0.eq(temp_s[2]), # copy of mantissa[0]
756 # overflow in bits 0..1: got shifted too (leave sticky)
757 of.guard.eq(temp_s[1]), # guard
758 of.round_bit.eq(temp_s[0]), # round
759 ]
760 # increase exponent
761 with m.Elif(increase):
762 temp_m = Signal(mwid+1, reset_less=True)
763 m.d.comb += [
764 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
765 i.z.m)),
766 ediff_n126.eq(i.z.N126 - i.z.e),
767 # connect multi-shifter to inp/out mantissa (and ediff)
768 msr.inp.eq(temp_m),
769 msr.diff.eq(ediff_n126),
770 self.o.z.m.eq(msr.m[3:]),
771 of.m0.eq(temp_s[3]), # copy of mantissa[0]
772 # overflow in bits 0..1: got shifted too (leave sticky)
773 of.guard.eq(temp_s[2]), # guard
774 of.round_bit.eq(temp_s[1]), # round
775 of.sticky.eq(temp_s[0]), # sticky
776 self.o.z.e.eq(i.z.e + ediff_n126),
777 ]
778
779 m.d.comb += self.o.mid.eq(self.i.mid)
780 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
781 m.d.comb += self.o.oz.eq(self.i.oz)
782
783 return m
784
785
786 class FPNorm1ModMulti:
787
788 def __init__(self, width, single_cycle=True):
789 self.width = width
790 self.in_select = Signal(reset_less=True)
791 self.in_z = FPNumBase(width, False)
792 self.in_of = Overflow()
793 self.temp_z = FPNumBase(width, False)
794 self.temp_of = Overflow()
795 self.out_z = FPNumBase(width, False)
796 self.out_of = Overflow()
797
798 def elaborate(self, platform):
799 m = Module()
800
801 m.submodules.norm1_out_z = self.out_z
802 m.submodules.norm1_out_overflow = self.out_of
803 m.submodules.norm1_temp_z = self.temp_z
804 m.submodules.norm1_temp_of = self.temp_of
805 m.submodules.norm1_in_z = self.in_z
806 m.submodules.norm1_in_overflow = self.in_of
807
808 in_z = FPNumBase(self.width, False)
809 in_of = Overflow()
810 m.submodules.norm1_insel_z = in_z
811 m.submodules.norm1_insel_overflow = in_of
812
813 # select which of temp or in z/of to use
814 with m.If(self.in_select):
815 m.d.comb += in_z.eq(self.in_z)
816 m.d.comb += in_of.eq(self.in_of)
817 with m.Else():
818 m.d.comb += in_z.eq(self.temp_z)
819 m.d.comb += in_of.eq(self.temp_of)
820 # initialise out from in (overridden below)
821 m.d.comb += self.out_z.eq(in_z)
822 m.d.comb += self.out_of.eq(in_of)
823 # normalisation increase/decrease conditions
824 decrease = Signal(reset_less=True)
825 increase = Signal(reset_less=True)
826 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
827 m.d.comb += increase.eq(in_z.exp_lt_n126)
828 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
829 # decrease exponent
830 with m.If(decrease):
831 m.d.comb += [
832 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
833 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
834 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
835 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
836 self.out_of.round_bit.eq(0), # reset round bit
837 self.out_of.m0.eq(in_of.guard),
838 ]
839 # increase exponent
840 with m.Elif(increase):
841 m.d.comb += [
842 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
843 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
844 self.out_of.guard.eq(in_z.m[0]),
845 self.out_of.m0.eq(in_z.m[1]),
846 self.out_of.round_bit.eq(in_of.guard),
847 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
848 ]
849
850 return m
851
852
853 class FPNorm1Single(FPState):
854
855 def __init__(self, width, id_wid, single_cycle=True):
856 FPState.__init__(self, "normalise_1")
857 self.mod = FPNorm1ModSingle(width)
858 self.o = self.ospec()
859 self.out_z = FPNumBase(width, False)
860 self.out_roundz = Signal(reset_less=True)
861
862 def ispec(self):
863 return self.mod.ispec()
864
865 def ospec(self):
866 return self.mod.ospec()
867
868 def setup(self, m, i):
869 """ links module to inputs and outputs
870 """
871 self.mod.setup(m, i)
872
873 def action(self, m):
874 m.next = "round"
875
876
877 class FPNorm1Multi(FPState):
878
879 def __init__(self, width, id_wid):
880 FPState.__init__(self, "normalise_1")
881 self.mod = FPNorm1ModMulti(width)
882 self.stb = Signal(reset_less=True)
883 self.ack = Signal(reset=0, reset_less=True)
884 self.out_norm = Signal(reset_less=True)
885 self.in_accept = Signal(reset_less=True)
886 self.temp_z = FPNumBase(width)
887 self.temp_of = Overflow()
888 self.out_z = FPNumBase(width)
889 self.out_roundz = Signal(reset_less=True)
890
891 def setup(self, m, in_z, in_of, norm_stb):
892 """ links module to inputs and outputs
893 """
894 self.mod.setup(m, in_z, in_of, norm_stb,
895 self.in_accept, self.temp_z, self.temp_of,
896 self.out_z, self.out_norm)
897
898 m.d.comb += self.stb.eq(norm_stb)
899 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
900
901 def action(self, m):
902 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
903 m.d.sync += self.temp_of.eq(self.mod.out_of)
904 m.d.sync += self.temp_z.eq(self.out_z)
905 with m.If(self.out_norm):
906 with m.If(self.in_accept):
907 m.d.sync += [
908 self.ack.eq(1),
909 ]
910 with m.Else():
911 m.d.sync += self.ack.eq(0)
912 with m.Else():
913 # normalisation not required (or done).
914 m.next = "round"
915 m.d.sync += self.ack.eq(1)
916 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
917
918
919 class FPNormToPack(FPState, UnbufferedPipeline):
920
921 def __init__(self, width, id_wid):
922 FPState.__init__(self, "normalise_1")
923 self.id_wid = id_wid
924 self.width = width
925 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
926
927 def ispec(self):
928 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
929
930 def ospec(self):
931 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
932
933 def setup(self, m, i):
934 """ links module to inputs and outputs
935 """
936
937 # Normalisation, Rounding Corrections, Pack - in a chain
938 nmod = FPNorm1ModSingle(self.width, self.id_wid)
939 rmod = FPRoundMod(self.width, self.id_wid)
940 cmod = FPCorrectionsMod(self.width, self.id_wid)
941 pmod = FPPackMod(self.width, self.id_wid)
942 chain = StageChain([nmod, rmod, cmod, pmod])
943 chain.setup(m, i)
944 self.out_z = pmod.ospec()
945
946 self.o = pmod.o
947
948 def process(self, i):
949 return self.o
950
951 def action(self, m):
952 m.d.sync += self.out_z.eq(self.process(None))
953 m.next = "pack_put_z"
954
955
956 class FPRoundData:
957
958 def __init__(self, width, id_wid):
959 self.z = FPNumBase(width, False)
960 self.out_do_z = Signal(reset_less=True)
961 self.oz = Signal(width, reset_less=True)
962 self.mid = Signal(id_wid, reset_less=True)
963
964 def eq(self, i):
965 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
966 self.mid.eq(i.mid)]
967
968
969 class FPRoundMod:
970
971 def __init__(self, width, id_wid):
972 self.width = width
973 self.id_wid = id_wid
974 self.i = self.ispec()
975 self.out_z = self.ospec()
976
977 def ispec(self):
978 return FPNorm1Data(self.width, self.id_wid)
979
980 def ospec(self):
981 return FPRoundData(self.width, self.id_wid)
982
983 def process(self, i):
984 return self.out_z
985
986 def setup(self, m, i):
987 m.submodules.roundz = self
988 m.d.comb += self.i.eq(i)
989
990 def elaborate(self, platform):
991 m = Module()
992 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
993 with m.If(~self.i.out_do_z):
994 with m.If(self.i.roundz):
995 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
996 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
997 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
998
999 return m
1000
1001
1002 class FPRound(FPState):
1003
1004 def __init__(self, width, id_wid):
1005 FPState.__init__(self, "round")
1006 self.mod = FPRoundMod(width)
1007 self.out_z = self.ospec()
1008
1009 def ispec(self):
1010 return self.mod.ispec()
1011
1012 def ospec(self):
1013 return self.mod.ospec()
1014
1015 def setup(self, m, i):
1016 """ links module to inputs and outputs
1017 """
1018 self.mod.setup(m, i)
1019
1020 self.idsync(m)
1021 m.d.sync += self.out_z.eq(self.mod.out_z)
1022 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1023
1024 def action(self, m):
1025 m.next = "corrections"
1026
1027
1028 class FPCorrectionsMod:
1029
1030 def __init__(self, width, id_wid):
1031 self.width = width
1032 self.id_wid = id_wid
1033 self.i = self.ispec()
1034 self.out_z = self.ospec()
1035
1036 def ispec(self):
1037 return FPRoundData(self.width, self.id_wid)
1038
1039 def ospec(self):
1040 return FPRoundData(self.width, self.id_wid)
1041
1042 def process(self, i):
1043 return self.out_z
1044
1045 def setup(self, m, i):
1046 """ links module to inputs and outputs
1047 """
1048 m.submodules.corrections = self
1049 m.d.comb += self.i.eq(i)
1050
1051 def elaborate(self, platform):
1052 m = Module()
1053 m.submodules.corr_in_z = self.i.z
1054 m.submodules.corr_out_z = self.out_z.z
1055 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1056 with m.If(~self.i.out_do_z):
1057 with m.If(self.i.z.is_denormalised):
1058 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1059 return m
1060
1061
1062 class FPCorrections(FPState):
1063
1064 def __init__(self, width, id_wid):
1065 FPState.__init__(self, "corrections")
1066 self.mod = FPCorrectionsMod(width)
1067 self.out_z = self.ospec()
1068
1069 def ispec(self):
1070 return self.mod.ispec()
1071
1072 def ospec(self):
1073 return self.mod.ospec()
1074
1075 def setup(self, m, in_z):
1076 """ links module to inputs and outputs
1077 """
1078 self.mod.setup(m, in_z)
1079
1080 m.d.sync += self.out_z.eq(self.mod.out_z)
1081 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1082
1083 def action(self, m):
1084 m.next = "pack"
1085
1086
1087 class FPPackData:
1088
1089 def __init__(self, width, id_wid):
1090 self.z = Signal(width, reset_less=True)
1091 self.mid = Signal(id_wid, reset_less=True)
1092
1093 def eq(self, i):
1094 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1095
1096 def ports(self):
1097 return [self.z, self.mid]
1098
1099
1100 class FPPackMod:
1101
1102 def __init__(self, width, id_wid):
1103 self.width = width
1104 self.id_wid = id_wid
1105 self.i = self.ispec()
1106 self.o = self.ospec()
1107
1108 def ispec(self):
1109 return FPRoundData(self.width, self.id_wid)
1110
1111 def ospec(self):
1112 return FPPackData(self.width, self.id_wid)
1113
1114 def process(self, i):
1115 return self.o
1116
1117 def setup(self, m, in_z):
1118 """ links module to inputs and outputs
1119 """
1120 m.submodules.pack = self
1121 m.d.comb += self.i.eq(in_z)
1122
1123 def elaborate(self, platform):
1124 m = Module()
1125 z = FPNumOut(self.width, False)
1126 m.submodules.pack_in_z = self.i.z
1127 m.submodules.pack_out_z = z
1128 m.d.comb += self.o.mid.eq(self.i.mid)
1129 with m.If(~self.i.out_do_z):
1130 with m.If(self.i.z.is_overflowed):
1131 m.d.comb += z.inf(self.i.z.s)
1132 with m.Else():
1133 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1134 with m.Else():
1135 m.d.comb += z.v.eq(self.i.oz)
1136 m.d.comb += self.o.z.eq(z.v)
1137 return m
1138
1139
1140 class FPPack(FPState):
1141
1142 def __init__(self, width, id_wid):
1143 FPState.__init__(self, "pack")
1144 self.mod = FPPackMod(width)
1145 self.out_z = self.ospec()
1146
1147 def ispec(self):
1148 return self.mod.ispec()
1149
1150 def ospec(self):
1151 return self.mod.ospec()
1152
1153 def setup(self, m, in_z):
1154 """ links module to inputs and outputs
1155 """
1156 self.mod.setup(m, in_z)
1157
1158 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1159 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1160
1161 def action(self, m):
1162 m.next = "pack_put_z"
1163
1164
1165 class FPPutZ(FPState):
1166
1167 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1168 FPState.__init__(self, state)
1169 if to_state is None:
1170 to_state = "get_ops"
1171 self.to_state = to_state
1172 self.in_z = in_z
1173 self.out_z = out_z
1174 self.in_mid = in_mid
1175 self.out_mid = out_mid
1176
1177 def action(self, m):
1178 if self.in_mid is not None:
1179 m.d.sync += self.out_mid.eq(self.in_mid)
1180 m.d.sync += [
1181 self.out_z.z.v.eq(self.in_z)
1182 ]
1183 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1184 m.d.sync += self.out_z.z.stb.eq(0)
1185 m.next = self.to_state
1186 with m.Else():
1187 m.d.sync += self.out_z.z.stb.eq(1)
1188
1189
1190 class FPPutZIdx(FPState):
1191
1192 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1193 FPState.__init__(self, state)
1194 if to_state is None:
1195 to_state = "get_ops"
1196 self.to_state = to_state
1197 self.in_z = in_z
1198 self.out_zs = out_zs
1199 self.in_mid = in_mid
1200
1201 def action(self, m):
1202 outz_stb = Signal(reset_less=True)
1203 outz_ack = Signal(reset_less=True)
1204 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1205 outz_ack.eq(self.out_zs[self.in_mid].ack),
1206 ]
1207 m.d.sync += [
1208 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1209 ]
1210 with m.If(outz_stb & outz_ack):
1211 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1212 m.next = self.to_state
1213 with m.Else():
1214 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1215
1216
1217 class FPOpData:
1218 def __init__(self, width, id_wid):
1219 self.z = FPOp(width)
1220 self.mid = Signal(id_wid, reset_less=True)
1221
1222 def eq(self, i):
1223 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1224
1225 def ports(self):
1226 return [self.z, self.mid]
1227
1228
1229 class FPADDBaseMod:
1230
1231 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1232 """ IEEE754 FP Add
1233
1234 * width: bit-width of IEEE754. supported: 16, 32, 64
1235 * id_wid: an identifier that is sync-connected to the input
1236 * single_cycle: True indicates each stage to complete in 1 clock
1237 * compact: True indicates a reduced number of stages
1238 """
1239 self.width = width
1240 self.id_wid = id_wid
1241 self.single_cycle = single_cycle
1242 self.compact = compact
1243
1244 self.in_t = Trigger()
1245 self.i = self.ispec()
1246 self.o = self.ospec()
1247
1248 self.states = []
1249
1250 def ispec(self):
1251 return FPADDBaseData(self.width, self.id_wid)
1252
1253 def ospec(self):
1254 return FPOpData(self.width, self.id_wid)
1255
1256 def add_state(self, state):
1257 self.states.append(state)
1258 return state
1259
1260 def get_fragment(self, platform=None):
1261 """ creates the HDL code-fragment for FPAdd
1262 """
1263 m = Module()
1264 m.submodules.out_z = self.o.z
1265 m.submodules.in_t = self.in_t
1266 if self.compact:
1267 self.get_compact_fragment(m, platform)
1268 else:
1269 self.get_longer_fragment(m, platform)
1270
1271 with m.FSM() as fsm:
1272
1273 for state in self.states:
1274 with m.State(state.state_from):
1275 state.action(m)
1276
1277 return m
1278
1279 def get_longer_fragment(self, m, platform=None):
1280
1281 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1282 self.width))
1283 get.setup(m, self.i)
1284 a = get.out_op1
1285 b = get.out_op2
1286 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1287
1288 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1289 sc.setup(m, a, b, self.in_mid)
1290
1291 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1292 dn.setup(m, a, b, sc.in_mid)
1293
1294 if self.single_cycle:
1295 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1296 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1297 else:
1298 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1299 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1300
1301 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1302 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1303
1304 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1305 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1306
1307 if self.single_cycle:
1308 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1309 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1310 else:
1311 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1312 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1313
1314 rn = self.add_state(FPRound(self.width, self.id_wid))
1315 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1316
1317 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1318 cor.setup(m, rn.out_z, rn.in_mid)
1319
1320 pa = self.add_state(FPPack(self.width, self.id_wid))
1321 pa.setup(m, cor.out_z, rn.in_mid)
1322
1323 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1324 pa.in_mid, self.out_mid))
1325
1326 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1327 pa.in_mid, self.out_mid))
1328
1329 def get_compact_fragment(self, m, platform=None):
1330
1331
1332 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1333 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1334 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1335 n1 = FPNormToPack(self.width, self.id_wid)
1336
1337 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1338
1339 chainlist = [get, sc, alm, n1]
1340 chain = StageChain(chainlist, specallocate=True)
1341 chain.setup(m, self.i)
1342
1343 for mod in chainlist:
1344 sc = self.add_state(mod)
1345
1346 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1347 n1.out_z.mid, self.o.mid))
1348
1349 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1350 # sc.o.mid, self.o.mid))
1351
1352
1353 class FPADDBase(FPState):
1354
1355 def __init__(self, width, id_wid=None, single_cycle=False):
1356 """ IEEE754 FP Add
1357
1358 * width: bit-width of IEEE754. supported: 16, 32, 64
1359 * id_wid: an identifier that is sync-connected to the input
1360 * single_cycle: True indicates each stage to complete in 1 clock
1361 """
1362 FPState.__init__(self, "fpadd")
1363 self.width = width
1364 self.single_cycle = single_cycle
1365 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1366 self.o = self.ospec()
1367
1368 self.in_t = Trigger()
1369 self.i = self.ispec()
1370
1371 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1372 self.in_accept = Signal(reset_less=True)
1373 self.add_stb = Signal(reset_less=True)
1374 self.add_ack = Signal(reset=0, reset_less=True)
1375
1376 def ispec(self):
1377 return self.mod.ispec()
1378
1379 def ospec(self):
1380 return self.mod.ospec()
1381
1382 def setup(self, m, i, add_stb, in_mid):
1383 m.d.comb += [self.i.eq(i),
1384 self.mod.i.eq(self.i),
1385 self.z_done.eq(self.mod.o.z.trigger),
1386 #self.add_stb.eq(add_stb),
1387 self.mod.in_t.stb.eq(self.in_t.stb),
1388 self.in_t.ack.eq(self.mod.in_t.ack),
1389 self.o.mid.eq(self.mod.o.mid),
1390 self.o.z.v.eq(self.mod.o.z.v),
1391 self.o.z.stb.eq(self.mod.o.z.stb),
1392 self.mod.o.z.ack.eq(self.o.z.ack),
1393 ]
1394
1395 m.d.sync += self.add_stb.eq(add_stb)
1396 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1397 m.d.sync += self.o.z.ack.eq(0) # likewise
1398 #m.d.sync += self.in_t.stb.eq(0)
1399
1400 m.submodules.fpadd = self.mod
1401
1402 def action(self, m):
1403
1404 # in_accept is set on incoming strobe HIGH and ack LOW.
1405 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1406
1407 #with m.If(self.in_t.ack):
1408 # m.d.sync += self.in_t.stb.eq(0)
1409 with m.If(~self.z_done):
1410 # not done: test for accepting an incoming operand pair
1411 with m.If(self.in_accept):
1412 m.d.sync += [
1413 self.add_ack.eq(1), # acknowledge receipt...
1414 self.in_t.stb.eq(1), # initiate add
1415 ]
1416 with m.Else():
1417 m.d.sync += [self.add_ack.eq(0),
1418 self.in_t.stb.eq(0),
1419 self.o.z.ack.eq(1),
1420 ]
1421 with m.Else():
1422 # done: acknowledge, and write out id and value
1423 m.d.sync += [self.add_ack.eq(1),
1424 self.in_t.stb.eq(0)
1425 ]
1426 m.next = "put_z"
1427
1428 return
1429
1430 if self.in_mid is not None:
1431 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1432
1433 m.d.sync += [
1434 self.out_z.v.eq(self.mod.out_z.v)
1435 ]
1436 # move to output state on detecting z ack
1437 with m.If(self.out_z.trigger):
1438 m.d.sync += self.out_z.stb.eq(0)
1439 m.next = "put_z"
1440 with m.Else():
1441 m.d.sync += self.out_z.stb.eq(1)
1442
1443
1444 class FPADDBasePipe(ControlBase):
1445 def __init__(self, width, id_wid):
1446 ControlBase.__init__(self)
1447 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1448 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1449 self.pipe3 = FPNormToPack(width, id_wid)
1450
1451 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1452
1453 def elaborate(self, platform):
1454 m = Module()
1455 m.submodules.scnorm = self.pipe1
1456 m.submodules.addalign = self.pipe2
1457 m.submodules.normpack = self.pipe3
1458 m.d.comb += self._eqs
1459 return m
1460
1461
1462 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1463 def __init__(self, width, id_wid, num_rows):
1464 self.num_rows = num_rows
1465 def iospec(): return FPADDBaseData(width, id_wid)
1466 stage = PassThroughStage(iospec)
1467 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1468
1469
1470 class FPADDMuxOutPipe(CombMuxOutPipe):
1471 def __init__(self, width, id_wid, num_rows):
1472 self.num_rows = num_rows
1473 def iospec(): return FPPackData(width, id_wid)
1474 stage = PassThroughStage(iospec)
1475 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1476
1477
1478 class FPADDMuxInOut:
1479 """ Reservation-Station version of FPADD pipeline.
1480
1481 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1482 * 3-stage adder pipeline
1483 * fan-out on outputs (an array of FPPackData: z,mid)
1484
1485 Fan-in and Fan-out are combinatorial.
1486 """
1487 def __init__(self, width, id_wid, num_rows):
1488 self.num_rows = num_rows
1489 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1490 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1491 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1492
1493 self.p = self.inpipe.p # kinda annoying,
1494 self.n = self.outpipe.n # use pipe in/out as this class in/out
1495 self._ports = self.inpipe.ports() + self.outpipe.ports()
1496
1497 def elaborate(self, platform):
1498 m = Module()
1499 m.submodules.inpipe = self.inpipe
1500 m.submodules.fpadd = self.fpadd
1501 m.submodules.outpipe = self.outpipe
1502
1503 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1504 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1505
1506 return m
1507
1508 def ports(self):
1509 return self._ports
1510
1511
1512 class FPADD(FPID):
1513 """ FPADD: stages as follows:
1514
1515 FPGetOp (a)
1516 |
1517 FPGetOp (b)
1518 |
1519 FPAddBase---> FPAddBaseMod
1520 | |
1521 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1522
1523 FPAddBase is tricky: it is both a stage and *has* stages.
1524 Connection to FPAddBaseMod therefore requires an in stb/ack
1525 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1526 needs to be the thing that raises the incoming stb.
1527 """
1528
1529 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1530 """ IEEE754 FP Add
1531
1532 * width: bit-width of IEEE754. supported: 16, 32, 64
1533 * id_wid: an identifier that is sync-connected to the input
1534 * single_cycle: True indicates each stage to complete in 1 clock
1535 """
1536 self.width = width
1537 self.id_wid = id_wid
1538 self.single_cycle = single_cycle
1539
1540 #self.out_z = FPOp(width)
1541 self.ids = FPID(id_wid)
1542
1543 rs = []
1544 for i in range(rs_sz):
1545 in_a = FPOp(width)
1546 in_b = FPOp(width)
1547 in_a.name = "in_a_%d" % i
1548 in_b.name = "in_b_%d" % i
1549 rs.append((in_a, in_b))
1550 self.rs = Array(rs)
1551
1552 res = []
1553 for i in range(rs_sz):
1554 out_z = FPOp(width)
1555 out_z.name = "out_z_%d" % i
1556 res.append(out_z)
1557 self.res = Array(res)
1558
1559 self.states = []
1560
1561 def add_state(self, state):
1562 self.states.append(state)
1563 return state
1564
1565 def get_fragment(self, platform=None):
1566 """ creates the HDL code-fragment for FPAdd
1567 """
1568 m = Module()
1569 m.submodules += self.rs
1570
1571 in_a = self.rs[0][0]
1572 in_b = self.rs[0][1]
1573
1574 geta = self.add_state(FPGetOp("get_a", "get_b",
1575 in_a, self.width))
1576 geta.setup(m, in_a)
1577 a = geta.out_op
1578
1579 getb = self.add_state(FPGetOp("get_b", "fpadd",
1580 in_b, self.width))
1581 getb.setup(m, in_b)
1582 b = getb.out_op
1583
1584 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1585 ab = self.add_state(ab)
1586 abd = ab.ispec() # create an input spec object for FPADDBase
1587 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1588 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1589 o = ab.o
1590
1591 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1592 o.mid, "get_a"))
1593
1594 with m.FSM() as fsm:
1595
1596 for state in self.states:
1597 with m.State(state.state_from):
1598 state.action(m)
1599
1600 return m
1601
1602
1603 if __name__ == "__main__":
1604 if True:
1605 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1606 main(alu, ports=alu.rs[0][0].ports() + \
1607 alu.rs[0][1].ports() + \
1608 alu.res[0].ports() + \
1609 [alu.ids.in_mid, alu.ids.out_mid])
1610 else:
1611 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1612 main(alu, ports=[alu.in_a, alu.in_b] + \
1613 alu.in_t.ports() + \
1614 alu.out_z.ports() + \
1615 [alu.in_mid, alu.out_mid])
1616
1617
1618 # works... but don't use, just do "python fname.py convert -t v"
1619 #print (verilog.convert(alu, ports=[
1620 # ports=alu.in_a.ports() + \
1621 # alu.in_b.ports() + \
1622 # alu.out_z.ports())