start splitting out common code from nmigen_add_experiment.py
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19
20
21 class FPSCData:
22
23 def __init__(self, width, id_wid):
24 self.a = FPNumBase(width, True)
25 self.b = FPNumBase(width, True)
26 self.z = FPNumOut(width, False)
27 self.oz = Signal(width, reset_less=True)
28 self.out_do_z = Signal(reset_less=True)
29 self.mid = Signal(id_wid, reset_less=True)
30
31 def eq(self, i):
32 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
33 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
34
35
36 class FPAddSpecialCasesMod:
37 """ special cases: NaNs, infs, zeros, denormalised
38 NOTE: some of these are unique to add. see "Special Operations"
39 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
40 """
41
42 def __init__(self, width, id_wid):
43 self.width = width
44 self.id_wid = id_wid
45 self.i = self.ispec()
46 self.o = self.ospec()
47
48 def ispec(self):
49 return FPADDBaseData(self.width, self.id_wid)
50
51 def ospec(self):
52 return FPSCData(self.width, self.id_wid)
53
54 def setup(self, m, i):
55 """ links module to inputs and outputs
56 """
57 m.submodules.specialcases = self
58 m.d.comb += self.i.eq(i)
59
60 def process(self, i):
61 return self.o
62
63 def elaborate(self, platform):
64 m = Module()
65
66 m.submodules.sc_out_z = self.o.z
67
68 # decode: XXX really should move to separate stage
69 a1 = FPNumIn(None, self.width)
70 b1 = FPNumIn(None, self.width)
71 m.submodules.sc_decode_a = a1
72 m.submodules.sc_decode_b = b1
73 m.d.comb += [a1.decode(self.i.a),
74 b1.decode(self.i.b),
75 ]
76
77 s_nomatch = Signal()
78 m.d.comb += s_nomatch.eq(a1.s != b1.s)
79
80 m_match = Signal()
81 m.d.comb += m_match.eq(a1.m == b1.m)
82
83 # if a is NaN or b is NaN return NaN
84 with m.If(a1.is_nan | b1.is_nan):
85 m.d.comb += self.o.out_do_z.eq(1)
86 m.d.comb += self.o.z.nan(0)
87
88 # XXX WEIRDNESS for FP16 non-canonical NaN handling
89 # under review
90
91 ## if a is zero and b is NaN return -b
92 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
93 # m.d.comb += self.o.out_do_z.eq(1)
94 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
95
96 ## if b is zero and a is NaN return -a
97 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
98 # m.d.comb += self.o.out_do_z.eq(1)
99 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
100
101 ## if a is -zero and b is NaN return -b
102 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
103 # m.d.comb += self.o.out_do_z.eq(1)
104 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
105
106 ## if b is -zero and a is NaN return -a
107 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
108 # m.d.comb += self.o.out_do_z.eq(1)
109 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
110
111 # if a is inf return inf (or NaN)
112 with m.Elif(a1.is_inf):
113 m.d.comb += self.o.out_do_z.eq(1)
114 m.d.comb += self.o.z.inf(a1.s)
115 # if a is inf and signs don't match return NaN
116 with m.If(b1.exp_128 & s_nomatch):
117 m.d.comb += self.o.z.nan(0)
118
119 # if b is inf return inf
120 with m.Elif(b1.is_inf):
121 m.d.comb += self.o.out_do_z.eq(1)
122 m.d.comb += self.o.z.inf(b1.s)
123
124 # if a is zero and b zero return signed-a/b
125 with m.Elif(a1.is_zero & b1.is_zero):
126 m.d.comb += self.o.out_do_z.eq(1)
127 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
128
129 # if a is zero return b
130 with m.Elif(a1.is_zero):
131 m.d.comb += self.o.out_do_z.eq(1)
132 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
133
134 # if b is zero return a
135 with m.Elif(b1.is_zero):
136 m.d.comb += self.o.out_do_z.eq(1)
137 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
138
139 # if a equal to -b return zero (+ve zero)
140 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
141 m.d.comb += self.o.out_do_z.eq(1)
142 m.d.comb += self.o.z.zero(0)
143
144 # Denormalised Number checks next, so pass a/b data through
145 with m.Else():
146 m.d.comb += self.o.out_do_z.eq(0)
147 m.d.comb += self.o.a.eq(a1)
148 m.d.comb += self.o.b.eq(b1)
149
150 m.d.comb += self.o.oz.eq(self.o.z.v)
151 m.d.comb += self.o.mid.eq(self.i.mid)
152
153 return m
154
155
156 class FPID:
157 def __init__(self, id_wid):
158 self.id_wid = id_wid
159 if self.id_wid:
160 self.in_mid = Signal(id_wid, reset_less=True)
161 self.out_mid = Signal(id_wid, reset_less=True)
162 else:
163 self.in_mid = None
164 self.out_mid = None
165
166 def idsync(self, m):
167 if self.id_wid is not None:
168 m.d.sync += self.out_mid.eq(self.in_mid)
169
170
171 class FPAddSpecialCases(FPState):
172 """ special cases: NaNs, infs, zeros, denormalised
173 NOTE: some of these are unique to add. see "Special Operations"
174 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
175 """
176
177 def __init__(self, width, id_wid):
178 FPState.__init__(self, "special_cases")
179 self.mod = FPAddSpecialCasesMod(width)
180 self.out_z = self.mod.ospec()
181 self.out_do_z = Signal(reset_less=True)
182
183 def setup(self, m, i):
184 """ links module to inputs and outputs
185 """
186 self.mod.setup(m, i, self.out_do_z)
187 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
188 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
189
190 def action(self, m):
191 self.idsync(m)
192 with m.If(self.out_do_z):
193 m.next = "put_z"
194 with m.Else():
195 m.next = "denormalise"
196
197
198 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
199 """ special cases: NaNs, infs, zeros, denormalised
200 NOTE: some of these are unique to add. see "Special Operations"
201 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
202 """
203
204 def __init__(self, width, id_wid):
205 FPState.__init__(self, "special_cases")
206 self.width = width
207 self.id_wid = id_wid
208 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
209 self.out = self.ospec()
210
211 def ispec(self):
212 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
213
214 def ospec(self):
215 return FPSCData(self.width, self.id_wid) # DeNorm ospec
216
217 def setup(self, m, i):
218 """ links module to inputs and outputs
219 """
220 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
221 dmod = FPAddDeNormMod(self.width, self.id_wid)
222
223 chain = StageChain([smod, dmod])
224 chain.setup(m, i)
225
226 # only needed for break-out (early-out)
227 # self.out_do_z = smod.o.out_do_z
228
229 self.o = dmod.o
230
231 def process(self, i):
232 return self.o
233
234 def action(self, m):
235 # for break-out (early-out)
236 #with m.If(self.out_do_z):
237 # m.next = "put_z"
238 #with m.Else():
239 m.d.sync += self.out.eq(self.process(None))
240 m.next = "align"
241
242
243 class FPAddDeNormMod(FPState):
244
245 def __init__(self, width, id_wid):
246 self.width = width
247 self.id_wid = id_wid
248 self.i = self.ispec()
249 self.o = self.ospec()
250
251 def ispec(self):
252 return FPSCData(self.width, self.id_wid)
253
254 def ospec(self):
255 return FPSCData(self.width, self.id_wid)
256
257 def process(self, i):
258 return self.o
259
260 def setup(self, m, i):
261 """ links module to inputs and outputs
262 """
263 m.submodules.denormalise = self
264 m.d.comb += self.i.eq(i)
265
266 def elaborate(self, platform):
267 m = Module()
268 m.submodules.denorm_in_a = self.i.a
269 m.submodules.denorm_in_b = self.i.b
270 m.submodules.denorm_out_a = self.o.a
271 m.submodules.denorm_out_b = self.o.b
272
273 with m.If(~self.i.out_do_z):
274 # XXX hmmm, don't like repeating identical code
275 m.d.comb += self.o.a.eq(self.i.a)
276 with m.If(self.i.a.exp_n127):
277 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
278 with m.Else():
279 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
280
281 m.d.comb += self.o.b.eq(self.i.b)
282 with m.If(self.i.b.exp_n127):
283 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
284 with m.Else():
285 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
286
287 m.d.comb += self.o.mid.eq(self.i.mid)
288 m.d.comb += self.o.z.eq(self.i.z)
289 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
290 m.d.comb += self.o.oz.eq(self.i.oz)
291
292 return m
293
294
295 class FPAddDeNorm(FPState):
296
297 def __init__(self, width, id_wid):
298 FPState.__init__(self, "denormalise")
299 self.mod = FPAddDeNormMod(width)
300 self.out_a = FPNumBase(width)
301 self.out_b = FPNumBase(width)
302
303 def setup(self, m, i):
304 """ links module to inputs and outputs
305 """
306 self.mod.setup(m, i)
307
308 m.d.sync += self.out_a.eq(self.mod.out_a)
309 m.d.sync += self.out_b.eq(self.mod.out_b)
310
311 def action(self, m):
312 # Denormalised Number checks
313 m.next = "align"
314
315
316 class FPAddAlignMultiMod(FPState):
317
318 def __init__(self, width):
319 self.in_a = FPNumBase(width)
320 self.in_b = FPNumBase(width)
321 self.out_a = FPNumIn(None, width)
322 self.out_b = FPNumIn(None, width)
323 self.exp_eq = Signal(reset_less=True)
324
325 def elaborate(self, platform):
326 # This one however (single-cycle) will do the shift
327 # in one go.
328
329 m = Module()
330
331 m.submodules.align_in_a = self.in_a
332 m.submodules.align_in_b = self.in_b
333 m.submodules.align_out_a = self.out_a
334 m.submodules.align_out_b = self.out_b
335
336 # NOTE: this does *not* do single-cycle multi-shifting,
337 # it *STAYS* in the align state until exponents match
338
339 # exponent of a greater than b: shift b down
340 m.d.comb += self.exp_eq.eq(0)
341 m.d.comb += self.out_a.eq(self.in_a)
342 m.d.comb += self.out_b.eq(self.in_b)
343 agtb = Signal(reset_less=True)
344 altb = Signal(reset_less=True)
345 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
346 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
347 with m.If(agtb):
348 m.d.comb += self.out_b.shift_down(self.in_b)
349 # exponent of b greater than a: shift a down
350 with m.Elif(altb):
351 m.d.comb += self.out_a.shift_down(self.in_a)
352 # exponents equal: move to next stage.
353 with m.Else():
354 m.d.comb += self.exp_eq.eq(1)
355 return m
356
357
358 class FPAddAlignMulti(FPState):
359
360 def __init__(self, width, id_wid):
361 FPState.__init__(self, "align")
362 self.mod = FPAddAlignMultiMod(width)
363 self.out_a = FPNumIn(None, width)
364 self.out_b = FPNumIn(None, width)
365 self.exp_eq = Signal(reset_less=True)
366
367 def setup(self, m, in_a, in_b):
368 """ links module to inputs and outputs
369 """
370 m.submodules.align = self.mod
371 m.d.comb += self.mod.in_a.eq(in_a)
372 m.d.comb += self.mod.in_b.eq(in_b)
373 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
374 m.d.sync += self.out_a.eq(self.mod.out_a)
375 m.d.sync += self.out_b.eq(self.mod.out_b)
376
377 def action(self, m):
378 with m.If(self.exp_eq):
379 m.next = "add_0"
380
381
382 class FPNumIn2Ops:
383
384 def __init__(self, width, id_wid):
385 self.a = FPNumIn(None, width)
386 self.b = FPNumIn(None, width)
387 self.z = FPNumOut(width, False)
388 self.out_do_z = Signal(reset_less=True)
389 self.oz = Signal(width, reset_less=True)
390 self.mid = Signal(id_wid, reset_less=True)
391
392 def eq(self, i):
393 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
394 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
395
396
397 class FPAddAlignSingleMod:
398
399 def __init__(self, width, id_wid):
400 self.width = width
401 self.id_wid = id_wid
402 self.i = self.ispec()
403 self.o = self.ospec()
404
405 def ispec(self):
406 return FPSCData(self.width, self.id_wid)
407
408 def ospec(self):
409 return FPNumIn2Ops(self.width, self.id_wid)
410
411 def process(self, i):
412 return self.o
413
414 def setup(self, m, i):
415 """ links module to inputs and outputs
416 """
417 m.submodules.align = self
418 m.d.comb += self.i.eq(i)
419
420 def elaborate(self, platform):
421 """ Aligns A against B or B against A, depending on which has the
422 greater exponent. This is done in a *single* cycle using
423 variable-width bit-shift
424
425 the shifter used here is quite expensive in terms of gates.
426 Mux A or B in (and out) into temporaries, as only one of them
427 needs to be aligned against the other
428 """
429 m = Module()
430
431 m.submodules.align_in_a = self.i.a
432 m.submodules.align_in_b = self.i.b
433 m.submodules.align_out_a = self.o.a
434 m.submodules.align_out_b = self.o.b
435
436 # temporary (muxed) input and output to be shifted
437 t_inp = FPNumBase(self.width)
438 t_out = FPNumIn(None, self.width)
439 espec = (len(self.i.a.e), True)
440 msr = MultiShiftRMerge(self.i.a.m_width, espec)
441 m.submodules.align_t_in = t_inp
442 m.submodules.align_t_out = t_out
443 m.submodules.multishift_r = msr
444
445 ediff = Signal(espec, reset_less=True)
446 ediffr = Signal(espec, reset_less=True)
447 tdiff = Signal(espec, reset_less=True)
448 elz = Signal(reset_less=True)
449 egz = Signal(reset_less=True)
450
451 # connect multi-shifter to t_inp/out mantissa (and tdiff)
452 m.d.comb += msr.inp.eq(t_inp.m)
453 m.d.comb += msr.diff.eq(tdiff)
454 m.d.comb += t_out.m.eq(msr.m)
455 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
456 m.d.comb += t_out.s.eq(t_inp.s)
457
458 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
459 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
460 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
461 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
462
463 # default: A-exp == B-exp, A and B untouched (fall through)
464 m.d.comb += self.o.a.eq(self.i.a)
465 m.d.comb += self.o.b.eq(self.i.b)
466 # only one shifter (muxed)
467 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
468 # exponent of a greater than b: shift b down
469 with m.If(~self.i.out_do_z):
470 with m.If(egz):
471 m.d.comb += [t_inp.eq(self.i.b),
472 tdiff.eq(ediff),
473 self.o.b.eq(t_out),
474 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
475 ]
476 # exponent of b greater than a: shift a down
477 with m.Elif(elz):
478 m.d.comb += [t_inp.eq(self.i.a),
479 tdiff.eq(ediffr),
480 self.o.a.eq(t_out),
481 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
482 ]
483
484 m.d.comb += self.o.mid.eq(self.i.mid)
485 m.d.comb += self.o.z.eq(self.i.z)
486 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
487 m.d.comb += self.o.oz.eq(self.i.oz)
488
489 return m
490
491
492 class FPAddAlignSingle(FPState):
493
494 def __init__(self, width, id_wid):
495 FPState.__init__(self, "align")
496 self.mod = FPAddAlignSingleMod(width, id_wid)
497 self.out_a = FPNumIn(None, width)
498 self.out_b = FPNumIn(None, width)
499
500 def setup(self, m, i):
501 """ links module to inputs and outputs
502 """
503 self.mod.setup(m, i)
504
505 # NOTE: could be done as comb
506 m.d.sync += self.out_a.eq(self.mod.out_a)
507 m.d.sync += self.out_b.eq(self.mod.out_b)
508
509 def action(self, m):
510 m.next = "add_0"
511
512
513 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
514
515 def __init__(self, width, id_wid):
516 FPState.__init__(self, "align")
517 self.width = width
518 self.id_wid = id_wid
519 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
520 self.a1o = self.ospec()
521
522 def ispec(self):
523 return FPSCData(self.width, self.id_wid)
524
525 def ospec(self):
526 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
527
528 def setup(self, m, i):
529 """ links module to inputs and outputs
530 """
531
532 # chain AddAlignSingle, AddStage0 and AddStage1
533 mod = FPAddAlignSingleMod(self.width, self.id_wid)
534 a0mod = FPAddStage0Mod(self.width, self.id_wid)
535 a1mod = FPAddStage1Mod(self.width, self.id_wid)
536
537 chain = StageChain([mod, a0mod, a1mod])
538 chain.setup(m, i)
539
540 self.o = a1mod.o
541
542 def process(self, i):
543 return self.o
544
545 def action(self, m):
546 m.d.sync += self.a1o.eq(self.process(None))
547 m.next = "normalise_1"
548
549
550 class FPAddStage0Data:
551
552 def __init__(self, width, id_wid):
553 self.z = FPNumBase(width, False)
554 self.out_do_z = Signal(reset_less=True)
555 self.oz = Signal(width, reset_less=True)
556 self.tot = Signal(self.z.m_width + 4, reset_less=True)
557 self.mid = Signal(id_wid, reset_less=True)
558
559 def eq(self, i):
560 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
561 self.tot.eq(i.tot), self.mid.eq(i.mid)]
562
563
564 class FPAddStage0Mod:
565
566 def __init__(self, width, id_wid):
567 self.width = width
568 self.id_wid = id_wid
569 self.i = self.ispec()
570 self.o = self.ospec()
571
572 def ispec(self):
573 return FPSCData(self.width, self.id_wid)
574
575 def ospec(self):
576 return FPAddStage0Data(self.width, self.id_wid)
577
578 def process(self, i):
579 return self.o
580
581 def setup(self, m, i):
582 """ links module to inputs and outputs
583 """
584 m.submodules.add0 = self
585 m.d.comb += self.i.eq(i)
586
587 def elaborate(self, platform):
588 m = Module()
589 m.submodules.add0_in_a = self.i.a
590 m.submodules.add0_in_b = self.i.b
591 m.submodules.add0_out_z = self.o.z
592
593 # store intermediate tests (and zero-extended mantissas)
594 seq = Signal(reset_less=True)
595 mge = Signal(reset_less=True)
596 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
597 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
598 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
599 mge.eq(self.i.a.m >= self.i.b.m),
600 am0.eq(Cat(self.i.a.m, 0)),
601 bm0.eq(Cat(self.i.b.m, 0))
602 ]
603 # same-sign (both negative or both positive) add mantissas
604 with m.If(~self.i.out_do_z):
605 m.d.comb += self.o.z.e.eq(self.i.a.e)
606 with m.If(seq):
607 m.d.comb += [
608 self.o.tot.eq(am0 + bm0),
609 self.o.z.s.eq(self.i.a.s)
610 ]
611 # a mantissa greater than b, use a
612 with m.Elif(mge):
613 m.d.comb += [
614 self.o.tot.eq(am0 - bm0),
615 self.o.z.s.eq(self.i.a.s)
616 ]
617 # b mantissa greater than a, use b
618 with m.Else():
619 m.d.comb += [
620 self.o.tot.eq(bm0 - am0),
621 self.o.z.s.eq(self.i.b.s)
622 ]
623
624 m.d.comb += self.o.oz.eq(self.i.oz)
625 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
626 m.d.comb += self.o.mid.eq(self.i.mid)
627 return m
628
629
630 class FPAddStage0(FPState):
631 """ First stage of add. covers same-sign (add) and subtract
632 special-casing when mantissas are greater or equal, to
633 give greatest accuracy.
634 """
635
636 def __init__(self, width, id_wid):
637 FPState.__init__(self, "add_0")
638 self.mod = FPAddStage0Mod(width)
639 self.o = self.mod.ospec()
640
641 def setup(self, m, i):
642 """ links module to inputs and outputs
643 """
644 self.mod.setup(m, i)
645
646 # NOTE: these could be done as combinatorial (merge add0+add1)
647 m.d.sync += self.o.eq(self.mod.o)
648
649 def action(self, m):
650 m.next = "add_1"
651
652
653 class FPAddStage1Data:
654
655 def __init__(self, width, id_wid):
656 self.z = FPNumBase(width, False)
657 self.out_do_z = Signal(reset_less=True)
658 self.oz = Signal(width, reset_less=True)
659 self.of = Overflow()
660 self.mid = Signal(id_wid, reset_less=True)
661
662 def eq(self, i):
663 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
664 self.of.eq(i.of), self.mid.eq(i.mid)]
665
666
667
668 class FPAddStage1Mod(FPState):
669 """ Second stage of add: preparation for normalisation.
670 detects when tot sum is too big (tot[27] is kinda a carry bit)
671 """
672
673 def __init__(self, width, id_wid):
674 self.width = width
675 self.id_wid = id_wid
676 self.i = self.ispec()
677 self.o = self.ospec()
678
679 def ispec(self):
680 return FPAddStage0Data(self.width, self.id_wid)
681
682 def ospec(self):
683 return FPAddStage1Data(self.width, self.id_wid)
684
685 def process(self, i):
686 return self.o
687
688 def setup(self, m, i):
689 """ links module to inputs and outputs
690 """
691 m.submodules.add1 = self
692 m.submodules.add1_out_overflow = self.o.of
693
694 m.d.comb += self.i.eq(i)
695
696 def elaborate(self, platform):
697 m = Module()
698 m.d.comb += self.o.z.eq(self.i.z)
699 # tot[-1] (MSB) gets set when the sum overflows. shift result down
700 with m.If(~self.i.out_do_z):
701 with m.If(self.i.tot[-1]):
702 m.d.comb += [
703 self.o.z.m.eq(self.i.tot[4:]),
704 self.o.of.m0.eq(self.i.tot[4]),
705 self.o.of.guard.eq(self.i.tot[3]),
706 self.o.of.round_bit.eq(self.i.tot[2]),
707 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
708 self.o.z.e.eq(self.i.z.e + 1)
709 ]
710 # tot[-1] (MSB) zero case
711 with m.Else():
712 m.d.comb += [
713 self.o.z.m.eq(self.i.tot[3:]),
714 self.o.of.m0.eq(self.i.tot[3]),
715 self.o.of.guard.eq(self.i.tot[2]),
716 self.o.of.round_bit.eq(self.i.tot[1]),
717 self.o.of.sticky.eq(self.i.tot[0])
718 ]
719
720 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
721 m.d.comb += self.o.oz.eq(self.i.oz)
722 m.d.comb += self.o.mid.eq(self.i.mid)
723
724 return m
725
726
727 class FPAddStage1(FPState):
728
729 def __init__(self, width, id_wid):
730 FPState.__init__(self, "add_1")
731 self.mod = FPAddStage1Mod(width)
732 self.out_z = FPNumBase(width, False)
733 self.out_of = Overflow()
734 self.norm_stb = Signal()
735
736 def setup(self, m, i):
737 """ links module to inputs and outputs
738 """
739 self.mod.setup(m, i)
740
741 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
742
743 m.d.sync += self.out_of.eq(self.mod.out_of)
744 m.d.sync += self.out_z.eq(self.mod.out_z)
745 m.d.sync += self.norm_stb.eq(1)
746
747 def action(self, m):
748 m.next = "normalise_1"
749
750
751 class FPNormaliseModSingle:
752
753 def __init__(self, width):
754 self.width = width
755 self.in_z = self.ispec()
756 self.out_z = self.ospec()
757
758 def ispec(self):
759 return FPNumBase(self.width, False)
760
761 def ospec(self):
762 return FPNumBase(self.width, False)
763
764 def setup(self, m, i):
765 """ links module to inputs and outputs
766 """
767 m.submodules.normalise = self
768 m.d.comb += self.i.eq(i)
769
770 def elaborate(self, platform):
771 m = Module()
772
773 mwid = self.out_z.m_width+2
774 pe = PriorityEncoder(mwid)
775 m.submodules.norm_pe = pe
776
777 m.submodules.norm1_out_z = self.out_z
778 m.submodules.norm1_in_z = self.in_z
779
780 in_z = FPNumBase(self.width, False)
781 in_of = Overflow()
782 m.submodules.norm1_insel_z = in_z
783 m.submodules.norm1_insel_overflow = in_of
784
785 espec = (len(in_z.e), True)
786 ediff_n126 = Signal(espec, reset_less=True)
787 msr = MultiShiftRMerge(mwid, espec)
788 m.submodules.multishift_r = msr
789
790 m.d.comb += in_z.eq(self.in_z)
791 m.d.comb += in_of.eq(self.in_of)
792 # initialise out from in (overridden below)
793 m.d.comb += self.out_z.eq(in_z)
794 m.d.comb += self.out_of.eq(in_of)
795 # normalisation decrease condition
796 decrease = Signal(reset_less=True)
797 m.d.comb += decrease.eq(in_z.m_msbzero)
798 # decrease exponent
799 with m.If(decrease):
800 # *sigh* not entirely obvious: count leading zeros (clz)
801 # with a PriorityEncoder: to find from the MSB
802 # we reverse the order of the bits.
803 temp_m = Signal(mwid, reset_less=True)
804 temp_s = Signal(mwid+1, reset_less=True)
805 clz = Signal((len(in_z.e), True), reset_less=True)
806 m.d.comb += [
807 # cat round and guard bits back into the mantissa
808 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
809 pe.i.eq(temp_m[::-1]), # inverted
810 clz.eq(pe.o), # count zeros from MSB down
811 temp_s.eq(temp_m << clz), # shift mantissa UP
812 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
813 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
814 ]
815
816 return m
817
818
819 class FPNorm1Data:
820
821 def __init__(self, width, id_wid):
822 self.roundz = Signal(reset_less=True)
823 self.z = FPNumBase(width, False)
824 self.out_do_z = Signal(reset_less=True)
825 self.oz = Signal(width, reset_less=True)
826 self.mid = Signal(id_wid, reset_less=True)
827
828 def eq(self, i):
829 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
830 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
831
832
833 class FPNorm1ModSingle:
834
835 def __init__(self, width, id_wid):
836 self.width = width
837 self.id_wid = id_wid
838 self.i = self.ispec()
839 self.o = self.ospec()
840
841 def ispec(self):
842 return FPAddStage1Data(self.width, self.id_wid)
843
844 def ospec(self):
845 return FPNorm1Data(self.width, self.id_wid)
846
847 def setup(self, m, i):
848 """ links module to inputs and outputs
849 """
850 m.submodules.normalise_1 = self
851 m.d.comb += self.i.eq(i)
852
853 def process(self, i):
854 return self.o
855
856 def elaborate(self, platform):
857 m = Module()
858
859 mwid = self.o.z.m_width+2
860 pe = PriorityEncoder(mwid)
861 m.submodules.norm_pe = pe
862
863 of = Overflow()
864 m.d.comb += self.o.roundz.eq(of.roundz)
865
866 m.submodules.norm1_out_z = self.o.z
867 m.submodules.norm1_out_overflow = of
868 m.submodules.norm1_in_z = self.i.z
869 m.submodules.norm1_in_overflow = self.i.of
870
871 i = self.ispec()
872 m.submodules.norm1_insel_z = i.z
873 m.submodules.norm1_insel_overflow = i.of
874
875 espec = (len(i.z.e), True)
876 ediff_n126 = Signal(espec, reset_less=True)
877 msr = MultiShiftRMerge(mwid, espec)
878 m.submodules.multishift_r = msr
879
880 m.d.comb += i.eq(self.i)
881 # initialise out from in (overridden below)
882 m.d.comb += self.o.z.eq(i.z)
883 m.d.comb += of.eq(i.of)
884 # normalisation increase/decrease conditions
885 decrease = Signal(reset_less=True)
886 increase = Signal(reset_less=True)
887 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
888 m.d.comb += increase.eq(i.z.exp_lt_n126)
889 # decrease exponent
890 with m.If(~self.i.out_do_z):
891 with m.If(decrease):
892 # *sigh* not entirely obvious: count leading zeros (clz)
893 # with a PriorityEncoder: to find from the MSB
894 # we reverse the order of the bits.
895 temp_m = Signal(mwid, reset_less=True)
896 temp_s = Signal(mwid+1, reset_less=True)
897 clz = Signal((len(i.z.e), True), reset_less=True)
898 # make sure that the amount to decrease by does NOT
899 # go below the minimum non-INF/NaN exponent
900 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
901 i.z.exp_sub_n126)
902 m.d.comb += [
903 # cat round and guard bits back into the mantissa
904 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
905 pe.i.eq(temp_m[::-1]), # inverted
906 clz.eq(limclz), # count zeros from MSB down
907 temp_s.eq(temp_m << clz), # shift mantissa UP
908 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
909 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
910 of.m0.eq(temp_s[2]), # copy of mantissa[0]
911 # overflow in bits 0..1: got shifted too (leave sticky)
912 of.guard.eq(temp_s[1]), # guard
913 of.round_bit.eq(temp_s[0]), # round
914 ]
915 # increase exponent
916 with m.Elif(increase):
917 temp_m = Signal(mwid+1, reset_less=True)
918 m.d.comb += [
919 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
920 i.z.m)),
921 ediff_n126.eq(i.z.N126 - i.z.e),
922 # connect multi-shifter to inp/out mantissa (and ediff)
923 msr.inp.eq(temp_m),
924 msr.diff.eq(ediff_n126),
925 self.o.z.m.eq(msr.m[3:]),
926 of.m0.eq(temp_s[3]), # copy of mantissa[0]
927 # overflow in bits 0..1: got shifted too (leave sticky)
928 of.guard.eq(temp_s[2]), # guard
929 of.round_bit.eq(temp_s[1]), # round
930 of.sticky.eq(temp_s[0]), # sticky
931 self.o.z.e.eq(i.z.e + ediff_n126),
932 ]
933
934 m.d.comb += self.o.mid.eq(self.i.mid)
935 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
936 m.d.comb += self.o.oz.eq(self.i.oz)
937
938 return m
939
940
941 class FPNorm1ModMulti:
942
943 def __init__(self, width, single_cycle=True):
944 self.width = width
945 self.in_select = Signal(reset_less=True)
946 self.in_z = FPNumBase(width, False)
947 self.in_of = Overflow()
948 self.temp_z = FPNumBase(width, False)
949 self.temp_of = Overflow()
950 self.out_z = FPNumBase(width, False)
951 self.out_of = Overflow()
952
953 def elaborate(self, platform):
954 m = Module()
955
956 m.submodules.norm1_out_z = self.out_z
957 m.submodules.norm1_out_overflow = self.out_of
958 m.submodules.norm1_temp_z = self.temp_z
959 m.submodules.norm1_temp_of = self.temp_of
960 m.submodules.norm1_in_z = self.in_z
961 m.submodules.norm1_in_overflow = self.in_of
962
963 in_z = FPNumBase(self.width, False)
964 in_of = Overflow()
965 m.submodules.norm1_insel_z = in_z
966 m.submodules.norm1_insel_overflow = in_of
967
968 # select which of temp or in z/of to use
969 with m.If(self.in_select):
970 m.d.comb += in_z.eq(self.in_z)
971 m.d.comb += in_of.eq(self.in_of)
972 with m.Else():
973 m.d.comb += in_z.eq(self.temp_z)
974 m.d.comb += in_of.eq(self.temp_of)
975 # initialise out from in (overridden below)
976 m.d.comb += self.out_z.eq(in_z)
977 m.d.comb += self.out_of.eq(in_of)
978 # normalisation increase/decrease conditions
979 decrease = Signal(reset_less=True)
980 increase = Signal(reset_less=True)
981 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
982 m.d.comb += increase.eq(in_z.exp_lt_n126)
983 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
984 # decrease exponent
985 with m.If(decrease):
986 m.d.comb += [
987 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
988 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
989 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
990 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
991 self.out_of.round_bit.eq(0), # reset round bit
992 self.out_of.m0.eq(in_of.guard),
993 ]
994 # increase exponent
995 with m.Elif(increase):
996 m.d.comb += [
997 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
998 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
999 self.out_of.guard.eq(in_z.m[0]),
1000 self.out_of.m0.eq(in_z.m[1]),
1001 self.out_of.round_bit.eq(in_of.guard),
1002 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1003 ]
1004
1005 return m
1006
1007
1008 class FPNorm1Single(FPState):
1009
1010 def __init__(self, width, id_wid, single_cycle=True):
1011 FPState.__init__(self, "normalise_1")
1012 self.mod = FPNorm1ModSingle(width)
1013 self.o = self.ospec()
1014 self.out_z = FPNumBase(width, False)
1015 self.out_roundz = Signal(reset_less=True)
1016
1017 def ispec(self):
1018 return self.mod.ispec()
1019
1020 def ospec(self):
1021 return self.mod.ospec()
1022
1023 def setup(self, m, i):
1024 """ links module to inputs and outputs
1025 """
1026 self.mod.setup(m, i)
1027
1028 def action(self, m):
1029 m.next = "round"
1030
1031
1032 class FPNorm1Multi(FPState):
1033
1034 def __init__(self, width, id_wid):
1035 FPState.__init__(self, "normalise_1")
1036 self.mod = FPNorm1ModMulti(width)
1037 self.stb = Signal(reset_less=True)
1038 self.ack = Signal(reset=0, reset_less=True)
1039 self.out_norm = Signal(reset_less=True)
1040 self.in_accept = Signal(reset_less=True)
1041 self.temp_z = FPNumBase(width)
1042 self.temp_of = Overflow()
1043 self.out_z = FPNumBase(width)
1044 self.out_roundz = Signal(reset_less=True)
1045
1046 def setup(self, m, in_z, in_of, norm_stb):
1047 """ links module to inputs and outputs
1048 """
1049 self.mod.setup(m, in_z, in_of, norm_stb,
1050 self.in_accept, self.temp_z, self.temp_of,
1051 self.out_z, self.out_norm)
1052
1053 m.d.comb += self.stb.eq(norm_stb)
1054 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1055
1056 def action(self, m):
1057 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1058 m.d.sync += self.temp_of.eq(self.mod.out_of)
1059 m.d.sync += self.temp_z.eq(self.out_z)
1060 with m.If(self.out_norm):
1061 with m.If(self.in_accept):
1062 m.d.sync += [
1063 self.ack.eq(1),
1064 ]
1065 with m.Else():
1066 m.d.sync += self.ack.eq(0)
1067 with m.Else():
1068 # normalisation not required (or done).
1069 m.next = "round"
1070 m.d.sync += self.ack.eq(1)
1071 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1072
1073
1074 class FPNormToPack(FPState, UnbufferedPipeline):
1075
1076 def __init__(self, width, id_wid):
1077 FPState.__init__(self, "normalise_1")
1078 self.id_wid = id_wid
1079 self.width = width
1080 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1081
1082 def ispec(self):
1083 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1084
1085 def ospec(self):
1086 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1087
1088 def setup(self, m, i):
1089 """ links module to inputs and outputs
1090 """
1091
1092 # Normalisation, Rounding Corrections, Pack - in a chain
1093 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1094 rmod = FPRoundMod(self.width, self.id_wid)
1095 cmod = FPCorrectionsMod(self.width, self.id_wid)
1096 pmod = FPPackMod(self.width, self.id_wid)
1097 chain = StageChain([nmod, rmod, cmod, pmod])
1098 chain.setup(m, i)
1099 self.out_z = pmod.ospec()
1100
1101 self.o = pmod.o
1102
1103 def process(self, i):
1104 return self.o
1105
1106 def action(self, m):
1107 m.d.sync += self.out_z.eq(self.process(None))
1108 m.next = "pack_put_z"
1109
1110
1111 class FPRoundData:
1112
1113 def __init__(self, width, id_wid):
1114 self.z = FPNumBase(width, False)
1115 self.out_do_z = Signal(reset_less=True)
1116 self.oz = Signal(width, reset_less=True)
1117 self.mid = Signal(id_wid, reset_less=True)
1118
1119 def eq(self, i):
1120 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1121 self.mid.eq(i.mid)]
1122
1123
1124 class FPRoundMod:
1125
1126 def __init__(self, width, id_wid):
1127 self.width = width
1128 self.id_wid = id_wid
1129 self.i = self.ispec()
1130 self.out_z = self.ospec()
1131
1132 def ispec(self):
1133 return FPNorm1Data(self.width, self.id_wid)
1134
1135 def ospec(self):
1136 return FPRoundData(self.width, self.id_wid)
1137
1138 def process(self, i):
1139 return self.out_z
1140
1141 def setup(self, m, i):
1142 m.submodules.roundz = self
1143 m.d.comb += self.i.eq(i)
1144
1145 def elaborate(self, platform):
1146 m = Module()
1147 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1148 with m.If(~self.i.out_do_z):
1149 with m.If(self.i.roundz):
1150 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1151 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1152 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1153
1154 return m
1155
1156
1157 class FPRound(FPState):
1158
1159 def __init__(self, width, id_wid):
1160 FPState.__init__(self, "round")
1161 self.mod = FPRoundMod(width)
1162 self.out_z = self.ospec()
1163
1164 def ispec(self):
1165 return self.mod.ispec()
1166
1167 def ospec(self):
1168 return self.mod.ospec()
1169
1170 def setup(self, m, i):
1171 """ links module to inputs and outputs
1172 """
1173 self.mod.setup(m, i)
1174
1175 self.idsync(m)
1176 m.d.sync += self.out_z.eq(self.mod.out_z)
1177 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1178
1179 def action(self, m):
1180 m.next = "corrections"
1181
1182
1183 class FPCorrectionsMod:
1184
1185 def __init__(self, width, id_wid):
1186 self.width = width
1187 self.id_wid = id_wid
1188 self.i = self.ispec()
1189 self.out_z = self.ospec()
1190
1191 def ispec(self):
1192 return FPRoundData(self.width, self.id_wid)
1193
1194 def ospec(self):
1195 return FPRoundData(self.width, self.id_wid)
1196
1197 def process(self, i):
1198 return self.out_z
1199
1200 def setup(self, m, i):
1201 """ links module to inputs and outputs
1202 """
1203 m.submodules.corrections = self
1204 m.d.comb += self.i.eq(i)
1205
1206 def elaborate(self, platform):
1207 m = Module()
1208 m.submodules.corr_in_z = self.i.z
1209 m.submodules.corr_out_z = self.out_z.z
1210 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1211 with m.If(~self.i.out_do_z):
1212 with m.If(self.i.z.is_denormalised):
1213 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1214 return m
1215
1216
1217 class FPCorrections(FPState):
1218
1219 def __init__(self, width, id_wid):
1220 FPState.__init__(self, "corrections")
1221 self.mod = FPCorrectionsMod(width)
1222 self.out_z = self.ospec()
1223
1224 def ispec(self):
1225 return self.mod.ispec()
1226
1227 def ospec(self):
1228 return self.mod.ospec()
1229
1230 def setup(self, m, in_z):
1231 """ links module to inputs and outputs
1232 """
1233 self.mod.setup(m, in_z)
1234
1235 m.d.sync += self.out_z.eq(self.mod.out_z)
1236 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1237
1238 def action(self, m):
1239 m.next = "pack"
1240
1241
1242 class FPPackData:
1243
1244 def __init__(self, width, id_wid):
1245 self.z = Signal(width, reset_less=True)
1246 self.mid = Signal(id_wid, reset_less=True)
1247
1248 def eq(self, i):
1249 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1250
1251 def ports(self):
1252 return [self.z, self.mid]
1253
1254
1255 class FPPackMod:
1256
1257 def __init__(self, width, id_wid):
1258 self.width = width
1259 self.id_wid = id_wid
1260 self.i = self.ispec()
1261 self.o = self.ospec()
1262
1263 def ispec(self):
1264 return FPRoundData(self.width, self.id_wid)
1265
1266 def ospec(self):
1267 return FPPackData(self.width, self.id_wid)
1268
1269 def process(self, i):
1270 return self.o
1271
1272 def setup(self, m, in_z):
1273 """ links module to inputs and outputs
1274 """
1275 m.submodules.pack = self
1276 m.d.comb += self.i.eq(in_z)
1277
1278 def elaborate(self, platform):
1279 m = Module()
1280 z = FPNumOut(self.width, False)
1281 m.submodules.pack_in_z = self.i.z
1282 m.submodules.pack_out_z = z
1283 m.d.comb += self.o.mid.eq(self.i.mid)
1284 with m.If(~self.i.out_do_z):
1285 with m.If(self.i.z.is_overflowed):
1286 m.d.comb += z.inf(self.i.z.s)
1287 with m.Else():
1288 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1289 with m.Else():
1290 m.d.comb += z.v.eq(self.i.oz)
1291 m.d.comb += self.o.z.eq(z.v)
1292 return m
1293
1294
1295 class FPPack(FPState):
1296
1297 def __init__(self, width, id_wid):
1298 FPState.__init__(self, "pack")
1299 self.mod = FPPackMod(width)
1300 self.out_z = self.ospec()
1301
1302 def ispec(self):
1303 return self.mod.ispec()
1304
1305 def ospec(self):
1306 return self.mod.ospec()
1307
1308 def setup(self, m, in_z):
1309 """ links module to inputs and outputs
1310 """
1311 self.mod.setup(m, in_z)
1312
1313 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1314 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1315
1316 def action(self, m):
1317 m.next = "pack_put_z"
1318
1319
1320 class FPPutZ(FPState):
1321
1322 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1323 FPState.__init__(self, state)
1324 if to_state is None:
1325 to_state = "get_ops"
1326 self.to_state = to_state
1327 self.in_z = in_z
1328 self.out_z = out_z
1329 self.in_mid = in_mid
1330 self.out_mid = out_mid
1331
1332 def action(self, m):
1333 if self.in_mid is not None:
1334 m.d.sync += self.out_mid.eq(self.in_mid)
1335 m.d.sync += [
1336 self.out_z.z.v.eq(self.in_z)
1337 ]
1338 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1339 m.d.sync += self.out_z.z.stb.eq(0)
1340 m.next = self.to_state
1341 with m.Else():
1342 m.d.sync += self.out_z.z.stb.eq(1)
1343
1344
1345 class FPPutZIdx(FPState):
1346
1347 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1348 FPState.__init__(self, state)
1349 if to_state is None:
1350 to_state = "get_ops"
1351 self.to_state = to_state
1352 self.in_z = in_z
1353 self.out_zs = out_zs
1354 self.in_mid = in_mid
1355
1356 def action(self, m):
1357 outz_stb = Signal(reset_less=True)
1358 outz_ack = Signal(reset_less=True)
1359 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1360 outz_ack.eq(self.out_zs[self.in_mid].ack),
1361 ]
1362 m.d.sync += [
1363 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1364 ]
1365 with m.If(outz_stb & outz_ack):
1366 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1367 m.next = self.to_state
1368 with m.Else():
1369 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1370
1371
1372 class FPOpData:
1373 def __init__(self, width, id_wid):
1374 self.z = FPOp(width)
1375 self.mid = Signal(id_wid, reset_less=True)
1376
1377 def eq(self, i):
1378 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1379
1380 def ports(self):
1381 return [self.z, self.mid]
1382
1383
1384 class FPADDBaseMod:
1385
1386 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1387 """ IEEE754 FP Add
1388
1389 * width: bit-width of IEEE754. supported: 16, 32, 64
1390 * id_wid: an identifier that is sync-connected to the input
1391 * single_cycle: True indicates each stage to complete in 1 clock
1392 * compact: True indicates a reduced number of stages
1393 """
1394 self.width = width
1395 self.id_wid = id_wid
1396 self.single_cycle = single_cycle
1397 self.compact = compact
1398
1399 self.in_t = Trigger()
1400 self.i = self.ispec()
1401 self.o = self.ospec()
1402
1403 self.states = []
1404
1405 def ispec(self):
1406 return FPADDBaseData(self.width, self.id_wid)
1407
1408 def ospec(self):
1409 return FPOpData(self.width, self.id_wid)
1410
1411 def add_state(self, state):
1412 self.states.append(state)
1413 return state
1414
1415 def get_fragment(self, platform=None):
1416 """ creates the HDL code-fragment for FPAdd
1417 """
1418 m = Module()
1419 m.submodules.out_z = self.o.z
1420 m.submodules.in_t = self.in_t
1421 if self.compact:
1422 self.get_compact_fragment(m, platform)
1423 else:
1424 self.get_longer_fragment(m, platform)
1425
1426 with m.FSM() as fsm:
1427
1428 for state in self.states:
1429 with m.State(state.state_from):
1430 state.action(m)
1431
1432 return m
1433
1434 def get_longer_fragment(self, m, platform=None):
1435
1436 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1437 self.width))
1438 get.setup(m, self.i)
1439 a = get.out_op1
1440 b = get.out_op2
1441 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1442
1443 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1444 sc.setup(m, a, b, self.in_mid)
1445
1446 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1447 dn.setup(m, a, b, sc.in_mid)
1448
1449 if self.single_cycle:
1450 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1451 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1452 else:
1453 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1454 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1455
1456 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1457 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1458
1459 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1460 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1461
1462 if self.single_cycle:
1463 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1464 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1465 else:
1466 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1467 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1468
1469 rn = self.add_state(FPRound(self.width, self.id_wid))
1470 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1471
1472 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1473 cor.setup(m, rn.out_z, rn.in_mid)
1474
1475 pa = self.add_state(FPPack(self.width, self.id_wid))
1476 pa.setup(m, cor.out_z, rn.in_mid)
1477
1478 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1479 pa.in_mid, self.out_mid))
1480
1481 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1482 pa.in_mid, self.out_mid))
1483
1484 def get_compact_fragment(self, m, platform=None):
1485
1486
1487 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1488 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1489 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1490 n1 = FPNormToPack(self.width, self.id_wid)
1491
1492 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1493
1494 chainlist = [get, sc, alm, n1]
1495 chain = StageChain(chainlist, specallocate=True)
1496 chain.setup(m, self.i)
1497
1498 for mod in chainlist:
1499 sc = self.add_state(mod)
1500
1501 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1502 n1.out_z.mid, self.o.mid))
1503
1504 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1505 # sc.o.mid, self.o.mid))
1506
1507
1508 class FPADDBase(FPState):
1509
1510 def __init__(self, width, id_wid=None, single_cycle=False):
1511 """ IEEE754 FP Add
1512
1513 * width: bit-width of IEEE754. supported: 16, 32, 64
1514 * id_wid: an identifier that is sync-connected to the input
1515 * single_cycle: True indicates each stage to complete in 1 clock
1516 """
1517 FPState.__init__(self, "fpadd")
1518 self.width = width
1519 self.single_cycle = single_cycle
1520 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1521 self.o = self.ospec()
1522
1523 self.in_t = Trigger()
1524 self.i = self.ispec()
1525
1526 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1527 self.in_accept = Signal(reset_less=True)
1528 self.add_stb = Signal(reset_less=True)
1529 self.add_ack = Signal(reset=0, reset_less=True)
1530
1531 def ispec(self):
1532 return self.mod.ispec()
1533
1534 def ospec(self):
1535 return self.mod.ospec()
1536
1537 def setup(self, m, i, add_stb, in_mid):
1538 m.d.comb += [self.i.eq(i),
1539 self.mod.i.eq(self.i),
1540 self.z_done.eq(self.mod.o.z.trigger),
1541 #self.add_stb.eq(add_stb),
1542 self.mod.in_t.stb.eq(self.in_t.stb),
1543 self.in_t.ack.eq(self.mod.in_t.ack),
1544 self.o.mid.eq(self.mod.o.mid),
1545 self.o.z.v.eq(self.mod.o.z.v),
1546 self.o.z.stb.eq(self.mod.o.z.stb),
1547 self.mod.o.z.ack.eq(self.o.z.ack),
1548 ]
1549
1550 m.d.sync += self.add_stb.eq(add_stb)
1551 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1552 m.d.sync += self.o.z.ack.eq(0) # likewise
1553 #m.d.sync += self.in_t.stb.eq(0)
1554
1555 m.submodules.fpadd = self.mod
1556
1557 def action(self, m):
1558
1559 # in_accept is set on incoming strobe HIGH and ack LOW.
1560 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1561
1562 #with m.If(self.in_t.ack):
1563 # m.d.sync += self.in_t.stb.eq(0)
1564 with m.If(~self.z_done):
1565 # not done: test for accepting an incoming operand pair
1566 with m.If(self.in_accept):
1567 m.d.sync += [
1568 self.add_ack.eq(1), # acknowledge receipt...
1569 self.in_t.stb.eq(1), # initiate add
1570 ]
1571 with m.Else():
1572 m.d.sync += [self.add_ack.eq(0),
1573 self.in_t.stb.eq(0),
1574 self.o.z.ack.eq(1),
1575 ]
1576 with m.Else():
1577 # done: acknowledge, and write out id and value
1578 m.d.sync += [self.add_ack.eq(1),
1579 self.in_t.stb.eq(0)
1580 ]
1581 m.next = "put_z"
1582
1583 return
1584
1585 if self.in_mid is not None:
1586 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1587
1588 m.d.sync += [
1589 self.out_z.v.eq(self.mod.out_z.v)
1590 ]
1591 # move to output state on detecting z ack
1592 with m.If(self.out_z.trigger):
1593 m.d.sync += self.out_z.stb.eq(0)
1594 m.next = "put_z"
1595 with m.Else():
1596 m.d.sync += self.out_z.stb.eq(1)
1597
1598
1599 class FPADDBasePipe(ControlBase):
1600 def __init__(self, width, id_wid):
1601 ControlBase.__init__(self)
1602 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1603 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1604 self.pipe3 = FPNormToPack(width, id_wid)
1605
1606 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1607
1608 def elaborate(self, platform):
1609 m = Module()
1610 m.submodules.scnorm = self.pipe1
1611 m.submodules.addalign = self.pipe2
1612 m.submodules.normpack = self.pipe3
1613 m.d.comb += self._eqs
1614 return m
1615
1616
1617 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1618 def __init__(self, width, id_wid, num_rows):
1619 self.num_rows = num_rows
1620 def iospec(): return FPADDBaseData(width, id_wid)
1621 stage = PassThroughStage(iospec)
1622 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1623
1624
1625 class FPADDMuxOutPipe(CombMuxOutPipe):
1626 def __init__(self, width, id_wid, num_rows):
1627 self.num_rows = num_rows
1628 def iospec(): return FPPackData(width, id_wid)
1629 stage = PassThroughStage(iospec)
1630 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1631
1632
1633 class FPADDMuxInOut:
1634 """ Reservation-Station version of FPADD pipeline.
1635
1636 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1637 * 3-stage adder pipeline
1638 * fan-out on outputs (an array of FPPackData: z,mid)
1639
1640 Fan-in and Fan-out are combinatorial.
1641 """
1642 def __init__(self, width, id_wid, num_rows):
1643 self.num_rows = num_rows
1644 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1645 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1646 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1647
1648 self.p = self.inpipe.p # kinda annoying,
1649 self.n = self.outpipe.n # use pipe in/out as this class in/out
1650 self._ports = self.inpipe.ports() + self.outpipe.ports()
1651
1652 def elaborate(self, platform):
1653 m = Module()
1654 m.submodules.inpipe = self.inpipe
1655 m.submodules.fpadd = self.fpadd
1656 m.submodules.outpipe = self.outpipe
1657
1658 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1659 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1660
1661 return m
1662
1663 def ports(self):
1664 return self._ports
1665
1666
1667 class FPADD(FPID):
1668 """ FPADD: stages as follows:
1669
1670 FPGetOp (a)
1671 |
1672 FPGetOp (b)
1673 |
1674 FPAddBase---> FPAddBaseMod
1675 | |
1676 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1677
1678 FPAddBase is tricky: it is both a stage and *has* stages.
1679 Connection to FPAddBaseMod therefore requires an in stb/ack
1680 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1681 needs to be the thing that raises the incoming stb.
1682 """
1683
1684 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1685 """ IEEE754 FP Add
1686
1687 * width: bit-width of IEEE754. supported: 16, 32, 64
1688 * id_wid: an identifier that is sync-connected to the input
1689 * single_cycle: True indicates each stage to complete in 1 clock
1690 """
1691 self.width = width
1692 self.id_wid = id_wid
1693 self.single_cycle = single_cycle
1694
1695 #self.out_z = FPOp(width)
1696 self.ids = FPID(id_wid)
1697
1698 rs = []
1699 for i in range(rs_sz):
1700 in_a = FPOp(width)
1701 in_b = FPOp(width)
1702 in_a.name = "in_a_%d" % i
1703 in_b.name = "in_b_%d" % i
1704 rs.append((in_a, in_b))
1705 self.rs = Array(rs)
1706
1707 res = []
1708 for i in range(rs_sz):
1709 out_z = FPOp(width)
1710 out_z.name = "out_z_%d" % i
1711 res.append(out_z)
1712 self.res = Array(res)
1713
1714 self.states = []
1715
1716 def add_state(self, state):
1717 self.states.append(state)
1718 return state
1719
1720 def get_fragment(self, platform=None):
1721 """ creates the HDL code-fragment for FPAdd
1722 """
1723 m = Module()
1724 m.submodules += self.rs
1725
1726 in_a = self.rs[0][0]
1727 in_b = self.rs[0][1]
1728
1729 geta = self.add_state(FPGetOp("get_a", "get_b",
1730 in_a, self.width))
1731 geta.setup(m, in_a)
1732 a = geta.out_op
1733
1734 getb = self.add_state(FPGetOp("get_b", "fpadd",
1735 in_b, self.width))
1736 getb.setup(m, in_b)
1737 b = getb.out_op
1738
1739 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1740 ab = self.add_state(ab)
1741 abd = ab.ispec() # create an input spec object for FPADDBase
1742 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1743 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1744 o = ab.o
1745
1746 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1747 o.mid, "get_a"))
1748
1749 with m.FSM() as fsm:
1750
1751 for state in self.states:
1752 with m.State(state.state_from):
1753 state.action(m)
1754
1755 return m
1756
1757
1758 if __name__ == "__main__":
1759 if True:
1760 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1761 main(alu, ports=alu.rs[0][0].ports() + \
1762 alu.rs[0][1].ports() + \
1763 alu.res[0].ports() + \
1764 [alu.ids.in_mid, alu.ids.out_mid])
1765 else:
1766 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1767 main(alu, ports=[alu.in_a, alu.in_b] + \
1768 alu.in_t.ports() + \
1769 alu.out_z.ports() + \
1770 [alu.in_mid, alu.out_mid])
1771
1772
1773 # works... but don't use, just do "python fname.py convert -t v"
1774 #print (verilog.convert(alu, ports=[
1775 # ports=alu.in_a.ports() + \
1776 # alu.in_b.ports() + \
1777 # alu.out_z.ports())