use StageChain in FPADDBaseMod compact mode
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.width = width
359 self.id_wid = id_wid
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.out = self.ospec()
362
363 def ispec(self):
364 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
365
366 def ospec(self):
367 return FPSCData(self.width, self.id_wid) # DeNorm ospec
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
373 dmod = FPAddDeNormMod(self.width, self.id_wid)
374
375 chain = StageChain([smod, dmod])
376 chain.setup(m, i)
377
378 # only needed for break-out (early-out)
379 # self.out_do_z = smod.o.out_do_z
380
381 self.o = dmod.o
382
383 def process(self, i):
384 return self.o
385
386 def action(self, m):
387 # for break-out (early-out)
388 #with m.If(self.out_do_z):
389 # m.next = "put_z"
390 #with m.Else():
391 m.d.sync += self.out.eq(self.process(None))
392 m.next = "align"
393
394
395 class FPAddDeNormMod(FPState):
396
397 def __init__(self, width, id_wid):
398 self.width = width
399 self.id_wid = id_wid
400 self.i = self.ispec()
401 self.o = self.ospec()
402
403 def ispec(self):
404 return FPSCData(self.width, self.id_wid)
405
406 def ospec(self):
407 return FPSCData(self.width, self.id_wid)
408
409 def process(self, i):
410 return self.o
411
412 def setup(self, m, i):
413 """ links module to inputs and outputs
414 """
415 m.submodules.denormalise = self
416 m.d.comb += self.i.eq(i)
417
418 def elaborate(self, platform):
419 m = Module()
420 m.submodules.denorm_in_a = self.i.a
421 m.submodules.denorm_in_b = self.i.b
422 m.submodules.denorm_out_a = self.o.a
423 m.submodules.denorm_out_b = self.o.b
424
425 with m.If(~self.i.out_do_z):
426 # XXX hmmm, don't like repeating identical code
427 m.d.comb += self.o.a.eq(self.i.a)
428 with m.If(self.i.a.exp_n127):
429 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
430 with m.Else():
431 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
432
433 m.d.comb += self.o.b.eq(self.i.b)
434 with m.If(self.i.b.exp_n127):
435 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
436 with m.Else():
437 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
438
439 m.d.comb += self.o.mid.eq(self.i.mid)
440 m.d.comb += self.o.z.eq(self.i.z)
441 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
442 m.d.comb += self.o.oz.eq(self.i.oz)
443
444 return m
445
446
447 class FPAddDeNorm(FPState):
448
449 def __init__(self, width, id_wid):
450 FPState.__init__(self, "denormalise")
451 self.mod = FPAddDeNormMod(width)
452 self.out_a = FPNumBase(width)
453 self.out_b = FPNumBase(width)
454
455 def setup(self, m, i):
456 """ links module to inputs and outputs
457 """
458 self.mod.setup(m, i)
459
460 m.d.sync += self.out_a.eq(self.mod.out_a)
461 m.d.sync += self.out_b.eq(self.mod.out_b)
462
463 def action(self, m):
464 # Denormalised Number checks
465 m.next = "align"
466
467
468 class FPAddAlignMultiMod(FPState):
469
470 def __init__(self, width):
471 self.in_a = FPNumBase(width)
472 self.in_b = FPNumBase(width)
473 self.out_a = FPNumIn(None, width)
474 self.out_b = FPNumIn(None, width)
475 self.exp_eq = Signal(reset_less=True)
476
477 def elaborate(self, platform):
478 # This one however (single-cycle) will do the shift
479 # in one go.
480
481 m = Module()
482
483 m.submodules.align_in_a = self.in_a
484 m.submodules.align_in_b = self.in_b
485 m.submodules.align_out_a = self.out_a
486 m.submodules.align_out_b = self.out_b
487
488 # NOTE: this does *not* do single-cycle multi-shifting,
489 # it *STAYS* in the align state until exponents match
490
491 # exponent of a greater than b: shift b down
492 m.d.comb += self.exp_eq.eq(0)
493 m.d.comb += self.out_a.eq(self.in_a)
494 m.d.comb += self.out_b.eq(self.in_b)
495 agtb = Signal(reset_less=True)
496 altb = Signal(reset_less=True)
497 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
498 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
499 with m.If(agtb):
500 m.d.comb += self.out_b.shift_down(self.in_b)
501 # exponent of b greater than a: shift a down
502 with m.Elif(altb):
503 m.d.comb += self.out_a.shift_down(self.in_a)
504 # exponents equal: move to next stage.
505 with m.Else():
506 m.d.comb += self.exp_eq.eq(1)
507 return m
508
509
510 class FPAddAlignMulti(FPState):
511
512 def __init__(self, width, id_wid):
513 FPState.__init__(self, "align")
514 self.mod = FPAddAlignMultiMod(width)
515 self.out_a = FPNumIn(None, width)
516 self.out_b = FPNumIn(None, width)
517 self.exp_eq = Signal(reset_less=True)
518
519 def setup(self, m, in_a, in_b):
520 """ links module to inputs and outputs
521 """
522 m.submodules.align = self.mod
523 m.d.comb += self.mod.in_a.eq(in_a)
524 m.d.comb += self.mod.in_b.eq(in_b)
525 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
526 m.d.sync += self.out_a.eq(self.mod.out_a)
527 m.d.sync += self.out_b.eq(self.mod.out_b)
528
529 def action(self, m):
530 with m.If(self.exp_eq):
531 m.next = "add_0"
532
533
534 class FPNumIn2Ops:
535
536 def __init__(self, width, id_wid):
537 self.a = FPNumIn(None, width)
538 self.b = FPNumIn(None, width)
539 self.z = FPNumOut(width, False)
540 self.out_do_z = Signal(reset_less=True)
541 self.oz = Signal(width, reset_less=True)
542 self.mid = Signal(id_wid, reset_less=True)
543
544 def eq(self, i):
545 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
546 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
547
548
549 class FPAddAlignSingleMod:
550
551 def __init__(self, width, id_wid):
552 self.width = width
553 self.id_wid = id_wid
554 self.i = self.ispec()
555 self.o = self.ospec()
556
557 def ispec(self):
558 return FPSCData(self.width, self.id_wid)
559
560 def ospec(self):
561 return FPNumIn2Ops(self.width, self.id_wid)
562
563 def process(self, i):
564 return self.o
565
566 def setup(self, m, i):
567 """ links module to inputs and outputs
568 """
569 m.submodules.align = self
570 m.d.comb += self.i.eq(i)
571
572 def elaborate(self, platform):
573 """ Aligns A against B or B against A, depending on which has the
574 greater exponent. This is done in a *single* cycle using
575 variable-width bit-shift
576
577 the shifter used here is quite expensive in terms of gates.
578 Mux A or B in (and out) into temporaries, as only one of them
579 needs to be aligned against the other
580 """
581 m = Module()
582
583 m.submodules.align_in_a = self.i.a
584 m.submodules.align_in_b = self.i.b
585 m.submodules.align_out_a = self.o.a
586 m.submodules.align_out_b = self.o.b
587
588 # temporary (muxed) input and output to be shifted
589 t_inp = FPNumBase(self.width)
590 t_out = FPNumIn(None, self.width)
591 espec = (len(self.i.a.e), True)
592 msr = MultiShiftRMerge(self.i.a.m_width, espec)
593 m.submodules.align_t_in = t_inp
594 m.submodules.align_t_out = t_out
595 m.submodules.multishift_r = msr
596
597 ediff = Signal(espec, reset_less=True)
598 ediffr = Signal(espec, reset_less=True)
599 tdiff = Signal(espec, reset_less=True)
600 elz = Signal(reset_less=True)
601 egz = Signal(reset_less=True)
602
603 # connect multi-shifter to t_inp/out mantissa (and tdiff)
604 m.d.comb += msr.inp.eq(t_inp.m)
605 m.d.comb += msr.diff.eq(tdiff)
606 m.d.comb += t_out.m.eq(msr.m)
607 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
608 m.d.comb += t_out.s.eq(t_inp.s)
609
610 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
611 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
612 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
613 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
614
615 # default: A-exp == B-exp, A and B untouched (fall through)
616 m.d.comb += self.o.a.eq(self.i.a)
617 m.d.comb += self.o.b.eq(self.i.b)
618 # only one shifter (muxed)
619 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
620 # exponent of a greater than b: shift b down
621 with m.If(~self.i.out_do_z):
622 with m.If(egz):
623 m.d.comb += [t_inp.eq(self.i.b),
624 tdiff.eq(ediff),
625 self.o.b.eq(t_out),
626 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
627 ]
628 # exponent of b greater than a: shift a down
629 with m.Elif(elz):
630 m.d.comb += [t_inp.eq(self.i.a),
631 tdiff.eq(ediffr),
632 self.o.a.eq(t_out),
633 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
634 ]
635
636 m.d.comb += self.o.mid.eq(self.i.mid)
637 m.d.comb += self.o.z.eq(self.i.z)
638 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
639 m.d.comb += self.o.oz.eq(self.i.oz)
640
641 return m
642
643
644 class FPAddAlignSingle(FPState):
645
646 def __init__(self, width, id_wid):
647 FPState.__init__(self, "align")
648 self.mod = FPAddAlignSingleMod(width, id_wid)
649 self.out_a = FPNumIn(None, width)
650 self.out_b = FPNumIn(None, width)
651
652 def setup(self, m, i):
653 """ links module to inputs and outputs
654 """
655 self.mod.setup(m, i)
656
657 # NOTE: could be done as comb
658 m.d.sync += self.out_a.eq(self.mod.out_a)
659 m.d.sync += self.out_b.eq(self.mod.out_b)
660
661 def action(self, m):
662 m.next = "add_0"
663
664
665 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
666
667 def __init__(self, width, id_wid):
668 FPState.__init__(self, "align")
669 self.width = width
670 self.id_wid = id_wid
671 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
672 self.a1o = self.ospec()
673
674 def ispec(self):
675 return FPSCData(self.width, self.id_wid)
676
677 def ospec(self):
678 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
679
680 def setup(self, m, i):
681 """ links module to inputs and outputs
682 """
683
684 # chain AddAlignSingle, AddStage0 and AddStage1
685 mod = FPAddAlignSingleMod(self.width, self.id_wid)
686 a0mod = FPAddStage0Mod(self.width, self.id_wid)
687 a1mod = FPAddStage1Mod(self.width, self.id_wid)
688
689 chain = StageChain([mod, a0mod, a1mod])
690 chain.setup(m, i)
691
692 self.o = a1mod.o
693
694 def process(self, i):
695 return self.o
696
697 def action(self, m):
698 m.d.sync += self.a1o.eq(self.process(None))
699 m.next = "normalise_1"
700
701
702 class FPAddStage0Data:
703
704 def __init__(self, width, id_wid):
705 self.z = FPNumBase(width, False)
706 self.out_do_z = Signal(reset_less=True)
707 self.oz = Signal(width, reset_less=True)
708 self.tot = Signal(self.z.m_width + 4, reset_less=True)
709 self.mid = Signal(id_wid, reset_less=True)
710
711 def eq(self, i):
712 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
713 self.tot.eq(i.tot), self.mid.eq(i.mid)]
714
715
716 class FPAddStage0Mod:
717
718 def __init__(self, width, id_wid):
719 self.width = width
720 self.id_wid = id_wid
721 self.i = self.ispec()
722 self.o = self.ospec()
723
724 def ispec(self):
725 return FPSCData(self.width, self.id_wid)
726
727 def ospec(self):
728 return FPAddStage0Data(self.width, self.id_wid)
729
730 def process(self, i):
731 return self.o
732
733 def setup(self, m, i):
734 """ links module to inputs and outputs
735 """
736 m.submodules.add0 = self
737 m.d.comb += self.i.eq(i)
738
739 def elaborate(self, platform):
740 m = Module()
741 m.submodules.add0_in_a = self.i.a
742 m.submodules.add0_in_b = self.i.b
743 m.submodules.add0_out_z = self.o.z
744
745 # store intermediate tests (and zero-extended mantissas)
746 seq = Signal(reset_less=True)
747 mge = Signal(reset_less=True)
748 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
749 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
750 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
751 mge.eq(self.i.a.m >= self.i.b.m),
752 am0.eq(Cat(self.i.a.m, 0)),
753 bm0.eq(Cat(self.i.b.m, 0))
754 ]
755 # same-sign (both negative or both positive) add mantissas
756 with m.If(~self.i.out_do_z):
757 m.d.comb += self.o.z.e.eq(self.i.a.e)
758 with m.If(seq):
759 m.d.comb += [
760 self.o.tot.eq(am0 + bm0),
761 self.o.z.s.eq(self.i.a.s)
762 ]
763 # a mantissa greater than b, use a
764 with m.Elif(mge):
765 m.d.comb += [
766 self.o.tot.eq(am0 - bm0),
767 self.o.z.s.eq(self.i.a.s)
768 ]
769 # b mantissa greater than a, use b
770 with m.Else():
771 m.d.comb += [
772 self.o.tot.eq(bm0 - am0),
773 self.o.z.s.eq(self.i.b.s)
774 ]
775
776 m.d.comb += self.o.oz.eq(self.i.oz)
777 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
778 m.d.comb += self.o.mid.eq(self.i.mid)
779 return m
780
781
782 class FPAddStage0(FPState):
783 """ First stage of add. covers same-sign (add) and subtract
784 special-casing when mantissas are greater or equal, to
785 give greatest accuracy.
786 """
787
788 def __init__(self, width, id_wid):
789 FPState.__init__(self, "add_0")
790 self.mod = FPAddStage0Mod(width)
791 self.o = self.mod.ospec()
792
793 def setup(self, m, i):
794 """ links module to inputs and outputs
795 """
796 self.mod.setup(m, i)
797
798 # NOTE: these could be done as combinatorial (merge add0+add1)
799 m.d.sync += self.o.eq(self.mod.o)
800
801 def action(self, m):
802 m.next = "add_1"
803
804
805 class FPAddStage1Data:
806
807 def __init__(self, width, id_wid):
808 self.z = FPNumBase(width, False)
809 self.out_do_z = Signal(reset_less=True)
810 self.oz = Signal(width, reset_less=True)
811 self.of = Overflow()
812 self.mid = Signal(id_wid, reset_less=True)
813
814 def eq(self, i):
815 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
816 self.of.eq(i.of), self.mid.eq(i.mid)]
817
818
819
820 class FPAddStage1Mod(FPState):
821 """ Second stage of add: preparation for normalisation.
822 detects when tot sum is too big (tot[27] is kinda a carry bit)
823 """
824
825 def __init__(self, width, id_wid):
826 self.width = width
827 self.id_wid = id_wid
828 self.i = self.ispec()
829 self.o = self.ospec()
830
831 def ispec(self):
832 return FPAddStage0Data(self.width, self.id_wid)
833
834 def ospec(self):
835 return FPAddStage1Data(self.width, self.id_wid)
836
837 def process(self, i):
838 return self.o
839
840 def setup(self, m, i):
841 """ links module to inputs and outputs
842 """
843 m.submodules.add1 = self
844 m.submodules.add1_out_overflow = self.o.of
845
846 m.d.comb += self.i.eq(i)
847
848 def elaborate(self, platform):
849 m = Module()
850 m.d.comb += self.o.z.eq(self.i.z)
851 # tot[-1] (MSB) gets set when the sum overflows. shift result down
852 with m.If(~self.i.out_do_z):
853 with m.If(self.i.tot[-1]):
854 m.d.comb += [
855 self.o.z.m.eq(self.i.tot[4:]),
856 self.o.of.m0.eq(self.i.tot[4]),
857 self.o.of.guard.eq(self.i.tot[3]),
858 self.o.of.round_bit.eq(self.i.tot[2]),
859 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
860 self.o.z.e.eq(self.i.z.e + 1)
861 ]
862 # tot[-1] (MSB) zero case
863 with m.Else():
864 m.d.comb += [
865 self.o.z.m.eq(self.i.tot[3:]),
866 self.o.of.m0.eq(self.i.tot[3]),
867 self.o.of.guard.eq(self.i.tot[2]),
868 self.o.of.round_bit.eq(self.i.tot[1]),
869 self.o.of.sticky.eq(self.i.tot[0])
870 ]
871
872 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
873 m.d.comb += self.o.oz.eq(self.i.oz)
874 m.d.comb += self.o.mid.eq(self.i.mid)
875
876 return m
877
878
879 class FPAddStage1(FPState):
880
881 def __init__(self, width, id_wid):
882 FPState.__init__(self, "add_1")
883 self.mod = FPAddStage1Mod(width)
884 self.out_z = FPNumBase(width, False)
885 self.out_of = Overflow()
886 self.norm_stb = Signal()
887
888 def setup(self, m, i):
889 """ links module to inputs and outputs
890 """
891 self.mod.setup(m, i)
892
893 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
894
895 m.d.sync += self.out_of.eq(self.mod.out_of)
896 m.d.sync += self.out_z.eq(self.mod.out_z)
897 m.d.sync += self.norm_stb.eq(1)
898
899 def action(self, m):
900 m.next = "normalise_1"
901
902
903 class FPNormaliseModSingle:
904
905 def __init__(self, width):
906 self.width = width
907 self.in_z = self.ispec()
908 self.out_z = self.ospec()
909
910 def ispec(self):
911 return FPNumBase(self.width, False)
912
913 def ospec(self):
914 return FPNumBase(self.width, False)
915
916 def setup(self, m, i):
917 """ links module to inputs and outputs
918 """
919 m.submodules.normalise = self
920 m.d.comb += self.i.eq(i)
921
922 def elaborate(self, platform):
923 m = Module()
924
925 mwid = self.out_z.m_width+2
926 pe = PriorityEncoder(mwid)
927 m.submodules.norm_pe = pe
928
929 m.submodules.norm1_out_z = self.out_z
930 m.submodules.norm1_in_z = self.in_z
931
932 in_z = FPNumBase(self.width, False)
933 in_of = Overflow()
934 m.submodules.norm1_insel_z = in_z
935 m.submodules.norm1_insel_overflow = in_of
936
937 espec = (len(in_z.e), True)
938 ediff_n126 = Signal(espec, reset_less=True)
939 msr = MultiShiftRMerge(mwid, espec)
940 m.submodules.multishift_r = msr
941
942 m.d.comb += in_z.eq(self.in_z)
943 m.d.comb += in_of.eq(self.in_of)
944 # initialise out from in (overridden below)
945 m.d.comb += self.out_z.eq(in_z)
946 m.d.comb += self.out_of.eq(in_of)
947 # normalisation decrease condition
948 decrease = Signal(reset_less=True)
949 m.d.comb += decrease.eq(in_z.m_msbzero)
950 # decrease exponent
951 with m.If(decrease):
952 # *sigh* not entirely obvious: count leading zeros (clz)
953 # with a PriorityEncoder: to find from the MSB
954 # we reverse the order of the bits.
955 temp_m = Signal(mwid, reset_less=True)
956 temp_s = Signal(mwid+1, reset_less=True)
957 clz = Signal((len(in_z.e), True), reset_less=True)
958 m.d.comb += [
959 # cat round and guard bits back into the mantissa
960 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
961 pe.i.eq(temp_m[::-1]), # inverted
962 clz.eq(pe.o), # count zeros from MSB down
963 temp_s.eq(temp_m << clz), # shift mantissa UP
964 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
965 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
966 ]
967
968 return m
969
970
971 class FPNorm1Data:
972
973 def __init__(self, width, id_wid):
974 self.roundz = Signal(reset_less=True)
975 self.z = FPNumBase(width, False)
976 self.out_do_z = Signal(reset_less=True)
977 self.oz = Signal(width, reset_less=True)
978 self.mid = Signal(id_wid, reset_less=True)
979
980 def eq(self, i):
981 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
982 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
983
984
985 class FPNorm1ModSingle:
986
987 def __init__(self, width, id_wid):
988 self.width = width
989 self.id_wid = id_wid
990 self.i = self.ispec()
991 self.o = self.ospec()
992
993 def ispec(self):
994 return FPAddStage1Data(self.width, self.id_wid)
995
996 def ospec(self):
997 return FPNorm1Data(self.width, self.id_wid)
998
999 def setup(self, m, i):
1000 """ links module to inputs and outputs
1001 """
1002 m.submodules.normalise_1 = self
1003 m.d.comb += self.i.eq(i)
1004
1005 def process(self, i):
1006 return self.o
1007
1008 def elaborate(self, platform):
1009 m = Module()
1010
1011 mwid = self.o.z.m_width+2
1012 pe = PriorityEncoder(mwid)
1013 m.submodules.norm_pe = pe
1014
1015 of = Overflow()
1016 m.d.comb += self.o.roundz.eq(of.roundz)
1017
1018 m.submodules.norm1_out_z = self.o.z
1019 m.submodules.norm1_out_overflow = of
1020 m.submodules.norm1_in_z = self.i.z
1021 m.submodules.norm1_in_overflow = self.i.of
1022
1023 i = self.ispec()
1024 m.submodules.norm1_insel_z = i.z
1025 m.submodules.norm1_insel_overflow = i.of
1026
1027 espec = (len(i.z.e), True)
1028 ediff_n126 = Signal(espec, reset_less=True)
1029 msr = MultiShiftRMerge(mwid, espec)
1030 m.submodules.multishift_r = msr
1031
1032 m.d.comb += i.eq(self.i)
1033 # initialise out from in (overridden below)
1034 m.d.comb += self.o.z.eq(i.z)
1035 m.d.comb += of.eq(i.of)
1036 # normalisation increase/decrease conditions
1037 decrease = Signal(reset_less=True)
1038 increase = Signal(reset_less=True)
1039 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1040 m.d.comb += increase.eq(i.z.exp_lt_n126)
1041 # decrease exponent
1042 with m.If(~self.i.out_do_z):
1043 with m.If(decrease):
1044 # *sigh* not entirely obvious: count leading zeros (clz)
1045 # with a PriorityEncoder: to find from the MSB
1046 # we reverse the order of the bits.
1047 temp_m = Signal(mwid, reset_less=True)
1048 temp_s = Signal(mwid+1, reset_less=True)
1049 clz = Signal((len(i.z.e), True), reset_less=True)
1050 # make sure that the amount to decrease by does NOT
1051 # go below the minimum non-INF/NaN exponent
1052 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1053 i.z.exp_sub_n126)
1054 m.d.comb += [
1055 # cat round and guard bits back into the mantissa
1056 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1057 pe.i.eq(temp_m[::-1]), # inverted
1058 clz.eq(limclz), # count zeros from MSB down
1059 temp_s.eq(temp_m << clz), # shift mantissa UP
1060 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1061 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1062 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1063 # overflow in bits 0..1: got shifted too (leave sticky)
1064 of.guard.eq(temp_s[1]), # guard
1065 of.round_bit.eq(temp_s[0]), # round
1066 ]
1067 # increase exponent
1068 with m.Elif(increase):
1069 temp_m = Signal(mwid+1, reset_less=True)
1070 m.d.comb += [
1071 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1072 i.z.m)),
1073 ediff_n126.eq(i.z.N126 - i.z.e),
1074 # connect multi-shifter to inp/out mantissa (and ediff)
1075 msr.inp.eq(temp_m),
1076 msr.diff.eq(ediff_n126),
1077 self.o.z.m.eq(msr.m[3:]),
1078 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1079 # overflow in bits 0..1: got shifted too (leave sticky)
1080 of.guard.eq(temp_s[2]), # guard
1081 of.round_bit.eq(temp_s[1]), # round
1082 of.sticky.eq(temp_s[0]), # sticky
1083 self.o.z.e.eq(i.z.e + ediff_n126),
1084 ]
1085
1086 m.d.comb += self.o.mid.eq(self.i.mid)
1087 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1088 m.d.comb += self.o.oz.eq(self.i.oz)
1089
1090 return m
1091
1092
1093 class FPNorm1ModMulti:
1094
1095 def __init__(self, width, single_cycle=True):
1096 self.width = width
1097 self.in_select = Signal(reset_less=True)
1098 self.in_z = FPNumBase(width, False)
1099 self.in_of = Overflow()
1100 self.temp_z = FPNumBase(width, False)
1101 self.temp_of = Overflow()
1102 self.out_z = FPNumBase(width, False)
1103 self.out_of = Overflow()
1104
1105 def elaborate(self, platform):
1106 m = Module()
1107
1108 m.submodules.norm1_out_z = self.out_z
1109 m.submodules.norm1_out_overflow = self.out_of
1110 m.submodules.norm1_temp_z = self.temp_z
1111 m.submodules.norm1_temp_of = self.temp_of
1112 m.submodules.norm1_in_z = self.in_z
1113 m.submodules.norm1_in_overflow = self.in_of
1114
1115 in_z = FPNumBase(self.width, False)
1116 in_of = Overflow()
1117 m.submodules.norm1_insel_z = in_z
1118 m.submodules.norm1_insel_overflow = in_of
1119
1120 # select which of temp or in z/of to use
1121 with m.If(self.in_select):
1122 m.d.comb += in_z.eq(self.in_z)
1123 m.d.comb += in_of.eq(self.in_of)
1124 with m.Else():
1125 m.d.comb += in_z.eq(self.temp_z)
1126 m.d.comb += in_of.eq(self.temp_of)
1127 # initialise out from in (overridden below)
1128 m.d.comb += self.out_z.eq(in_z)
1129 m.d.comb += self.out_of.eq(in_of)
1130 # normalisation increase/decrease conditions
1131 decrease = Signal(reset_less=True)
1132 increase = Signal(reset_less=True)
1133 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1134 m.d.comb += increase.eq(in_z.exp_lt_n126)
1135 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1136 # decrease exponent
1137 with m.If(decrease):
1138 m.d.comb += [
1139 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1140 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1141 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1142 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1143 self.out_of.round_bit.eq(0), # reset round bit
1144 self.out_of.m0.eq(in_of.guard),
1145 ]
1146 # increase exponent
1147 with m.Elif(increase):
1148 m.d.comb += [
1149 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1150 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1151 self.out_of.guard.eq(in_z.m[0]),
1152 self.out_of.m0.eq(in_z.m[1]),
1153 self.out_of.round_bit.eq(in_of.guard),
1154 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1155 ]
1156
1157 return m
1158
1159
1160 class FPNorm1Single(FPState):
1161
1162 def __init__(self, width, id_wid, single_cycle=True):
1163 FPState.__init__(self, "normalise_1")
1164 self.mod = FPNorm1ModSingle(width)
1165 self.o = self.ospec()
1166 self.out_z = FPNumBase(width, False)
1167 self.out_roundz = Signal(reset_less=True)
1168
1169 def ispec(self):
1170 return self.mod.ispec()
1171
1172 def ospec(self):
1173 return self.mod.ospec()
1174
1175 def setup(self, m, i):
1176 """ links module to inputs and outputs
1177 """
1178 self.mod.setup(m, i)
1179
1180 def action(self, m):
1181 m.next = "round"
1182
1183
1184 class FPNorm1Multi(FPState):
1185
1186 def __init__(self, width, id_wid):
1187 FPState.__init__(self, "normalise_1")
1188 self.mod = FPNorm1ModMulti(width)
1189 self.stb = Signal(reset_less=True)
1190 self.ack = Signal(reset=0, reset_less=True)
1191 self.out_norm = Signal(reset_less=True)
1192 self.in_accept = Signal(reset_less=True)
1193 self.temp_z = FPNumBase(width)
1194 self.temp_of = Overflow()
1195 self.out_z = FPNumBase(width)
1196 self.out_roundz = Signal(reset_less=True)
1197
1198 def setup(self, m, in_z, in_of, norm_stb):
1199 """ links module to inputs and outputs
1200 """
1201 self.mod.setup(m, in_z, in_of, norm_stb,
1202 self.in_accept, self.temp_z, self.temp_of,
1203 self.out_z, self.out_norm)
1204
1205 m.d.comb += self.stb.eq(norm_stb)
1206 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1207
1208 def action(self, m):
1209 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1210 m.d.sync += self.temp_of.eq(self.mod.out_of)
1211 m.d.sync += self.temp_z.eq(self.out_z)
1212 with m.If(self.out_norm):
1213 with m.If(self.in_accept):
1214 m.d.sync += [
1215 self.ack.eq(1),
1216 ]
1217 with m.Else():
1218 m.d.sync += self.ack.eq(0)
1219 with m.Else():
1220 # normalisation not required (or done).
1221 m.next = "round"
1222 m.d.sync += self.ack.eq(1)
1223 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1224
1225
1226 class FPNormToPack(FPState, UnbufferedPipeline):
1227
1228 def __init__(self, width, id_wid):
1229 FPState.__init__(self, "normalise_1")
1230 self.id_wid = id_wid
1231 self.width = width
1232 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1233
1234 def ispec(self):
1235 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1236
1237 def ospec(self):
1238 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1239
1240 def setup(self, m, i):
1241 """ links module to inputs and outputs
1242 """
1243
1244 # Normalisation, Rounding Corrections, Pack - in a chain
1245 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1246 rmod = FPRoundMod(self.width, self.id_wid)
1247 cmod = FPCorrectionsMod(self.width, self.id_wid)
1248 pmod = FPPackMod(self.width, self.id_wid)
1249 chain = StageChain([nmod, rmod, cmod, pmod])
1250 chain.setup(m, i)
1251 self.out_z = pmod.ospec()
1252
1253 self.o = pmod.o
1254
1255 def process(self, i):
1256 return self.o
1257
1258 def action(self, m):
1259 m.d.sync += self.out_z.eq(self.process(None))
1260 m.next = "pack_put_z"
1261
1262
1263 class FPRoundData:
1264
1265 def __init__(self, width, id_wid):
1266 self.z = FPNumBase(width, False)
1267 self.out_do_z = Signal(reset_less=True)
1268 self.oz = Signal(width, reset_less=True)
1269 self.mid = Signal(id_wid, reset_less=True)
1270
1271 def eq(self, i):
1272 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1273 self.mid.eq(i.mid)]
1274
1275
1276 class FPRoundMod:
1277
1278 def __init__(self, width, id_wid):
1279 self.width = width
1280 self.id_wid = id_wid
1281 self.i = self.ispec()
1282 self.out_z = self.ospec()
1283
1284 def ispec(self):
1285 return FPNorm1Data(self.width, self.id_wid)
1286
1287 def ospec(self):
1288 return FPRoundData(self.width, self.id_wid)
1289
1290 def process(self, i):
1291 return self.out_z
1292
1293 def setup(self, m, i):
1294 m.submodules.roundz = self
1295 m.d.comb += self.i.eq(i)
1296
1297 def elaborate(self, platform):
1298 m = Module()
1299 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1300 with m.If(~self.i.out_do_z):
1301 with m.If(self.i.roundz):
1302 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1303 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1304 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1305
1306 return m
1307
1308
1309 class FPRound(FPState):
1310
1311 def __init__(self, width, id_wid):
1312 FPState.__init__(self, "round")
1313 self.mod = FPRoundMod(width)
1314 self.out_z = self.ospec()
1315
1316 def ispec(self):
1317 return self.mod.ispec()
1318
1319 def ospec(self):
1320 return self.mod.ospec()
1321
1322 def setup(self, m, i):
1323 """ links module to inputs and outputs
1324 """
1325 self.mod.setup(m, i)
1326
1327 self.idsync(m)
1328 m.d.sync += self.out_z.eq(self.mod.out_z)
1329 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1330
1331 def action(self, m):
1332 m.next = "corrections"
1333
1334
1335 class FPCorrectionsMod:
1336
1337 def __init__(self, width, id_wid):
1338 self.width = width
1339 self.id_wid = id_wid
1340 self.i = self.ispec()
1341 self.out_z = self.ospec()
1342
1343 def ispec(self):
1344 return FPRoundData(self.width, self.id_wid)
1345
1346 def ospec(self):
1347 return FPRoundData(self.width, self.id_wid)
1348
1349 def process(self, i):
1350 return self.out_z
1351
1352 def setup(self, m, i):
1353 """ links module to inputs and outputs
1354 """
1355 m.submodules.corrections = self
1356 m.d.comb += self.i.eq(i)
1357
1358 def elaborate(self, platform):
1359 m = Module()
1360 m.submodules.corr_in_z = self.i.z
1361 m.submodules.corr_out_z = self.out_z.z
1362 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1363 with m.If(~self.i.out_do_z):
1364 with m.If(self.i.z.is_denormalised):
1365 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1366 return m
1367
1368
1369 class FPCorrections(FPState):
1370
1371 def __init__(self, width, id_wid):
1372 FPState.__init__(self, "corrections")
1373 self.mod = FPCorrectionsMod(width)
1374 self.out_z = self.ospec()
1375
1376 def ispec(self):
1377 return self.mod.ispec()
1378
1379 def ospec(self):
1380 return self.mod.ospec()
1381
1382 def setup(self, m, in_z):
1383 """ links module to inputs and outputs
1384 """
1385 self.mod.setup(m, in_z)
1386
1387 m.d.sync += self.out_z.eq(self.mod.out_z)
1388 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1389
1390 def action(self, m):
1391 m.next = "pack"
1392
1393
1394 class FPPackData:
1395
1396 def __init__(self, width, id_wid):
1397 self.z = Signal(width, reset_less=True)
1398 self.mid = Signal(id_wid, reset_less=True)
1399
1400 def eq(self, i):
1401 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1402
1403 def ports(self):
1404 return [self.z, self.mid]
1405
1406
1407 class FPPackMod:
1408
1409 def __init__(self, width, id_wid):
1410 self.width = width
1411 self.id_wid = id_wid
1412 self.i = self.ispec()
1413 self.o = self.ospec()
1414
1415 def ispec(self):
1416 return FPRoundData(self.width, self.id_wid)
1417
1418 def ospec(self):
1419 return FPPackData(self.width, self.id_wid)
1420
1421 def process(self, i):
1422 return self.o
1423
1424 def setup(self, m, in_z):
1425 """ links module to inputs and outputs
1426 """
1427 m.submodules.pack = self
1428 m.d.comb += self.i.eq(in_z)
1429
1430 def elaborate(self, platform):
1431 m = Module()
1432 z = FPNumOut(self.width, False)
1433 m.submodules.pack_in_z = self.i.z
1434 m.submodules.pack_out_z = z
1435 m.d.comb += self.o.mid.eq(self.i.mid)
1436 with m.If(~self.i.out_do_z):
1437 with m.If(self.i.z.is_overflowed):
1438 m.d.comb += z.inf(self.i.z.s)
1439 with m.Else():
1440 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1441 with m.Else():
1442 m.d.comb += z.v.eq(self.i.oz)
1443 m.d.comb += self.o.z.eq(z.v)
1444 return m
1445
1446
1447 class FPPack(FPState):
1448
1449 def __init__(self, width, id_wid):
1450 FPState.__init__(self, "pack")
1451 self.mod = FPPackMod(width)
1452 self.out_z = self.ospec()
1453
1454 def ispec(self):
1455 return self.mod.ispec()
1456
1457 def ospec(self):
1458 return self.mod.ospec()
1459
1460 def setup(self, m, in_z):
1461 """ links module to inputs and outputs
1462 """
1463 self.mod.setup(m, in_z)
1464
1465 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1466 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1467
1468 def action(self, m):
1469 m.next = "pack_put_z"
1470
1471
1472 class FPPutZ(FPState):
1473
1474 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1475 FPState.__init__(self, state)
1476 if to_state is None:
1477 to_state = "get_ops"
1478 self.to_state = to_state
1479 self.in_z = in_z
1480 self.out_z = out_z
1481 self.in_mid = in_mid
1482 self.out_mid = out_mid
1483
1484 def action(self, m):
1485 if self.in_mid is not None:
1486 m.d.sync += self.out_mid.eq(self.in_mid)
1487 m.d.sync += [
1488 self.out_z.z.v.eq(self.in_z)
1489 ]
1490 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1491 m.d.sync += self.out_z.z.stb.eq(0)
1492 m.next = self.to_state
1493 with m.Else():
1494 m.d.sync += self.out_z.z.stb.eq(1)
1495
1496
1497 class FPPutZIdx(FPState):
1498
1499 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1500 FPState.__init__(self, state)
1501 if to_state is None:
1502 to_state = "get_ops"
1503 self.to_state = to_state
1504 self.in_z = in_z
1505 self.out_zs = out_zs
1506 self.in_mid = in_mid
1507
1508 def action(self, m):
1509 outz_stb = Signal(reset_less=True)
1510 outz_ack = Signal(reset_less=True)
1511 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1512 outz_ack.eq(self.out_zs[self.in_mid].ack),
1513 ]
1514 m.d.sync += [
1515 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1516 ]
1517 with m.If(outz_stb & outz_ack):
1518 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1519 m.next = self.to_state
1520 with m.Else():
1521 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1522
1523
1524 class FPOpData:
1525 def __init__(self, width, id_wid):
1526 self.z = FPOp(width)
1527 self.mid = Signal(id_wid, reset_less=True)
1528
1529 def eq(self, i):
1530 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1531
1532 def ports(self):
1533 return [self.z, self.mid]
1534
1535
1536 class FPADDBaseMod:
1537
1538 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1539 """ IEEE754 FP Add
1540
1541 * width: bit-width of IEEE754. supported: 16, 32, 64
1542 * id_wid: an identifier that is sync-connected to the input
1543 * single_cycle: True indicates each stage to complete in 1 clock
1544 * compact: True indicates a reduced number of stages
1545 """
1546 self.width = width
1547 self.id_wid = id_wid
1548 self.single_cycle = single_cycle
1549 self.compact = compact
1550
1551 self.in_t = Trigger()
1552 self.i = self.ispec()
1553 self.o = self.ospec()
1554
1555 self.states = []
1556
1557 def ispec(self):
1558 return FPADDBaseData(self.width, self.id_wid)
1559
1560 def ospec(self):
1561 return FPOpData(self.width, self.id_wid)
1562
1563 def add_state(self, state):
1564 self.states.append(state)
1565 return state
1566
1567 def get_fragment(self, platform=None):
1568 """ creates the HDL code-fragment for FPAdd
1569 """
1570 m = Module()
1571 m.submodules.out_z = self.o.z
1572 m.submodules.in_t = self.in_t
1573 if self.compact:
1574 self.get_compact_fragment(m, platform)
1575 else:
1576 self.get_longer_fragment(m, platform)
1577
1578 with m.FSM() as fsm:
1579
1580 for state in self.states:
1581 with m.State(state.state_from):
1582 state.action(m)
1583
1584 return m
1585
1586 def get_longer_fragment(self, m, platform=None):
1587
1588 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1589 self.width))
1590 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1591 a = get.out_op1
1592 b = get.out_op2
1593
1594 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1595 sc.setup(m, a, b, self.in_mid)
1596
1597 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1598 dn.setup(m, a, b, sc.in_mid)
1599
1600 if self.single_cycle:
1601 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1602 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1603 else:
1604 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1605 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1606
1607 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1608 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1609
1610 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1611 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1612
1613 if self.single_cycle:
1614 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1615 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1616 else:
1617 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1618 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1619
1620 rn = self.add_state(FPRound(self.width, self.id_wid))
1621 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1622
1623 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1624 cor.setup(m, rn.out_z, rn.in_mid)
1625
1626 pa = self.add_state(FPPack(self.width, self.id_wid))
1627 pa.setup(m, cor.out_z, rn.in_mid)
1628
1629 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1630 pa.in_mid, self.out_mid))
1631
1632 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1633 pa.in_mid, self.out_mid))
1634
1635 def get_compact_fragment(self, m, platform=None):
1636
1637 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1638 self.width, self.id_wid))
1639 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1640
1641 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1642 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1643 n1 = FPNormToPack(self.width, self.id_wid)
1644
1645 chainlist = [sc, alm, n1]
1646 chain = StageChain(chainlist, specallocate=True)
1647 chain.setup(m, get.o)
1648
1649 for mod in chainlist:
1650 sc = self.add_state(mod)
1651
1652 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1653 n1.out_z.mid, self.o.mid))
1654
1655 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1656 # sc.o.mid, self.o.mid))
1657
1658
1659 class FPADDBase(FPState):
1660
1661 def __init__(self, width, id_wid=None, single_cycle=False):
1662 """ IEEE754 FP Add
1663
1664 * width: bit-width of IEEE754. supported: 16, 32, 64
1665 * id_wid: an identifier that is sync-connected to the input
1666 * single_cycle: True indicates each stage to complete in 1 clock
1667 """
1668 FPState.__init__(self, "fpadd")
1669 self.width = width
1670 self.single_cycle = single_cycle
1671 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1672 self.o = self.ospec()
1673
1674 self.in_t = Trigger()
1675 self.i = self.ispec()
1676
1677 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1678 self.in_accept = Signal(reset_less=True)
1679 self.add_stb = Signal(reset_less=True)
1680 self.add_ack = Signal(reset=0, reset_less=True)
1681
1682 def ispec(self):
1683 return self.mod.ispec()
1684
1685 def ospec(self):
1686 return self.mod.ospec()
1687
1688 def setup(self, m, i, add_stb, in_mid):
1689 m.d.comb += [self.i.eq(i),
1690 self.mod.i.eq(self.i),
1691 self.z_done.eq(self.mod.o.z.trigger),
1692 #self.add_stb.eq(add_stb),
1693 self.mod.in_t.stb.eq(self.in_t.stb),
1694 self.in_t.ack.eq(self.mod.in_t.ack),
1695 self.o.mid.eq(self.mod.o.mid),
1696 self.o.z.v.eq(self.mod.o.z.v),
1697 self.o.z.stb.eq(self.mod.o.z.stb),
1698 self.mod.o.z.ack.eq(self.o.z.ack),
1699 ]
1700
1701 m.d.sync += self.add_stb.eq(add_stb)
1702 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1703 m.d.sync += self.o.z.ack.eq(0) # likewise
1704 #m.d.sync += self.in_t.stb.eq(0)
1705
1706 m.submodules.fpadd = self.mod
1707
1708 def action(self, m):
1709
1710 # in_accept is set on incoming strobe HIGH and ack LOW.
1711 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1712
1713 #with m.If(self.in_t.ack):
1714 # m.d.sync += self.in_t.stb.eq(0)
1715 with m.If(~self.z_done):
1716 # not done: test for accepting an incoming operand pair
1717 with m.If(self.in_accept):
1718 m.d.sync += [
1719 self.add_ack.eq(1), # acknowledge receipt...
1720 self.in_t.stb.eq(1), # initiate add
1721 ]
1722 with m.Else():
1723 m.d.sync += [self.add_ack.eq(0),
1724 self.in_t.stb.eq(0),
1725 self.o.z.ack.eq(1),
1726 ]
1727 with m.Else():
1728 # done: acknowledge, and write out id and value
1729 m.d.sync += [self.add_ack.eq(1),
1730 self.in_t.stb.eq(0)
1731 ]
1732 m.next = "put_z"
1733
1734 return
1735
1736 if self.in_mid is not None:
1737 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1738
1739 m.d.sync += [
1740 self.out_z.v.eq(self.mod.out_z.v)
1741 ]
1742 # move to output state on detecting z ack
1743 with m.If(self.out_z.trigger):
1744 m.d.sync += self.out_z.stb.eq(0)
1745 m.next = "put_z"
1746 with m.Else():
1747 m.d.sync += self.out_z.stb.eq(1)
1748
1749
1750 class FPADDBasePipe(ControlBase):
1751 def __init__(self, width, id_wid):
1752 ControlBase.__init__(self)
1753 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1754 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1755 self.pipe3 = FPNormToPack(width, id_wid)
1756
1757 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1758
1759 def elaborate(self, platform):
1760 m = Module()
1761 m.submodules.scnorm = self.pipe1
1762 m.submodules.addalign = self.pipe2
1763 m.submodules.normpack = self.pipe3
1764 m.d.comb += self._eqs
1765 return m
1766
1767
1768 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1769 def __init__(self, width, id_wid, num_rows):
1770 self.num_rows = num_rows
1771 def iospec(): return FPADDBaseData(width, id_wid)
1772 stage = PassThroughStage(iospec)
1773 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1774
1775
1776 class FPADDMuxOutPipe(CombMuxOutPipe):
1777 def __init__(self, width, id_wid, num_rows):
1778 self.num_rows = num_rows
1779 def iospec(): return FPPackData(width, id_wid)
1780 stage = PassThroughStage(iospec)
1781 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1782
1783
1784 class FPADDMuxInOut:
1785 """ Reservation-Station version of FPADD pipeline.
1786
1787 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1788 * 3-stage adder pipeline
1789 * fan-out on outputs (an array of FPPackData: z,mid)
1790
1791 Fan-in and Fan-out are combinatorial.
1792 """
1793 def __init__(self, width, id_wid, num_rows):
1794 self.num_rows = num_rows
1795 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1796 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1797 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1798
1799 self.p = self.inpipe.p # kinda annoying,
1800 self.n = self.outpipe.n # use pipe in/out as this class in/out
1801 self._ports = self.inpipe.ports() + self.outpipe.ports()
1802
1803 def elaborate(self, platform):
1804 m = Module()
1805 m.submodules.inpipe = self.inpipe
1806 m.submodules.fpadd = self.fpadd
1807 m.submodules.outpipe = self.outpipe
1808
1809 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1810 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1811
1812 return m
1813
1814 def ports(self):
1815 return self._ports
1816
1817
1818 class FPADD(FPID):
1819 """ FPADD: stages as follows:
1820
1821 FPGetOp (a)
1822 |
1823 FPGetOp (b)
1824 |
1825 FPAddBase---> FPAddBaseMod
1826 | |
1827 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1828
1829 FPAddBase is tricky: it is both a stage and *has* stages.
1830 Connection to FPAddBaseMod therefore requires an in stb/ack
1831 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1832 needs to be the thing that raises the incoming stb.
1833 """
1834
1835 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1836 """ IEEE754 FP Add
1837
1838 * width: bit-width of IEEE754. supported: 16, 32, 64
1839 * id_wid: an identifier that is sync-connected to the input
1840 * single_cycle: True indicates each stage to complete in 1 clock
1841 """
1842 self.width = width
1843 self.id_wid = id_wid
1844 self.single_cycle = single_cycle
1845
1846 #self.out_z = FPOp(width)
1847 self.ids = FPID(id_wid)
1848
1849 rs = []
1850 for i in range(rs_sz):
1851 in_a = FPOp(width)
1852 in_b = FPOp(width)
1853 in_a.name = "in_a_%d" % i
1854 in_b.name = "in_b_%d" % i
1855 rs.append((in_a, in_b))
1856 self.rs = Array(rs)
1857
1858 res = []
1859 for i in range(rs_sz):
1860 out_z = FPOp(width)
1861 out_z.name = "out_z_%d" % i
1862 res.append(out_z)
1863 self.res = Array(res)
1864
1865 self.states = []
1866
1867 def add_state(self, state):
1868 self.states.append(state)
1869 return state
1870
1871 def get_fragment(self, platform=None):
1872 """ creates the HDL code-fragment for FPAdd
1873 """
1874 m = Module()
1875 m.submodules += self.rs
1876
1877 in_a = self.rs[0][0]
1878 in_b = self.rs[0][1]
1879
1880 geta = self.add_state(FPGetOp("get_a", "get_b",
1881 in_a, self.width))
1882 geta.setup(m, in_a)
1883 a = geta.out_op
1884
1885 getb = self.add_state(FPGetOp("get_b", "fpadd",
1886 in_b, self.width))
1887 getb.setup(m, in_b)
1888 b = getb.out_op
1889
1890 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1891 ab = self.add_state(ab)
1892 abd = ab.ispec() # create an input spec object for FPADDBase
1893 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1894 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1895 o = ab.o
1896
1897 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1898 o.mid, "get_a"))
1899
1900 with m.FSM() as fsm:
1901
1902 for state in self.states:
1903 with m.State(state.state_from):
1904 state.action(m)
1905
1906 return m
1907
1908
1909 if __name__ == "__main__":
1910 if True:
1911 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1912 main(alu, ports=alu.rs[0][0].ports() + \
1913 alu.rs[0][1].ports() + \
1914 alu.res[0].ports() + \
1915 [alu.ids.in_mid, alu.ids.out_mid])
1916 else:
1917 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1918 main(alu, ports=[alu.in_a, alu.in_b] + \
1919 alu.in_t.ports() + \
1920 alu.out_z.ports() + \
1921 [alu.in_mid, alu.out_mid])
1922
1923
1924 # works... but don't use, just do "python fname.py convert -t v"
1925 #print (verilog.convert(alu, ports=[
1926 # ports=alu.in_a.ports() + \
1927 # alu.in_b.ports() + \
1928 # alu.out_z.ports())