move inputgroup to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.smod = FPAddSpecialCasesMod(width, id_wid)
359 self.dmod = FPAddDeNormMod(width, id_wid)
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.o = self.ospec()
362
363 def ispec(self):
364 return self.smod.ispec()
365
366 def ospec(self):
367 return self.dmod.ospec()
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self.smod.setup(m, i)
376 self.dmod.setup(m, self.smod.o)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
378
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
382
383 # out_do_z=False
384 m.d.comb += self.o.eq(self.dmod.o)
385
386 def process(self, i):
387 return self.o
388
389 def action(self, m):
390 #with m.If(self.out_do_z):
391 # m.next = "put_z"
392 #with m.Else():
393 m.next = "align"
394
395
396 class FPAddDeNormMod(FPState):
397
398 def __init__(self, width, id_wid):
399 self.width = width
400 self.id_wid = id_wid
401 self.i = self.ispec()
402 self.o = self.ospec()
403
404 def ispec(self):
405 return FPSCData(self.width, self.id_wid)
406
407 def ospec(self):
408 return FPSCData(self.width, self.id_wid)
409
410 def setup(self, m, i):
411 """ links module to inputs and outputs
412 """
413 m.submodules.denormalise = self
414 m.d.comb += self.i.eq(i)
415
416 def elaborate(self, platform):
417 m = Module()
418 m.submodules.denorm_in_a = self.i.a
419 m.submodules.denorm_in_b = self.i.b
420 m.submodules.denorm_out_a = self.o.a
421 m.submodules.denorm_out_b = self.o.b
422
423 with m.If(~self.i.out_do_z):
424 # XXX hmmm, don't like repeating identical code
425 m.d.comb += self.o.a.eq(self.i.a)
426 with m.If(self.i.a.exp_n127):
427 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
428 with m.Else():
429 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
430
431 m.d.comb += self.o.b.eq(self.i.b)
432 with m.If(self.i.b.exp_n127):
433 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
434 with m.Else():
435 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
436
437 m.d.comb += self.o.mid.eq(self.i.mid)
438 m.d.comb += self.o.z.eq(self.i.z)
439 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
440 m.d.comb += self.o.oz.eq(self.i.oz)
441
442 return m
443
444
445 class FPAddDeNorm(FPState):
446
447 def __init__(self, width, id_wid):
448 FPState.__init__(self, "denormalise")
449 self.mod = FPAddDeNormMod(width)
450 self.out_a = FPNumBase(width)
451 self.out_b = FPNumBase(width)
452
453 def setup(self, m, i):
454 """ links module to inputs and outputs
455 """
456 self.mod.setup(m, i)
457
458 m.d.sync += self.out_a.eq(self.mod.out_a)
459 m.d.sync += self.out_b.eq(self.mod.out_b)
460
461 def action(self, m):
462 # Denormalised Number checks
463 m.next = "align"
464
465
466 class FPAddAlignMultiMod(FPState):
467
468 def __init__(self, width):
469 self.in_a = FPNumBase(width)
470 self.in_b = FPNumBase(width)
471 self.out_a = FPNumIn(None, width)
472 self.out_b = FPNumIn(None, width)
473 self.exp_eq = Signal(reset_less=True)
474
475 def elaborate(self, platform):
476 # This one however (single-cycle) will do the shift
477 # in one go.
478
479 m = Module()
480
481 m.submodules.align_in_a = self.in_a
482 m.submodules.align_in_b = self.in_b
483 m.submodules.align_out_a = self.out_a
484 m.submodules.align_out_b = self.out_b
485
486 # NOTE: this does *not* do single-cycle multi-shifting,
487 # it *STAYS* in the align state until exponents match
488
489 # exponent of a greater than b: shift b down
490 m.d.comb += self.exp_eq.eq(0)
491 m.d.comb += self.out_a.eq(self.in_a)
492 m.d.comb += self.out_b.eq(self.in_b)
493 agtb = Signal(reset_less=True)
494 altb = Signal(reset_less=True)
495 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
496 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
497 with m.If(agtb):
498 m.d.comb += self.out_b.shift_down(self.in_b)
499 # exponent of b greater than a: shift a down
500 with m.Elif(altb):
501 m.d.comb += self.out_a.shift_down(self.in_a)
502 # exponents equal: move to next stage.
503 with m.Else():
504 m.d.comb += self.exp_eq.eq(1)
505 return m
506
507
508 class FPAddAlignMulti(FPState):
509
510 def __init__(self, width, id_wid):
511 FPState.__init__(self, "align")
512 self.mod = FPAddAlignMultiMod(width)
513 self.out_a = FPNumIn(None, width)
514 self.out_b = FPNumIn(None, width)
515 self.exp_eq = Signal(reset_less=True)
516
517 def setup(self, m, in_a, in_b):
518 """ links module to inputs and outputs
519 """
520 m.submodules.align = self.mod
521 m.d.comb += self.mod.in_a.eq(in_a)
522 m.d.comb += self.mod.in_b.eq(in_b)
523 #m.d.comb += self.out_a.eq(self.mod.out_a)
524 #m.d.comb += self.out_b.eq(self.mod.out_b)
525 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
526 m.d.sync += self.out_a.eq(self.mod.out_a)
527 m.d.sync += self.out_b.eq(self.mod.out_b)
528
529 def action(self, m):
530 with m.If(self.exp_eq):
531 m.next = "add_0"
532
533
534 class FPNumIn2Ops:
535
536 def __init__(self, width, id_wid):
537 self.a = FPNumIn(None, width)
538 self.b = FPNumIn(None, width)
539 self.z = FPNumOut(width, False)
540 self.out_do_z = Signal(reset_less=True)
541 self.oz = Signal(width, reset_less=True)
542 self.mid = Signal(id_wid, reset_less=True)
543
544 def eq(self, i):
545 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
546 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
547
548
549 class FPAddAlignSingleMod:
550
551 def __init__(self, width, id_wid):
552 self.width = width
553 self.id_wid = id_wid
554 self.i = self.ispec()
555 self.o = self.ospec()
556
557 def ispec(self):
558 return FPSCData(self.width, self.id_wid)
559
560 def ospec(self):
561 return FPNumIn2Ops(self.width, self.id_wid)
562
563 def process(self, i):
564 return self.o
565
566 def setup(self, m, i):
567 """ links module to inputs and outputs
568 """
569 m.submodules.align = self
570 m.d.comb += self.i.eq(i)
571
572 def elaborate(self, platform):
573 """ Aligns A against B or B against A, depending on which has the
574 greater exponent. This is done in a *single* cycle using
575 variable-width bit-shift
576
577 the shifter used here is quite expensive in terms of gates.
578 Mux A or B in (and out) into temporaries, as only one of them
579 needs to be aligned against the other
580 """
581 m = Module()
582
583 m.submodules.align_in_a = self.i.a
584 m.submodules.align_in_b = self.i.b
585 m.submodules.align_out_a = self.o.a
586 m.submodules.align_out_b = self.o.b
587
588 # temporary (muxed) input and output to be shifted
589 t_inp = FPNumBase(self.width)
590 t_out = FPNumIn(None, self.width)
591 espec = (len(self.i.a.e), True)
592 msr = MultiShiftRMerge(self.i.a.m_width, espec)
593 m.submodules.align_t_in = t_inp
594 m.submodules.align_t_out = t_out
595 m.submodules.multishift_r = msr
596
597 ediff = Signal(espec, reset_less=True)
598 ediffr = Signal(espec, reset_less=True)
599 tdiff = Signal(espec, reset_less=True)
600 elz = Signal(reset_less=True)
601 egz = Signal(reset_less=True)
602
603 # connect multi-shifter to t_inp/out mantissa (and tdiff)
604 m.d.comb += msr.inp.eq(t_inp.m)
605 m.d.comb += msr.diff.eq(tdiff)
606 m.d.comb += t_out.m.eq(msr.m)
607 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
608 m.d.comb += t_out.s.eq(t_inp.s)
609
610 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
611 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
612 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
613 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
614
615 # default: A-exp == B-exp, A and B untouched (fall through)
616 m.d.comb += self.o.a.eq(self.i.a)
617 m.d.comb += self.o.b.eq(self.i.b)
618 # only one shifter (muxed)
619 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
620 # exponent of a greater than b: shift b down
621 with m.If(~self.i.out_do_z):
622 with m.If(egz):
623 m.d.comb += [t_inp.eq(self.i.b),
624 tdiff.eq(ediff),
625 self.o.b.eq(t_out),
626 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
627 ]
628 # exponent of b greater than a: shift a down
629 with m.Elif(elz):
630 m.d.comb += [t_inp.eq(self.i.a),
631 tdiff.eq(ediffr),
632 self.o.a.eq(t_out),
633 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
634 ]
635
636 m.d.comb += self.o.mid.eq(self.i.mid)
637 m.d.comb += self.o.z.eq(self.i.z)
638 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
639 m.d.comb += self.o.oz.eq(self.i.oz)
640
641 return m
642
643
644 class FPAddAlignSingle(FPState):
645
646 def __init__(self, width, id_wid):
647 FPState.__init__(self, "align")
648 self.mod = FPAddAlignSingleMod(width, id_wid)
649 self.out_a = FPNumIn(None, width)
650 self.out_b = FPNumIn(None, width)
651
652 def setup(self, m, i):
653 """ links module to inputs and outputs
654 """
655 self.mod.setup(m, i)
656
657 # NOTE: could be done as comb
658 m.d.sync += self.out_a.eq(self.mod.out_a)
659 m.d.sync += self.out_b.eq(self.mod.out_b)
660
661 def action(self, m):
662 m.next = "add_0"
663
664
665 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
666
667 def __init__(self, width, id_wid):
668 FPState.__init__(self, "align")
669 self.width = width
670 self.id_wid = id_wid
671 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
672 self.a1o = self.ospec()
673
674 def ispec(self):
675 return FPSCData(self.width, self.id_wid)
676 #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
677
678 def ospec(self):
679 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
680
681 def setup(self, m, i):
682 """ links module to inputs and outputs
683 """
684
685 # chain AddAlignSingle, AddStage0 and AddStage1
686 mod = FPAddAlignSingleMod(self.width, self.id_wid)
687 a0mod = FPAddStage0Mod(self.width, self.id_wid)
688 a1mod = FPAddStage1Mod(self.width, self.id_wid)
689
690 chain = StageChain([mod, a0mod, a1mod])
691 chain.setup(m, i)
692
693 m.d.comb += self.a1o.eq(a1mod.o)
694
695 def process(self, i):
696 return self.a1o
697
698 def action(self, m):
699 m.next = "normalise_1"
700
701
702 class FPAddStage0Data:
703
704 def __init__(self, width, id_wid):
705 self.z = FPNumBase(width, False)
706 self.out_do_z = Signal(reset_less=True)
707 self.oz = Signal(width, reset_less=True)
708 self.tot = Signal(self.z.m_width + 4, reset_less=True)
709 self.mid = Signal(id_wid, reset_less=True)
710
711 def eq(self, i):
712 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
713 self.tot.eq(i.tot), self.mid.eq(i.mid)]
714
715
716 class FPAddStage0Mod:
717
718 def __init__(self, width, id_wid):
719 self.width = width
720 self.id_wid = id_wid
721 self.i = self.ispec()
722 self.o = self.ospec()
723
724 def ispec(self):
725 return FPSCData(self.width, self.id_wid)
726
727 def ospec(self):
728 return FPAddStage0Data(self.width, self.id_wid)
729
730 def process(self, i):
731 return self.o
732
733 def setup(self, m, i):
734 """ links module to inputs and outputs
735 """
736 m.submodules.add0 = self
737 m.d.comb += self.i.eq(i)
738
739 def elaborate(self, platform):
740 m = Module()
741 m.submodules.add0_in_a = self.i.a
742 m.submodules.add0_in_b = self.i.b
743 m.submodules.add0_out_z = self.o.z
744
745 # store intermediate tests (and zero-extended mantissas)
746 seq = Signal(reset_less=True)
747 mge = Signal(reset_less=True)
748 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
749 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
750 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
751 mge.eq(self.i.a.m >= self.i.b.m),
752 am0.eq(Cat(self.i.a.m, 0)),
753 bm0.eq(Cat(self.i.b.m, 0))
754 ]
755 # same-sign (both negative or both positive) add mantissas
756 with m.If(~self.i.out_do_z):
757 m.d.comb += self.o.z.e.eq(self.i.a.e)
758 with m.If(seq):
759 m.d.comb += [
760 self.o.tot.eq(am0 + bm0),
761 self.o.z.s.eq(self.i.a.s)
762 ]
763 # a mantissa greater than b, use a
764 with m.Elif(mge):
765 m.d.comb += [
766 self.o.tot.eq(am0 - bm0),
767 self.o.z.s.eq(self.i.a.s)
768 ]
769 # b mantissa greater than a, use b
770 with m.Else():
771 m.d.comb += [
772 self.o.tot.eq(bm0 - am0),
773 self.o.z.s.eq(self.i.b.s)
774 ]
775
776 m.d.comb += self.o.oz.eq(self.i.oz)
777 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
778 m.d.comb += self.o.mid.eq(self.i.mid)
779 return m
780
781
782 class FPAddStage0(FPState):
783 """ First stage of add. covers same-sign (add) and subtract
784 special-casing when mantissas are greater or equal, to
785 give greatest accuracy.
786 """
787
788 def __init__(self, width, id_wid):
789 FPState.__init__(self, "add_0")
790 self.mod = FPAddStage0Mod(width)
791 self.o = self.mod.ospec()
792
793 def setup(self, m, i):
794 """ links module to inputs and outputs
795 """
796 self.mod.setup(m, i)
797
798 # NOTE: these could be done as combinatorial (merge add0+add1)
799 m.d.sync += self.o.eq(self.mod.o)
800
801 def action(self, m):
802 m.next = "add_1"
803
804
805 class FPAddStage1Data:
806
807 def __init__(self, width, id_wid):
808 self.z = FPNumBase(width, False)
809 self.out_do_z = Signal(reset_less=True)
810 self.oz = Signal(width, reset_less=True)
811 self.of = Overflow()
812 self.mid = Signal(id_wid, reset_less=True)
813
814 def eq(self, i):
815 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
816 self.of.eq(i.of), self.mid.eq(i.mid)]
817
818
819
820 class FPAddStage1Mod(FPState):
821 """ Second stage of add: preparation for normalisation.
822 detects when tot sum is too big (tot[27] is kinda a carry bit)
823 """
824
825 def __init__(self, width, id_wid):
826 self.width = width
827 self.id_wid = id_wid
828 self.i = self.ispec()
829 self.o = self.ospec()
830
831 def ispec(self):
832 return FPAddStage0Data(self.width, self.id_wid)
833
834 def ospec(self):
835 return FPAddStage1Data(self.width, self.id_wid)
836
837 def process(self, i):
838 return self.o
839
840 def setup(self, m, i):
841 """ links module to inputs and outputs
842 """
843 m.submodules.add1 = self
844 m.submodules.add1_out_overflow = self.o.of
845
846 m.d.comb += self.i.eq(i)
847
848 def elaborate(self, platform):
849 m = Module()
850 #m.submodules.norm1_in_overflow = self.in_of
851 #m.submodules.norm1_out_overflow = self.out_of
852 #m.submodules.norm1_in_z = self.in_z
853 #m.submodules.norm1_out_z = self.out_z
854 m.d.comb += self.o.z.eq(self.i.z)
855 # tot[-1] (MSB) gets set when the sum overflows. shift result down
856 with m.If(~self.i.out_do_z):
857 with m.If(self.i.tot[-1]):
858 m.d.comb += [
859 self.o.z.m.eq(self.i.tot[4:]),
860 self.o.of.m0.eq(self.i.tot[4]),
861 self.o.of.guard.eq(self.i.tot[3]),
862 self.o.of.round_bit.eq(self.i.tot[2]),
863 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
864 self.o.z.e.eq(self.i.z.e + 1)
865 ]
866 # tot[-1] (MSB) zero case
867 with m.Else():
868 m.d.comb += [
869 self.o.z.m.eq(self.i.tot[3:]),
870 self.o.of.m0.eq(self.i.tot[3]),
871 self.o.of.guard.eq(self.i.tot[2]),
872 self.o.of.round_bit.eq(self.i.tot[1]),
873 self.o.of.sticky.eq(self.i.tot[0])
874 ]
875
876 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
877 m.d.comb += self.o.oz.eq(self.i.oz)
878 m.d.comb += self.o.mid.eq(self.i.mid)
879
880 return m
881
882
883 class FPAddStage1(FPState):
884
885 def __init__(self, width, id_wid):
886 FPState.__init__(self, "add_1")
887 self.mod = FPAddStage1Mod(width)
888 self.out_z = FPNumBase(width, False)
889 self.out_of = Overflow()
890 self.norm_stb = Signal()
891
892 def setup(self, m, i):
893 """ links module to inputs and outputs
894 """
895 self.mod.setup(m, i)
896
897 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
898
899 m.d.sync += self.out_of.eq(self.mod.out_of)
900 m.d.sync += self.out_z.eq(self.mod.out_z)
901 m.d.sync += self.norm_stb.eq(1)
902
903 def action(self, m):
904 m.next = "normalise_1"
905
906
907 class FPNormaliseModSingle:
908
909 def __init__(self, width):
910 self.width = width
911 self.in_z = self.ispec()
912 self.out_z = self.ospec()
913
914 def ispec(self):
915 return FPNumBase(self.width, False)
916
917 def ospec(self):
918 return FPNumBase(self.width, False)
919
920 def setup(self, m, i):
921 """ links module to inputs and outputs
922 """
923 m.submodules.normalise = self
924 m.d.comb += self.i.eq(i)
925
926 def elaborate(self, platform):
927 m = Module()
928
929 mwid = self.out_z.m_width+2
930 pe = PriorityEncoder(mwid)
931 m.submodules.norm_pe = pe
932
933 m.submodules.norm1_out_z = self.out_z
934 m.submodules.norm1_in_z = self.in_z
935
936 in_z = FPNumBase(self.width, False)
937 in_of = Overflow()
938 m.submodules.norm1_insel_z = in_z
939 m.submodules.norm1_insel_overflow = in_of
940
941 espec = (len(in_z.e), True)
942 ediff_n126 = Signal(espec, reset_less=True)
943 msr = MultiShiftRMerge(mwid, espec)
944 m.submodules.multishift_r = msr
945
946 m.d.comb += in_z.eq(self.in_z)
947 m.d.comb += in_of.eq(self.in_of)
948 # initialise out from in (overridden below)
949 m.d.comb += self.out_z.eq(in_z)
950 m.d.comb += self.out_of.eq(in_of)
951 # normalisation decrease condition
952 decrease = Signal(reset_less=True)
953 m.d.comb += decrease.eq(in_z.m_msbzero)
954 # decrease exponent
955 with m.If(decrease):
956 # *sigh* not entirely obvious: count leading zeros (clz)
957 # with a PriorityEncoder: to find from the MSB
958 # we reverse the order of the bits.
959 temp_m = Signal(mwid, reset_less=True)
960 temp_s = Signal(mwid+1, reset_less=True)
961 clz = Signal((len(in_z.e), True), reset_less=True)
962 m.d.comb += [
963 # cat round and guard bits back into the mantissa
964 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
965 pe.i.eq(temp_m[::-1]), # inverted
966 clz.eq(pe.o), # count zeros from MSB down
967 temp_s.eq(temp_m << clz), # shift mantissa UP
968 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
969 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
970 ]
971
972 return m
973
974 class FPNorm1Data:
975
976 def __init__(self, width, id_wid):
977 self.roundz = Signal(reset_less=True)
978 self.z = FPNumBase(width, False)
979 self.out_do_z = Signal(reset_less=True)
980 self.oz = Signal(width, reset_less=True)
981 self.mid = Signal(id_wid, reset_less=True)
982
983 def eq(self, i):
984 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
985 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
986
987
988 class FPNorm1ModSingle:
989
990 def __init__(self, width, id_wid):
991 self.width = width
992 self.id_wid = id_wid
993 self.i = self.ispec()
994 self.o = self.ospec()
995
996 def ispec(self):
997 return FPAddStage1Data(self.width, self.id_wid)
998
999 def ospec(self):
1000 return FPNorm1Data(self.width, self.id_wid)
1001
1002 def setup(self, m, i):
1003 """ links module to inputs and outputs
1004 """
1005 m.submodules.normalise_1 = self
1006 m.d.comb += self.i.eq(i)
1007
1008 def process(self, i):
1009 return self.o
1010
1011 def elaborate(self, platform):
1012 m = Module()
1013
1014 mwid = self.o.z.m_width+2
1015 pe = PriorityEncoder(mwid)
1016 m.submodules.norm_pe = pe
1017
1018 of = Overflow()
1019 m.d.comb += self.o.roundz.eq(of.roundz)
1020
1021 m.submodules.norm1_out_z = self.o.z
1022 m.submodules.norm1_out_overflow = of
1023 m.submodules.norm1_in_z = self.i.z
1024 m.submodules.norm1_in_overflow = self.i.of
1025
1026 i = self.ispec()
1027 m.submodules.norm1_insel_z = i.z
1028 m.submodules.norm1_insel_overflow = i.of
1029
1030 espec = (len(i.z.e), True)
1031 ediff_n126 = Signal(espec, reset_less=True)
1032 msr = MultiShiftRMerge(mwid, espec)
1033 m.submodules.multishift_r = msr
1034
1035 m.d.comb += i.eq(self.i)
1036 # initialise out from in (overridden below)
1037 m.d.comb += self.o.z.eq(i.z)
1038 m.d.comb += of.eq(i.of)
1039 # normalisation increase/decrease conditions
1040 decrease = Signal(reset_less=True)
1041 increase = Signal(reset_less=True)
1042 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1043 m.d.comb += increase.eq(i.z.exp_lt_n126)
1044 # decrease exponent
1045 with m.If(~self.i.out_do_z):
1046 with m.If(decrease):
1047 # *sigh* not entirely obvious: count leading zeros (clz)
1048 # with a PriorityEncoder: to find from the MSB
1049 # we reverse the order of the bits.
1050 temp_m = Signal(mwid, reset_less=True)
1051 temp_s = Signal(mwid+1, reset_less=True)
1052 clz = Signal((len(i.z.e), True), reset_less=True)
1053 # make sure that the amount to decrease by does NOT
1054 # go below the minimum non-INF/NaN exponent
1055 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1056 i.z.exp_sub_n126)
1057 m.d.comb += [
1058 # cat round and guard bits back into the mantissa
1059 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1060 pe.i.eq(temp_m[::-1]), # inverted
1061 clz.eq(limclz), # count zeros from MSB down
1062 temp_s.eq(temp_m << clz), # shift mantissa UP
1063 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1064 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1065 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1066 # overflow in bits 0..1: got shifted too (leave sticky)
1067 of.guard.eq(temp_s[1]), # guard
1068 of.round_bit.eq(temp_s[0]), # round
1069 ]
1070 # increase exponent
1071 with m.Elif(increase):
1072 temp_m = Signal(mwid+1, reset_less=True)
1073 m.d.comb += [
1074 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1075 i.z.m)),
1076 ediff_n126.eq(i.z.N126 - i.z.e),
1077 # connect multi-shifter to inp/out mantissa (and ediff)
1078 msr.inp.eq(temp_m),
1079 msr.diff.eq(ediff_n126),
1080 self.o.z.m.eq(msr.m[3:]),
1081 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1082 # overflow in bits 0..1: got shifted too (leave sticky)
1083 of.guard.eq(temp_s[2]), # guard
1084 of.round_bit.eq(temp_s[1]), # round
1085 of.sticky.eq(temp_s[0]), # sticky
1086 self.o.z.e.eq(i.z.e + ediff_n126),
1087 ]
1088
1089 m.d.comb += self.o.mid.eq(self.i.mid)
1090 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1091 m.d.comb += self.o.oz.eq(self.i.oz)
1092
1093 return m
1094
1095
1096 class FPNorm1ModMulti:
1097
1098 def __init__(self, width, single_cycle=True):
1099 self.width = width
1100 self.in_select = Signal(reset_less=True)
1101 self.in_z = FPNumBase(width, False)
1102 self.in_of = Overflow()
1103 self.temp_z = FPNumBase(width, False)
1104 self.temp_of = Overflow()
1105 self.out_z = FPNumBase(width, False)
1106 self.out_of = Overflow()
1107
1108 def elaborate(self, platform):
1109 m = Module()
1110
1111 m.submodules.norm1_out_z = self.out_z
1112 m.submodules.norm1_out_overflow = self.out_of
1113 m.submodules.norm1_temp_z = self.temp_z
1114 m.submodules.norm1_temp_of = self.temp_of
1115 m.submodules.norm1_in_z = self.in_z
1116 m.submodules.norm1_in_overflow = self.in_of
1117
1118 in_z = FPNumBase(self.width, False)
1119 in_of = Overflow()
1120 m.submodules.norm1_insel_z = in_z
1121 m.submodules.norm1_insel_overflow = in_of
1122
1123 # select which of temp or in z/of to use
1124 with m.If(self.in_select):
1125 m.d.comb += in_z.eq(self.in_z)
1126 m.d.comb += in_of.eq(self.in_of)
1127 with m.Else():
1128 m.d.comb += in_z.eq(self.temp_z)
1129 m.d.comb += in_of.eq(self.temp_of)
1130 # initialise out from in (overridden below)
1131 m.d.comb += self.out_z.eq(in_z)
1132 m.d.comb += self.out_of.eq(in_of)
1133 # normalisation increase/decrease conditions
1134 decrease = Signal(reset_less=True)
1135 increase = Signal(reset_less=True)
1136 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1137 m.d.comb += increase.eq(in_z.exp_lt_n126)
1138 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1139 # decrease exponent
1140 with m.If(decrease):
1141 m.d.comb += [
1142 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1143 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1144 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1145 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1146 self.out_of.round_bit.eq(0), # reset round bit
1147 self.out_of.m0.eq(in_of.guard),
1148 ]
1149 # increase exponent
1150 with m.Elif(increase):
1151 m.d.comb += [
1152 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1153 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1154 self.out_of.guard.eq(in_z.m[0]),
1155 self.out_of.m0.eq(in_z.m[1]),
1156 self.out_of.round_bit.eq(in_of.guard),
1157 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1158 ]
1159
1160 return m
1161
1162
1163 class FPNorm1Single(FPState):
1164
1165 def __init__(self, width, id_wid, single_cycle=True):
1166 FPState.__init__(self, "normalise_1")
1167 self.mod = FPNorm1ModSingle(width)
1168 self.o = self.ospec()
1169 self.out_z = FPNumBase(width, False)
1170 self.out_roundz = Signal(reset_less=True)
1171
1172 def ispec(self):
1173 return self.mod.ispec()
1174
1175 def ospec(self):
1176 return self.mod.ospec()
1177
1178 def setup(self, m, i):
1179 """ links module to inputs and outputs
1180 """
1181 self.mod.setup(m, i)
1182
1183 def action(self, m):
1184 m.next = "round"
1185
1186
1187 class FPNorm1Multi(FPState):
1188
1189 def __init__(self, width, id_wid):
1190 FPState.__init__(self, "normalise_1")
1191 self.mod = FPNorm1ModMulti(width)
1192 self.stb = Signal(reset_less=True)
1193 self.ack = Signal(reset=0, reset_less=True)
1194 self.out_norm = Signal(reset_less=True)
1195 self.in_accept = Signal(reset_less=True)
1196 self.temp_z = FPNumBase(width)
1197 self.temp_of = Overflow()
1198 self.out_z = FPNumBase(width)
1199 self.out_roundz = Signal(reset_less=True)
1200
1201 def setup(self, m, in_z, in_of, norm_stb):
1202 """ links module to inputs and outputs
1203 """
1204 self.mod.setup(m, in_z, in_of, norm_stb,
1205 self.in_accept, self.temp_z, self.temp_of,
1206 self.out_z, self.out_norm)
1207
1208 m.d.comb += self.stb.eq(norm_stb)
1209 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1210
1211 def action(self, m):
1212 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1213 m.d.sync += self.temp_of.eq(self.mod.out_of)
1214 m.d.sync += self.temp_z.eq(self.out_z)
1215 with m.If(self.out_norm):
1216 with m.If(self.in_accept):
1217 m.d.sync += [
1218 self.ack.eq(1),
1219 ]
1220 with m.Else():
1221 m.d.sync += self.ack.eq(0)
1222 with m.Else():
1223 # normalisation not required (or done).
1224 m.next = "round"
1225 m.d.sync += self.ack.eq(1)
1226 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1227
1228
1229 class FPNormToPack(FPState, UnbufferedPipeline):
1230
1231 def __init__(self, width, id_wid):
1232 FPState.__init__(self, "normalise_1")
1233 self.id_wid = id_wid
1234 self.width = width
1235 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1236
1237 def ispec(self):
1238 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1239
1240 def ospec(self):
1241 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1242
1243 def setup(self, m, i):
1244 """ links module to inputs and outputs
1245 """
1246
1247 # Normalisation, Rounding Corrections, Pack - in a chain
1248 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1249 rmod = FPRoundMod(self.width, self.id_wid)
1250 cmod = FPCorrectionsMod(self.width, self.id_wid)
1251 pmod = FPPackMod(self.width, self.id_wid)
1252 chain = StageChain([nmod, rmod, cmod, pmod])
1253 chain.setup(m, i)
1254 self.out_z = pmod.ospec()
1255
1256 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1257 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1258
1259 def process(self, i):
1260 return self.out_z
1261
1262 def action(self, m):
1263 m.next = "pack_put_z"
1264
1265
1266 class FPRoundData:
1267
1268 def __init__(self, width, id_wid):
1269 self.z = FPNumBase(width, False)
1270 self.out_do_z = Signal(reset_less=True)
1271 self.oz = Signal(width, reset_less=True)
1272 self.mid = Signal(id_wid, reset_less=True)
1273
1274 def eq(self, i):
1275 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1276 self.mid.eq(i.mid)]
1277
1278
1279 class FPRoundMod:
1280
1281 def __init__(self, width, id_wid):
1282 self.width = width
1283 self.id_wid = id_wid
1284 self.i = self.ispec()
1285 self.out_z = self.ospec()
1286
1287 def ispec(self):
1288 return FPNorm1Data(self.width, self.id_wid)
1289
1290 def ospec(self):
1291 return FPRoundData(self.width, self.id_wid)
1292
1293 def process(self, i):
1294 return self.out_z
1295
1296 def setup(self, m, i):
1297 m.submodules.roundz = self
1298 m.d.comb += self.i.eq(i)
1299
1300 def elaborate(self, platform):
1301 m = Module()
1302 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1303 with m.If(~self.i.out_do_z):
1304 with m.If(self.i.roundz):
1305 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1306 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1307 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1308
1309 return m
1310
1311
1312 class FPRound(FPState):
1313
1314 def __init__(self, width, id_wid):
1315 FPState.__init__(self, "round")
1316 self.mod = FPRoundMod(width)
1317 self.out_z = self.ospec()
1318
1319 def ispec(self):
1320 return self.mod.ispec()
1321
1322 def ospec(self):
1323 return self.mod.ospec()
1324
1325 def setup(self, m, i):
1326 """ links module to inputs and outputs
1327 """
1328 self.mod.setup(m, i)
1329
1330 self.idsync(m)
1331 m.d.sync += self.out_z.eq(self.mod.out_z)
1332 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1333
1334 def action(self, m):
1335 m.next = "corrections"
1336
1337
1338 class FPCorrectionsMod:
1339
1340 def __init__(self, width, id_wid):
1341 self.width = width
1342 self.id_wid = id_wid
1343 self.i = self.ispec()
1344 self.out_z = self.ospec()
1345
1346 def ispec(self):
1347 return FPRoundData(self.width, self.id_wid)
1348
1349 def ospec(self):
1350 return FPRoundData(self.width, self.id_wid)
1351
1352 def process(self, i):
1353 return self.out_z
1354
1355 def setup(self, m, i):
1356 """ links module to inputs and outputs
1357 """
1358 m.submodules.corrections = self
1359 m.d.comb += self.i.eq(i)
1360
1361 def elaborate(self, platform):
1362 m = Module()
1363 m.submodules.corr_in_z = self.i.z
1364 m.submodules.corr_out_z = self.out_z.z
1365 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1366 with m.If(~self.i.out_do_z):
1367 with m.If(self.i.z.is_denormalised):
1368 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1369 return m
1370
1371
1372 class FPCorrections(FPState):
1373
1374 def __init__(self, width, id_wid):
1375 FPState.__init__(self, "corrections")
1376 self.mod = FPCorrectionsMod(width)
1377 self.out_z = self.ospec()
1378
1379 def ispec(self):
1380 return self.mod.ispec()
1381
1382 def ospec(self):
1383 return self.mod.ospec()
1384
1385 def setup(self, m, in_z):
1386 """ links module to inputs and outputs
1387 """
1388 self.mod.setup(m, in_z)
1389
1390 m.d.sync += self.out_z.eq(self.mod.out_z)
1391 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1392
1393 def action(self, m):
1394 m.next = "pack"
1395
1396
1397 class FPPackData:
1398
1399 def __init__(self, width, id_wid):
1400 self.z = Signal(width, reset_less=True)
1401 self.mid = Signal(id_wid, reset_less=True)
1402
1403 def eq(self, i):
1404 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1405
1406 def ports(self):
1407 return [self.z, self.mid]
1408
1409
1410 class FPPackMod:
1411
1412 def __init__(self, width, id_wid):
1413 self.width = width
1414 self.id_wid = id_wid
1415 self.i = self.ispec()
1416 self.o = self.ospec()
1417
1418 def ispec(self):
1419 return FPRoundData(self.width, self.id_wid)
1420
1421 def ospec(self):
1422 return FPPackData(self.width, self.id_wid)
1423
1424 def process(self, i):
1425 return self.o
1426
1427 def setup(self, m, in_z):
1428 """ links module to inputs and outputs
1429 """
1430 m.submodules.pack = self
1431 m.d.comb += self.i.eq(in_z)
1432
1433 def elaborate(self, platform):
1434 m = Module()
1435 z = FPNumOut(self.width, False)
1436 m.submodules.pack_in_z = self.i.z
1437 m.submodules.pack_out_z = z
1438 m.d.comb += self.o.mid.eq(self.i.mid)
1439 with m.If(~self.i.out_do_z):
1440 with m.If(self.i.z.is_overflowed):
1441 m.d.comb += z.inf(self.i.z.s)
1442 with m.Else():
1443 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1444 with m.Else():
1445 m.d.comb += z.v.eq(self.i.oz)
1446 m.d.comb += self.o.z.eq(z.v)
1447 return m
1448
1449
1450 class FPPack(FPState):
1451
1452 def __init__(self, width, id_wid):
1453 FPState.__init__(self, "pack")
1454 self.mod = FPPackMod(width)
1455 self.out_z = self.ospec()
1456
1457 def ispec(self):
1458 return self.mod.ispec()
1459
1460 def ospec(self):
1461 return self.mod.ospec()
1462
1463 def setup(self, m, in_z):
1464 """ links module to inputs and outputs
1465 """
1466 self.mod.setup(m, in_z)
1467
1468 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1469 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1470
1471 def action(self, m):
1472 m.next = "pack_put_z"
1473
1474
1475 class FPPutZ(FPState):
1476
1477 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1478 FPState.__init__(self, state)
1479 if to_state is None:
1480 to_state = "get_ops"
1481 self.to_state = to_state
1482 self.in_z = in_z
1483 self.out_z = out_z
1484 self.in_mid = in_mid
1485 self.out_mid = out_mid
1486
1487 def action(self, m):
1488 if self.in_mid is not None:
1489 m.d.sync += self.out_mid.eq(self.in_mid)
1490 m.d.sync += [
1491 self.out_z.z.v.eq(self.in_z)
1492 ]
1493 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1494 m.d.sync += self.out_z.z.stb.eq(0)
1495 m.next = self.to_state
1496 with m.Else():
1497 m.d.sync += self.out_z.z.stb.eq(1)
1498
1499
1500 class FPPutZIdx(FPState):
1501
1502 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1503 FPState.__init__(self, state)
1504 if to_state is None:
1505 to_state = "get_ops"
1506 self.to_state = to_state
1507 self.in_z = in_z
1508 self.out_zs = out_zs
1509 self.in_mid = in_mid
1510
1511 def action(self, m):
1512 outz_stb = Signal(reset_less=True)
1513 outz_ack = Signal(reset_less=True)
1514 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1515 outz_ack.eq(self.out_zs[self.in_mid].ack),
1516 ]
1517 m.d.sync += [
1518 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1519 ]
1520 with m.If(outz_stb & outz_ack):
1521 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1522 m.next = self.to_state
1523 with m.Else():
1524 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1525
1526 class FPOpData:
1527 def __init__(self, width, id_wid):
1528 self.z = FPOp(width)
1529 self.mid = Signal(id_wid, reset_less=True)
1530
1531 def eq(self, i):
1532 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1533
1534 def ports(self):
1535 return [self.z, self.mid]
1536
1537
1538 class FPADDBaseMod:
1539
1540 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1541 """ IEEE754 FP Add
1542
1543 * width: bit-width of IEEE754. supported: 16, 32, 64
1544 * id_wid: an identifier that is sync-connected to the input
1545 * single_cycle: True indicates each stage to complete in 1 clock
1546 * compact: True indicates a reduced number of stages
1547 """
1548 self.width = width
1549 self.id_wid = id_wid
1550 self.single_cycle = single_cycle
1551 self.compact = compact
1552
1553 self.in_t = Trigger()
1554 self.i = self.ispec()
1555 self.o = self.ospec()
1556
1557 self.states = []
1558
1559 def ispec(self):
1560 return FPADDBaseData(self.width, self.id_wid)
1561
1562 def ospec(self):
1563 return FPOpData(self.width, self.id_wid)
1564
1565 def add_state(self, state):
1566 self.states.append(state)
1567 return state
1568
1569 def get_fragment(self, platform=None):
1570 """ creates the HDL code-fragment for FPAdd
1571 """
1572 m = Module()
1573 m.submodules.out_z = self.o.z
1574 m.submodules.in_t = self.in_t
1575 if self.compact:
1576 self.get_compact_fragment(m, platform)
1577 else:
1578 self.get_longer_fragment(m, platform)
1579
1580 with m.FSM() as fsm:
1581
1582 for state in self.states:
1583 with m.State(state.state_from):
1584 state.action(m)
1585
1586 return m
1587
1588 def get_longer_fragment(self, m, platform=None):
1589
1590 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1591 self.width))
1592 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1593 a = get.out_op1
1594 b = get.out_op2
1595
1596 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1597 sc.setup(m, a, b, self.in_mid)
1598
1599 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1600 dn.setup(m, a, b, sc.in_mid)
1601
1602 if self.single_cycle:
1603 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1604 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1605 else:
1606 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1607 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1608
1609 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1610 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1611
1612 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1613 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1614
1615 if self.single_cycle:
1616 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1617 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1618 else:
1619 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1620 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1621
1622 rn = self.add_state(FPRound(self.width, self.id_wid))
1623 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1624
1625 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1626 cor.setup(m, rn.out_z, rn.in_mid)
1627
1628 pa = self.add_state(FPPack(self.width, self.id_wid))
1629 pa.setup(m, cor.out_z, rn.in_mid)
1630
1631 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1632 pa.in_mid, self.out_mid))
1633
1634 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1635 pa.in_mid, self.out_mid))
1636
1637 def get_compact_fragment(self, m, platform=None):
1638
1639 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1640 self.width, self.id_wid))
1641 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1642
1643 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1644 sc.setup(m, get.o)
1645
1646 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1647 alm.setup(m, sc.o)
1648
1649 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1650 n1.setup(m, alm.a1o)
1651
1652 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1653 n1.out_z.mid, self.o.mid))
1654
1655 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1656 # sc.o.mid, self.o.mid))
1657
1658
1659 class FPADDBase(FPState):
1660
1661 def __init__(self, width, id_wid=None, single_cycle=False):
1662 """ IEEE754 FP Add
1663
1664 * width: bit-width of IEEE754. supported: 16, 32, 64
1665 * id_wid: an identifier that is sync-connected to the input
1666 * single_cycle: True indicates each stage to complete in 1 clock
1667 """
1668 FPState.__init__(self, "fpadd")
1669 self.width = width
1670 self.single_cycle = single_cycle
1671 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1672 self.o = self.ospec()
1673
1674 self.in_t = Trigger()
1675 self.i = self.ispec()
1676
1677 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1678 self.in_accept = Signal(reset_less=True)
1679 self.add_stb = Signal(reset_less=True)
1680 self.add_ack = Signal(reset=0, reset_less=True)
1681
1682 def ispec(self):
1683 return self.mod.ispec()
1684
1685 def ospec(self):
1686 return self.mod.ospec()
1687
1688 def setup(self, m, i, add_stb, in_mid):
1689 m.d.comb += [self.i.eq(i),
1690 self.mod.i.eq(self.i),
1691 self.z_done.eq(self.mod.o.z.trigger),
1692 #self.add_stb.eq(add_stb),
1693 self.mod.in_t.stb.eq(self.in_t.stb),
1694 self.in_t.ack.eq(self.mod.in_t.ack),
1695 self.o.mid.eq(self.mod.o.mid),
1696 self.o.z.v.eq(self.mod.o.z.v),
1697 self.o.z.stb.eq(self.mod.o.z.stb),
1698 self.mod.o.z.ack.eq(self.o.z.ack),
1699 ]
1700
1701 m.d.sync += self.add_stb.eq(add_stb)
1702 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1703 m.d.sync += self.o.z.ack.eq(0) # likewise
1704 #m.d.sync += self.in_t.stb.eq(0)
1705
1706 m.submodules.fpadd = self.mod
1707
1708 def action(self, m):
1709
1710 # in_accept is set on incoming strobe HIGH and ack LOW.
1711 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1712
1713 #with m.If(self.in_t.ack):
1714 # m.d.sync += self.in_t.stb.eq(0)
1715 with m.If(~self.z_done):
1716 # not done: test for accepting an incoming operand pair
1717 with m.If(self.in_accept):
1718 m.d.sync += [
1719 self.add_ack.eq(1), # acknowledge receipt...
1720 self.in_t.stb.eq(1), # initiate add
1721 ]
1722 with m.Else():
1723 m.d.sync += [self.add_ack.eq(0),
1724 self.in_t.stb.eq(0),
1725 self.o.z.ack.eq(1),
1726 ]
1727 with m.Else():
1728 # done: acknowledge, and write out id and value
1729 m.d.sync += [self.add_ack.eq(1),
1730 self.in_t.stb.eq(0)
1731 ]
1732 m.next = "put_z"
1733
1734 return
1735
1736 if self.in_mid is not None:
1737 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1738
1739 m.d.sync += [
1740 self.out_z.v.eq(self.mod.out_z.v)
1741 ]
1742 # move to output state on detecting z ack
1743 with m.If(self.out_z.trigger):
1744 m.d.sync += self.out_z.stb.eq(0)
1745 m.next = "put_z"
1746 with m.Else():
1747 m.d.sync += self.out_z.stb.eq(1)
1748
1749
1750 class FPADDBasePipe(ControlBase):
1751 def __init__(self, width, id_wid):
1752 ControlBase.__init__(self)
1753 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1754 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1755 self.pipe3 = FPNormToPack(width, id_wid)
1756
1757 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1758
1759 def elaborate(self, platform):
1760 m = Module()
1761 m.submodules.scnorm = self.pipe1
1762 m.submodules.addalign = self.pipe2
1763 m.submodules.normpack = self.pipe3
1764 m.d.comb += self._eqs
1765 return m
1766
1767
1768 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1769 def __init__(self, width, id_wid, num_rows):
1770 self.num_rows = num_rows
1771 def iospec(): return FPADDBaseData(width, id_wid)
1772 stage = PassThroughStage(iospec)
1773 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1774
1775
1776 class FPADDMuxOutPipe(CombMuxOutPipe):
1777 def __init__(self, width, id_wid, num_rows):
1778 self.num_rows = num_rows
1779 def iospec(): return FPPackData(width, id_wid)
1780 stage = PassThroughStage(iospec)
1781 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1782
1783
1784 class FPADDMuxInOut:
1785 """ Reservation-Station version of FPADD pipeline.
1786
1787 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1788 * 3-stage adder pipeline
1789 * fan-out on outputs (an array of FPPackData: z,mid)
1790
1791 Fan-in and Fan-out are combinatorial.
1792 """
1793 def __init__(self, width, id_wid, num_rows):
1794 self.num_rows = num_rows
1795 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1796 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1797 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1798
1799 self.p = self.inpipe.p # kinda annoying,
1800 self.n = self.outpipe.n # use pipe in/out as this class in/out
1801 self._ports = self.inpipe.ports() + self.outpipe.ports()
1802
1803 def elaborate(self, platform):
1804 m = Module()
1805 m.submodules.inpipe = self.inpipe
1806 m.submodules.fpadd = self.fpadd
1807 m.submodules.outpipe = self.outpipe
1808
1809 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1810 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1811
1812 return m
1813
1814 def ports(self):
1815 return self._ports
1816
1817
1818 class FPADD(FPID):
1819 """ FPADD: stages as follows:
1820
1821 FPGetOp (a)
1822 |
1823 FPGetOp (b)
1824 |
1825 FPAddBase---> FPAddBaseMod
1826 | |
1827 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1828
1829 FPAddBase is tricky: it is both a stage and *has* stages.
1830 Connection to FPAddBaseMod therefore requires an in stb/ack
1831 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1832 needs to be the thing that raises the incoming stb.
1833 """
1834
1835 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1836 """ IEEE754 FP Add
1837
1838 * width: bit-width of IEEE754. supported: 16, 32, 64
1839 * id_wid: an identifier that is sync-connected to the input
1840 * single_cycle: True indicates each stage to complete in 1 clock
1841 """
1842 self.width = width
1843 self.id_wid = id_wid
1844 self.single_cycle = single_cycle
1845
1846 #self.out_z = FPOp(width)
1847 self.ids = FPID(id_wid)
1848
1849 rs = []
1850 for i in range(rs_sz):
1851 in_a = FPOp(width)
1852 in_b = FPOp(width)
1853 in_a.name = "in_a_%d" % i
1854 in_b.name = "in_b_%d" % i
1855 rs.append((in_a, in_b))
1856 self.rs = Array(rs)
1857
1858 res = []
1859 for i in range(rs_sz):
1860 out_z = FPOp(width)
1861 out_z.name = "out_z_%d" % i
1862 res.append(out_z)
1863 self.res = Array(res)
1864
1865 self.states = []
1866
1867 def add_state(self, state):
1868 self.states.append(state)
1869 return state
1870
1871 def get_fragment(self, platform=None):
1872 """ creates the HDL code-fragment for FPAdd
1873 """
1874 m = Module()
1875 m.submodules += self.rs
1876
1877 in_a = self.rs[0][0]
1878 in_b = self.rs[0][1]
1879
1880 geta = self.add_state(FPGetOp("get_a", "get_b",
1881 in_a, self.width))
1882 geta.setup(m, in_a)
1883 a = geta.out_op
1884
1885 getb = self.add_state(FPGetOp("get_b", "fpadd",
1886 in_b, self.width))
1887 getb.setup(m, in_b)
1888 b = getb.out_op
1889
1890 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1891 ab = self.add_state(ab)
1892 abd = ab.ispec() # create an input spec object for FPADDBase
1893 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1894 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1895 o = ab.o
1896
1897 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1898 o.mid, "get_a"))
1899
1900 with m.FSM() as fsm:
1901
1902 for state in self.states:
1903 with m.State(state.state_from):
1904 state.action(m)
1905
1906 return m
1907
1908
1909 if __name__ == "__main__":
1910 if True:
1911 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1912 main(alu, ports=alu.rs[0][0].ports() + \
1913 alu.rs[0][1].ports() + \
1914 alu.res[0].ports() + \
1915 [alu.ids.in_mid, alu.ids.out_mid])
1916 else:
1917 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1918 main(alu, ports=[alu.in_a, alu.in_b] + \
1919 alu.in_t.ports() + \
1920 alu.out_z.ports() + \
1921 [alu.in_mid, alu.out_mid])
1922
1923
1924 # works... but don't use, just do "python fname.py convert -t v"
1925 #print (verilog.convert(alu, ports=[
1926 # ports=alu.in_a.ports() + \
1927 # alu.in_b.ports() + \
1928 # alu.out_z.ports())