cleanup
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.smod = FPAddSpecialCasesMod(width, id_wid)
359 self.dmod = FPAddDeNormMod(width, id_wid)
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.o = self.ospec()
362
363 def ispec(self):
364 return self.smod.ispec()
365
366 def ospec(self):
367 return self.dmod.ospec()
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self.smod.setup(m, i)
376 self.dmod.setup(m, self.smod.o)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
378
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
382
383 # out_do_z=False
384 m.d.comb += self.o.eq(self.dmod.o)
385
386 def process(self, i):
387 return self.o
388
389 def action(self, m):
390 #with m.If(self.out_do_z):
391 # m.next = "put_z"
392 #with m.Else():
393 m.next = "align"
394
395
396 class FPAddDeNormMod(FPState):
397
398 def __init__(self, width, id_wid):
399 self.width = width
400 self.id_wid = id_wid
401 self.i = self.ispec()
402 self.o = self.ospec()
403
404 def ispec(self):
405 return FPSCData(self.width, self.id_wid)
406
407 def ospec(self):
408 return FPSCData(self.width, self.id_wid)
409
410 def setup(self, m, i):
411 """ links module to inputs and outputs
412 """
413 m.submodules.denormalise = self
414 m.d.comb += self.i.eq(i)
415
416 def elaborate(self, platform):
417 m = Module()
418 m.submodules.denorm_in_a = self.i.a
419 m.submodules.denorm_in_b = self.i.b
420 m.submodules.denorm_out_a = self.o.a
421 m.submodules.denorm_out_b = self.o.b
422
423 with m.If(~self.i.out_do_z):
424 # XXX hmmm, don't like repeating identical code
425 m.d.comb += self.o.a.eq(self.i.a)
426 with m.If(self.i.a.exp_n127):
427 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
428 with m.Else():
429 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
430
431 m.d.comb += self.o.b.eq(self.i.b)
432 with m.If(self.i.b.exp_n127):
433 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
434 with m.Else():
435 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
436
437 m.d.comb += self.o.mid.eq(self.i.mid)
438 m.d.comb += self.o.z.eq(self.i.z)
439 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
440 m.d.comb += self.o.oz.eq(self.i.oz)
441
442 return m
443
444
445 class FPAddDeNorm(FPState):
446
447 def __init__(self, width, id_wid):
448 FPState.__init__(self, "denormalise")
449 self.mod = FPAddDeNormMod(width)
450 self.out_a = FPNumBase(width)
451 self.out_b = FPNumBase(width)
452
453 def setup(self, m, i):
454 """ links module to inputs and outputs
455 """
456 self.mod.setup(m, i)
457
458 m.d.sync += self.out_a.eq(self.mod.out_a)
459 m.d.sync += self.out_b.eq(self.mod.out_b)
460
461 def action(self, m):
462 # Denormalised Number checks
463 m.next = "align"
464
465
466 class FPAddAlignMultiMod(FPState):
467
468 def __init__(self, width):
469 self.in_a = FPNumBase(width)
470 self.in_b = FPNumBase(width)
471 self.out_a = FPNumIn(None, width)
472 self.out_b = FPNumIn(None, width)
473 self.exp_eq = Signal(reset_less=True)
474
475 def elaborate(self, platform):
476 # This one however (single-cycle) will do the shift
477 # in one go.
478
479 m = Module()
480
481 m.submodules.align_in_a = self.in_a
482 m.submodules.align_in_b = self.in_b
483 m.submodules.align_out_a = self.out_a
484 m.submodules.align_out_b = self.out_b
485
486 # NOTE: this does *not* do single-cycle multi-shifting,
487 # it *STAYS* in the align state until exponents match
488
489 # exponent of a greater than b: shift b down
490 m.d.comb += self.exp_eq.eq(0)
491 m.d.comb += self.out_a.eq(self.in_a)
492 m.d.comb += self.out_b.eq(self.in_b)
493 agtb = Signal(reset_less=True)
494 altb = Signal(reset_less=True)
495 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
496 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
497 with m.If(agtb):
498 m.d.comb += self.out_b.shift_down(self.in_b)
499 # exponent of b greater than a: shift a down
500 with m.Elif(altb):
501 m.d.comb += self.out_a.shift_down(self.in_a)
502 # exponents equal: move to next stage.
503 with m.Else():
504 m.d.comb += self.exp_eq.eq(1)
505 return m
506
507
508 class FPAddAlignMulti(FPState):
509
510 def __init__(self, width, id_wid):
511 FPState.__init__(self, "align")
512 self.mod = FPAddAlignMultiMod(width)
513 self.out_a = FPNumIn(None, width)
514 self.out_b = FPNumIn(None, width)
515 self.exp_eq = Signal(reset_less=True)
516
517 def setup(self, m, in_a, in_b):
518 """ links module to inputs and outputs
519 """
520 m.submodules.align = self.mod
521 m.d.comb += self.mod.in_a.eq(in_a)
522 m.d.comb += self.mod.in_b.eq(in_b)
523 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
524 m.d.sync += self.out_a.eq(self.mod.out_a)
525 m.d.sync += self.out_b.eq(self.mod.out_b)
526
527 def action(self, m):
528 with m.If(self.exp_eq):
529 m.next = "add_0"
530
531
532 class FPNumIn2Ops:
533
534 def __init__(self, width, id_wid):
535 self.a = FPNumIn(None, width)
536 self.b = FPNumIn(None, width)
537 self.z = FPNumOut(width, False)
538 self.out_do_z = Signal(reset_less=True)
539 self.oz = Signal(width, reset_less=True)
540 self.mid = Signal(id_wid, reset_less=True)
541
542 def eq(self, i):
543 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
544 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
545
546
547 class FPAddAlignSingleMod:
548
549 def __init__(self, width, id_wid):
550 self.width = width
551 self.id_wid = id_wid
552 self.i = self.ispec()
553 self.o = self.ospec()
554
555 def ispec(self):
556 return FPSCData(self.width, self.id_wid)
557
558 def ospec(self):
559 return FPNumIn2Ops(self.width, self.id_wid)
560
561 def process(self, i):
562 return self.o
563
564 def setup(self, m, i):
565 """ links module to inputs and outputs
566 """
567 m.submodules.align = self
568 m.d.comb += self.i.eq(i)
569
570 def elaborate(self, platform):
571 """ Aligns A against B or B against A, depending on which has the
572 greater exponent. This is done in a *single* cycle using
573 variable-width bit-shift
574
575 the shifter used here is quite expensive in terms of gates.
576 Mux A or B in (and out) into temporaries, as only one of them
577 needs to be aligned against the other
578 """
579 m = Module()
580
581 m.submodules.align_in_a = self.i.a
582 m.submodules.align_in_b = self.i.b
583 m.submodules.align_out_a = self.o.a
584 m.submodules.align_out_b = self.o.b
585
586 # temporary (muxed) input and output to be shifted
587 t_inp = FPNumBase(self.width)
588 t_out = FPNumIn(None, self.width)
589 espec = (len(self.i.a.e), True)
590 msr = MultiShiftRMerge(self.i.a.m_width, espec)
591 m.submodules.align_t_in = t_inp
592 m.submodules.align_t_out = t_out
593 m.submodules.multishift_r = msr
594
595 ediff = Signal(espec, reset_less=True)
596 ediffr = Signal(espec, reset_less=True)
597 tdiff = Signal(espec, reset_less=True)
598 elz = Signal(reset_less=True)
599 egz = Signal(reset_less=True)
600
601 # connect multi-shifter to t_inp/out mantissa (and tdiff)
602 m.d.comb += msr.inp.eq(t_inp.m)
603 m.d.comb += msr.diff.eq(tdiff)
604 m.d.comb += t_out.m.eq(msr.m)
605 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
606 m.d.comb += t_out.s.eq(t_inp.s)
607
608 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
609 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
610 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
611 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
612
613 # default: A-exp == B-exp, A and B untouched (fall through)
614 m.d.comb += self.o.a.eq(self.i.a)
615 m.d.comb += self.o.b.eq(self.i.b)
616 # only one shifter (muxed)
617 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
618 # exponent of a greater than b: shift b down
619 with m.If(~self.i.out_do_z):
620 with m.If(egz):
621 m.d.comb += [t_inp.eq(self.i.b),
622 tdiff.eq(ediff),
623 self.o.b.eq(t_out),
624 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
625 ]
626 # exponent of b greater than a: shift a down
627 with m.Elif(elz):
628 m.d.comb += [t_inp.eq(self.i.a),
629 tdiff.eq(ediffr),
630 self.o.a.eq(t_out),
631 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
632 ]
633
634 m.d.comb += self.o.mid.eq(self.i.mid)
635 m.d.comb += self.o.z.eq(self.i.z)
636 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
637 m.d.comb += self.o.oz.eq(self.i.oz)
638
639 return m
640
641
642 class FPAddAlignSingle(FPState):
643
644 def __init__(self, width, id_wid):
645 FPState.__init__(self, "align")
646 self.mod = FPAddAlignSingleMod(width, id_wid)
647 self.out_a = FPNumIn(None, width)
648 self.out_b = FPNumIn(None, width)
649
650 def setup(self, m, i):
651 """ links module to inputs and outputs
652 """
653 self.mod.setup(m, i)
654
655 # NOTE: could be done as comb
656 m.d.sync += self.out_a.eq(self.mod.out_a)
657 m.d.sync += self.out_b.eq(self.mod.out_b)
658
659 def action(self, m):
660 m.next = "add_0"
661
662
663 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
664
665 def __init__(self, width, id_wid):
666 FPState.__init__(self, "align")
667 self.width = width
668 self.id_wid = id_wid
669 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
670 self.a1o = self.ospec()
671
672 def ispec(self):
673 return FPSCData(self.width, self.id_wid)
674
675 def ospec(self):
676 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
677
678 def setup(self, m, i):
679 """ links module to inputs and outputs
680 """
681
682 # chain AddAlignSingle, AddStage0 and AddStage1
683 mod = FPAddAlignSingleMod(self.width, self.id_wid)
684 a0mod = FPAddStage0Mod(self.width, self.id_wid)
685 a1mod = FPAddStage1Mod(self.width, self.id_wid)
686
687 chain = StageChain([mod, a0mod, a1mod])
688 chain.setup(m, i)
689
690 m.d.comb += self.a1o.eq(a1mod.o)
691
692 def process(self, i):
693 return self.a1o
694
695 def action(self, m):
696 m.next = "normalise_1"
697
698
699 class FPAddStage0Data:
700
701 def __init__(self, width, id_wid):
702 self.z = FPNumBase(width, False)
703 self.out_do_z = Signal(reset_less=True)
704 self.oz = Signal(width, reset_less=True)
705 self.tot = Signal(self.z.m_width + 4, reset_less=True)
706 self.mid = Signal(id_wid, reset_less=True)
707
708 def eq(self, i):
709 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
710 self.tot.eq(i.tot), self.mid.eq(i.mid)]
711
712
713 class FPAddStage0Mod:
714
715 def __init__(self, width, id_wid):
716 self.width = width
717 self.id_wid = id_wid
718 self.i = self.ispec()
719 self.o = self.ospec()
720
721 def ispec(self):
722 return FPSCData(self.width, self.id_wid)
723
724 def ospec(self):
725 return FPAddStage0Data(self.width, self.id_wid)
726
727 def process(self, i):
728 return self.o
729
730 def setup(self, m, i):
731 """ links module to inputs and outputs
732 """
733 m.submodules.add0 = self
734 m.d.comb += self.i.eq(i)
735
736 def elaborate(self, platform):
737 m = Module()
738 m.submodules.add0_in_a = self.i.a
739 m.submodules.add0_in_b = self.i.b
740 m.submodules.add0_out_z = self.o.z
741
742 # store intermediate tests (and zero-extended mantissas)
743 seq = Signal(reset_less=True)
744 mge = Signal(reset_less=True)
745 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
746 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
747 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
748 mge.eq(self.i.a.m >= self.i.b.m),
749 am0.eq(Cat(self.i.a.m, 0)),
750 bm0.eq(Cat(self.i.b.m, 0))
751 ]
752 # same-sign (both negative or both positive) add mantissas
753 with m.If(~self.i.out_do_z):
754 m.d.comb += self.o.z.e.eq(self.i.a.e)
755 with m.If(seq):
756 m.d.comb += [
757 self.o.tot.eq(am0 + bm0),
758 self.o.z.s.eq(self.i.a.s)
759 ]
760 # a mantissa greater than b, use a
761 with m.Elif(mge):
762 m.d.comb += [
763 self.o.tot.eq(am0 - bm0),
764 self.o.z.s.eq(self.i.a.s)
765 ]
766 # b mantissa greater than a, use b
767 with m.Else():
768 m.d.comb += [
769 self.o.tot.eq(bm0 - am0),
770 self.o.z.s.eq(self.i.b.s)
771 ]
772
773 m.d.comb += self.o.oz.eq(self.i.oz)
774 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
775 m.d.comb += self.o.mid.eq(self.i.mid)
776 return m
777
778
779 class FPAddStage0(FPState):
780 """ First stage of add. covers same-sign (add) and subtract
781 special-casing when mantissas are greater or equal, to
782 give greatest accuracy.
783 """
784
785 def __init__(self, width, id_wid):
786 FPState.__init__(self, "add_0")
787 self.mod = FPAddStage0Mod(width)
788 self.o = self.mod.ospec()
789
790 def setup(self, m, i):
791 """ links module to inputs and outputs
792 """
793 self.mod.setup(m, i)
794
795 # NOTE: these could be done as combinatorial (merge add0+add1)
796 m.d.sync += self.o.eq(self.mod.o)
797
798 def action(self, m):
799 m.next = "add_1"
800
801
802 class FPAddStage1Data:
803
804 def __init__(self, width, id_wid):
805 self.z = FPNumBase(width, False)
806 self.out_do_z = Signal(reset_less=True)
807 self.oz = Signal(width, reset_less=True)
808 self.of = Overflow()
809 self.mid = Signal(id_wid, reset_less=True)
810
811 def eq(self, i):
812 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
813 self.of.eq(i.of), self.mid.eq(i.mid)]
814
815
816
817 class FPAddStage1Mod(FPState):
818 """ Second stage of add: preparation for normalisation.
819 detects when tot sum is too big (tot[27] is kinda a carry bit)
820 """
821
822 def __init__(self, width, id_wid):
823 self.width = width
824 self.id_wid = id_wid
825 self.i = self.ispec()
826 self.o = self.ospec()
827
828 def ispec(self):
829 return FPAddStage0Data(self.width, self.id_wid)
830
831 def ospec(self):
832 return FPAddStage1Data(self.width, self.id_wid)
833
834 def process(self, i):
835 return self.o
836
837 def setup(self, m, i):
838 """ links module to inputs and outputs
839 """
840 m.submodules.add1 = self
841 m.submodules.add1_out_overflow = self.o.of
842
843 m.d.comb += self.i.eq(i)
844
845 def elaborate(self, platform):
846 m = Module()
847 m.d.comb += self.o.z.eq(self.i.z)
848 # tot[-1] (MSB) gets set when the sum overflows. shift result down
849 with m.If(~self.i.out_do_z):
850 with m.If(self.i.tot[-1]):
851 m.d.comb += [
852 self.o.z.m.eq(self.i.tot[4:]),
853 self.o.of.m0.eq(self.i.tot[4]),
854 self.o.of.guard.eq(self.i.tot[3]),
855 self.o.of.round_bit.eq(self.i.tot[2]),
856 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
857 self.o.z.e.eq(self.i.z.e + 1)
858 ]
859 # tot[-1] (MSB) zero case
860 with m.Else():
861 m.d.comb += [
862 self.o.z.m.eq(self.i.tot[3:]),
863 self.o.of.m0.eq(self.i.tot[3]),
864 self.o.of.guard.eq(self.i.tot[2]),
865 self.o.of.round_bit.eq(self.i.tot[1]),
866 self.o.of.sticky.eq(self.i.tot[0])
867 ]
868
869 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
870 m.d.comb += self.o.oz.eq(self.i.oz)
871 m.d.comb += self.o.mid.eq(self.i.mid)
872
873 return m
874
875
876 class FPAddStage1(FPState):
877
878 def __init__(self, width, id_wid):
879 FPState.__init__(self, "add_1")
880 self.mod = FPAddStage1Mod(width)
881 self.out_z = FPNumBase(width, False)
882 self.out_of = Overflow()
883 self.norm_stb = Signal()
884
885 def setup(self, m, i):
886 """ links module to inputs and outputs
887 """
888 self.mod.setup(m, i)
889
890 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
891
892 m.d.sync += self.out_of.eq(self.mod.out_of)
893 m.d.sync += self.out_z.eq(self.mod.out_z)
894 m.d.sync += self.norm_stb.eq(1)
895
896 def action(self, m):
897 m.next = "normalise_1"
898
899
900 class FPNormaliseModSingle:
901
902 def __init__(self, width):
903 self.width = width
904 self.in_z = self.ispec()
905 self.out_z = self.ospec()
906
907 def ispec(self):
908 return FPNumBase(self.width, False)
909
910 def ospec(self):
911 return FPNumBase(self.width, False)
912
913 def setup(self, m, i):
914 """ links module to inputs and outputs
915 """
916 m.submodules.normalise = self
917 m.d.comb += self.i.eq(i)
918
919 def elaborate(self, platform):
920 m = Module()
921
922 mwid = self.out_z.m_width+2
923 pe = PriorityEncoder(mwid)
924 m.submodules.norm_pe = pe
925
926 m.submodules.norm1_out_z = self.out_z
927 m.submodules.norm1_in_z = self.in_z
928
929 in_z = FPNumBase(self.width, False)
930 in_of = Overflow()
931 m.submodules.norm1_insel_z = in_z
932 m.submodules.norm1_insel_overflow = in_of
933
934 espec = (len(in_z.e), True)
935 ediff_n126 = Signal(espec, reset_less=True)
936 msr = MultiShiftRMerge(mwid, espec)
937 m.submodules.multishift_r = msr
938
939 m.d.comb += in_z.eq(self.in_z)
940 m.d.comb += in_of.eq(self.in_of)
941 # initialise out from in (overridden below)
942 m.d.comb += self.out_z.eq(in_z)
943 m.d.comb += self.out_of.eq(in_of)
944 # normalisation decrease condition
945 decrease = Signal(reset_less=True)
946 m.d.comb += decrease.eq(in_z.m_msbzero)
947 # decrease exponent
948 with m.If(decrease):
949 # *sigh* not entirely obvious: count leading zeros (clz)
950 # with a PriorityEncoder: to find from the MSB
951 # we reverse the order of the bits.
952 temp_m = Signal(mwid, reset_less=True)
953 temp_s = Signal(mwid+1, reset_less=True)
954 clz = Signal((len(in_z.e), True), reset_less=True)
955 m.d.comb += [
956 # cat round and guard bits back into the mantissa
957 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
958 pe.i.eq(temp_m[::-1]), # inverted
959 clz.eq(pe.o), # count zeros from MSB down
960 temp_s.eq(temp_m << clz), # shift mantissa UP
961 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
962 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
963 ]
964
965 return m
966
967
968 class FPNorm1Data:
969
970 def __init__(self, width, id_wid):
971 self.roundz = Signal(reset_less=True)
972 self.z = FPNumBase(width, False)
973 self.out_do_z = Signal(reset_less=True)
974 self.oz = Signal(width, reset_less=True)
975 self.mid = Signal(id_wid, reset_less=True)
976
977 def eq(self, i):
978 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
979 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
980
981
982 class FPNorm1ModSingle:
983
984 def __init__(self, width, id_wid):
985 self.width = width
986 self.id_wid = id_wid
987 self.i = self.ispec()
988 self.o = self.ospec()
989
990 def ispec(self):
991 return FPAddStage1Data(self.width, self.id_wid)
992
993 def ospec(self):
994 return FPNorm1Data(self.width, self.id_wid)
995
996 def setup(self, m, i):
997 """ links module to inputs and outputs
998 """
999 m.submodules.normalise_1 = self
1000 m.d.comb += self.i.eq(i)
1001
1002 def process(self, i):
1003 return self.o
1004
1005 def elaborate(self, platform):
1006 m = Module()
1007
1008 mwid = self.o.z.m_width+2
1009 pe = PriorityEncoder(mwid)
1010 m.submodules.norm_pe = pe
1011
1012 of = Overflow()
1013 m.d.comb += self.o.roundz.eq(of.roundz)
1014
1015 m.submodules.norm1_out_z = self.o.z
1016 m.submodules.norm1_out_overflow = of
1017 m.submodules.norm1_in_z = self.i.z
1018 m.submodules.norm1_in_overflow = self.i.of
1019
1020 i = self.ispec()
1021 m.submodules.norm1_insel_z = i.z
1022 m.submodules.norm1_insel_overflow = i.of
1023
1024 espec = (len(i.z.e), True)
1025 ediff_n126 = Signal(espec, reset_less=True)
1026 msr = MultiShiftRMerge(mwid, espec)
1027 m.submodules.multishift_r = msr
1028
1029 m.d.comb += i.eq(self.i)
1030 # initialise out from in (overridden below)
1031 m.d.comb += self.o.z.eq(i.z)
1032 m.d.comb += of.eq(i.of)
1033 # normalisation increase/decrease conditions
1034 decrease = Signal(reset_less=True)
1035 increase = Signal(reset_less=True)
1036 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1037 m.d.comb += increase.eq(i.z.exp_lt_n126)
1038 # decrease exponent
1039 with m.If(~self.i.out_do_z):
1040 with m.If(decrease):
1041 # *sigh* not entirely obvious: count leading zeros (clz)
1042 # with a PriorityEncoder: to find from the MSB
1043 # we reverse the order of the bits.
1044 temp_m = Signal(mwid, reset_less=True)
1045 temp_s = Signal(mwid+1, reset_less=True)
1046 clz = Signal((len(i.z.e), True), reset_less=True)
1047 # make sure that the amount to decrease by does NOT
1048 # go below the minimum non-INF/NaN exponent
1049 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1050 i.z.exp_sub_n126)
1051 m.d.comb += [
1052 # cat round and guard bits back into the mantissa
1053 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1054 pe.i.eq(temp_m[::-1]), # inverted
1055 clz.eq(limclz), # count zeros from MSB down
1056 temp_s.eq(temp_m << clz), # shift mantissa UP
1057 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1058 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1059 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1060 # overflow in bits 0..1: got shifted too (leave sticky)
1061 of.guard.eq(temp_s[1]), # guard
1062 of.round_bit.eq(temp_s[0]), # round
1063 ]
1064 # increase exponent
1065 with m.Elif(increase):
1066 temp_m = Signal(mwid+1, reset_less=True)
1067 m.d.comb += [
1068 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1069 i.z.m)),
1070 ediff_n126.eq(i.z.N126 - i.z.e),
1071 # connect multi-shifter to inp/out mantissa (and ediff)
1072 msr.inp.eq(temp_m),
1073 msr.diff.eq(ediff_n126),
1074 self.o.z.m.eq(msr.m[3:]),
1075 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1076 # overflow in bits 0..1: got shifted too (leave sticky)
1077 of.guard.eq(temp_s[2]), # guard
1078 of.round_bit.eq(temp_s[1]), # round
1079 of.sticky.eq(temp_s[0]), # sticky
1080 self.o.z.e.eq(i.z.e + ediff_n126),
1081 ]
1082
1083 m.d.comb += self.o.mid.eq(self.i.mid)
1084 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1085 m.d.comb += self.o.oz.eq(self.i.oz)
1086
1087 return m
1088
1089
1090 class FPNorm1ModMulti:
1091
1092 def __init__(self, width, single_cycle=True):
1093 self.width = width
1094 self.in_select = Signal(reset_less=True)
1095 self.in_z = FPNumBase(width, False)
1096 self.in_of = Overflow()
1097 self.temp_z = FPNumBase(width, False)
1098 self.temp_of = Overflow()
1099 self.out_z = FPNumBase(width, False)
1100 self.out_of = Overflow()
1101
1102 def elaborate(self, platform):
1103 m = Module()
1104
1105 m.submodules.norm1_out_z = self.out_z
1106 m.submodules.norm1_out_overflow = self.out_of
1107 m.submodules.norm1_temp_z = self.temp_z
1108 m.submodules.norm1_temp_of = self.temp_of
1109 m.submodules.norm1_in_z = self.in_z
1110 m.submodules.norm1_in_overflow = self.in_of
1111
1112 in_z = FPNumBase(self.width, False)
1113 in_of = Overflow()
1114 m.submodules.norm1_insel_z = in_z
1115 m.submodules.norm1_insel_overflow = in_of
1116
1117 # select which of temp or in z/of to use
1118 with m.If(self.in_select):
1119 m.d.comb += in_z.eq(self.in_z)
1120 m.d.comb += in_of.eq(self.in_of)
1121 with m.Else():
1122 m.d.comb += in_z.eq(self.temp_z)
1123 m.d.comb += in_of.eq(self.temp_of)
1124 # initialise out from in (overridden below)
1125 m.d.comb += self.out_z.eq(in_z)
1126 m.d.comb += self.out_of.eq(in_of)
1127 # normalisation increase/decrease conditions
1128 decrease = Signal(reset_less=True)
1129 increase = Signal(reset_less=True)
1130 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1131 m.d.comb += increase.eq(in_z.exp_lt_n126)
1132 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1133 # decrease exponent
1134 with m.If(decrease):
1135 m.d.comb += [
1136 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1137 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1138 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1139 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1140 self.out_of.round_bit.eq(0), # reset round bit
1141 self.out_of.m0.eq(in_of.guard),
1142 ]
1143 # increase exponent
1144 with m.Elif(increase):
1145 m.d.comb += [
1146 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1147 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1148 self.out_of.guard.eq(in_z.m[0]),
1149 self.out_of.m0.eq(in_z.m[1]),
1150 self.out_of.round_bit.eq(in_of.guard),
1151 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1152 ]
1153
1154 return m
1155
1156
1157 class FPNorm1Single(FPState):
1158
1159 def __init__(self, width, id_wid, single_cycle=True):
1160 FPState.__init__(self, "normalise_1")
1161 self.mod = FPNorm1ModSingle(width)
1162 self.o = self.ospec()
1163 self.out_z = FPNumBase(width, False)
1164 self.out_roundz = Signal(reset_less=True)
1165
1166 def ispec(self):
1167 return self.mod.ispec()
1168
1169 def ospec(self):
1170 return self.mod.ospec()
1171
1172 def setup(self, m, i):
1173 """ links module to inputs and outputs
1174 """
1175 self.mod.setup(m, i)
1176
1177 def action(self, m):
1178 m.next = "round"
1179
1180
1181 class FPNorm1Multi(FPState):
1182
1183 def __init__(self, width, id_wid):
1184 FPState.__init__(self, "normalise_1")
1185 self.mod = FPNorm1ModMulti(width)
1186 self.stb = Signal(reset_less=True)
1187 self.ack = Signal(reset=0, reset_less=True)
1188 self.out_norm = Signal(reset_less=True)
1189 self.in_accept = Signal(reset_less=True)
1190 self.temp_z = FPNumBase(width)
1191 self.temp_of = Overflow()
1192 self.out_z = FPNumBase(width)
1193 self.out_roundz = Signal(reset_less=True)
1194
1195 def setup(self, m, in_z, in_of, norm_stb):
1196 """ links module to inputs and outputs
1197 """
1198 self.mod.setup(m, in_z, in_of, norm_stb,
1199 self.in_accept, self.temp_z, self.temp_of,
1200 self.out_z, self.out_norm)
1201
1202 m.d.comb += self.stb.eq(norm_stb)
1203 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1204
1205 def action(self, m):
1206 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1207 m.d.sync += self.temp_of.eq(self.mod.out_of)
1208 m.d.sync += self.temp_z.eq(self.out_z)
1209 with m.If(self.out_norm):
1210 with m.If(self.in_accept):
1211 m.d.sync += [
1212 self.ack.eq(1),
1213 ]
1214 with m.Else():
1215 m.d.sync += self.ack.eq(0)
1216 with m.Else():
1217 # normalisation not required (or done).
1218 m.next = "round"
1219 m.d.sync += self.ack.eq(1)
1220 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1221
1222
1223 class FPNormToPack(FPState, UnbufferedPipeline):
1224
1225 def __init__(self, width, id_wid):
1226 FPState.__init__(self, "normalise_1")
1227 self.id_wid = id_wid
1228 self.width = width
1229 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1230
1231 def ispec(self):
1232 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1233
1234 def ospec(self):
1235 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1236
1237 def setup(self, m, i):
1238 """ links module to inputs and outputs
1239 """
1240
1241 # Normalisation, Rounding Corrections, Pack - in a chain
1242 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1243 rmod = FPRoundMod(self.width, self.id_wid)
1244 cmod = FPCorrectionsMod(self.width, self.id_wid)
1245 pmod = FPPackMod(self.width, self.id_wid)
1246 chain = StageChain([nmod, rmod, cmod, pmod])
1247 chain.setup(m, i)
1248 self.out_z = pmod.ospec()
1249
1250 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1251 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1252
1253 def process(self, i):
1254 return self.out_z
1255
1256 def action(self, m):
1257 m.next = "pack_put_z"
1258
1259
1260 class FPRoundData:
1261
1262 def __init__(self, width, id_wid):
1263 self.z = FPNumBase(width, False)
1264 self.out_do_z = Signal(reset_less=True)
1265 self.oz = Signal(width, reset_less=True)
1266 self.mid = Signal(id_wid, reset_less=True)
1267
1268 def eq(self, i):
1269 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1270 self.mid.eq(i.mid)]
1271
1272
1273 class FPRoundMod:
1274
1275 def __init__(self, width, id_wid):
1276 self.width = width
1277 self.id_wid = id_wid
1278 self.i = self.ispec()
1279 self.out_z = self.ospec()
1280
1281 def ispec(self):
1282 return FPNorm1Data(self.width, self.id_wid)
1283
1284 def ospec(self):
1285 return FPRoundData(self.width, self.id_wid)
1286
1287 def process(self, i):
1288 return self.out_z
1289
1290 def setup(self, m, i):
1291 m.submodules.roundz = self
1292 m.d.comb += self.i.eq(i)
1293
1294 def elaborate(self, platform):
1295 m = Module()
1296 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1297 with m.If(~self.i.out_do_z):
1298 with m.If(self.i.roundz):
1299 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1300 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1301 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1302
1303 return m
1304
1305
1306 class FPRound(FPState):
1307
1308 def __init__(self, width, id_wid):
1309 FPState.__init__(self, "round")
1310 self.mod = FPRoundMod(width)
1311 self.out_z = self.ospec()
1312
1313 def ispec(self):
1314 return self.mod.ispec()
1315
1316 def ospec(self):
1317 return self.mod.ospec()
1318
1319 def setup(self, m, i):
1320 """ links module to inputs and outputs
1321 """
1322 self.mod.setup(m, i)
1323
1324 self.idsync(m)
1325 m.d.sync += self.out_z.eq(self.mod.out_z)
1326 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1327
1328 def action(self, m):
1329 m.next = "corrections"
1330
1331
1332 class FPCorrectionsMod:
1333
1334 def __init__(self, width, id_wid):
1335 self.width = width
1336 self.id_wid = id_wid
1337 self.i = self.ispec()
1338 self.out_z = self.ospec()
1339
1340 def ispec(self):
1341 return FPRoundData(self.width, self.id_wid)
1342
1343 def ospec(self):
1344 return FPRoundData(self.width, self.id_wid)
1345
1346 def process(self, i):
1347 return self.out_z
1348
1349 def setup(self, m, i):
1350 """ links module to inputs and outputs
1351 """
1352 m.submodules.corrections = self
1353 m.d.comb += self.i.eq(i)
1354
1355 def elaborate(self, platform):
1356 m = Module()
1357 m.submodules.corr_in_z = self.i.z
1358 m.submodules.corr_out_z = self.out_z.z
1359 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1360 with m.If(~self.i.out_do_z):
1361 with m.If(self.i.z.is_denormalised):
1362 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1363 return m
1364
1365
1366 class FPCorrections(FPState):
1367
1368 def __init__(self, width, id_wid):
1369 FPState.__init__(self, "corrections")
1370 self.mod = FPCorrectionsMod(width)
1371 self.out_z = self.ospec()
1372
1373 def ispec(self):
1374 return self.mod.ispec()
1375
1376 def ospec(self):
1377 return self.mod.ospec()
1378
1379 def setup(self, m, in_z):
1380 """ links module to inputs and outputs
1381 """
1382 self.mod.setup(m, in_z)
1383
1384 m.d.sync += self.out_z.eq(self.mod.out_z)
1385 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1386
1387 def action(self, m):
1388 m.next = "pack"
1389
1390
1391 class FPPackData:
1392
1393 def __init__(self, width, id_wid):
1394 self.z = Signal(width, reset_less=True)
1395 self.mid = Signal(id_wid, reset_less=True)
1396
1397 def eq(self, i):
1398 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1399
1400 def ports(self):
1401 return [self.z, self.mid]
1402
1403
1404 class FPPackMod:
1405
1406 def __init__(self, width, id_wid):
1407 self.width = width
1408 self.id_wid = id_wid
1409 self.i = self.ispec()
1410 self.o = self.ospec()
1411
1412 def ispec(self):
1413 return FPRoundData(self.width, self.id_wid)
1414
1415 def ospec(self):
1416 return FPPackData(self.width, self.id_wid)
1417
1418 def process(self, i):
1419 return self.o
1420
1421 def setup(self, m, in_z):
1422 """ links module to inputs and outputs
1423 """
1424 m.submodules.pack = self
1425 m.d.comb += self.i.eq(in_z)
1426
1427 def elaborate(self, platform):
1428 m = Module()
1429 z = FPNumOut(self.width, False)
1430 m.submodules.pack_in_z = self.i.z
1431 m.submodules.pack_out_z = z
1432 m.d.comb += self.o.mid.eq(self.i.mid)
1433 with m.If(~self.i.out_do_z):
1434 with m.If(self.i.z.is_overflowed):
1435 m.d.comb += z.inf(self.i.z.s)
1436 with m.Else():
1437 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1438 with m.Else():
1439 m.d.comb += z.v.eq(self.i.oz)
1440 m.d.comb += self.o.z.eq(z.v)
1441 return m
1442
1443
1444 class FPPack(FPState):
1445
1446 def __init__(self, width, id_wid):
1447 FPState.__init__(self, "pack")
1448 self.mod = FPPackMod(width)
1449 self.out_z = self.ospec()
1450
1451 def ispec(self):
1452 return self.mod.ispec()
1453
1454 def ospec(self):
1455 return self.mod.ospec()
1456
1457 def setup(self, m, in_z):
1458 """ links module to inputs and outputs
1459 """
1460 self.mod.setup(m, in_z)
1461
1462 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1463 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1464
1465 def action(self, m):
1466 m.next = "pack_put_z"
1467
1468
1469 class FPPutZ(FPState):
1470
1471 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1472 FPState.__init__(self, state)
1473 if to_state is None:
1474 to_state = "get_ops"
1475 self.to_state = to_state
1476 self.in_z = in_z
1477 self.out_z = out_z
1478 self.in_mid = in_mid
1479 self.out_mid = out_mid
1480
1481 def action(self, m):
1482 if self.in_mid is not None:
1483 m.d.sync += self.out_mid.eq(self.in_mid)
1484 m.d.sync += [
1485 self.out_z.z.v.eq(self.in_z)
1486 ]
1487 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1488 m.d.sync += self.out_z.z.stb.eq(0)
1489 m.next = self.to_state
1490 with m.Else():
1491 m.d.sync += self.out_z.z.stb.eq(1)
1492
1493
1494 class FPPutZIdx(FPState):
1495
1496 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1497 FPState.__init__(self, state)
1498 if to_state is None:
1499 to_state = "get_ops"
1500 self.to_state = to_state
1501 self.in_z = in_z
1502 self.out_zs = out_zs
1503 self.in_mid = in_mid
1504
1505 def action(self, m):
1506 outz_stb = Signal(reset_less=True)
1507 outz_ack = Signal(reset_less=True)
1508 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1509 outz_ack.eq(self.out_zs[self.in_mid].ack),
1510 ]
1511 m.d.sync += [
1512 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1513 ]
1514 with m.If(outz_stb & outz_ack):
1515 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1516 m.next = self.to_state
1517 with m.Else():
1518 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1519
1520 class FPOpData:
1521 def __init__(self, width, id_wid):
1522 self.z = FPOp(width)
1523 self.mid = Signal(id_wid, reset_less=True)
1524
1525 def eq(self, i):
1526 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1527
1528 def ports(self):
1529 return [self.z, self.mid]
1530
1531
1532 class FPADDBaseMod:
1533
1534 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1535 """ IEEE754 FP Add
1536
1537 * width: bit-width of IEEE754. supported: 16, 32, 64
1538 * id_wid: an identifier that is sync-connected to the input
1539 * single_cycle: True indicates each stage to complete in 1 clock
1540 * compact: True indicates a reduced number of stages
1541 """
1542 self.width = width
1543 self.id_wid = id_wid
1544 self.single_cycle = single_cycle
1545 self.compact = compact
1546
1547 self.in_t = Trigger()
1548 self.i = self.ispec()
1549 self.o = self.ospec()
1550
1551 self.states = []
1552
1553 def ispec(self):
1554 return FPADDBaseData(self.width, self.id_wid)
1555
1556 def ospec(self):
1557 return FPOpData(self.width, self.id_wid)
1558
1559 def add_state(self, state):
1560 self.states.append(state)
1561 return state
1562
1563 def get_fragment(self, platform=None):
1564 """ creates the HDL code-fragment for FPAdd
1565 """
1566 m = Module()
1567 m.submodules.out_z = self.o.z
1568 m.submodules.in_t = self.in_t
1569 if self.compact:
1570 self.get_compact_fragment(m, platform)
1571 else:
1572 self.get_longer_fragment(m, platform)
1573
1574 with m.FSM() as fsm:
1575
1576 for state in self.states:
1577 with m.State(state.state_from):
1578 state.action(m)
1579
1580 return m
1581
1582 def get_longer_fragment(self, m, platform=None):
1583
1584 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1585 self.width))
1586 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1587 a = get.out_op1
1588 b = get.out_op2
1589
1590 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1591 sc.setup(m, a, b, self.in_mid)
1592
1593 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1594 dn.setup(m, a, b, sc.in_mid)
1595
1596 if self.single_cycle:
1597 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1598 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1599 else:
1600 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1601 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1602
1603 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1604 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1605
1606 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1607 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1608
1609 if self.single_cycle:
1610 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1611 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1612 else:
1613 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1614 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1615
1616 rn = self.add_state(FPRound(self.width, self.id_wid))
1617 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1618
1619 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1620 cor.setup(m, rn.out_z, rn.in_mid)
1621
1622 pa = self.add_state(FPPack(self.width, self.id_wid))
1623 pa.setup(m, cor.out_z, rn.in_mid)
1624
1625 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1626 pa.in_mid, self.out_mid))
1627
1628 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1629 pa.in_mid, self.out_mid))
1630
1631 def get_compact_fragment(self, m, platform=None):
1632
1633 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1634 self.width, self.id_wid))
1635 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1636
1637 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1638 sc.setup(m, get.o)
1639
1640 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1641 alm.setup(m, sc.o)
1642
1643 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1644 n1.setup(m, alm.a1o)
1645
1646 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1647 n1.out_z.mid, self.o.mid))
1648
1649 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1650 # sc.o.mid, self.o.mid))
1651
1652
1653 class FPADDBase(FPState):
1654
1655 def __init__(self, width, id_wid=None, single_cycle=False):
1656 """ IEEE754 FP Add
1657
1658 * width: bit-width of IEEE754. supported: 16, 32, 64
1659 * id_wid: an identifier that is sync-connected to the input
1660 * single_cycle: True indicates each stage to complete in 1 clock
1661 """
1662 FPState.__init__(self, "fpadd")
1663 self.width = width
1664 self.single_cycle = single_cycle
1665 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1666 self.o = self.ospec()
1667
1668 self.in_t = Trigger()
1669 self.i = self.ispec()
1670
1671 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1672 self.in_accept = Signal(reset_less=True)
1673 self.add_stb = Signal(reset_less=True)
1674 self.add_ack = Signal(reset=0, reset_less=True)
1675
1676 def ispec(self):
1677 return self.mod.ispec()
1678
1679 def ospec(self):
1680 return self.mod.ospec()
1681
1682 def setup(self, m, i, add_stb, in_mid):
1683 m.d.comb += [self.i.eq(i),
1684 self.mod.i.eq(self.i),
1685 self.z_done.eq(self.mod.o.z.trigger),
1686 #self.add_stb.eq(add_stb),
1687 self.mod.in_t.stb.eq(self.in_t.stb),
1688 self.in_t.ack.eq(self.mod.in_t.ack),
1689 self.o.mid.eq(self.mod.o.mid),
1690 self.o.z.v.eq(self.mod.o.z.v),
1691 self.o.z.stb.eq(self.mod.o.z.stb),
1692 self.mod.o.z.ack.eq(self.o.z.ack),
1693 ]
1694
1695 m.d.sync += self.add_stb.eq(add_stb)
1696 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1697 m.d.sync += self.o.z.ack.eq(0) # likewise
1698 #m.d.sync += self.in_t.stb.eq(0)
1699
1700 m.submodules.fpadd = self.mod
1701
1702 def action(self, m):
1703
1704 # in_accept is set on incoming strobe HIGH and ack LOW.
1705 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1706
1707 #with m.If(self.in_t.ack):
1708 # m.d.sync += self.in_t.stb.eq(0)
1709 with m.If(~self.z_done):
1710 # not done: test for accepting an incoming operand pair
1711 with m.If(self.in_accept):
1712 m.d.sync += [
1713 self.add_ack.eq(1), # acknowledge receipt...
1714 self.in_t.stb.eq(1), # initiate add
1715 ]
1716 with m.Else():
1717 m.d.sync += [self.add_ack.eq(0),
1718 self.in_t.stb.eq(0),
1719 self.o.z.ack.eq(1),
1720 ]
1721 with m.Else():
1722 # done: acknowledge, and write out id and value
1723 m.d.sync += [self.add_ack.eq(1),
1724 self.in_t.stb.eq(0)
1725 ]
1726 m.next = "put_z"
1727
1728 return
1729
1730 if self.in_mid is not None:
1731 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1732
1733 m.d.sync += [
1734 self.out_z.v.eq(self.mod.out_z.v)
1735 ]
1736 # move to output state on detecting z ack
1737 with m.If(self.out_z.trigger):
1738 m.d.sync += self.out_z.stb.eq(0)
1739 m.next = "put_z"
1740 with m.Else():
1741 m.d.sync += self.out_z.stb.eq(1)
1742
1743
1744 class FPADDBasePipe(ControlBase):
1745 def __init__(self, width, id_wid):
1746 ControlBase.__init__(self)
1747 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1748 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1749 self.pipe3 = FPNormToPack(width, id_wid)
1750
1751 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1752
1753 def elaborate(self, platform):
1754 m = Module()
1755 m.submodules.scnorm = self.pipe1
1756 m.submodules.addalign = self.pipe2
1757 m.submodules.normpack = self.pipe3
1758 m.d.comb += self._eqs
1759 return m
1760
1761
1762 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1763 def __init__(self, width, id_wid, num_rows):
1764 self.num_rows = num_rows
1765 def iospec(): return FPADDBaseData(width, id_wid)
1766 stage = PassThroughStage(iospec)
1767 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1768
1769
1770 class FPADDMuxOutPipe(CombMuxOutPipe):
1771 def __init__(self, width, id_wid, num_rows):
1772 self.num_rows = num_rows
1773 def iospec(): return FPPackData(width, id_wid)
1774 stage = PassThroughStage(iospec)
1775 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1776
1777
1778 class FPADDMuxInOut:
1779 """ Reservation-Station version of FPADD pipeline.
1780
1781 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1782 * 3-stage adder pipeline
1783 * fan-out on outputs (an array of FPPackData: z,mid)
1784
1785 Fan-in and Fan-out are combinatorial.
1786 """
1787 def __init__(self, width, id_wid, num_rows):
1788 self.num_rows = num_rows
1789 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1790 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1791 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1792
1793 self.p = self.inpipe.p # kinda annoying,
1794 self.n = self.outpipe.n # use pipe in/out as this class in/out
1795 self._ports = self.inpipe.ports() + self.outpipe.ports()
1796
1797 def elaborate(self, platform):
1798 m = Module()
1799 m.submodules.inpipe = self.inpipe
1800 m.submodules.fpadd = self.fpadd
1801 m.submodules.outpipe = self.outpipe
1802
1803 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1804 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1805
1806 return m
1807
1808 def ports(self):
1809 return self._ports
1810
1811
1812 class FPADD(FPID):
1813 """ FPADD: stages as follows:
1814
1815 FPGetOp (a)
1816 |
1817 FPGetOp (b)
1818 |
1819 FPAddBase---> FPAddBaseMod
1820 | |
1821 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1822
1823 FPAddBase is tricky: it is both a stage and *has* stages.
1824 Connection to FPAddBaseMod therefore requires an in stb/ack
1825 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1826 needs to be the thing that raises the incoming stb.
1827 """
1828
1829 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1830 """ IEEE754 FP Add
1831
1832 * width: bit-width of IEEE754. supported: 16, 32, 64
1833 * id_wid: an identifier that is sync-connected to the input
1834 * single_cycle: True indicates each stage to complete in 1 clock
1835 """
1836 self.width = width
1837 self.id_wid = id_wid
1838 self.single_cycle = single_cycle
1839
1840 #self.out_z = FPOp(width)
1841 self.ids = FPID(id_wid)
1842
1843 rs = []
1844 for i in range(rs_sz):
1845 in_a = FPOp(width)
1846 in_b = FPOp(width)
1847 in_a.name = "in_a_%d" % i
1848 in_b.name = "in_b_%d" % i
1849 rs.append((in_a, in_b))
1850 self.rs = Array(rs)
1851
1852 res = []
1853 for i in range(rs_sz):
1854 out_z = FPOp(width)
1855 out_z.name = "out_z_%d" % i
1856 res.append(out_z)
1857 self.res = Array(res)
1858
1859 self.states = []
1860
1861 def add_state(self, state):
1862 self.states.append(state)
1863 return state
1864
1865 def get_fragment(self, platform=None):
1866 """ creates the HDL code-fragment for FPAdd
1867 """
1868 m = Module()
1869 m.submodules += self.rs
1870
1871 in_a = self.rs[0][0]
1872 in_b = self.rs[0][1]
1873
1874 geta = self.add_state(FPGetOp("get_a", "get_b",
1875 in_a, self.width))
1876 geta.setup(m, in_a)
1877 a = geta.out_op
1878
1879 getb = self.add_state(FPGetOp("get_b", "fpadd",
1880 in_b, self.width))
1881 getb.setup(m, in_b)
1882 b = getb.out_op
1883
1884 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1885 ab = self.add_state(ab)
1886 abd = ab.ispec() # create an input spec object for FPADDBase
1887 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1888 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1889 o = ab.o
1890
1891 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1892 o.mid, "get_a"))
1893
1894 with m.FSM() as fsm:
1895
1896 for state in self.states:
1897 with m.State(state.state_from):
1898 state.action(m)
1899
1900 return m
1901
1902
1903 if __name__ == "__main__":
1904 if True:
1905 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1906 main(alu, ports=alu.rs[0][0].ports() + \
1907 alu.rs[0][1].ports() + \
1908 alu.res[0].ports() + \
1909 [alu.ids.in_mid, alu.ids.out_mid])
1910 else:
1911 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1912 main(alu, ports=[alu.in_a, alu.in_b] + \
1913 alu.in_t.ports() + \
1914 alu.out_z.ports() + \
1915 [alu.in_mid, alu.out_mid])
1916
1917
1918 # works... but don't use, just do "python fname.py convert -t v"
1919 #print (verilog.convert(alu, ports=[
1920 # ports=alu.in_a.ports() + \
1921 # alu.in_b.ports() + \
1922 # alu.out_z.ports())