ac2f726e61b125a266be016ab74dbc31e0e3f33f
[ieee754fpu.git] / src / ieee754 / fcvt / pipeline.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 import sys
6 import functools
7
8 from nmigen import Module, Signal, Cat, Const, Mux, Elaboratable
9 from nmigen.cli import main, verilog
10
11 from nmutil.singlepipe import ControlBase
12 from nmutil.concurrentunit import ReservationStations, num_bits
13
14 from ieee754.fpcommon.fpbase import Overflow
15 from ieee754.fpcommon.getop import FPADDBaseData
16 from ieee754.fpcommon.pack import FPPackData
17 from ieee754.fpcommon.normtopack import FPNormToPack
18 from ieee754.fpcommon.postcalc import FPAddStage1Data
19 from ieee754.fpcommon.msbhigh import FPMSBHigh
20 from ieee754.fpcommon.exphigh import FPEXPHigh
21
22
23 from nmigen import Module, Signal, Elaboratable
24 from math import log
25
26 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
27 from ieee754.fpcommon.fpbase import FPState, FPNumBase
28 from ieee754.fpcommon.getop import FPPipeContext
29
30 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
31 from nmutil.singlepipe import SimpleHandshake, StageChain
32
33 from ieee754.fpcommon.fpbase import FPState
34 from ieee754.pipeline import PipelineSpec
35
36 from ieee754.fcvt.float2int import FPCVTFloatToIntMod
37
38
39 class SignedOp:
40 def __init__(self):
41 self.signed = Signal(reset_less=True)
42
43 def eq(self, i):
44 return [self.signed.eq(i)]
45
46
47 class FPCVTIntToFloatMod(Elaboratable):
48 """ FP integer conversion: copes with 16/32/64 int to 16/32/64 fp.
49
50 self.ctx.i.op & 0x1 == 0x1 : SIGNED int
51 self.ctx.i.op & 0x1 == 0x0 : UNSIGNED int
52 """
53 def __init__(self, in_pspec, out_pspec):
54 self.in_pspec = in_pspec
55 self.out_pspec = out_pspec
56 self.i = self.ispec()
57 self.o = self.ospec()
58
59 def ispec(self):
60 return FPADDBaseData(self.in_pspec)
61
62 def ospec(self):
63 return FPAddStage1Data(self.out_pspec, e_extra=True)
64
65 def setup(self, m, i):
66 """ links module to inputs and outputs
67 """
68 m.submodules.intconvert = self
69 comb += self.i.eq(i)
70
71 def process(self, i):
72 return self.o
73
74 def elaborate(self, platform):
75 m = Module()
76 comb = m.d.comb
77
78 #m.submodules.sc_out_z = self.o.z
79
80 # decode: XXX really should move to separate stage
81 print("in_width out", self.in_pspec.width,
82 self.out_pspec.width)
83 print("a1", self.in_pspec.width)
84 z1 = self.o.z
85 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
86
87 me = self.in_pspec.width
88 mz = self.o.z.rmw
89 ms = mz - me
90 print("ms-me", ms, me, mz)
91
92 # 3 extra bits for guard/round/sticky
93 msb = FPMSBHigh(me+3, z1.e_width)
94 m.submodules.norm_msb = msb
95
96 # signed or unsigned, use operator context
97 signed = Signal(reset_less=True)
98 comb += signed.eq(self.i.ctx.op[0])
99
100 # copy of mantissa (one less bit if signed)
101 mantissa = Signal(me, reset_less=True)
102
103 # detect signed/unsigned. key case: -ve numbers need inversion
104 # to +ve because the FP sign says if it's -ve or not.
105 with m.If(signed):
106 comb += z1.s.eq(self.i.a[-1]) # sign in top bit of a
107 with m.If(z1.s):
108 comb += mantissa.eq(-self.i.a) # invert input if sign -ve
109 with m.Else():
110 comb += mantissa.eq(self.i.a) # leave as-is
111 with m.Else():
112 comb += mantissa.eq(self.i.a) # unsigned, use full a
113 comb += z1.s.eq(0)
114
115 # set input from full INT
116 comb += msb.m_in.eq(Cat(0, 0, 0, mantissa)) # g/r/s + input
117 comb += msb.e_in.eq(me) # exp = int width
118
119 # to do with FP16... not yet resolved why
120 alternative = ms < 0
121
122 if alternative:
123 comb += z1.e.eq(msb.e_out-1)
124 mmsb = msb.m_out[-mz-1:]
125 if mz == 16:
126 # larger int to smaller FP (uint32/64 -> fp16 most likely)
127 comb += z1.m[ms-1:].eq(mmsb)
128 else: # 32? XXX weirdness...
129 comb += z1.m.eq(mmsb)
130 else:
131 # smaller int to larger FP
132 comb += z1.e.eq(msb.e_out)
133 comb += z1.m[ms:].eq(msb.m_out[3:])
134 comb += z1.create(z1.s, z1.e, z1.m) # ... here
135
136 # note: post-normalisation actually appears to be capable of
137 # detecting overflow to infinity (FPPackMod). so it's ok to
138 # drop the bits into the mantissa (with a fixed exponent),
139 # do some rounding (which might result in exceeding the
140 # range of the target FP by re-increasing the exponent),
141 # and basically *not* have to do any kind of range-checking
142 # here: just set up guard/round/sticky, drop the INT into the
143 # mantissa, and away we go. XXX TODO: see if FPNormaliseMod
144 # is even necessary. it probably isn't
145
146 # initialise rounding (but only activate if needed)
147 if alternative:
148 # larger int to smaller FP (uint32/64 -> fp16 most likely)
149 comb += self.o.of.guard.eq(msb.m_out[-mz-2])
150 comb += self.o.of.round_bit.eq(msb.m_out[-mz-3])
151 comb += self.o.of.sticky.eq(msb.m_out[:-mz-3].bool())
152 comb += self.o.of.m0.eq(msb.m_out[-mz-1])
153 else:
154 # smaller int to larger FP
155 comb += self.o.of.guard.eq(msb.m_out[2])
156 comb += self.o.of.round_bit.eq(msb.m_out[1])
157 comb += self.o.of.sticky.eq(msb.m_out[:1].bool())
158 comb += self.o.of.m0.eq(msb.m_out[3])
159
160 # special cases active by default
161 comb += self.o.out_do_z.eq(1)
162
163 # detect zero
164 with m.If(~self.i.a.bool()):
165 comb += self.o.z.zero(0)
166 with m.Else():
167 comb += self.o.out_do_z.eq(0) # activate normalisation
168
169 # copy the context (muxid, operator)
170 comb += self.o.oz.eq(self.o.z.v)
171 comb += self.o.ctx.eq(self.i.ctx)
172
173 return m
174
175
176 class FPCVTUpConvertMod(Elaboratable):
177 """ FP up-conversion (lower to higher bitwidth)
178 """
179 def __init__(self, in_pspec, out_pspec):
180 self.in_pspec = in_pspec
181 self.out_pspec = out_pspec
182 self.i = self.ispec()
183 self.o = self.ospec()
184
185 def ispec(self):
186 return FPADDBaseData(self.in_pspec)
187
188 def ospec(self):
189 return FPAddStage1Data(self.out_pspec, e_extra=False)
190
191 def setup(self, m, i):
192 """ links module to inputs and outputs
193 """
194 m.submodules.upconvert = self
195 comb += self.i.eq(i)
196
197 def process(self, i):
198 return self.o
199
200 def elaborate(self, platform):
201 m = Module()
202 comb = m.d.comb
203
204 #m.submodules.sc_out_z = self.o.z
205
206 # decode: XXX really should move to separate stage
207 print("in_width out", self.in_pspec.width,
208 self.out_pspec.width)
209 a1 = FPNumBaseRecord(self.in_pspec.width, False)
210 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
211 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
212 comb += a1.v.eq(self.i.a)
213 z1 = self.o.z
214 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
215
216 me = a1.rmw
217 ms = self.o.z.rmw - a1.rmw
218 print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
219
220 # conversion can mostly be done manually...
221 comb += self.o.z.s.eq(a1.s)
222 comb += self.o.z.e.eq(a1.e)
223 comb += self.o.z.m[ms:].eq(a1.m)
224 comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
225
226 # initialise rounding to all zeros (deactivate)
227 comb += self.o.of.guard.eq(0)
228 comb += self.o.of.round_bit.eq(0)
229 comb += self.o.of.sticky.eq(0)
230 comb += self.o.of.m0.eq(a1.m[0])
231
232 # most special cases active (except tiny-number normalisation, below)
233 comb += self.o.out_do_z.eq(1)
234
235 # detect NaN/Inf first
236 with m.If(a1.exp_128):
237 with m.If(~a1.m_zero):
238 comb += self.o.z.nan(0) # RISC-V wants normalised NaN
239 with m.Else():
240 comb += self.o.z.inf(a1.s) # RISC-V wants signed INF
241 with m.Else():
242 with m.If(a1.exp_n127):
243 with m.If(~a1.m_zero):
244 comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
245 comb += self.o.out_do_z.eq(0) # activate normalisation
246 with m.Else():
247 # RISC-V zero needs actual zero
248 comb += self.o.z.zero(a1.s)
249
250 # copy the context (muxid, operator)
251 comb += self.o.oz.eq(self.o.z.v)
252 comb += self.o.ctx.eq(self.i.ctx)
253
254 return m
255
256
257 class FPCVTDownConvertMod(Elaboratable):
258 """ FP down-conversion (higher to lower bitwidth)
259 """
260 def __init__(self, in_pspec, out_pspec):
261 self.in_pspec = in_pspec
262 self.out_pspec = out_pspec
263 self.i = self.ispec()
264 self.o = self.ospec()
265
266 def ispec(self):
267 return FPADDBaseData(self.in_pspec)
268
269 def ospec(self):
270 return FPAddStage1Data(self.out_pspec, e_extra=True)
271
272 def setup(self, m, i):
273 """ links module to inputs and outputs
274 """
275 m.submodules.downconvert = self
276 comb += self.i.eq(i)
277
278 def process(self, i):
279 return self.o
280
281 def elaborate(self, platform):
282 m = Module()
283 comb = m.d.comb
284
285 #m.submodules.sc_out_z = self.o.z
286
287 # decode: XXX really should move to separate stage
288 print("in_width out", self.in_pspec.width,
289 self.out_pspec.width)
290 a1 = FPNumBaseRecord(self.in_pspec.width, False)
291 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
292 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
293 comb += a1.v.eq(self.i.a)
294 z1 = self.o.z
295 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
296
297 me = a1.rmw
298 ms = a1.rmw - self.o.z.rmw
299 print("ms-me", ms, me)
300
301 # intermediaries
302 exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
303 exp_gt127 = Signal(reset_less=True)
304 # constants from z1, at the bit-width of a1.
305 N126 = Const(z1.fp.N126.value, (a1.e_width, True))
306 P127 = Const(z1.fp.P127.value, (a1.e_width, True))
307 comb += exp_sub_n126.eq(a1.e - N126)
308 comb += exp_gt127.eq(a1.e > P127)
309
310 # if a zero, return zero (signed)
311 with m.If(a1.exp_n127):
312 comb += self.o.z.zero(a1.s)
313 comb += self.o.out_do_z.eq(1)
314
315 # if a range outside z's min range (-126)
316 with m.Elif(exp_sub_n126 < 0):
317 comb += self.o.of.guard.eq(a1.m[ms-1])
318 comb += self.o.of.round_bit.eq(a1.m[ms-2])
319 comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
320 comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
321
322 comb += self.o.z.s.eq(a1.s)
323 comb += self.o.z.e.eq(a1.e)
324 comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
325 comb += self.o.z.m[-1].eq(1)
326
327 # if a is inf return inf
328 with m.Elif(a1.is_inf):
329 comb += self.o.z.inf(a1.s)
330 comb += self.o.out_do_z.eq(1)
331
332 # if a is NaN return NaN
333 with m.Elif(a1.is_nan):
334 comb += self.o.z.nan(0)
335 comb += self.o.out_do_z.eq(1)
336
337 # if a mantissa greater than 127, return inf
338 with m.Elif(exp_gt127):
339 print("inf", self.o.z.inf(a1.s))
340 comb += self.o.z.inf(a1.s)
341 comb += self.o.out_do_z.eq(1)
342
343 # ok after all that, anything else should fit fine (whew)
344 with m.Else():
345 comb += self.o.of.guard.eq(a1.m[ms-1])
346 comb += self.o.of.round_bit.eq(a1.m[ms-2])
347 comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
348 comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
349
350 # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
351 print("alen", a1.e_start, z1.fp.N126, N126)
352 print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
353 mo = Signal(self.o.z.m_width-1)
354 comb += mo.eq(a1.m[ms:me])
355 with m.If(self.o.of.roundz):
356 with m.If((~mo == 0)): # all 1s
357 comb += self.o.z.create(a1.s, a1.e+1, mo+1)
358 with m.Else():
359 comb += self.o.z.create(a1.s, a1.e, mo+1)
360 with m.Else():
361 comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
362 comb += self.o.out_do_z.eq(1)
363
364 # copy the context (muxid, operator)
365 comb += self.o.oz.eq(self.o.z.v)
366 comb += self.o.ctx.eq(self.i.ctx)
367
368 return m
369
370
371 class FPCVTConvertDeNorm(FPState, SimpleHandshake):
372 """ FPConversion and De-norm
373 """
374
375 def __init__(self, modkls, in_pspec, out_pspec):
376 FPState.__init__(self, "cvt")
377 sc = modkls(in_pspec, out_pspec)
378 SimpleHandshake.__init__(self, sc)
379 self.out = self.ospec(None)
380
381
382 class FPCVTFtoIntBasePipe(ControlBase):
383 def __init__(self, modkls, e_extra, in_pspec, out_pspec):
384 ControlBase.__init__(self)
385 self.pipe1 = FPCVTConvertDeNorm(modkls, in_pspec, out_pspec)
386 #self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
387
388 #self._eqs = self.connect([self.pipe1, self.pipe2])
389 self._eqs = self.connect([self.pipe1, ])
390
391 def elaborate(self, platform):
392 m = ControlBase.elaborate(self, platform)
393 m.submodules.down = self.pipe1
394 #m.submodules.normpack = self.pipe2
395 m.d.comb += self._eqs
396 return m
397
398
399 class FPCVTBasePipe(ControlBase):
400 def __init__(self, modkls, e_extra, in_pspec, out_pspec):
401 ControlBase.__init__(self)
402 self.pipe1 = FPCVTConvertDeNorm(modkls, in_pspec, out_pspec)
403 self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
404
405 self._eqs = self.connect([self.pipe1, self.pipe2])
406
407 def elaborate(self, platform):
408 m = ControlBase.elaborate(self, platform)
409 m.submodules.down = self.pipe1
410 m.submodules.normpack = self.pipe2
411 m.d.comb += self._eqs
412 return m
413
414
415 class FPCVTMuxInOutBase(ReservationStations):
416 """ Reservation-Station version of FPCVT pipeline.
417
418 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
419 * 2-stage multiplier pipeline
420 * fan-out on outputs (an array of FPPackData: z,mid)
421
422 Fan-in and Fan-out are combinatorial.
423 """
424
425 def __init__(self, modkls, e_extra, in_width, out_width,
426 num_rows, op_wid=0, pkls=FPCVTBasePipe):
427 self.op_wid = op_wid
428 self.id_wid = num_bits(in_width)
429 self.out_id_wid = num_bits(out_width)
430
431 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
432 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
433
434 self.alu = pkls(modkls, e_extra, self.in_pspec, self.out_pspec)
435 ReservationStations.__init__(self, num_rows)
436
437 def i_specfn(self):
438 return FPADDBaseData(self.in_pspec)
439
440 def o_specfn(self):
441 return FPPackData(self.out_pspec)
442
443
444 def getkls(*args, **kwargs):
445 print ("getkls", args, kwargs)
446 return FPCVTMuxInOutBase(*args, **kwargs)
447
448
449 # factory which creates near-identical class structures that differ by
450 # the module and the e_extra argument. at some point it would be good
451 # to merge these into a single dynamic "thing" that takes an operator.
452 # however, the difference(s) in the bitwidths makes that a little less
453 # straightforward.
454 muxfactoryinput = [("FPCVTDownMuxInOut", FPCVTDownConvertMod, True, ),
455 ("FPCVTUpMuxInOut", FPCVTUpConvertMod, False, ),
456 ("FPCVTIntMuxInOut", FPCVTIntToFloatMod, True, ),
457 ]
458
459 for (name, kls, e_extra) in muxfactoryinput:
460 fn = functools.partial(getkls, kls, e_extra)
461 setattr(sys.modules[__name__], name, fn)
462
463
464 class FPCVTF2IntMuxInOut(FPCVTMuxInOutBase):
465 """ Reservation-Station version of FPCVT pipeline.
466
467 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
468 * 2-stage multiplier pipeline
469 * fan-out on outputs (an array of FPPackData: z,mid)
470
471 Fan-in and Fan-out are combinatorial.
472 """
473
474 def __init__(self, in_width, out_width, num_rows, op_wid=0):
475 FPCVTMuxInOutBase.__init__(self, FPCVTFloatToIntMod, False,
476 in_width, out_width,
477 num_rows, op_wid,
478 pkls=FPCVTFtoIntBasePipe)
479