add first version fcvt int to fp16/32/64
[ieee754fpu.git] / src / ieee754 / fcvt / pipeline.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Const, Elaboratable
6 from nmigen.cli import main, verilog
7
8 from nmutil.singlepipe import ControlBase
9 from nmutil.concurrentunit import ReservationStations, num_bits
10
11 from ieee754.fpcommon.getop import FPADDBaseData
12 from ieee754.fpcommon.pack import FPPackData
13 from ieee754.fpcommon.normtopack import FPNormToPack
14 from ieee754.fpcommon.postcalc import FPAddStage1Data
15 from ieee754.fpcommon.msbhigh import FPMSBHigh
16
17
18 from nmigen import Module, Signal, Elaboratable
19 from math import log
20
21 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
22 from ieee754.fpcommon.fpbase import FPState, FPNumBase
23 from ieee754.fpcommon.getop import FPPipeContext
24
25 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
26 from nmutil.singlepipe import SimpleHandshake, StageChain
27
28 from ieee754.fpcommon.fpbase import FPState
29 from ieee754.pipeline import PipelineSpec
30
31
32 class FPCVTIntToFloatMod(Elaboratable):
33 """ FP integer conversion.
34
35 TODO: dynamic selection of signed/unsigned
36 """
37 def __init__(self, in_pspec, out_pspec):
38 self.in_pspec = in_pspec
39 self.out_pspec = out_pspec
40 self.i = self.ispec()
41 self.o = self.ospec()
42
43 def ispec(self):
44 return FPADDBaseData(self.in_pspec)
45
46 def ospec(self):
47 return FPAddStage1Data(self.out_pspec, e_extra=True)
48
49 def setup(self, m, i):
50 """ links module to inputs and outputs
51 """
52 m.submodules.upconvert = self
53 m.d.comb += self.i.eq(i)
54
55 def process(self, i):
56 return self.o
57
58 def elaborate(self, platform):
59 m = Module()
60
61 #m.submodules.sc_out_z = self.o.z
62
63 # decode: XXX really should move to separate stage
64 print("in_width out", self.in_pspec.width,
65 self.out_pspec.width)
66 print("a1", self.in_pspec.width)
67 z1 = self.o.z
68 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
69
70 me = self.in_pspec.width
71 ms = self.o.z.rmw - me
72 print("ms-me", ms, me, self.o.z.rmw)
73
74 # 3 extra bits for guard/round/sticky
75 msb = FPMSBHigh(me+3, z1.e_width)
76 m.submodules.norm_msb = msb
77
78 # set input from full INT
79 m.d.comb += msb.m_in.eq(Cat(0, 0, 0, self.i.a)) # g/r/s + input
80 m.d.comb += msb.e_in.eq(me) # exp = int width
81
82 # conversion can mostly be done manually...
83 zo = self.o.z
84 m.d.comb += zo.s.eq(0) # unsigned for now
85 m.d.comb += zo.e.eq(msb.e_out)
86 m.d.comb += zo.m[ms:].eq(msb.m_out[3:])
87 m.d.comb += zo.create(zo.s, zo.e, zo.m) # ... here
88
89 # initialise rounding (but only activate if needed)
90 m.d.comb += self.o.of.guard.eq(msb.m_out[2])
91 m.d.comb += self.o.of.round_bit.eq(msb.m_out[1])
92 m.d.comb += self.o.of.sticky.eq(msb.m_out[1])
93 m.d.comb += self.o.of.m0.eq(msb.m_out[3])
94
95 # special cases active by default
96 m.d.comb += self.o.out_do_z.eq(1)
97
98 # detect zero
99 with m.If(~self.i.a.bool()):
100 m.d.comb += self.o.z.zero(0)
101 with m.Else():
102 m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
103
104 # copy the context (muxid, operator)
105 m.d.comb += self.o.oz.eq(self.o.z.v)
106 m.d.comb += self.o.ctx.eq(self.i.ctx)
107
108 return m
109
110
111 class FPCVTUpConvertMod(Elaboratable):
112 """ FP up-conversion (lower to higher bitwidth)
113 """
114 def __init__(self, in_pspec, out_pspec):
115 self.in_pspec = in_pspec
116 self.out_pspec = out_pspec
117 self.i = self.ispec()
118 self.o = self.ospec()
119
120 def ispec(self):
121 return FPADDBaseData(self.in_pspec)
122
123 def ospec(self):
124 return FPAddStage1Data(self.out_pspec, e_extra=False)
125
126 def setup(self, m, i):
127 """ links module to inputs and outputs
128 """
129 m.submodules.upconvert = self
130 m.d.comb += self.i.eq(i)
131
132 def process(self, i):
133 return self.o
134
135 def elaborate(self, platform):
136 m = Module()
137
138 #m.submodules.sc_out_z = self.o.z
139
140 # decode: XXX really should move to separate stage
141 print("in_width out", self.in_pspec.width,
142 self.out_pspec.width)
143 a1 = FPNumBaseRecord(self.in_pspec.width, False)
144 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
145 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
146 m.d.comb += a1.v.eq(self.i.a)
147 z1 = self.o.z
148 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
149
150 me = a1.rmw
151 ms = self.o.z.rmw - a1.rmw
152 print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
153
154 # conversion can mostly be done manually...
155 m.d.comb += self.o.z.s.eq(a1.s)
156 m.d.comb += self.o.z.e.eq(a1.e)
157 m.d.comb += self.o.z.m[ms:].eq(a1.m)
158 m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
159
160 # initialise rounding to all zeros (deactivate)
161 m.d.comb += self.o.of.guard.eq(0)
162 m.d.comb += self.o.of.round_bit.eq(0)
163 m.d.comb += self.o.of.sticky.eq(0)
164 m.d.comb += self.o.of.m0.eq(a1.m[0])
165
166 # most special cases active (except tiny-number normalisation, below)
167 m.d.comb += self.o.out_do_z.eq(1)
168
169 # detect NaN/Inf first
170 with m.If(a1.exp_128):
171 with m.If(~a1.m_zero):
172 m.d.comb += self.o.z.nan(0) # RISC-V wants normalised NaN
173 with m.Else():
174 m.d.comb += self.o.z.inf(a1.s) # RISC-V wants signed INF
175 with m.Else():
176 with m.If(a1.exp_n127):
177 with m.If(~a1.m_zero):
178 m.d.comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
179 m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
180 with m.Else():
181 # RISC-V zero needs actual zero
182 m.d.comb += self.o.z.zero(a1.s)
183
184 # copy the context (muxid, operator)
185 m.d.comb += self.o.oz.eq(self.o.z.v)
186 m.d.comb += self.o.ctx.eq(self.i.ctx)
187
188 return m
189
190
191 class FPCVTDownConvertMod(Elaboratable):
192 """ FP down-conversion (higher to lower bitwidth)
193 """
194 def __init__(self, in_pspec, out_pspec):
195 self.in_pspec = in_pspec
196 self.out_pspec = out_pspec
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.in_pspec)
202
203 def ospec(self):
204 return FPAddStage1Data(self.out_pspec, e_extra=True)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.downconvert = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 #m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 print("in_width out", self.in_pspec.width,
222 self.out_pspec.width)
223 a1 = FPNumBaseRecord(self.in_pspec.width, False)
224 print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
225 m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
226 m.d.comb += a1.v.eq(self.i.a)
227 z1 = self.o.z
228 print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
229
230 me = a1.rmw
231 ms = a1.rmw - self.o.z.rmw
232 print("ms-me", ms, me)
233
234 # intermediaries
235 exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
236 exp_gt127 = Signal(reset_less=True)
237 # constants from z1, at the bit-width of a1.
238 N126 = Const(z1.fp.N126.value, (a1.e_width, True))
239 P127 = Const(z1.fp.P127.value, (a1.e_width, True))
240 m.d.comb += exp_sub_n126.eq(a1.e - N126)
241 m.d.comb += exp_gt127.eq(a1.e > P127)
242
243 # if a zero, return zero (signed)
244 with m.If(a1.exp_n127):
245 m.d.comb += self.o.z.zero(a1.s)
246 m.d.comb += self.o.out_do_z.eq(1)
247
248 # if a range outside z's min range (-126)
249 with m.Elif(exp_sub_n126 < 0):
250 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
251 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
252 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
253 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
254
255 m.d.comb += self.o.z.s.eq(a1.s)
256 m.d.comb += self.o.z.e.eq(a1.e)
257 m.d.comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
258 m.d.comb += self.o.z.m[-1].eq(1)
259
260 # if a is inf return inf
261 with m.Elif(a1.is_inf):
262 m.d.comb += self.o.z.inf(a1.s)
263 m.d.comb += self.o.out_do_z.eq(1)
264
265 # if a is NaN return NaN
266 with m.Elif(a1.is_nan):
267 m.d.comb += self.o.z.nan(0)
268 m.d.comb += self.o.out_do_z.eq(1)
269
270 # if a mantissa greater than 127, return inf
271 with m.Elif(exp_gt127):
272 print("inf", self.o.z.inf(a1.s))
273 m.d.comb += self.o.z.inf(a1.s)
274 m.d.comb += self.o.out_do_z.eq(1)
275
276 # ok after all that, anything else should fit fine (whew)
277 with m.Else():
278 m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
279 m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
280 m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
281 m.d.comb += self.o.of.m0.eq(a1.m[ms]) # bit of a1
282
283 # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
284 print("alen", a1.e_start, z1.fp.N126, N126)
285 print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
286 mo = Signal(self.o.z.m_width-1)
287 m.d.comb += mo.eq(a1.m[ms:me])
288 with m.If(self.o.of.roundz):
289 with m.If((~mo == 0)): # all 1s
290 m.d.comb += self.o.z.create(a1.s, a1.e+1, mo+1)
291 with m.Else():
292 m.d.comb += self.o.z.create(a1.s, a1.e, mo+1)
293 with m.Else():
294 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
295 m.d.comb += self.o.out_do_z.eq(1)
296
297 # copy the context (muxid, operator)
298 m.d.comb += self.o.oz.eq(self.o.z.v)
299 m.d.comb += self.o.ctx.eq(self.i.ctx)
300
301 return m
302
303
304 class FPCVTIntToFloat(FPState):
305 """ Up-conversion
306 """
307
308 def __init__(self, in_width, out_width, id_wid):
309 FPState.__init__(self, "inttofloat")
310 self.mod = FPCVTIntToFloatMod(in_width, out_width)
311 self.out_z = self.mod.ospec()
312 self.out_do_z = Signal(reset_less=True)
313
314 def setup(self, m, i):
315 """ links module to inputs and outputs
316 """
317 self.mod.setup(m, i, self.out_do_z)
318 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
319 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
320
321 def action(self, m):
322 self.idsync(m)
323 with m.If(self.out_do_z):
324 m.next = "put_z"
325 with m.Else():
326 m.next = "denormalise"
327
328
329 class FPCVTUpConvert(FPState):
330 """ Up-conversion
331 """
332
333 def __init__(self, in_width, out_width, id_wid):
334 FPState.__init__(self, "upconvert")
335 self.mod = FPCVTUpConvertMod(in_width, out_width)
336 self.out_z = self.mod.ospec()
337 self.out_do_z = Signal(reset_less=True)
338
339 def setup(self, m, i):
340 """ links module to inputs and outputs
341 """
342 self.mod.setup(m, i, self.out_do_z)
343 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
344 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
345
346 def action(self, m):
347 self.idsync(m)
348 with m.If(self.out_do_z):
349 m.next = "put_z"
350 with m.Else():
351 m.next = "denormalise"
352
353
354 class FPCVTDownConvert(FPState):
355 """ special cases: NaNs, infs, zeros, denormalised
356 """
357
358 def __init__(self, in_width, out_width, id_wid):
359 FPState.__init__(self, "special_cases")
360 self.mod = FPCVTDownConvertMod(in_width, out_width)
361 self.out_z = self.mod.ospec()
362 self.out_do_z = Signal(reset_less=True)
363
364 def setup(self, m, i):
365 """ links module to inputs and outputs
366 """
367 self.mod.setup(m, i, self.out_do_z)
368 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
369 m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx) # (and context)
370
371 def action(self, m):
372 self.idsync(m)
373 with m.If(self.out_do_z):
374 m.next = "put_z"
375 with m.Else():
376 m.next = "denormalise"
377
378
379 class FPCVTIntToFloatDeNorm(FPState, SimpleHandshake):
380 """ Upconvert
381 """
382
383 def __init__(self, in_pspec, out_pspec):
384 FPState.__init__(self, "inttofloat")
385 sc = FPCVTIntToFloatMod(in_pspec, out_pspec)
386 SimpleHandshake.__init__(self, sc)
387 self.out = self.ospec(None)
388
389
390 class FPCVTUpConvertDeNorm(FPState, SimpleHandshake):
391 """ Upconvert
392 """
393
394 def __init__(self, in_pspec, out_pspec):
395 FPState.__init__(self, "upconvert")
396 sc = FPCVTUpConvertMod(in_pspec, out_pspec)
397 SimpleHandshake.__init__(self, sc)
398 self.out = self.ospec(None)
399
400
401 class FPCVTDownConvertDeNorm(FPState, SimpleHandshake):
402 """ downconvert
403 """
404
405 def __init__(self, in_pspec, out_pspec):
406 FPState.__init__(self, "downconvert")
407 sc = FPCVTDownConvertMod(in_pspec, out_pspec)
408 SimpleHandshake.__init__(self, sc)
409 self.out = self.ospec(None)
410
411
412 class FPCVTIntBasePipe(ControlBase):
413 def __init__(self, in_pspec, out_pspec):
414 ControlBase.__init__(self)
415 self.pipe1 = FPCVTIntToFloatDeNorm(in_pspec, out_pspec)
416 self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
417
418 self._eqs = self.connect([self.pipe1, self.pipe2])
419
420 def elaborate(self, platform):
421 m = ControlBase.elaborate(self, platform)
422 m.submodules.toint = self.pipe1
423 m.submodules.normpack = self.pipe2
424 m.d.comb += self._eqs
425 return m
426
427
428 class FPCVTUpBasePipe(ControlBase):
429 def __init__(self, in_pspec, out_pspec):
430 ControlBase.__init__(self)
431 self.pipe1 = FPCVTUpConvertDeNorm(in_pspec, out_pspec)
432 self.pipe2 = FPNormToPack(out_pspec, e_extra=False)
433
434 self._eqs = self.connect([self.pipe1, self.pipe2])
435
436 def elaborate(self, platform):
437 m = ControlBase.elaborate(self, platform)
438 m.submodules.up = self.pipe1
439 m.submodules.normpack = self.pipe2
440 m.d.comb += self._eqs
441 return m
442
443
444 class FPCVTDownBasePipe(ControlBase):
445 def __init__(self, in_pspec, out_pspec):
446 ControlBase.__init__(self)
447 self.pipe1 = FPCVTDownConvertDeNorm(in_pspec, out_pspec)
448 self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
449
450 self._eqs = self.connect([self.pipe1, self.pipe2])
451
452 def elaborate(self, platform):
453 m = ControlBase.elaborate(self, platform)
454 m.submodules.down = self.pipe1
455 m.submodules.normpack = self.pipe2
456 m.d.comb += self._eqs
457 return m
458
459
460 class FPCVTIntMuxInOut(ReservationStations):
461 """ Reservation-Station version of FPCVT int-to-float pipeline.
462
463 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
464 * 2-stage multiplier pipeline
465 * fan-out on outputs (an array of FPPackData: z,mid)
466
467 Fan-in and Fan-out are combinatorial.
468 """
469
470 def __init__(self, in_width, out_width, num_rows, op_wid=0):
471 self.op_wid = op_wid
472 self.id_wid = num_bits(in_width)
473 self.out_id_wid = num_bits(out_width)
474
475 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
476 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
477
478 self.alu = FPCVTIntBasePipe(self.in_pspec, self.out_pspec)
479 ReservationStations.__init__(self, num_rows)
480
481 def i_specfn(self):
482 return FPADDBaseData(self.in_pspec)
483
484 def o_specfn(self):
485 return FPPackData(self.out_pspec)
486
487
488 class FPCVTUpMuxInOut(ReservationStations):
489 """ Reservation-Station version of FPCVT up pipeline.
490
491 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
492 * 2-stage multiplier pipeline
493 * fan-out on outputs (an array of FPPackData: z,mid)
494
495 Fan-in and Fan-out are combinatorial.
496 """
497
498 def __init__(self, in_width, out_width, num_rows, op_wid=0):
499 self.op_wid = op_wid
500 self.id_wid = num_bits(in_width)
501 self.out_id_wid = num_bits(out_width)
502
503 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
504 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
505
506 self.alu = FPCVTUpBasePipe(self.in_pspec, self.out_pspec)
507 ReservationStations.__init__(self, num_rows)
508
509 def i_specfn(self):
510 return FPADDBaseData(self.in_pspec)
511
512 def o_specfn(self):
513 return FPPackData(self.out_pspec)
514
515
516 class FPCVTDownMuxInOut(ReservationStations):
517 """ Reservation-Station version of FPCVT pipeline.
518
519 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
520 * 2-stage multiplier pipeline
521 * fan-out on outputs (an array of FPPackData: z,mid)
522
523 Fan-in and Fan-out are combinatorial.
524 """
525
526 def __init__(self, in_width, out_width, num_rows, op_wid=0):
527 self.op_wid = op_wid
528 self.id_wid = num_bits(in_width)
529 self.out_id_wid = num_bits(out_width)
530
531 self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
532 self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
533
534 self.alu = FPCVTDownBasePipe(self.in_pspec, self.out_pspec)
535 ReservationStations.__init__(self, num_rows)
536
537 def i_specfn(self):
538 return FPADDBaseData(self.in_pspec)
539
540 def o_specfn(self):
541 return FPPackData(self.out_pspec)