src/ieee754/fcvt/pipeline.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 import sys
   6 import functools
   7
   8 from nmigen import Module, Signal, Cat, Const, Mux, Elaboratable
   9 from nmigen.cli import main, verilog
  10
  11 from nmutil.singlepipe import ControlBase
  12 from nmutil.concurrentunit import ReservationStations, num_bits
  13
  14 from ieee754.fpcommon.fpbase import Overflow
  15 from ieee754.fpcommon.getop import FPADDBaseData
  16 from ieee754.fpcommon.pack import FPPackData
  17 from ieee754.fpcommon.normtopack import FPNormToPack
  18 from ieee754.fpcommon.postcalc import FPAddStage1Data
  19 from ieee754.fpcommon.msbhigh import FPMSBHigh
  20 from ieee754.fpcommon.exphigh import FPEXPHigh
  21
  22
  23 from nmigen import Module, Signal, Elaboratable
  24 from math import log
  25
  26 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
  27 from ieee754.fpcommon.fpbase import FPState, FPNumBase
  28 from ieee754.fpcommon.getop import FPPipeContext
  29
  30 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
  31 from nmutil.singlepipe import SimpleHandshake, StageChain
  32
  33 from ieee754.fpcommon.fpbase import FPState
  34 from ieee754.pipeline import PipelineSpec
  35
  36 from ieee754.fcvt.float2int import FPCVTFloatToIntMod
  37
  38
  39 class SignedOp:
  40     def __init__(self):
  41         self.signed = Signal(reset_less=True)
  42
  43     def eq(self, i):
  44         return [self.signed.eq(i)]
  45
  46
  47 class FPCVTIntToFloatMod(Elaboratable):
  48     """ FP integer conversion: copes with 16/32/64 int to 16/32/64 fp.
  49
  50         self.ctx.i.op & 0x1 == 0x1 : SIGNED int
  51         self.ctx.i.op & 0x1 == 0x0 : UNSIGNED int
  52     """
  53     def __init__(self, in_pspec, out_pspec):
  54         self.in_pspec = in_pspec
  55         self.out_pspec = out_pspec
  56         self.i = self.ispec()
  57         self.o = self.ospec()
  58
  59     def ispec(self):
  60         return FPADDBaseData(self.in_pspec)
  61
  62     def ospec(self):
  63         return FPAddStage1Data(self.out_pspec, e_extra=True)
  64
  65     def setup(self, m, i):
  66         """ links module to inputs and outputs
  67         """
  68         m.submodules.intconvert = self
  69         comb += self.i.eq(i)
  70
  71     def process(self, i):
  72         return self.o
  73
  74     def elaborate(self, platform):
  75         m = Module()
  76         comb = m.d.comb
  77
  78         #m.submodules.sc_out_z = self.o.z
  79
  80         # decode: XXX really should move to separate stage
  81         print("in_width out", self.in_pspec.width,
  82               self.out_pspec.width)
  83         print("a1", self.in_pspec.width)
  84         z1 = self.o.z
  85         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
  86
  87         me = self.in_pspec.width
  88         mz = self.o.z.rmw
  89         ms = mz - me
  90         print("ms-me", ms, me, mz)
  91
  92         # 3 extra bits for guard/round/sticky
  93         msb = FPMSBHigh(me+3, z1.e_width)
  94         m.submodules.norm_msb = msb
  95
  96         # signed or unsigned, use operator context
  97         signed = Signal(reset_less=True)
  98         comb += signed.eq(self.i.ctx.op[0])
  99
 100         # copy of mantissa (one less bit if signed)
 101         mantissa = Signal(me, reset_less=True)
 102
 103         # detect signed/unsigned.  key case: -ve numbers need inversion
 104         # to +ve because the FP sign says if it's -ve or not.
 105         with m.If(signed):
 106             comb += z1.s.eq(self.i.a[-1])      # sign in top bit of a
 107             with m.If(z1.s):
 108                 comb += mantissa.eq(-self.i.a) # invert input if sign -ve
 109             with m.Else():
 110                 comb += mantissa.eq(self.i.a)  # leave as-is
 111         with m.Else():
 112             comb += mantissa.eq(self.i.a)      # unsigned, use full a
 113             comb += z1.s.eq(0)
 114
 115         # set input from full INT
 116         comb += msb.m_in.eq(Cat(0, 0, 0, mantissa)) # g/r/s + input
 117         comb += msb.e_in.eq(me)                     # exp = int width
 118
 119         # to do with FP16... not yet resolved why
 120         alternative = ms < 0
 121
 122         if alternative:
 123             comb += z1.e.eq(msb.e_out-1)
 124             mmsb = msb.m_out[-mz-1:]
 125             if mz == 16:
 126                 # larger int to smaller FP (uint32/64 -> fp16 most likely)
 127                 comb += z1.m[ms-1:].eq(mmsb)
 128             else: # 32? XXX weirdness...
 129                 comb += z1.m.eq(mmsb)
 130         else:
 131             # smaller int to larger FP
 132             comb += z1.e.eq(msb.e_out)
 133             comb += z1.m[ms:].eq(msb.m_out[3:])
 134         comb += z1.create(z1.s, z1.e, z1.m) # ... here
 135
 136         # note: post-normalisation actually appears to be capable of
 137         # detecting overflow to infinity (FPPackMod).  so it's ok to
 138         # drop the bits into the mantissa (with a fixed exponent),
 139         # do some rounding (which might result in exceeding the
 140         # range of the target FP by re-increasing the exponent),
 141         # and basically *not* have to do any kind of range-checking
 142         # here: just set up guard/round/sticky, drop the INT into the
 143         # mantissa, and away we go.  XXX TODO: see if FPNormaliseMod
 144         # is even necessary.  it probably isn't
 145
 146         # initialise rounding (but only activate if needed)
 147         if alternative:
 148             # larger int to smaller FP (uint32/64 -> fp16 most likely)
 149             comb += self.o.of.guard.eq(msb.m_out[-mz-2])
 150             comb += self.o.of.round_bit.eq(msb.m_out[-mz-3])
 151             comb += self.o.of.sticky.eq(msb.m_out[:-mz-3].bool())
 152             comb += self.o.of.m0.eq(msb.m_out[-mz-1])
 153         else:
 154             # smaller int to larger FP
 155             comb += self.o.of.guard.eq(msb.m_out[2])
 156             comb += self.o.of.round_bit.eq(msb.m_out[1])
 157             comb += self.o.of.sticky.eq(msb.m_out[:1].bool())
 158             comb += self.o.of.m0.eq(msb.m_out[3])
 159
 160         # special cases active by default
 161         comb += self.o.out_do_z.eq(1)
 162
 163         # detect zero
 164         with m.If(~self.i.a.bool()):
 165             comb += self.o.z.zero(0)
 166         with m.Else():
 167             comb += self.o.out_do_z.eq(0) # activate normalisation
 168
 169         # copy the context (muxid, operator)
 170         comb += self.o.oz.eq(self.o.z.v)
 171         comb += self.o.ctx.eq(self.i.ctx)
 172
 173         return m
 174
 175
 176 class FPCVTUpConvertMod(Elaboratable):
 177     """ FP up-conversion (lower to higher bitwidth)
 178     """
 179     def __init__(self, in_pspec, out_pspec):
 180         self.in_pspec = in_pspec
 181         self.out_pspec = out_pspec
 182         self.i = self.ispec()
 183         self.o = self.ospec()
 184
 185     def ispec(self):
 186         return FPADDBaseData(self.in_pspec)
 187
 188     def ospec(self):
 189         return FPAddStage1Data(self.out_pspec, e_extra=False)
 190
 191     def setup(self, m, i):
 192         """ links module to inputs and outputs
 193         """
 194         m.submodules.upconvert = self
 195         comb += self.i.eq(i)
 196
 197     def process(self, i):
 198         return self.o
 199
 200     def elaborate(self, platform):
 201         m = Module()
 202         comb = m.d.comb
 203
 204         #m.submodules.sc_out_z = self.o.z
 205
 206         # decode: XXX really should move to separate stage
 207         print("in_width out", self.in_pspec.width,
 208               self.out_pspec.width)
 209         a1 = FPNumBaseRecord(self.in_pspec.width, False)
 210         print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
 211         m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
 212         comb += a1.v.eq(self.i.a)
 213         z1 = self.o.z
 214         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
 215
 216         me = a1.rmw
 217         ms = self.o.z.rmw - a1.rmw
 218         print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
 219
 220         # conversion can mostly be done manually...
 221         comb += self.o.z.s.eq(a1.s)
 222         comb += self.o.z.e.eq(a1.e)
 223         comb += self.o.z.m[ms:].eq(a1.m)
 224         comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
 225
 226         # initialise rounding to all zeros (deactivate)
 227         comb += self.o.of.guard.eq(0)
 228         comb += self.o.of.round_bit.eq(0)
 229         comb += self.o.of.sticky.eq(0)
 230         comb += self.o.of.m0.eq(a1.m[0])
 231
 232         # most special cases active (except tiny-number normalisation, below)
 233         comb += self.o.out_do_z.eq(1)
 234
 235         # detect NaN/Inf first
 236         with m.If(a1.exp_128):
 237             with m.If(~a1.m_zero):
 238                 comb += self.o.z.nan(0) # RISC-V wants normalised NaN
 239             with m.Else():
 240                 comb += self.o.z.inf(a1.s) # RISC-V wants signed INF
 241         with m.Else():
 242             with m.If(a1.exp_n127):
 243                 with m.If(~a1.m_zero):
 244                     comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
 245                     comb += self.o.out_do_z.eq(0) # activate normalisation
 246                 with m.Else():
 247                     # RISC-V zero needs actual zero
 248                     comb += self.o.z.zero(a1.s)
 249
 250         # copy the context (muxid, operator)
 251         comb += self.o.oz.eq(self.o.z.v)
 252         comb += self.o.ctx.eq(self.i.ctx)
 253
 254         return m
 255
 256
 257 class FPCVTDownConvertMod(Elaboratable):
 258     """ FP down-conversion (higher to lower bitwidth)
 259     """
 260     def __init__(self, in_pspec, out_pspec):
 261         self.in_pspec = in_pspec
 262         self.out_pspec = out_pspec
 263         self.i = self.ispec()
 264         self.o = self.ospec()
 265
 266     def ispec(self):
 267         return FPADDBaseData(self.in_pspec)
 268
 269     def ospec(self):
 270         return FPAddStage1Data(self.out_pspec, e_extra=True)
 271
 272     def setup(self, m, i):
 273         """ links module to inputs and outputs
 274         """
 275         m.submodules.downconvert = self
 276         comb += self.i.eq(i)
 277
 278     def process(self, i):
 279         return self.o
 280
 281     def elaborate(self, platform):
 282         m = Module()
 283         comb = m.d.comb
 284
 285         #m.submodules.sc_out_z = self.o.z
 286
 287         # decode: XXX really should move to separate stage
 288         print("in_width out", self.in_pspec.width,
 289               self.out_pspec.width)
 290         a1 = FPNumBaseRecord(self.in_pspec.width, False)
 291         print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
 292         m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
 293         comb += a1.v.eq(self.i.a)
 294         z1 = self.o.z
 295         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
 296
 297         me = a1.rmw
 298         ms = a1.rmw - self.o.z.rmw
 299         print("ms-me", ms, me)
 300
 301         # intermediaries
 302         exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
 303         exp_gt127 = Signal(reset_less=True)
 304         # constants from z1, at the bit-width of a1.
 305         N126 = Const(z1.fp.N126.value, (a1.e_width, True))
 306         P127 = Const(z1.fp.P127.value, (a1.e_width, True))
 307         comb += exp_sub_n126.eq(a1.e - N126)
 308         comb += exp_gt127.eq(a1.e > P127)
 309
 310         # if a zero, return zero (signed)
 311         with m.If(a1.exp_n127):
 312             comb += self.o.z.zero(a1.s)
 313             comb += self.o.out_do_z.eq(1)
 314
 315         # if a range outside z's min range (-126)
 316         with m.Elif(exp_sub_n126 < 0):
 317             comb += self.o.of.guard.eq(a1.m[ms-1])
 318             comb += self.o.of.round_bit.eq(a1.m[ms-2])
 319             comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
 320             comb += self.o.of.m0.eq(a1.m[ms])  # bit of a1
 321
 322             comb += self.o.z.s.eq(a1.s)
 323             comb += self.o.z.e.eq(a1.e)
 324             comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
 325             comb += self.o.z.m[-1].eq(1)
 326
 327         # if a is inf return inf
 328         with m.Elif(a1.is_inf):
 329             comb += self.o.z.inf(a1.s)
 330             comb += self.o.out_do_z.eq(1)
 331
 332         # if a is NaN return NaN
 333         with m.Elif(a1.is_nan):
 334             comb += self.o.z.nan(0)
 335             comb += self.o.out_do_z.eq(1)
 336
 337         # if a mantissa greater than 127, return inf
 338         with m.Elif(exp_gt127):
 339             print("inf", self.o.z.inf(a1.s))
 340             comb += self.o.z.inf(a1.s)
 341             comb += self.o.out_do_z.eq(1)
 342
 343         # ok after all that, anything else should fit fine (whew)
 344         with m.Else():
 345             comb += self.o.of.guard.eq(a1.m[ms-1])
 346             comb += self.o.of.round_bit.eq(a1.m[ms-2])
 347             comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
 348             comb += self.o.of.m0.eq(a1.m[ms])  # bit of a1
 349
 350             # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
 351             print("alen", a1.e_start, z1.fp.N126, N126)
 352             print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
 353             mo = Signal(self.o.z.m_width-1)
 354             comb += mo.eq(a1.m[ms:me])
 355             with m.If(self.o.of.roundz):
 356                 with m.If((~mo == 0)):  # all 1s
 357                     comb += self.o.z.create(a1.s, a1.e+1, mo+1)
 358                 with m.Else():
 359                     comb += self.o.z.create(a1.s, a1.e, mo+1)
 360             with m.Else():
 361                 comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
 362             comb += self.o.out_do_z.eq(1)
 363
 364         # copy the context (muxid, operator)
 365         comb += self.o.oz.eq(self.o.z.v)
 366         comb += self.o.ctx.eq(self.i.ctx)
 367
 368         return m
 369
 370
 371 class FPCVTConvertDeNorm(FPState, SimpleHandshake):
 372     """ FPConversion and De-norm
 373     """
 374
 375     def __init__(self, modkls, in_pspec, out_pspec):
 376         FPState.__init__(self, "cvt")
 377         sc = modkls(in_pspec, out_pspec)
 378         SimpleHandshake.__init__(self, sc)
 379         self.out = self.ospec(None)
 380
 381
 382 class FPCVTFtoIntBasePipe(ControlBase):
 383     def __init__(self, modkls, e_extra, in_pspec, out_pspec):
 384         ControlBase.__init__(self)
 385         self.pipe1 = FPCVTConvertDeNorm(modkls, in_pspec, out_pspec)
 386         #self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
 387
 388         #self._eqs = self.connect([self.pipe1, self.pipe2])
 389         self._eqs = self.connect([self.pipe1, ])
 390
 391     def elaborate(self, platform):
 392         m = ControlBase.elaborate(self, platform)
 393         m.submodules.down = self.pipe1
 394         #m.submodules.normpack = self.pipe2
 395         m.d.comb += self._eqs
 396         return m
 397
 398
 399 class FPCVTBasePipe(ControlBase):
 400     def __init__(self, modkls, e_extra, in_pspec, out_pspec):
 401         ControlBase.__init__(self)
 402         self.pipe1 = FPCVTConvertDeNorm(modkls, in_pspec, out_pspec)
 403         self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
 404
 405         self._eqs = self.connect([self.pipe1, self.pipe2])
 406
 407     def elaborate(self, platform):
 408         m = ControlBase.elaborate(self, platform)
 409         m.submodules.down = self.pipe1
 410         m.submodules.normpack = self.pipe2
 411         m.d.comb += self._eqs
 412         return m
 413
 414
 415 class FPCVTMuxInOutBase(ReservationStations):
 416     """ Reservation-Station version of FPCVT pipeline.
 417
 418         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 419         * 2-stage multiplier pipeline
 420         * fan-out on outputs (an array of FPPackData: z,mid)
 421
 422         Fan-in and Fan-out are combinatorial.
 423     """
 424
 425     def __init__(self, modkls, e_extra, in_width, out_width,
 426                        num_rows, op_wid=0, pkls=FPCVTBasePipe):
 427         self.op_wid = op_wid
 428         self.id_wid = num_bits(in_width)
 429         self.out_id_wid = num_bits(out_width)
 430
 431         self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
 432         self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
 433
 434         self.alu = pkls(modkls, e_extra, self.in_pspec, self.out_pspec)
 435         ReservationStations.__init__(self, num_rows)
 436
 437     def i_specfn(self):
 438         return FPADDBaseData(self.in_pspec)
 439
 440     def o_specfn(self):
 441         return FPPackData(self.out_pspec)
 442
 443
 444 def getkls(*args, **kwargs):
 445     print ("getkls", args, kwargs)
 446     return FPCVTMuxInOutBase(*args, **kwargs)
 447
 448
 449 # factory which creates near-identical class structures that differ by
 450 # the module and the e_extra argument.  at some point it would be good
 451 # to merge these into a single dynamic "thing" that takes an operator.
 452 # however, the difference(s) in the bitwidths makes that a little less
 453 # straightforward.
 454 muxfactoryinput = [("FPCVTDownMuxInOut", FPCVTDownConvertMod, True, ),
 455                    ("FPCVTUpMuxInOut",   FPCVTUpConvertMod,   False, ),
 456                    ("FPCVTIntMuxInOut",   FPCVTIntToFloatMod,   True, ),
 457                   ]
 458
 459 for (name, kls, e_extra) in muxfactoryinput:
 460     fn = functools.partial(getkls, kls, e_extra)
 461     setattr(sys.modules[__name__], name, fn)
 462
 463
 464 class FPCVTF2IntMuxInOut(FPCVTMuxInOutBase):
 465     """ Reservation-Station version of FPCVT pipeline.
 466
 467         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 468         * 2-stage multiplier pipeline
 469         * fan-out on outputs (an array of FPPackData: z,mid)
 470
 471         Fan-in and Fan-out are combinatorial.
 472     """
 473
 474     def __init__(self, in_width, out_width, num_rows, op_wid=0):
 475         FPCVTMuxInOutBase.__init__(self, FPCVTFloatToIntMod, False,
 476                                          in_width, out_width,
 477                                          num_rows, op_wid,
 478                                          pkls=FPCVTFtoIntBasePipe)
 479