src/ieee754/fcvt/pipeline.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Const, Elaboratable
   6 from nmigen.cli import main, verilog
   7
   8 from nmutil.singlepipe import ControlBase
   9 from nmutil.concurrentunit import ReservationStations, num_bits
  10
  11 from ieee754.fpcommon.getop import FPADDBaseData
  12 from ieee754.fpcommon.pack import FPPackData
  13 from ieee754.fpcommon.normtopack import FPNormToPack
  14 from ieee754.fpcommon.postcalc import FPAddStage1Data
  15 from ieee754.fpcommon.msbhigh import FPMSBHigh
  16
  17
  18 from nmigen import Module, Signal, Elaboratable
  19 from math import log
  20
  21 from ieee754.fpcommon.fpbase import FPNumIn, FPNumOut, FPNumBaseRecord
  22 from ieee754.fpcommon.fpbase import FPState, FPNumBase
  23 from ieee754.fpcommon.getop import FPPipeContext
  24
  25 from ieee754.fpcommon.fpbase import FPNumDecode, FPNumBaseRecord
  26 from nmutil.singlepipe import SimpleHandshake, StageChain
  27
  28 from ieee754.fpcommon.fpbase import FPState
  29 from ieee754.pipeline import PipelineSpec
  30
  31
  32 class FPCVTIntToFloatMod(Elaboratable):
  33     """ FP integer conversion.
  34
  35         TODO: dynamic selection of signed/unsigned
  36     """
  37     def __init__(self, in_pspec, out_pspec):
  38         self.in_pspec = in_pspec
  39         self.out_pspec = out_pspec
  40         self.i = self.ispec()
  41         self.o = self.ospec()
  42
  43     def ispec(self):
  44         return FPADDBaseData(self.in_pspec)
  45
  46     def ospec(self):
  47         return FPAddStage1Data(self.out_pspec, e_extra=True)
  48
  49     def setup(self, m, i):
  50         """ links module to inputs and outputs
  51         """
  52         m.submodules.upconvert = self
  53         m.d.comb += self.i.eq(i)
  54
  55     def process(self, i):
  56         return self.o
  57
  58     def elaborate(self, platform):
  59         m = Module()
  60
  61         #m.submodules.sc_out_z = self.o.z
  62
  63         # decode: XXX really should move to separate stage
  64         print("in_width out", self.in_pspec.width,
  65               self.out_pspec.width)
  66         print("a1", self.in_pspec.width)
  67         z1 = self.o.z
  68         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
  69
  70         me = self.in_pspec.width
  71         ms = self.o.z.rmw - me
  72         print("ms-me", ms, me, self.o.z.rmw)
  73
  74         # 3 extra bits for guard/round/sticky
  75         msb = FPMSBHigh(me+3, z1.e_width)
  76         m.submodules.norm_msb = msb
  77
  78         # set input from full INT
  79         m.d.comb += msb.m_in.eq(Cat(0, 0, 0, self.i.a)) # g/r/s + input
  80         m.d.comb += msb.e_in.eq(me)                     # exp = int width
  81
  82         # conversion can mostly be done manually...
  83         zo = self.o.z
  84         m.d.comb += zo.s.eq(0)  # unsigned for now
  85         m.d.comb += zo.e.eq(msb.e_out)
  86         m.d.comb += zo.m[ms:].eq(msb.m_out[3:])
  87         m.d.comb += zo.create(zo.s, zo.e, zo.m) # ... here
  88
  89         # initialise rounding (but only activate if needed)
  90         m.d.comb += self.o.of.guard.eq(msb.m_out[2])
  91         m.d.comb += self.o.of.round_bit.eq(msb.m_out[1])
  92         m.d.comb += self.o.of.sticky.eq(msb.m_out[1])
  93         m.d.comb += self.o.of.m0.eq(msb.m_out[3])
  94
  95         # special cases active by default
  96         m.d.comb += self.o.out_do_z.eq(1)
  97
  98         # detect zero
  99         with m.If(~self.i.a.bool()):
 100             m.d.comb += self.o.z.zero(0)
 101         with m.Else():
 102             m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
 103
 104         # copy the context (muxid, operator)
 105         m.d.comb += self.o.oz.eq(self.o.z.v)
 106         m.d.comb += self.o.ctx.eq(self.i.ctx)
 107
 108         return m
 109
 110
 111 class FPCVTUpConvertMod(Elaboratable):
 112     """ FP up-conversion (lower to higher bitwidth)
 113     """
 114     def __init__(self, in_pspec, out_pspec):
 115         self.in_pspec = in_pspec
 116         self.out_pspec = out_pspec
 117         self.i = self.ispec()
 118         self.o = self.ospec()
 119
 120     def ispec(self):
 121         return FPADDBaseData(self.in_pspec)
 122
 123     def ospec(self):
 124         return FPAddStage1Data(self.out_pspec, e_extra=False)
 125
 126     def setup(self, m, i):
 127         """ links module to inputs and outputs
 128         """
 129         m.submodules.upconvert = self
 130         m.d.comb += self.i.eq(i)
 131
 132     def process(self, i):
 133         return self.o
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137
 138         #m.submodules.sc_out_z = self.o.z
 139
 140         # decode: XXX really should move to separate stage
 141         print("in_width out", self.in_pspec.width,
 142               self.out_pspec.width)
 143         a1 = FPNumBaseRecord(self.in_pspec.width, False)
 144         print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
 145         m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
 146         m.d.comb += a1.v.eq(self.i.a)
 147         z1 = self.o.z
 148         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
 149
 150         me = a1.rmw
 151         ms = self.o.z.rmw - a1.rmw
 152         print("ms-me", ms, me, self.o.z.rmw, a1.rmw)
 153
 154         # conversion can mostly be done manually...
 155         m.d.comb += self.o.z.s.eq(a1.s)
 156         m.d.comb += self.o.z.e.eq(a1.e)
 157         m.d.comb += self.o.z.m[ms:].eq(a1.m)
 158         m.d.comb += self.o.z.create(a1.s, a1.e, self.o.z.m) # ... here
 159
 160         # initialise rounding to all zeros (deactivate)
 161         m.d.comb += self.o.of.guard.eq(0)
 162         m.d.comb += self.o.of.round_bit.eq(0)
 163         m.d.comb += self.o.of.sticky.eq(0)
 164         m.d.comb += self.o.of.m0.eq(a1.m[0])
 165
 166         # most special cases active (except tiny-number normalisation, below)
 167         m.d.comb += self.o.out_do_z.eq(1)
 168
 169         # detect NaN/Inf first
 170         with m.If(a1.exp_128):
 171             with m.If(~a1.m_zero):
 172                 m.d.comb += self.o.z.nan(0) # RISC-V wants normalised NaN
 173             with m.Else():
 174                 m.d.comb += self.o.z.inf(a1.s) # RISC-V wants signed INF
 175         with m.Else():
 176             with m.If(a1.exp_n127):
 177                 with m.If(~a1.m_zero):
 178                     m.d.comb += self.o.z.m[ms:].eq(Cat(0, a1.m))
 179                     m.d.comb += self.o.out_do_z.eq(0) # activate normalisation
 180                 with m.Else():
 181                     # RISC-V zero needs actual zero
 182                     m.d.comb += self.o.z.zero(a1.s)
 183
 184         # copy the context (muxid, operator)
 185         m.d.comb += self.o.oz.eq(self.o.z.v)
 186         m.d.comb += self.o.ctx.eq(self.i.ctx)
 187
 188         return m
 189
 190
 191 class FPCVTDownConvertMod(Elaboratable):
 192     """ FP down-conversion (higher to lower bitwidth)
 193     """
 194     def __init__(self, in_pspec, out_pspec):
 195         self.in_pspec = in_pspec
 196         self.out_pspec = out_pspec
 197         self.i = self.ispec()
 198         self.o = self.ospec()
 199
 200     def ispec(self):
 201         return FPADDBaseData(self.in_pspec)
 202
 203     def ospec(self):
 204         return FPAddStage1Data(self.out_pspec, e_extra=True)
 205
 206     def setup(self, m, i):
 207         """ links module to inputs and outputs
 208         """
 209         m.submodules.downconvert = self
 210         m.d.comb += self.i.eq(i)
 211
 212     def process(self, i):
 213         return self.o
 214
 215     def elaborate(self, platform):
 216         m = Module()
 217
 218         #m.submodules.sc_out_z = self.o.z
 219
 220         # decode: XXX really should move to separate stage
 221         print("in_width out", self.in_pspec.width,
 222               self.out_pspec.width)
 223         a1 = FPNumBaseRecord(self.in_pspec.width, False)
 224         print("a1", a1.width, a1.rmw, a1.e_width, a1.e_start, a1.e_end)
 225         m.submodules.sc_decode_a = a1 = FPNumDecode(None, a1)
 226         m.d.comb += a1.v.eq(self.i.a)
 227         z1 = self.o.z
 228         print("z1", z1.width, z1.rmw, z1.e_width, z1.e_start, z1.e_end)
 229
 230         me = a1.rmw
 231         ms = a1.rmw - self.o.z.rmw
 232         print("ms-me", ms, me)
 233
 234         # intermediaries
 235         exp_sub_n126 = Signal((a1.e_width, True), reset_less=True)
 236         exp_gt127 = Signal(reset_less=True)
 237         # constants from z1, at the bit-width of a1.
 238         N126 = Const(z1.fp.N126.value, (a1.e_width, True))
 239         P127 = Const(z1.fp.P127.value, (a1.e_width, True))
 240         m.d.comb += exp_sub_n126.eq(a1.e - N126)
 241         m.d.comb += exp_gt127.eq(a1.e > P127)
 242
 243         # if a zero, return zero (signed)
 244         with m.If(a1.exp_n127):
 245             m.d.comb += self.o.z.zero(a1.s)
 246             m.d.comb += self.o.out_do_z.eq(1)
 247
 248         # if a range outside z's min range (-126)
 249         with m.Elif(exp_sub_n126 < 0):
 250             m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
 251             m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
 252             m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
 253             m.d.comb += self.o.of.m0.eq(a1.m[ms])  # bit of a1
 254
 255             m.d.comb += self.o.z.s.eq(a1.s)
 256             m.d.comb += self.o.z.e.eq(a1.e)
 257             m.d.comb += self.o.z.m.eq(a1.m[-self.o.z.rmw-1:])
 258             m.d.comb += self.o.z.m[-1].eq(1)
 259
 260         # if a is inf return inf
 261         with m.Elif(a1.is_inf):
 262             m.d.comb += self.o.z.inf(a1.s)
 263             m.d.comb += self.o.out_do_z.eq(1)
 264
 265         # if a is NaN return NaN
 266         with m.Elif(a1.is_nan):
 267             m.d.comb += self.o.z.nan(0)
 268             m.d.comb += self.o.out_do_z.eq(1)
 269
 270         # if a mantissa greater than 127, return inf
 271         with m.Elif(exp_gt127):
 272             print("inf", self.o.z.inf(a1.s))
 273             m.d.comb += self.o.z.inf(a1.s)
 274             m.d.comb += self.o.out_do_z.eq(1)
 275
 276         # ok after all that, anything else should fit fine (whew)
 277         with m.Else():
 278             m.d.comb += self.o.of.guard.eq(a1.m[ms-1])
 279             m.d.comb += self.o.of.round_bit.eq(a1.m[ms-2])
 280             m.d.comb += self.o.of.sticky.eq(a1.m[:ms-2].bool())
 281             m.d.comb += self.o.of.m0.eq(a1.m[ms])  # bit of a1
 282
 283             # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
 284             print("alen", a1.e_start, z1.fp.N126, N126)
 285             print("m1", self.o.z.rmw, a1.m[-self.o.z.rmw-1:])
 286             mo = Signal(self.o.z.m_width-1)
 287             m.d.comb += mo.eq(a1.m[ms:me])
 288             with m.If(self.o.of.roundz):
 289                 with m.If((~mo == 0)):  # all 1s
 290                     m.d.comb += self.o.z.create(a1.s, a1.e+1, mo+1)
 291                 with m.Else():
 292                     m.d.comb += self.o.z.create(a1.s, a1.e, mo+1)
 293             with m.Else():
 294                 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[-self.o.z.rmw-1:])
 295             m.d.comb += self.o.out_do_z.eq(1)
 296
 297         # copy the context (muxid, operator)
 298         m.d.comb += self.o.oz.eq(self.o.z.v)
 299         m.d.comb += self.o.ctx.eq(self.i.ctx)
 300
 301         return m
 302
 303
 304 class FPCVTIntToFloat(FPState):
 305     """ Up-conversion
 306     """
 307
 308     def __init__(self, in_width, out_width, id_wid):
 309         FPState.__init__(self, "inttofloat")
 310         self.mod = FPCVTIntToFloatMod(in_width, out_width)
 311         self.out_z = self.mod.ospec()
 312         self.out_do_z = Signal(reset_less=True)
 313
 314     def setup(self, m, i):
 315         """ links module to inputs and outputs
 316         """
 317         self.mod.setup(m, i, self.out_do_z)
 318         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)  # only take the output
 319         m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx)  # (and context)
 320
 321     def action(self, m):
 322         self.idsync(m)
 323         with m.If(self.out_do_z):
 324             m.next = "put_z"
 325         with m.Else():
 326             m.next = "denormalise"
 327
 328
 329 class FPCVTUpConvert(FPState):
 330     """ Up-conversion
 331     """
 332
 333     def __init__(self, in_width, out_width, id_wid):
 334         FPState.__init__(self, "upconvert")
 335         self.mod = FPCVTUpConvertMod(in_width, out_width)
 336         self.out_z = self.mod.ospec()
 337         self.out_do_z = Signal(reset_less=True)
 338
 339     def setup(self, m, i):
 340         """ links module to inputs and outputs
 341         """
 342         self.mod.setup(m, i, self.out_do_z)
 343         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)  # only take the output
 344         m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx)  # (and context)
 345
 346     def action(self, m):
 347         self.idsync(m)
 348         with m.If(self.out_do_z):
 349             m.next = "put_z"
 350         with m.Else():
 351             m.next = "denormalise"
 352
 353
 354 class FPCVTDownConvert(FPState):
 355     """ special cases: NaNs, infs, zeros, denormalised
 356     """
 357
 358     def __init__(self, in_width, out_width, id_wid):
 359         FPState.__init__(self, "special_cases")
 360         self.mod = FPCVTDownConvertMod(in_width, out_width)
 361         self.out_z = self.mod.ospec()
 362         self.out_do_z = Signal(reset_less=True)
 363
 364     def setup(self, m, i):
 365         """ links module to inputs and outputs
 366         """
 367         self.mod.setup(m, i, self.out_do_z)
 368         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)  # only take the output
 369         m.d.sync += self.out_z.ctx.eq(self.mod.o.ctx)  # (and context)
 370
 371     def action(self, m):
 372         self.idsync(m)
 373         with m.If(self.out_do_z):
 374             m.next = "put_z"
 375         with m.Else():
 376             m.next = "denormalise"
 377
 378
 379 class FPCVTIntToFloatDeNorm(FPState, SimpleHandshake):
 380     """ Upconvert
 381     """
 382
 383     def __init__(self, in_pspec, out_pspec):
 384         FPState.__init__(self, "inttofloat")
 385         sc = FPCVTIntToFloatMod(in_pspec, out_pspec)
 386         SimpleHandshake.__init__(self, sc)
 387         self.out = self.ospec(None)
 388
 389
 390 class FPCVTUpConvertDeNorm(FPState, SimpleHandshake):
 391     """ Upconvert
 392     """
 393
 394     def __init__(self, in_pspec, out_pspec):
 395         FPState.__init__(self, "upconvert")
 396         sc = FPCVTUpConvertMod(in_pspec, out_pspec)
 397         SimpleHandshake.__init__(self, sc)
 398         self.out = self.ospec(None)
 399
 400
 401 class FPCVTDownConvertDeNorm(FPState, SimpleHandshake):
 402     """ downconvert
 403     """
 404
 405     def __init__(self, in_pspec, out_pspec):
 406         FPState.__init__(self, "downconvert")
 407         sc = FPCVTDownConvertMod(in_pspec, out_pspec)
 408         SimpleHandshake.__init__(self, sc)
 409         self.out = self.ospec(None)
 410
 411
 412 class FPCVTIntBasePipe(ControlBase):
 413     def __init__(self, in_pspec, out_pspec):
 414         ControlBase.__init__(self)
 415         self.pipe1 = FPCVTIntToFloatDeNorm(in_pspec, out_pspec)
 416         self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
 417
 418         self._eqs = self.connect([self.pipe1, self.pipe2])
 419
 420     def elaborate(self, platform):
 421         m = ControlBase.elaborate(self, platform)
 422         m.submodules.toint = self.pipe1
 423         m.submodules.normpack = self.pipe2
 424         m.d.comb += self._eqs
 425         return m
 426
 427
 428 class FPCVTUpBasePipe(ControlBase):
 429     def __init__(self, in_pspec, out_pspec):
 430         ControlBase.__init__(self)
 431         self.pipe1 = FPCVTUpConvertDeNorm(in_pspec, out_pspec)
 432         self.pipe2 = FPNormToPack(out_pspec, e_extra=False)
 433
 434         self._eqs = self.connect([self.pipe1, self.pipe2])
 435
 436     def elaborate(self, platform):
 437         m = ControlBase.elaborate(self, platform)
 438         m.submodules.up = self.pipe1
 439         m.submodules.normpack = self.pipe2
 440         m.d.comb += self._eqs
 441         return m
 442
 443
 444 class FPCVTDownBasePipe(ControlBase):
 445     def __init__(self, in_pspec, out_pspec):
 446         ControlBase.__init__(self)
 447         self.pipe1 = FPCVTDownConvertDeNorm(in_pspec, out_pspec)
 448         self.pipe2 = FPNormToPack(out_pspec, e_extra=True)
 449
 450         self._eqs = self.connect([self.pipe1, self.pipe2])
 451
 452     def elaborate(self, platform):
 453         m = ControlBase.elaborate(self, platform)
 454         m.submodules.down = self.pipe1
 455         m.submodules.normpack = self.pipe2
 456         m.d.comb += self._eqs
 457         return m
 458
 459
 460 class FPCVTIntMuxInOut(ReservationStations):
 461     """ Reservation-Station version of FPCVT int-to-float pipeline.
 462
 463         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 464         * 2-stage multiplier pipeline
 465         * fan-out on outputs (an array of FPPackData: z,mid)
 466
 467         Fan-in and Fan-out are combinatorial.
 468     """
 469
 470     def __init__(self, in_width, out_width, num_rows, op_wid=0):
 471         self.op_wid = op_wid
 472         self.id_wid = num_bits(in_width)
 473         self.out_id_wid = num_bits(out_width)
 474
 475         self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
 476         self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
 477
 478         self.alu = FPCVTIntBasePipe(self.in_pspec, self.out_pspec)
 479         ReservationStations.__init__(self, num_rows)
 480
 481     def i_specfn(self):
 482         return FPADDBaseData(self.in_pspec)
 483
 484     def o_specfn(self):
 485         return FPPackData(self.out_pspec)
 486
 487
 488 class FPCVTUpMuxInOut(ReservationStations):
 489     """ Reservation-Station version of FPCVT up pipeline.
 490
 491         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 492         * 2-stage multiplier pipeline
 493         * fan-out on outputs (an array of FPPackData: z,mid)
 494
 495         Fan-in and Fan-out are combinatorial.
 496     """
 497
 498     def __init__(self, in_width, out_width, num_rows, op_wid=0):
 499         self.op_wid = op_wid
 500         self.id_wid = num_bits(in_width)
 501         self.out_id_wid = num_bits(out_width)
 502
 503         self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
 504         self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
 505
 506         self.alu = FPCVTUpBasePipe(self.in_pspec, self.out_pspec)
 507         ReservationStations.__init__(self, num_rows)
 508
 509     def i_specfn(self):
 510         return FPADDBaseData(self.in_pspec)
 511
 512     def o_specfn(self):
 513         return FPPackData(self.out_pspec)
 514
 515
 516 class FPCVTDownMuxInOut(ReservationStations):
 517     """ Reservation-Station version of FPCVT pipeline.
 518
 519         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 520         * 2-stage multiplier pipeline
 521         * fan-out on outputs (an array of FPPackData: z,mid)
 522
 523         Fan-in and Fan-out are combinatorial.
 524     """
 525
 526     def __init__(self, in_width, out_width, num_rows, op_wid=0):
 527         self.op_wid = op_wid
 528         self.id_wid = num_bits(in_width)
 529         self.out_id_wid = num_bits(out_width)
 530
 531         self.in_pspec = PipelineSpec(in_width, self.id_wid, self.op_wid)
 532         self.out_pspec = PipelineSpec(out_width, self.out_id_wid, op_wid)
 533
 534         self.alu = FPCVTDownBasePipe(self.in_pspec, self.out_pspec)
 535         ReservationStations.__init__(self, num_rows)
 536
 537     def i_specfn(self):
 538         return FPADDBaseData(self.in_pspec)
 539
 540     def o_specfn(self):
 541         return FPPackData(self.out_pspec)