src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23
  24
  25 class FPAddSpecialCasesMod:
  26     """ special cases: NaNs, infs, zeros, denormalised
  27         NOTE: some of these are unique to add.  see "Special Operations"
  28         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  29     """
  30
  31     def __init__(self, width, id_wid):
  32         self.width = width
  33         self.id_wid = id_wid
  34         self.i = self.ispec()
  35         self.o = self.ospec()
  36
  37     def ispec(self):
  38         return FPADDBaseData(self.width, self.id_wid)
  39
  40     def ospec(self):
  41         return FPSCData(self.width, self.id_wid)
  42
  43     def setup(self, m, i):
  44         """ links module to inputs and outputs
  45         """
  46         m.submodules.specialcases = self
  47         m.d.comb += self.i.eq(i)
  48
  49     def process(self, i):
  50         return self.o
  51
  52     def elaborate(self, platform):
  53         m = Module()
  54
  55         m.submodules.sc_out_z = self.o.z
  56
  57         # decode: XXX really should move to separate stage
  58         a1 = FPNumIn(None, self.width)
  59         b1 = FPNumIn(None, self.width)
  60         m.submodules.sc_decode_a = a1
  61         m.submodules.sc_decode_b = b1
  62         m.d.comb += [a1.decode(self.i.a),
  63                      b1.decode(self.i.b),
  64                     ]
  65
  66         s_nomatch = Signal()
  67         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  68
  69         m_match = Signal()
  70         m.d.comb += m_match.eq(a1.m == b1.m)
  71
  72         # if a is NaN or b is NaN return NaN
  73         with m.If(a1.is_nan | b1.is_nan):
  74             m.d.comb += self.o.out_do_z.eq(1)
  75             m.d.comb += self.o.z.nan(0)
  76
  77         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  78         # under review
  79
  80         ## if a is zero and b is NaN return -b
  81         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  82         #    m.d.comb += self.o.out_do_z.eq(1)
  83         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  84
  85         ## if b is zero and a is NaN return -a
  86         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  87         #    m.d.comb += self.o.out_do_z.eq(1)
  88         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  89
  90         ## if a is -zero and b is NaN return -b
  91         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  92         #    m.d.comb += self.o.out_do_z.eq(1)
  93         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  94
  95         ## if b is -zero and a is NaN return -a
  96         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
  97         #    m.d.comb += self.o.out_do_z.eq(1)
  98         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
  99
 100         # if a is inf return inf (or NaN)
 101         with m.Elif(a1.is_inf):
 102             m.d.comb += self.o.out_do_z.eq(1)
 103             m.d.comb += self.o.z.inf(a1.s)
 104             # if a is inf and signs don't match return NaN
 105             with m.If(b1.exp_128 & s_nomatch):
 106                 m.d.comb += self.o.z.nan(0)
 107
 108         # if b is inf return inf
 109         with m.Elif(b1.is_inf):
 110             m.d.comb += self.o.out_do_z.eq(1)
 111             m.d.comb += self.o.z.inf(b1.s)
 112
 113         # if a is zero and b zero return signed-a/b
 114         with m.Elif(a1.is_zero & b1.is_zero):
 115             m.d.comb += self.o.out_do_z.eq(1)
 116             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 117
 118         # if a is zero return b
 119         with m.Elif(a1.is_zero):
 120             m.d.comb += self.o.out_do_z.eq(1)
 121             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 122
 123         # if b is zero return a
 124         with m.Elif(b1.is_zero):
 125             m.d.comb += self.o.out_do_z.eq(1)
 126             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 127
 128         # if a equal to -b return zero (+ve zero)
 129         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 130             m.d.comb += self.o.out_do_z.eq(1)
 131             m.d.comb += self.o.z.zero(0)
 132
 133         # Denormalised Number checks next, so pass a/b data through
 134         with m.Else():
 135             m.d.comb += self.o.out_do_z.eq(0)
 136             m.d.comb += self.o.a.eq(a1)
 137             m.d.comb += self.o.b.eq(b1)
 138
 139         m.d.comb += self.o.oz.eq(self.o.z.v)
 140         m.d.comb += self.o.mid.eq(self.i.mid)
 141
 142         return m
 143
 144
 145 class FPID:
 146     def __init__(self, id_wid):
 147         self.id_wid = id_wid
 148         if self.id_wid:
 149             self.in_mid = Signal(id_wid, reset_less=True)
 150             self.out_mid = Signal(id_wid, reset_less=True)
 151         else:
 152             self.in_mid = None
 153             self.out_mid = None
 154
 155     def idsync(self, m):
 156         if self.id_wid is not None:
 157             m.d.sync += self.out_mid.eq(self.in_mid)
 158
 159
 160 class FPAddSpecialCases(FPState):
 161     """ special cases: NaNs, infs, zeros, denormalised
 162         NOTE: some of these are unique to add.  see "Special Operations"
 163         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 164     """
 165
 166     def __init__(self, width, id_wid):
 167         FPState.__init__(self, "special_cases")
 168         self.mod = FPAddSpecialCasesMod(width)
 169         self.out_z = self.mod.ospec()
 170         self.out_do_z = Signal(reset_less=True)
 171
 172     def setup(self, m, i):
 173         """ links module to inputs and outputs
 174         """
 175         self.mod.setup(m, i, self.out_do_z)
 176         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 177         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 178
 179     def action(self, m):
 180         self.idsync(m)
 181         with m.If(self.out_do_z):
 182             m.next = "put_z"
 183         with m.Else():
 184             m.next = "denormalise"
 185
 186
 187 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 188     """ special cases: NaNs, infs, zeros, denormalised
 189         NOTE: some of these are unique to add.  see "Special Operations"
 190         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 191     """
 192
 193     def __init__(self, width, id_wid):
 194         FPState.__init__(self, "special_cases")
 195         self.width = width
 196         self.id_wid = id_wid
 197         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 198         self.out = self.ospec()
 199
 200     def ispec(self):
 201         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 202
 203     def ospec(self):
 204         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 205
 206     def setup(self, m, i):
 207         """ links module to inputs and outputs
 208         """
 209         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 210         dmod = FPAddDeNormMod(self.width, self.id_wid)
 211
 212         chain = StageChain([smod, dmod])
 213         chain.setup(m, i)
 214
 215         # only needed for break-out (early-out)
 216         # self.out_do_z = smod.o.out_do_z
 217
 218         self.o = dmod.o
 219
 220     def process(self, i):
 221         return self.o
 222
 223     def action(self, m):
 224         # for break-out (early-out)
 225         #with m.If(self.out_do_z):
 226         #    m.next = "put_z"
 227         #with m.Else():
 228             m.d.sync += self.out.eq(self.process(None))
 229             m.next = "align"
 230
 231
 232 class FPAddAlignMultiMod(FPState):
 233
 234     def __init__(self, width):
 235         self.in_a = FPNumBase(width)
 236         self.in_b = FPNumBase(width)
 237         self.out_a = FPNumIn(None, width)
 238         self.out_b = FPNumIn(None, width)
 239         self.exp_eq = Signal(reset_less=True)
 240
 241     def elaborate(self, platform):
 242         # This one however (single-cycle) will do the shift
 243         # in one go.
 244
 245         m = Module()
 246
 247         m.submodules.align_in_a = self.in_a
 248         m.submodules.align_in_b = self.in_b
 249         m.submodules.align_out_a = self.out_a
 250         m.submodules.align_out_b = self.out_b
 251
 252         # NOTE: this does *not* do single-cycle multi-shifting,
 253         #       it *STAYS* in the align state until exponents match
 254
 255         # exponent of a greater than b: shift b down
 256         m.d.comb += self.exp_eq.eq(0)
 257         m.d.comb += self.out_a.eq(self.in_a)
 258         m.d.comb += self.out_b.eq(self.in_b)
 259         agtb = Signal(reset_less=True)
 260         altb = Signal(reset_less=True)
 261         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 262         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 263         with m.If(agtb):
 264             m.d.comb += self.out_b.shift_down(self.in_b)
 265         # exponent of b greater than a: shift a down
 266         with m.Elif(altb):
 267             m.d.comb += self.out_a.shift_down(self.in_a)
 268         # exponents equal: move to next stage.
 269         with m.Else():
 270             m.d.comb += self.exp_eq.eq(1)
 271         return m
 272
 273
 274 class FPAddAlignMulti(FPState):
 275
 276     def __init__(self, width, id_wid):
 277         FPState.__init__(self, "align")
 278         self.mod = FPAddAlignMultiMod(width)
 279         self.out_a = FPNumIn(None, width)
 280         self.out_b = FPNumIn(None, width)
 281         self.exp_eq = Signal(reset_less=True)
 282
 283     def setup(self, m, in_a, in_b):
 284         """ links module to inputs and outputs
 285         """
 286         m.submodules.align = self.mod
 287         m.d.comb += self.mod.in_a.eq(in_a)
 288         m.d.comb += self.mod.in_b.eq(in_b)
 289         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 290         m.d.sync += self.out_a.eq(self.mod.out_a)
 291         m.d.sync += self.out_b.eq(self.mod.out_b)
 292
 293     def action(self, m):
 294         with m.If(self.exp_eq):
 295             m.next = "add_0"
 296
 297
 298 class FPNumIn2Ops:
 299
 300     def __init__(self, width, id_wid):
 301         self.a = FPNumIn(None, width)
 302         self.b = FPNumIn(None, width)
 303         self.z = FPNumOut(width, False)
 304         self.out_do_z = Signal(reset_less=True)
 305         self.oz = Signal(width, reset_less=True)
 306         self.mid = Signal(id_wid, reset_less=True)
 307
 308     def eq(self, i):
 309         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 310                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 311
 312
 313 class FPAddAlignSingleMod:
 314
 315     def __init__(self, width, id_wid):
 316         self.width = width
 317         self.id_wid = id_wid
 318         self.i = self.ispec()
 319         self.o = self.ospec()
 320
 321     def ispec(self):
 322         return FPSCData(self.width, self.id_wid)
 323
 324     def ospec(self):
 325         return FPNumIn2Ops(self.width, self.id_wid)
 326
 327     def process(self, i):
 328         return self.o
 329
 330     def setup(self, m, i):
 331         """ links module to inputs and outputs
 332         """
 333         m.submodules.align = self
 334         m.d.comb += self.i.eq(i)
 335
 336     def elaborate(self, platform):
 337         """ Aligns A against B or B against A, depending on which has the
 338             greater exponent.  This is done in a *single* cycle using
 339             variable-width bit-shift
 340
 341             the shifter used here is quite expensive in terms of gates.
 342             Mux A or B in (and out) into temporaries, as only one of them
 343             needs to be aligned against the other
 344         """
 345         m = Module()
 346
 347         m.submodules.align_in_a = self.i.a
 348         m.submodules.align_in_b = self.i.b
 349         m.submodules.align_out_a = self.o.a
 350         m.submodules.align_out_b = self.o.b
 351
 352         # temporary (muxed) input and output to be shifted
 353         t_inp = FPNumBase(self.width)
 354         t_out = FPNumIn(None, self.width)
 355         espec = (len(self.i.a.e), True)
 356         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 357         m.submodules.align_t_in = t_inp
 358         m.submodules.align_t_out = t_out
 359         m.submodules.multishift_r = msr
 360
 361         ediff = Signal(espec, reset_less=True)
 362         ediffr = Signal(espec, reset_less=True)
 363         tdiff = Signal(espec, reset_less=True)
 364         elz = Signal(reset_less=True)
 365         egz = Signal(reset_less=True)
 366
 367         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 368         m.d.comb += msr.inp.eq(t_inp.m)
 369         m.d.comb += msr.diff.eq(tdiff)
 370         m.d.comb += t_out.m.eq(msr.m)
 371         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 372         m.d.comb += t_out.s.eq(t_inp.s)
 373
 374         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 375         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 376         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 377         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 378
 379         # default: A-exp == B-exp, A and B untouched (fall through)
 380         m.d.comb += self.o.a.eq(self.i.a)
 381         m.d.comb += self.o.b.eq(self.i.b)
 382         # only one shifter (muxed)
 383         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 384         # exponent of a greater than b: shift b down
 385         with m.If(~self.i.out_do_z):
 386             with m.If(egz):
 387                 m.d.comb += [t_inp.eq(self.i.b),
 388                              tdiff.eq(ediff),
 389                              self.o.b.eq(t_out),
 390                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 391                             ]
 392             # exponent of b greater than a: shift a down
 393             with m.Elif(elz):
 394                 m.d.comb += [t_inp.eq(self.i.a),
 395                              tdiff.eq(ediffr),
 396                              self.o.a.eq(t_out),
 397                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 398                             ]
 399
 400         m.d.comb += self.o.mid.eq(self.i.mid)
 401         m.d.comb += self.o.z.eq(self.i.z)
 402         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 403         m.d.comb += self.o.oz.eq(self.i.oz)
 404
 405         return m
 406
 407
 408 class FPAddAlignSingle(FPState):
 409
 410     def __init__(self, width, id_wid):
 411         FPState.__init__(self, "align")
 412         self.mod = FPAddAlignSingleMod(width, id_wid)
 413         self.out_a = FPNumIn(None, width)
 414         self.out_b = FPNumIn(None, width)
 415
 416     def setup(self, m, i):
 417         """ links module to inputs and outputs
 418         """
 419         self.mod.setup(m, i)
 420
 421         # NOTE: could be done as comb
 422         m.d.sync += self.out_a.eq(self.mod.out_a)
 423         m.d.sync += self.out_b.eq(self.mod.out_b)
 424
 425     def action(self, m):
 426         m.next = "add_0"
 427
 428
 429 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 430
 431     def __init__(self, width, id_wid):
 432         FPState.__init__(self, "align")
 433         self.width = width
 434         self.id_wid = id_wid
 435         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 436         self.a1o = self.ospec()
 437
 438     def ispec(self):
 439         return FPSCData(self.width, self.id_wid)
 440
 441     def ospec(self):
 442         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 443
 444     def setup(self, m, i):
 445         """ links module to inputs and outputs
 446         """
 447
 448         # chain AddAlignSingle, AddStage0 and AddStage1
 449         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 450         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 451         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 452
 453         chain = StageChain([mod, a0mod, a1mod])
 454         chain.setup(m, i)
 455
 456         self.o = a1mod.o
 457
 458     def process(self, i):
 459         return self.o
 460
 461     def action(self, m):
 462         m.d.sync += self.a1o.eq(self.process(None))
 463         m.next = "normalise_1"
 464
 465
 466 class FPAddStage0Data:
 467
 468     def __init__(self, width, id_wid):
 469         self.z = FPNumBase(width, False)
 470         self.out_do_z = Signal(reset_less=True)
 471         self.oz = Signal(width, reset_less=True)
 472         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 473         self.mid = Signal(id_wid, reset_less=True)
 474
 475     def eq(self, i):
 476         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 477                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 478
 479
 480 class FPAddStage0Mod:
 481
 482     def __init__(self, width, id_wid):
 483         self.width = width
 484         self.id_wid = id_wid
 485         self.i = self.ispec()
 486         self.o = self.ospec()
 487
 488     def ispec(self):
 489         return FPSCData(self.width, self.id_wid)
 490
 491     def ospec(self):
 492         return FPAddStage0Data(self.width, self.id_wid)
 493
 494     def process(self, i):
 495         return self.o
 496
 497     def setup(self, m, i):
 498         """ links module to inputs and outputs
 499         """
 500         m.submodules.add0 = self
 501         m.d.comb += self.i.eq(i)
 502
 503     def elaborate(self, platform):
 504         m = Module()
 505         m.submodules.add0_in_a = self.i.a
 506         m.submodules.add0_in_b = self.i.b
 507         m.submodules.add0_out_z = self.o.z
 508
 509         # store intermediate tests (and zero-extended mantissas)
 510         seq = Signal(reset_less=True)
 511         mge = Signal(reset_less=True)
 512         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 513         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 514         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 515                      mge.eq(self.i.a.m >= self.i.b.m),
 516                      am0.eq(Cat(self.i.a.m, 0)),
 517                      bm0.eq(Cat(self.i.b.m, 0))
 518                     ]
 519         # same-sign (both negative or both positive) add mantissas
 520         with m.If(~self.i.out_do_z):
 521             m.d.comb += self.o.z.e.eq(self.i.a.e)
 522             with m.If(seq):
 523                 m.d.comb += [
 524                     self.o.tot.eq(am0 + bm0),
 525                     self.o.z.s.eq(self.i.a.s)
 526                 ]
 527             # a mantissa greater than b, use a
 528             with m.Elif(mge):
 529                 m.d.comb += [
 530                     self.o.tot.eq(am0 - bm0),
 531                     self.o.z.s.eq(self.i.a.s)
 532                 ]
 533             # b mantissa greater than a, use b
 534             with m.Else():
 535                 m.d.comb += [
 536                     self.o.tot.eq(bm0 - am0),
 537                     self.o.z.s.eq(self.i.b.s)
 538             ]
 539
 540         m.d.comb += self.o.oz.eq(self.i.oz)
 541         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 542         m.d.comb += self.o.mid.eq(self.i.mid)
 543         return m
 544
 545
 546 class FPAddStage0(FPState):
 547     """ First stage of add.  covers same-sign (add) and subtract
 548         special-casing when mantissas are greater or equal, to
 549         give greatest accuracy.
 550     """
 551
 552     def __init__(self, width, id_wid):
 553         FPState.__init__(self, "add_0")
 554         self.mod = FPAddStage0Mod(width)
 555         self.o = self.mod.ospec()
 556
 557     def setup(self, m, i):
 558         """ links module to inputs and outputs
 559         """
 560         self.mod.setup(m, i)
 561
 562         # NOTE: these could be done as combinatorial (merge add0+add1)
 563         m.d.sync += self.o.eq(self.mod.o)
 564
 565     def action(self, m):
 566         m.next = "add_1"
 567
 568
 569 class FPAddStage1Mod(FPState):
 570     """ Second stage of add: preparation for normalisation.
 571         detects when tot sum is too big (tot[27] is kinda a carry bit)
 572     """
 573
 574     def __init__(self, width, id_wid):
 575         self.width = width
 576         self.id_wid = id_wid
 577         self.i = self.ispec()
 578         self.o = self.ospec()
 579
 580     def ispec(self):
 581         return FPAddStage0Data(self.width, self.id_wid)
 582
 583     def ospec(self):
 584         return FPAddStage1Data(self.width, self.id_wid)
 585
 586     def process(self, i):
 587         return self.o
 588
 589     def setup(self, m, i):
 590         """ links module to inputs and outputs
 591         """
 592         m.submodules.add1 = self
 593         m.submodules.add1_out_overflow = self.o.of
 594
 595         m.d.comb += self.i.eq(i)
 596
 597     def elaborate(self, platform):
 598         m = Module()
 599         m.d.comb += self.o.z.eq(self.i.z)
 600         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 601         with m.If(~self.i.out_do_z):
 602             with m.If(self.i.tot[-1]):
 603                 m.d.comb += [
 604                     self.o.z.m.eq(self.i.tot[4:]),
 605                     self.o.of.m0.eq(self.i.tot[4]),
 606                     self.o.of.guard.eq(self.i.tot[3]),
 607                     self.o.of.round_bit.eq(self.i.tot[2]),
 608                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 609                     self.o.z.e.eq(self.i.z.e + 1)
 610             ]
 611             # tot[-1] (MSB) zero case
 612             with m.Else():
 613                 m.d.comb += [
 614                     self.o.z.m.eq(self.i.tot[3:]),
 615                     self.o.of.m0.eq(self.i.tot[3]),
 616                     self.o.of.guard.eq(self.i.tot[2]),
 617                     self.o.of.round_bit.eq(self.i.tot[1]),
 618                     self.o.of.sticky.eq(self.i.tot[0])
 619             ]
 620
 621         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 622         m.d.comb += self.o.oz.eq(self.i.oz)
 623         m.d.comb += self.o.mid.eq(self.i.mid)
 624
 625         return m
 626
 627
 628 class FPAddStage1(FPState):
 629
 630     def __init__(self, width, id_wid):
 631         FPState.__init__(self, "add_1")
 632         self.mod = FPAddStage1Mod(width)
 633         self.out_z = FPNumBase(width, False)
 634         self.out_of = Overflow()
 635         self.norm_stb = Signal()
 636
 637     def setup(self, m, i):
 638         """ links module to inputs and outputs
 639         """
 640         self.mod.setup(m, i)
 641
 642         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 643
 644         m.d.sync += self.out_of.eq(self.mod.out_of)
 645         m.d.sync += self.out_z.eq(self.mod.out_z)
 646         m.d.sync += self.norm_stb.eq(1)
 647
 648     def action(self, m):
 649         m.next = "normalise_1"
 650
 651
 652
 653
 654     def __init__(self, width, single_cycle=True):
 655         self.width = width
 656         self.in_select = Signal(reset_less=True)
 657         self.in_z = FPNumBase(width, False)
 658         self.in_of = Overflow()
 659         self.temp_z = FPNumBase(width, False)
 660         self.temp_of = Overflow()
 661         self.out_z = FPNumBase(width, False)
 662         self.out_of = Overflow()
 663
 664     def elaborate(self, platform):
 665         m = Module()
 666
 667         m.submodules.norm1_out_z = self.out_z
 668         m.submodules.norm1_out_overflow = self.out_of
 669         m.submodules.norm1_temp_z = self.temp_z
 670         m.submodules.norm1_temp_of = self.temp_of
 671         m.submodules.norm1_in_z = self.in_z
 672         m.submodules.norm1_in_overflow = self.in_of
 673
 674         in_z = FPNumBase(self.width, False)
 675         in_of = Overflow()
 676         m.submodules.norm1_insel_z = in_z
 677         m.submodules.norm1_insel_overflow = in_of
 678
 679         # select which of temp or in z/of to use
 680         with m.If(self.in_select):
 681             m.d.comb += in_z.eq(self.in_z)
 682             m.d.comb += in_of.eq(self.in_of)
 683         with m.Else():
 684             m.d.comb += in_z.eq(self.temp_z)
 685             m.d.comb += in_of.eq(self.temp_of)
 686         # initialise out from in (overridden below)
 687         m.d.comb += self.out_z.eq(in_z)
 688         m.d.comb += self.out_of.eq(in_of)
 689         # normalisation increase/decrease conditions
 690         decrease = Signal(reset_less=True)
 691         increase = Signal(reset_less=True)
 692         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 693         m.d.comb += increase.eq(in_z.exp_lt_n126)
 694         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 695         # decrease exponent
 696         with m.If(decrease):
 697             m.d.comb += [
 698                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 699                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 700                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 701                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 702                 self.out_of.round_bit.eq(0),        # reset round bit
 703                 self.out_of.m0.eq(in_of.guard),
 704             ]
 705         # increase exponent
 706         with m.Elif(increase):
 707             m.d.comb += [
 708                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 709                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 710                 self.out_of.guard.eq(in_z.m[0]),
 711                 self.out_of.m0.eq(in_z.m[1]),
 712                 self.out_of.round_bit.eq(in_of.guard),
 713                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 714             ]
 715
 716         return m
 717
 718
 719 class FPNormToPack(FPState, UnbufferedPipeline):
 720
 721     def __init__(self, width, id_wid):
 722         FPState.__init__(self, "normalise_1")
 723         self.id_wid = id_wid
 724         self.width = width
 725         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 726
 727     def ispec(self):
 728         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 729
 730     def ospec(self):
 731         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
 732
 733     def setup(self, m, i):
 734         """ links module to inputs and outputs
 735         """
 736
 737         # Normalisation, Rounding Corrections, Pack - in a chain
 738         nmod = FPNorm1ModSingle(self.width, self.id_wid)
 739         rmod = FPRoundMod(self.width, self.id_wid)
 740         cmod = FPCorrectionsMod(self.width, self.id_wid)
 741         pmod = FPPackMod(self.width, self.id_wid)
 742         chain = StageChain([nmod, rmod, cmod, pmod])
 743         chain.setup(m, i)
 744         self.out_z = pmod.ospec()
 745
 746         self.o = pmod.o
 747
 748     def process(self, i):
 749         return self.o
 750
 751     def action(self, m):
 752         m.d.sync += self.out_z.eq(self.process(None))
 753         m.next = "pack_put_z"
 754
 755
 756 class FPRoundData:
 757
 758     def __init__(self, width, id_wid):
 759         self.z = FPNumBase(width, False)
 760         self.out_do_z = Signal(reset_less=True)
 761         self.oz = Signal(width, reset_less=True)
 762         self.mid = Signal(id_wid, reset_less=True)
 763
 764     def eq(self, i):
 765         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 766                 self.mid.eq(i.mid)]
 767
 768
 769 class FPRoundMod:
 770
 771     def __init__(self, width, id_wid):
 772         self.width = width
 773         self.id_wid = id_wid
 774         self.i = self.ispec()
 775         self.out_z = self.ospec()
 776
 777     def ispec(self):
 778         return FPNorm1Data(self.width, self.id_wid)
 779
 780     def ospec(self):
 781         return FPRoundData(self.width, self.id_wid)
 782
 783     def process(self, i):
 784         return self.out_z
 785
 786     def setup(self, m, i):
 787         m.submodules.roundz = self
 788         m.d.comb += self.i.eq(i)
 789
 790     def elaborate(self, platform):
 791         m = Module()
 792         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
 793         with m.If(~self.i.out_do_z):
 794             with m.If(self.i.roundz):
 795                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
 796                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
 797                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
 798
 799         return m
 800
 801
 802 class FPRound(FPState):
 803
 804     def __init__(self, width, id_wid):
 805         FPState.__init__(self, "round")
 806         self.mod = FPRoundMod(width)
 807         self.out_z = self.ospec()
 808
 809     def ispec(self):
 810         return self.mod.ispec()
 811
 812     def ospec(self):
 813         return self.mod.ospec()
 814
 815     def setup(self, m, i):
 816         """ links module to inputs and outputs
 817         """
 818         self.mod.setup(m, i)
 819
 820         self.idsync(m)
 821         m.d.sync += self.out_z.eq(self.mod.out_z)
 822         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 823
 824     def action(self, m):
 825         m.next = "corrections"
 826
 827
 828 class FPCorrectionsMod:
 829
 830     def __init__(self, width, id_wid):
 831         self.width = width
 832         self.id_wid = id_wid
 833         self.i = self.ispec()
 834         self.out_z = self.ospec()
 835
 836     def ispec(self):
 837         return FPRoundData(self.width, self.id_wid)
 838
 839     def ospec(self):
 840         return FPRoundData(self.width, self.id_wid)
 841
 842     def process(self, i):
 843         return self.out_z
 844
 845     def setup(self, m, i):
 846         """ links module to inputs and outputs
 847         """
 848         m.submodules.corrections = self
 849         m.d.comb += self.i.eq(i)
 850
 851     def elaborate(self, platform):
 852         m = Module()
 853         m.submodules.corr_in_z = self.i.z
 854         m.submodules.corr_out_z = self.out_z.z
 855         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
 856         with m.If(~self.i.out_do_z):
 857             with m.If(self.i.z.is_denormalised):
 858                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
 859         return m
 860
 861
 862 class FPCorrections(FPState):
 863
 864     def __init__(self, width, id_wid):
 865         FPState.__init__(self, "corrections")
 866         self.mod = FPCorrectionsMod(width)
 867         self.out_z = self.ospec()
 868
 869     def ispec(self):
 870         return self.mod.ispec()
 871
 872     def ospec(self):
 873         return self.mod.ospec()
 874
 875     def setup(self, m, in_z):
 876         """ links module to inputs and outputs
 877         """
 878         self.mod.setup(m, in_z)
 879
 880         m.d.sync += self.out_z.eq(self.mod.out_z)
 881         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 882
 883     def action(self, m):
 884         m.next = "pack"
 885
 886
 887 class FPPackData:
 888
 889     def __init__(self, width, id_wid):
 890         self.z = Signal(width, reset_less=True)
 891         self.mid = Signal(id_wid, reset_less=True)
 892
 893     def eq(self, i):
 894         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 895
 896     def ports(self):
 897         return [self.z, self.mid]
 898
 899
 900 class FPPackMod:
 901
 902     def __init__(self, width, id_wid):
 903         self.width = width
 904         self.id_wid = id_wid
 905         self.i = self.ispec()
 906         self.o = self.ospec()
 907
 908     def ispec(self):
 909         return FPRoundData(self.width, self.id_wid)
 910
 911     def ospec(self):
 912         return FPPackData(self.width, self.id_wid)
 913
 914     def process(self, i):
 915         return self.o
 916
 917     def setup(self, m, in_z):
 918         """ links module to inputs and outputs
 919         """
 920         m.submodules.pack = self
 921         m.d.comb += self.i.eq(in_z)
 922
 923     def elaborate(self, platform):
 924         m = Module()
 925         z = FPNumOut(self.width, False)
 926         m.submodules.pack_in_z = self.i.z
 927         m.submodules.pack_out_z = z
 928         m.d.comb += self.o.mid.eq(self.i.mid)
 929         with m.If(~self.i.out_do_z):
 930             with m.If(self.i.z.is_overflowed):
 931                 m.d.comb += z.inf(self.i.z.s)
 932             with m.Else():
 933                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
 934         with m.Else():
 935             m.d.comb += z.v.eq(self.i.oz)
 936         m.d.comb += self.o.z.eq(z.v)
 937         return m
 938
 939
 940 class FPPack(FPState):
 941
 942     def __init__(self, width, id_wid):
 943         FPState.__init__(self, "pack")
 944         self.mod = FPPackMod(width)
 945         self.out_z = self.ospec()
 946
 947     def ispec(self):
 948         return self.mod.ispec()
 949
 950     def ospec(self):
 951         return self.mod.ospec()
 952
 953     def setup(self, m, in_z):
 954         """ links module to inputs and outputs
 955         """
 956         self.mod.setup(m, in_z)
 957
 958         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
 959         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 960
 961     def action(self, m):
 962         m.next = "pack_put_z"
 963
 964
 965 class FPPutZ(FPState):
 966
 967     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
 968         FPState.__init__(self, state)
 969         if to_state is None:
 970             to_state = "get_ops"
 971         self.to_state = to_state
 972         self.in_z = in_z
 973         self.out_z = out_z
 974         self.in_mid = in_mid
 975         self.out_mid = out_mid
 976
 977     def action(self, m):
 978         if self.in_mid is not None:
 979             m.d.sync += self.out_mid.eq(self.in_mid)
 980         m.d.sync += [
 981           self.out_z.z.v.eq(self.in_z)
 982         ]
 983         with m.If(self.out_z.z.stb & self.out_z.z.ack):
 984             m.d.sync += self.out_z.z.stb.eq(0)
 985             m.next = self.to_state
 986         with m.Else():
 987             m.d.sync += self.out_z.z.stb.eq(1)
 988
 989
 990 class FPPutZIdx(FPState):
 991
 992     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
 993         FPState.__init__(self, state)
 994         if to_state is None:
 995             to_state = "get_ops"
 996         self.to_state = to_state
 997         self.in_z = in_z
 998         self.out_zs = out_zs
 999         self.in_mid = in_mid
1000
1001     def action(self, m):
1002         outz_stb = Signal(reset_less=True)
1003         outz_ack = Signal(reset_less=True)
1004         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1005                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1006                     ]
1007         m.d.sync += [
1008           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1009         ]
1010         with m.If(outz_stb & outz_ack):
1011             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1012             m.next = self.to_state
1013         with m.Else():
1014             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1015
1016
1017 class FPOpData:
1018     def __init__(self, width, id_wid):
1019         self.z = FPOp(width)
1020         self.mid = Signal(id_wid, reset_less=True)
1021
1022     def eq(self, i):
1023         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1024
1025     def ports(self):
1026         return [self.z, self.mid]
1027
1028
1029 class FPADDBaseMod:
1030
1031     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1032         """ IEEE754 FP Add
1033
1034             * width: bit-width of IEEE754.  supported: 16, 32, 64
1035             * id_wid: an identifier that is sync-connected to the input
1036             * single_cycle: True indicates each stage to complete in 1 clock
1037             * compact: True indicates a reduced number of stages
1038         """
1039         self.width = width
1040         self.id_wid = id_wid
1041         self.single_cycle = single_cycle
1042         self.compact = compact
1043
1044         self.in_t = Trigger()
1045         self.i = self.ispec()
1046         self.o = self.ospec()
1047
1048         self.states = []
1049
1050     def ispec(self):
1051         return FPADDBaseData(self.width, self.id_wid)
1052
1053     def ospec(self):
1054         return FPOpData(self.width, self.id_wid)
1055
1056     def add_state(self, state):
1057         self.states.append(state)
1058         return state
1059
1060     def get_fragment(self, platform=None):
1061         """ creates the HDL code-fragment for FPAdd
1062         """
1063         m = Module()
1064         m.submodules.out_z = self.o.z
1065         m.submodules.in_t = self.in_t
1066         if self.compact:
1067             self.get_compact_fragment(m, platform)
1068         else:
1069             self.get_longer_fragment(m, platform)
1070
1071         with m.FSM() as fsm:
1072
1073             for state in self.states:
1074                 with m.State(state.state_from):
1075                     state.action(m)
1076
1077         return m
1078
1079     def get_longer_fragment(self, m, platform=None):
1080
1081         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1082                                       self.width))
1083         get.setup(m, self.i)
1084         a = get.out_op1
1085         b = get.out_op2
1086         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1087
1088         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1089         sc.setup(m, a, b, self.in_mid)
1090
1091         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1092         dn.setup(m, a, b, sc.in_mid)
1093
1094         if self.single_cycle:
1095             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1096             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1097         else:
1098             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1099             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1100
1101         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1102         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1103
1104         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1105         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1106
1107         if self.single_cycle:
1108             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1109             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1110         else:
1111             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1112             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1113
1114         rn = self.add_state(FPRound(self.width, self.id_wid))
1115         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1116
1117         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1118         cor.setup(m, rn.out_z, rn.in_mid)
1119
1120         pa = self.add_state(FPPack(self.width, self.id_wid))
1121         pa.setup(m, cor.out_z, rn.in_mid)
1122
1123         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1124                                     pa.in_mid, self.out_mid))
1125
1126         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1127                                     pa.in_mid, self.out_mid))
1128
1129     def get_compact_fragment(self, m, platform=None):
1130
1131
1132         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1133         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1134         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1135         n1 = FPNormToPack(self.width, self.id_wid)
1136
1137         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1138
1139         chainlist = [get, sc, alm, n1]
1140         chain = StageChain(chainlist, specallocate=True)
1141         chain.setup(m, self.i)
1142
1143         for mod in chainlist:
1144             sc = self.add_state(mod)
1145
1146         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1147                                     n1.out_z.mid, self.o.mid))
1148
1149         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1150         #                            sc.o.mid, self.o.mid))
1151
1152
1153 class FPADDBase(FPState):
1154
1155     def __init__(self, width, id_wid=None, single_cycle=False):
1156         """ IEEE754 FP Add
1157
1158             * width: bit-width of IEEE754.  supported: 16, 32, 64
1159             * id_wid: an identifier that is sync-connected to the input
1160             * single_cycle: True indicates each stage to complete in 1 clock
1161         """
1162         FPState.__init__(self, "fpadd")
1163         self.width = width
1164         self.single_cycle = single_cycle
1165         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1166         self.o = self.ospec()
1167
1168         self.in_t = Trigger()
1169         self.i = self.ispec()
1170
1171         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1172         self.in_accept = Signal(reset_less=True)
1173         self.add_stb = Signal(reset_less=True)
1174         self.add_ack = Signal(reset=0, reset_less=True)
1175
1176     def ispec(self):
1177         return self.mod.ispec()
1178
1179     def ospec(self):
1180         return self.mod.ospec()
1181
1182     def setup(self, m, i, add_stb, in_mid):
1183         m.d.comb += [self.i.eq(i),
1184                      self.mod.i.eq(self.i),
1185                      self.z_done.eq(self.mod.o.z.trigger),
1186                      #self.add_stb.eq(add_stb),
1187                      self.mod.in_t.stb.eq(self.in_t.stb),
1188                      self.in_t.ack.eq(self.mod.in_t.ack),
1189                      self.o.mid.eq(self.mod.o.mid),
1190                      self.o.z.v.eq(self.mod.o.z.v),
1191                      self.o.z.stb.eq(self.mod.o.z.stb),
1192                      self.mod.o.z.ack.eq(self.o.z.ack),
1193                     ]
1194
1195         m.d.sync += self.add_stb.eq(add_stb)
1196         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1197         m.d.sync += self.o.z.ack.eq(0) # likewise
1198         #m.d.sync += self.in_t.stb.eq(0)
1199
1200         m.submodules.fpadd = self.mod
1201
1202     def action(self, m):
1203
1204         # in_accept is set on incoming strobe HIGH and ack LOW.
1205         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1206
1207         #with m.If(self.in_t.ack):
1208         #    m.d.sync += self.in_t.stb.eq(0)
1209         with m.If(~self.z_done):
1210             # not done: test for accepting an incoming operand pair
1211             with m.If(self.in_accept):
1212                 m.d.sync += [
1213                     self.add_ack.eq(1), # acknowledge receipt...
1214                     self.in_t.stb.eq(1), # initiate add
1215                 ]
1216             with m.Else():
1217                 m.d.sync += [self.add_ack.eq(0),
1218                              self.in_t.stb.eq(0),
1219                              self.o.z.ack.eq(1),
1220                             ]
1221         with m.Else():
1222             # done: acknowledge, and write out id and value
1223             m.d.sync += [self.add_ack.eq(1),
1224                          self.in_t.stb.eq(0)
1225                         ]
1226             m.next = "put_z"
1227
1228             return
1229
1230             if self.in_mid is not None:
1231                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1232
1233             m.d.sync += [
1234               self.out_z.v.eq(self.mod.out_z.v)
1235             ]
1236             # move to output state on detecting z ack
1237             with m.If(self.out_z.trigger):
1238                 m.d.sync += self.out_z.stb.eq(0)
1239                 m.next = "put_z"
1240             with m.Else():
1241                 m.d.sync += self.out_z.stb.eq(1)
1242
1243
1244 class FPADDBasePipe(ControlBase):
1245     def __init__(self, width, id_wid):
1246         ControlBase.__init__(self)
1247         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1248         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1249         self.pipe3 = FPNormToPack(width, id_wid)
1250
1251         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1252
1253     def elaborate(self, platform):
1254         m = Module()
1255         m.submodules.scnorm = self.pipe1
1256         m.submodules.addalign = self.pipe2
1257         m.submodules.normpack = self.pipe3
1258         m.d.comb += self._eqs
1259         return m
1260
1261
1262 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1263     def __init__(self, width, id_wid, num_rows):
1264         self.num_rows = num_rows
1265         def iospec(): return FPADDBaseData(width, id_wid)
1266         stage = PassThroughStage(iospec)
1267         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1268
1269
1270 class FPADDMuxOutPipe(CombMuxOutPipe):
1271     def __init__(self, width, id_wid, num_rows):
1272         self.num_rows = num_rows
1273         def iospec(): return FPPackData(width, id_wid)
1274         stage = PassThroughStage(iospec)
1275         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1276
1277
1278 class FPADDMuxInOut:
1279     """ Reservation-Station version of FPADD pipeline.
1280
1281         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1282         * 3-stage adder pipeline
1283         * fan-out on outputs (an array of FPPackData: z,mid)
1284
1285         Fan-in and Fan-out are combinatorial.
1286     """
1287     def __init__(self, width, id_wid, num_rows):
1288         self.num_rows = num_rows
1289         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1290         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1291         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1292
1293         self.p = self.inpipe.p  # kinda annoying,
1294         self.n = self.outpipe.n # use pipe in/out as this class in/out
1295         self._ports = self.inpipe.ports() + self.outpipe.ports()
1296
1297     def elaborate(self, platform):
1298         m = Module()
1299         m.submodules.inpipe = self.inpipe
1300         m.submodules.fpadd = self.fpadd
1301         m.submodules.outpipe = self.outpipe
1302
1303         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1304         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1305
1306         return m
1307
1308     def ports(self):
1309         return self._ports
1310
1311
1312 class FPADD(FPID):
1313     """ FPADD: stages as follows:
1314
1315         FPGetOp (a)
1316            |
1317         FPGetOp (b)
1318            |
1319         FPAddBase---> FPAddBaseMod
1320            |            |
1321         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1322
1323         FPAddBase is tricky: it is both a stage and *has* stages.
1324         Connection to FPAddBaseMod therefore requires an in stb/ack
1325         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1326         needs to be the thing that raises the incoming stb.
1327     """
1328
1329     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1330         """ IEEE754 FP Add
1331
1332             * width: bit-width of IEEE754.  supported: 16, 32, 64
1333             * id_wid: an identifier that is sync-connected to the input
1334             * single_cycle: True indicates each stage to complete in 1 clock
1335         """
1336         self.width = width
1337         self.id_wid = id_wid
1338         self.single_cycle = single_cycle
1339
1340         #self.out_z = FPOp(width)
1341         self.ids = FPID(id_wid)
1342
1343         rs = []
1344         for i in range(rs_sz):
1345             in_a  = FPOp(width)
1346             in_b  = FPOp(width)
1347             in_a.name = "in_a_%d" % i
1348             in_b.name = "in_b_%d" % i
1349             rs.append((in_a, in_b))
1350         self.rs = Array(rs)
1351
1352         res = []
1353         for i in range(rs_sz):
1354             out_z = FPOp(width)
1355             out_z.name = "out_z_%d" % i
1356             res.append(out_z)
1357         self.res = Array(res)
1358
1359         self.states = []
1360
1361     def add_state(self, state):
1362         self.states.append(state)
1363         return state
1364
1365     def get_fragment(self, platform=None):
1366         """ creates the HDL code-fragment for FPAdd
1367         """
1368         m = Module()
1369         m.submodules += self.rs
1370
1371         in_a = self.rs[0][0]
1372         in_b = self.rs[0][1]
1373
1374         geta = self.add_state(FPGetOp("get_a", "get_b",
1375                                       in_a, self.width))
1376         geta.setup(m, in_a)
1377         a = geta.out_op
1378
1379         getb = self.add_state(FPGetOp("get_b", "fpadd",
1380                                       in_b, self.width))
1381         getb.setup(m, in_b)
1382         b = getb.out_op
1383
1384         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1385         ab = self.add_state(ab)
1386         abd = ab.ispec() # create an input spec object for FPADDBase
1387         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1388         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1389         o = ab.o
1390
1391         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1392                                     o.mid, "get_a"))
1393
1394         with m.FSM() as fsm:
1395
1396             for state in self.states:
1397                 with m.State(state.state_from):
1398                     state.action(m)
1399
1400         return m
1401
1402
1403 if __name__ == "__main__":
1404     if True:
1405         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1406         main(alu, ports=alu.rs[0][0].ports() + \
1407                         alu.rs[0][1].ports() + \
1408                         alu.res[0].ports() + \
1409                         [alu.ids.in_mid, alu.ids.out_mid])
1410     else:
1411         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1412         main(alu, ports=[alu.in_a, alu.in_b] + \
1413                         alu.in_t.ports() + \
1414                         alu.out_z.ports() + \
1415                         [alu.in_mid, alu.out_mid])
1416
1417
1418     # works... but don't use, just do "python fname.py convert -t v"
1419     #print (verilog.convert(alu, ports=[
1420     #                        ports=alu.in_a.ports() + \
1421     #                              alu.in_b.ports() + \
1422     #                              alu.out_z.ports())