src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23
  24
  25 class FPAddSpecialCasesMod:
  26     """ special cases: NaNs, infs, zeros, denormalised
  27         NOTE: some of these are unique to add.  see "Special Operations"
  28         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  29     """
  30
  31     def __init__(self, width, id_wid):
  32         self.width = width
  33         self.id_wid = id_wid
  34         self.i = self.ispec()
  35         self.o = self.ospec()
  36
  37     def ispec(self):
  38         return FPADDBaseData(self.width, self.id_wid)
  39
  40     def ospec(self):
  41         return FPSCData(self.width, self.id_wid)
  42
  43     def setup(self, m, i):
  44         """ links module to inputs and outputs
  45         """
  46         m.submodules.specialcases = self
  47         m.d.comb += self.i.eq(i)
  48
  49     def process(self, i):
  50         return self.o
  51
  52     def elaborate(self, platform):
  53         m = Module()
  54
  55         m.submodules.sc_out_z = self.o.z
  56
  57         # decode: XXX really should move to separate stage
  58         a1 = FPNumIn(None, self.width)
  59         b1 = FPNumIn(None, self.width)
  60         m.submodules.sc_decode_a = a1
  61         m.submodules.sc_decode_b = b1
  62         m.d.comb += [a1.decode(self.i.a),
  63                      b1.decode(self.i.b),
  64                     ]
  65
  66         s_nomatch = Signal()
  67         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  68
  69         m_match = Signal()
  70         m.d.comb += m_match.eq(a1.m == b1.m)
  71
  72         # if a is NaN or b is NaN return NaN
  73         with m.If(a1.is_nan | b1.is_nan):
  74             m.d.comb += self.o.out_do_z.eq(1)
  75             m.d.comb += self.o.z.nan(0)
  76
  77         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  78         # under review
  79
  80         ## if a is zero and b is NaN return -b
  81         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  82         #    m.d.comb += self.o.out_do_z.eq(1)
  83         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  84
  85         ## if b is zero and a is NaN return -a
  86         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  87         #    m.d.comb += self.o.out_do_z.eq(1)
  88         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  89
  90         ## if a is -zero and b is NaN return -b
  91         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  92         #    m.d.comb += self.o.out_do_z.eq(1)
  93         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  94
  95         ## if b is -zero and a is NaN return -a
  96         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
  97         #    m.d.comb += self.o.out_do_z.eq(1)
  98         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
  99
 100         # if a is inf return inf (or NaN)
 101         with m.Elif(a1.is_inf):
 102             m.d.comb += self.o.out_do_z.eq(1)
 103             m.d.comb += self.o.z.inf(a1.s)
 104             # if a is inf and signs don't match return NaN
 105             with m.If(b1.exp_128 & s_nomatch):
 106                 m.d.comb += self.o.z.nan(0)
 107
 108         # if b is inf return inf
 109         with m.Elif(b1.is_inf):
 110             m.d.comb += self.o.out_do_z.eq(1)
 111             m.d.comb += self.o.z.inf(b1.s)
 112
 113         # if a is zero and b zero return signed-a/b
 114         with m.Elif(a1.is_zero & b1.is_zero):
 115             m.d.comb += self.o.out_do_z.eq(1)
 116             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 117
 118         # if a is zero return b
 119         with m.Elif(a1.is_zero):
 120             m.d.comb += self.o.out_do_z.eq(1)
 121             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 122
 123         # if b is zero return a
 124         with m.Elif(b1.is_zero):
 125             m.d.comb += self.o.out_do_z.eq(1)
 126             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 127
 128         # if a equal to -b return zero (+ve zero)
 129         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 130             m.d.comb += self.o.out_do_z.eq(1)
 131             m.d.comb += self.o.z.zero(0)
 132
 133         # Denormalised Number checks next, so pass a/b data through
 134         with m.Else():
 135             m.d.comb += self.o.out_do_z.eq(0)
 136             m.d.comb += self.o.a.eq(a1)
 137             m.d.comb += self.o.b.eq(b1)
 138
 139         m.d.comb += self.o.oz.eq(self.o.z.v)
 140         m.d.comb += self.o.mid.eq(self.i.mid)
 141
 142         return m
 143
 144
 145 class FPAddSpecialCases(FPState):
 146     """ special cases: NaNs, infs, zeros, denormalised
 147         NOTE: some of these are unique to add.  see "Special Operations"
 148         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 149     """
 150
 151     def __init__(self, width, id_wid):
 152         FPState.__init__(self, "special_cases")
 153         self.mod = FPAddSpecialCasesMod(width)
 154         self.out_z = self.mod.ospec()
 155         self.out_do_z = Signal(reset_less=True)
 156
 157     def setup(self, m, i):
 158         """ links module to inputs and outputs
 159         """
 160         self.mod.setup(m, i, self.out_do_z)
 161         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 162         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 163
 164     def action(self, m):
 165         self.idsync(m)
 166         with m.If(self.out_do_z):
 167             m.next = "put_z"
 168         with m.Else():
 169             m.next = "denormalise"
 170
 171
 172 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 173     """ special cases: NaNs, infs, zeros, denormalised
 174         NOTE: some of these are unique to add.  see "Special Operations"
 175         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 176     """
 177
 178     def __init__(self, width, id_wid):
 179         FPState.__init__(self, "special_cases")
 180         self.width = width
 181         self.id_wid = id_wid
 182         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 183         self.out = self.ospec()
 184
 185     def ispec(self):
 186         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 187
 188     def ospec(self):
 189         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 190
 191     def setup(self, m, i):
 192         """ links module to inputs and outputs
 193         """
 194         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 195         dmod = FPAddDeNormMod(self.width, self.id_wid)
 196
 197         chain = StageChain([smod, dmod])
 198         chain.setup(m, i)
 199
 200         # only needed for break-out (early-out)
 201         # self.out_do_z = smod.o.out_do_z
 202
 203         self.o = dmod.o
 204
 205     def process(self, i):
 206         return self.o
 207
 208     def action(self, m):
 209         # for break-out (early-out)
 210         #with m.If(self.out_do_z):
 211         #    m.next = "put_z"
 212         #with m.Else():
 213             m.d.sync += self.out.eq(self.process(None))
 214             m.next = "align"
 215
 216
 217 class FPAddAlignMultiMod(FPState):
 218
 219     def __init__(self, width):
 220         self.in_a = FPNumBase(width)
 221         self.in_b = FPNumBase(width)
 222         self.out_a = FPNumIn(None, width)
 223         self.out_b = FPNumIn(None, width)
 224         self.exp_eq = Signal(reset_less=True)
 225
 226     def elaborate(self, platform):
 227         # This one however (single-cycle) will do the shift
 228         # in one go.
 229
 230         m = Module()
 231
 232         m.submodules.align_in_a = self.in_a
 233         m.submodules.align_in_b = self.in_b
 234         m.submodules.align_out_a = self.out_a
 235         m.submodules.align_out_b = self.out_b
 236
 237         # NOTE: this does *not* do single-cycle multi-shifting,
 238         #       it *STAYS* in the align state until exponents match
 239
 240         # exponent of a greater than b: shift b down
 241         m.d.comb += self.exp_eq.eq(0)
 242         m.d.comb += self.out_a.eq(self.in_a)
 243         m.d.comb += self.out_b.eq(self.in_b)
 244         agtb = Signal(reset_less=True)
 245         altb = Signal(reset_less=True)
 246         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 247         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 248         with m.If(agtb):
 249             m.d.comb += self.out_b.shift_down(self.in_b)
 250         # exponent of b greater than a: shift a down
 251         with m.Elif(altb):
 252             m.d.comb += self.out_a.shift_down(self.in_a)
 253         # exponents equal: move to next stage.
 254         with m.Else():
 255             m.d.comb += self.exp_eq.eq(1)
 256         return m
 257
 258
 259 class FPAddAlignMulti(FPState):
 260
 261     def __init__(self, width, id_wid):
 262         FPState.__init__(self, "align")
 263         self.mod = FPAddAlignMultiMod(width)
 264         self.out_a = FPNumIn(None, width)
 265         self.out_b = FPNumIn(None, width)
 266         self.exp_eq = Signal(reset_less=True)
 267
 268     def setup(self, m, in_a, in_b):
 269         """ links module to inputs and outputs
 270         """
 271         m.submodules.align = self.mod
 272         m.d.comb += self.mod.in_a.eq(in_a)
 273         m.d.comb += self.mod.in_b.eq(in_b)
 274         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 275         m.d.sync += self.out_a.eq(self.mod.out_a)
 276         m.d.sync += self.out_b.eq(self.mod.out_b)
 277
 278     def action(self, m):
 279         with m.If(self.exp_eq):
 280             m.next = "add_0"
 281
 282
 283 class FPNumIn2Ops:
 284
 285     def __init__(self, width, id_wid):
 286         self.a = FPNumIn(None, width)
 287         self.b = FPNumIn(None, width)
 288         self.z = FPNumOut(width, False)
 289         self.out_do_z = Signal(reset_less=True)
 290         self.oz = Signal(width, reset_less=True)
 291         self.mid = Signal(id_wid, reset_less=True)
 292
 293     def eq(self, i):
 294         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 295                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 296
 297
 298 class FPAddAlignSingleMod:
 299
 300     def __init__(self, width, id_wid):
 301         self.width = width
 302         self.id_wid = id_wid
 303         self.i = self.ispec()
 304         self.o = self.ospec()
 305
 306     def ispec(self):
 307         return FPSCData(self.width, self.id_wid)
 308
 309     def ospec(self):
 310         return FPNumIn2Ops(self.width, self.id_wid)
 311
 312     def process(self, i):
 313         return self.o
 314
 315     def setup(self, m, i):
 316         """ links module to inputs and outputs
 317         """
 318         m.submodules.align = self
 319         m.d.comb += self.i.eq(i)
 320
 321     def elaborate(self, platform):
 322         """ Aligns A against B or B against A, depending on which has the
 323             greater exponent.  This is done in a *single* cycle using
 324             variable-width bit-shift
 325
 326             the shifter used here is quite expensive in terms of gates.
 327             Mux A or B in (and out) into temporaries, as only one of them
 328             needs to be aligned against the other
 329         """
 330         m = Module()
 331
 332         m.submodules.align_in_a = self.i.a
 333         m.submodules.align_in_b = self.i.b
 334         m.submodules.align_out_a = self.o.a
 335         m.submodules.align_out_b = self.o.b
 336
 337         # temporary (muxed) input and output to be shifted
 338         t_inp = FPNumBase(self.width)
 339         t_out = FPNumIn(None, self.width)
 340         espec = (len(self.i.a.e), True)
 341         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 342         m.submodules.align_t_in = t_inp
 343         m.submodules.align_t_out = t_out
 344         m.submodules.multishift_r = msr
 345
 346         ediff = Signal(espec, reset_less=True)
 347         ediffr = Signal(espec, reset_less=True)
 348         tdiff = Signal(espec, reset_less=True)
 349         elz = Signal(reset_less=True)
 350         egz = Signal(reset_less=True)
 351
 352         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 353         m.d.comb += msr.inp.eq(t_inp.m)
 354         m.d.comb += msr.diff.eq(tdiff)
 355         m.d.comb += t_out.m.eq(msr.m)
 356         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 357         m.d.comb += t_out.s.eq(t_inp.s)
 358
 359         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 360         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 361         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 362         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 363
 364         # default: A-exp == B-exp, A and B untouched (fall through)
 365         m.d.comb += self.o.a.eq(self.i.a)
 366         m.d.comb += self.o.b.eq(self.i.b)
 367         # only one shifter (muxed)
 368         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 369         # exponent of a greater than b: shift b down
 370         with m.If(~self.i.out_do_z):
 371             with m.If(egz):
 372                 m.d.comb += [t_inp.eq(self.i.b),
 373                              tdiff.eq(ediff),
 374                              self.o.b.eq(t_out),
 375                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 376                             ]
 377             # exponent of b greater than a: shift a down
 378             with m.Elif(elz):
 379                 m.d.comb += [t_inp.eq(self.i.a),
 380                              tdiff.eq(ediffr),
 381                              self.o.a.eq(t_out),
 382                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 383                             ]
 384
 385         m.d.comb += self.o.mid.eq(self.i.mid)
 386         m.d.comb += self.o.z.eq(self.i.z)
 387         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 388         m.d.comb += self.o.oz.eq(self.i.oz)
 389
 390         return m
 391
 392
 393 class FPAddAlignSingle(FPState):
 394
 395     def __init__(self, width, id_wid):
 396         FPState.__init__(self, "align")
 397         self.mod = FPAddAlignSingleMod(width, id_wid)
 398         self.out_a = FPNumIn(None, width)
 399         self.out_b = FPNumIn(None, width)
 400
 401     def setup(self, m, i):
 402         """ links module to inputs and outputs
 403         """
 404         self.mod.setup(m, i)
 405
 406         # NOTE: could be done as comb
 407         m.d.sync += self.out_a.eq(self.mod.out_a)
 408         m.d.sync += self.out_b.eq(self.mod.out_b)
 409
 410     def action(self, m):
 411         m.next = "add_0"
 412
 413
 414 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 415
 416     def __init__(self, width, id_wid):
 417         FPState.__init__(self, "align")
 418         self.width = width
 419         self.id_wid = id_wid
 420         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 421         self.a1o = self.ospec()
 422
 423     def ispec(self):
 424         return FPSCData(self.width, self.id_wid)
 425
 426     def ospec(self):
 427         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 428
 429     def setup(self, m, i):
 430         """ links module to inputs and outputs
 431         """
 432
 433         # chain AddAlignSingle, AddStage0 and AddStage1
 434         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 435         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 436         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 437
 438         chain = StageChain([mod, a0mod, a1mod])
 439         chain.setup(m, i)
 440
 441         self.o = a1mod.o
 442
 443     def process(self, i):
 444         return self.o
 445
 446     def action(self, m):
 447         m.d.sync += self.a1o.eq(self.process(None))
 448         m.next = "normalise_1"
 449
 450
 451 class FPAddStage0Data:
 452
 453     def __init__(self, width, id_wid):
 454         self.z = FPNumBase(width, False)
 455         self.out_do_z = Signal(reset_less=True)
 456         self.oz = Signal(width, reset_less=True)
 457         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 458         self.mid = Signal(id_wid, reset_less=True)
 459
 460     def eq(self, i):
 461         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 462                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 463
 464
 465 class FPAddStage0Mod:
 466
 467     def __init__(self, width, id_wid):
 468         self.width = width
 469         self.id_wid = id_wid
 470         self.i = self.ispec()
 471         self.o = self.ospec()
 472
 473     def ispec(self):
 474         return FPSCData(self.width, self.id_wid)
 475
 476     def ospec(self):
 477         return FPAddStage0Data(self.width, self.id_wid)
 478
 479     def process(self, i):
 480         return self.o
 481
 482     def setup(self, m, i):
 483         """ links module to inputs and outputs
 484         """
 485         m.submodules.add0 = self
 486         m.d.comb += self.i.eq(i)
 487
 488     def elaborate(self, platform):
 489         m = Module()
 490         m.submodules.add0_in_a = self.i.a
 491         m.submodules.add0_in_b = self.i.b
 492         m.submodules.add0_out_z = self.o.z
 493
 494         # store intermediate tests (and zero-extended mantissas)
 495         seq = Signal(reset_less=True)
 496         mge = Signal(reset_less=True)
 497         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 498         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 499         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 500                      mge.eq(self.i.a.m >= self.i.b.m),
 501                      am0.eq(Cat(self.i.a.m, 0)),
 502                      bm0.eq(Cat(self.i.b.m, 0))
 503                     ]
 504         # same-sign (both negative or both positive) add mantissas
 505         with m.If(~self.i.out_do_z):
 506             m.d.comb += self.o.z.e.eq(self.i.a.e)
 507             with m.If(seq):
 508                 m.d.comb += [
 509                     self.o.tot.eq(am0 + bm0),
 510                     self.o.z.s.eq(self.i.a.s)
 511                 ]
 512             # a mantissa greater than b, use a
 513             with m.Elif(mge):
 514                 m.d.comb += [
 515                     self.o.tot.eq(am0 - bm0),
 516                     self.o.z.s.eq(self.i.a.s)
 517                 ]
 518             # b mantissa greater than a, use b
 519             with m.Else():
 520                 m.d.comb += [
 521                     self.o.tot.eq(bm0 - am0),
 522                     self.o.z.s.eq(self.i.b.s)
 523             ]
 524
 525         m.d.comb += self.o.oz.eq(self.i.oz)
 526         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 527         m.d.comb += self.o.mid.eq(self.i.mid)
 528         return m
 529
 530
 531 class FPAddStage0(FPState):
 532     """ First stage of add.  covers same-sign (add) and subtract
 533         special-casing when mantissas are greater or equal, to
 534         give greatest accuracy.
 535     """
 536
 537     def __init__(self, width, id_wid):
 538         FPState.__init__(self, "add_0")
 539         self.mod = FPAddStage0Mod(width)
 540         self.o = self.mod.ospec()
 541
 542     def setup(self, m, i):
 543         """ links module to inputs and outputs
 544         """
 545         self.mod.setup(m, i)
 546
 547         # NOTE: these could be done as combinatorial (merge add0+add1)
 548         m.d.sync += self.o.eq(self.mod.o)
 549
 550     def action(self, m):
 551         m.next = "add_1"
 552
 553
 554 class FPAddStage1Mod(FPState):
 555     """ Second stage of add: preparation for normalisation.
 556         detects when tot sum is too big (tot[27] is kinda a carry bit)
 557     """
 558
 559     def __init__(self, width, id_wid):
 560         self.width = width
 561         self.id_wid = id_wid
 562         self.i = self.ispec()
 563         self.o = self.ospec()
 564
 565     def ispec(self):
 566         return FPAddStage0Data(self.width, self.id_wid)
 567
 568     def ospec(self):
 569         return FPAddStage1Data(self.width, self.id_wid)
 570
 571     def process(self, i):
 572         return self.o
 573
 574     def setup(self, m, i):
 575         """ links module to inputs and outputs
 576         """
 577         m.submodules.add1 = self
 578         m.submodules.add1_out_overflow = self.o.of
 579
 580         m.d.comb += self.i.eq(i)
 581
 582     def elaborate(self, platform):
 583         m = Module()
 584         m.d.comb += self.o.z.eq(self.i.z)
 585         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 586         with m.If(~self.i.out_do_z):
 587             with m.If(self.i.tot[-1]):
 588                 m.d.comb += [
 589                     self.o.z.m.eq(self.i.tot[4:]),
 590                     self.o.of.m0.eq(self.i.tot[4]),
 591                     self.o.of.guard.eq(self.i.tot[3]),
 592                     self.o.of.round_bit.eq(self.i.tot[2]),
 593                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 594                     self.o.z.e.eq(self.i.z.e + 1)
 595             ]
 596             # tot[-1] (MSB) zero case
 597             with m.Else():
 598                 m.d.comb += [
 599                     self.o.z.m.eq(self.i.tot[3:]),
 600                     self.o.of.m0.eq(self.i.tot[3]),
 601                     self.o.of.guard.eq(self.i.tot[2]),
 602                     self.o.of.round_bit.eq(self.i.tot[1]),
 603                     self.o.of.sticky.eq(self.i.tot[0])
 604             ]
 605
 606         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 607         m.d.comb += self.o.oz.eq(self.i.oz)
 608         m.d.comb += self.o.mid.eq(self.i.mid)
 609
 610         return m
 611
 612
 613 class FPAddStage1(FPState):
 614
 615     def __init__(self, width, id_wid):
 616         FPState.__init__(self, "add_1")
 617         self.mod = FPAddStage1Mod(width)
 618         self.out_z = FPNumBase(width, False)
 619         self.out_of = Overflow()
 620         self.norm_stb = Signal()
 621
 622     def setup(self, m, i):
 623         """ links module to inputs and outputs
 624         """
 625         self.mod.setup(m, i)
 626
 627         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 628
 629         m.d.sync += self.out_of.eq(self.mod.out_of)
 630         m.d.sync += self.out_z.eq(self.mod.out_z)
 631         m.d.sync += self.norm_stb.eq(1)
 632
 633     def action(self, m):
 634         m.next = "normalise_1"
 635
 636
 637
 638
 639     def __init__(self, width, single_cycle=True):
 640         self.width = width
 641         self.in_select = Signal(reset_less=True)
 642         self.in_z = FPNumBase(width, False)
 643         self.in_of = Overflow()
 644         self.temp_z = FPNumBase(width, False)
 645         self.temp_of = Overflow()
 646         self.out_z = FPNumBase(width, False)
 647         self.out_of = Overflow()
 648
 649     def elaborate(self, platform):
 650         m = Module()
 651
 652         m.submodules.norm1_out_z = self.out_z
 653         m.submodules.norm1_out_overflow = self.out_of
 654         m.submodules.norm1_temp_z = self.temp_z
 655         m.submodules.norm1_temp_of = self.temp_of
 656         m.submodules.norm1_in_z = self.in_z
 657         m.submodules.norm1_in_overflow = self.in_of
 658
 659         in_z = FPNumBase(self.width, False)
 660         in_of = Overflow()
 661         m.submodules.norm1_insel_z = in_z
 662         m.submodules.norm1_insel_overflow = in_of
 663
 664         # select which of temp or in z/of to use
 665         with m.If(self.in_select):
 666             m.d.comb += in_z.eq(self.in_z)
 667             m.d.comb += in_of.eq(self.in_of)
 668         with m.Else():
 669             m.d.comb += in_z.eq(self.temp_z)
 670             m.d.comb += in_of.eq(self.temp_of)
 671         # initialise out from in (overridden below)
 672         m.d.comb += self.out_z.eq(in_z)
 673         m.d.comb += self.out_of.eq(in_of)
 674         # normalisation increase/decrease conditions
 675         decrease = Signal(reset_less=True)
 676         increase = Signal(reset_less=True)
 677         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 678         m.d.comb += increase.eq(in_z.exp_lt_n126)
 679         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 680         # decrease exponent
 681         with m.If(decrease):
 682             m.d.comb += [
 683                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 684                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 685                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 686                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 687                 self.out_of.round_bit.eq(0),        # reset round bit
 688                 self.out_of.m0.eq(in_of.guard),
 689             ]
 690         # increase exponent
 691         with m.Elif(increase):
 692             m.d.comb += [
 693                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 694                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 695                 self.out_of.guard.eq(in_z.m[0]),
 696                 self.out_of.m0.eq(in_z.m[1]),
 697                 self.out_of.round_bit.eq(in_of.guard),
 698                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 699             ]
 700
 701         return m
 702
 703
 704 class FPNormToPack(FPState, UnbufferedPipeline):
 705
 706     def __init__(self, width, id_wid):
 707         FPState.__init__(self, "normalise_1")
 708         self.id_wid = id_wid
 709         self.width = width
 710         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 711
 712     def ispec(self):
 713         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 714
 715     def ospec(self):
 716         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
 717
 718     def setup(self, m, i):
 719         """ links module to inputs and outputs
 720         """
 721
 722         # Normalisation, Rounding Corrections, Pack - in a chain
 723         nmod = FPNorm1ModSingle(self.width, self.id_wid)
 724         rmod = FPRoundMod(self.width, self.id_wid)
 725         cmod = FPCorrectionsMod(self.width, self.id_wid)
 726         pmod = FPPackMod(self.width, self.id_wid)
 727         chain = StageChain([nmod, rmod, cmod, pmod])
 728         chain.setup(m, i)
 729         self.out_z = pmod.ospec()
 730
 731         self.o = pmod.o
 732
 733     def process(self, i):
 734         return self.o
 735
 736     def action(self, m):
 737         m.d.sync += self.out_z.eq(self.process(None))
 738         m.next = "pack_put_z"
 739
 740
 741 class FPRoundData:
 742
 743     def __init__(self, width, id_wid):
 744         self.z = FPNumBase(width, False)
 745         self.out_do_z = Signal(reset_less=True)
 746         self.oz = Signal(width, reset_less=True)
 747         self.mid = Signal(id_wid, reset_less=True)
 748
 749     def eq(self, i):
 750         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 751                 self.mid.eq(i.mid)]
 752
 753
 754 class FPRoundMod:
 755
 756     def __init__(self, width, id_wid):
 757         self.width = width
 758         self.id_wid = id_wid
 759         self.i = self.ispec()
 760         self.out_z = self.ospec()
 761
 762     def ispec(self):
 763         return FPNorm1Data(self.width, self.id_wid)
 764
 765     def ospec(self):
 766         return FPRoundData(self.width, self.id_wid)
 767
 768     def process(self, i):
 769         return self.out_z
 770
 771     def setup(self, m, i):
 772         m.submodules.roundz = self
 773         m.d.comb += self.i.eq(i)
 774
 775     def elaborate(self, platform):
 776         m = Module()
 777         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
 778         with m.If(~self.i.out_do_z):
 779             with m.If(self.i.roundz):
 780                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
 781                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
 782                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
 783
 784         return m
 785
 786
 787 class FPRound(FPState):
 788
 789     def __init__(self, width, id_wid):
 790         FPState.__init__(self, "round")
 791         self.mod = FPRoundMod(width)
 792         self.out_z = self.ospec()
 793
 794     def ispec(self):
 795         return self.mod.ispec()
 796
 797     def ospec(self):
 798         return self.mod.ospec()
 799
 800     def setup(self, m, i):
 801         """ links module to inputs and outputs
 802         """
 803         self.mod.setup(m, i)
 804
 805         self.idsync(m)
 806         m.d.sync += self.out_z.eq(self.mod.out_z)
 807         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 808
 809     def action(self, m):
 810         m.next = "corrections"
 811
 812
 813 class FPCorrectionsMod:
 814
 815     def __init__(self, width, id_wid):
 816         self.width = width
 817         self.id_wid = id_wid
 818         self.i = self.ispec()
 819         self.out_z = self.ospec()
 820
 821     def ispec(self):
 822         return FPRoundData(self.width, self.id_wid)
 823
 824     def ospec(self):
 825         return FPRoundData(self.width, self.id_wid)
 826
 827     def process(self, i):
 828         return self.out_z
 829
 830     def setup(self, m, i):
 831         """ links module to inputs and outputs
 832         """
 833         m.submodules.corrections = self
 834         m.d.comb += self.i.eq(i)
 835
 836     def elaborate(self, platform):
 837         m = Module()
 838         m.submodules.corr_in_z = self.i.z
 839         m.submodules.corr_out_z = self.out_z.z
 840         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
 841         with m.If(~self.i.out_do_z):
 842             with m.If(self.i.z.is_denormalised):
 843                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
 844         return m
 845
 846
 847 class FPCorrections(FPState):
 848
 849     def __init__(self, width, id_wid):
 850         FPState.__init__(self, "corrections")
 851         self.mod = FPCorrectionsMod(width)
 852         self.out_z = self.ospec()
 853
 854     def ispec(self):
 855         return self.mod.ispec()
 856
 857     def ospec(self):
 858         return self.mod.ospec()
 859
 860     def setup(self, m, in_z):
 861         """ links module to inputs and outputs
 862         """
 863         self.mod.setup(m, in_z)
 864
 865         m.d.sync += self.out_z.eq(self.mod.out_z)
 866         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 867
 868     def action(self, m):
 869         m.next = "pack"
 870
 871
 872 class FPPackData:
 873
 874     def __init__(self, width, id_wid):
 875         self.z = Signal(width, reset_less=True)
 876         self.mid = Signal(id_wid, reset_less=True)
 877
 878     def eq(self, i):
 879         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 880
 881     def ports(self):
 882         return [self.z, self.mid]
 883
 884
 885 class FPPackMod:
 886
 887     def __init__(self, width, id_wid):
 888         self.width = width
 889         self.id_wid = id_wid
 890         self.i = self.ispec()
 891         self.o = self.ospec()
 892
 893     def ispec(self):
 894         return FPRoundData(self.width, self.id_wid)
 895
 896     def ospec(self):
 897         return FPPackData(self.width, self.id_wid)
 898
 899     def process(self, i):
 900         return self.o
 901
 902     def setup(self, m, in_z):
 903         """ links module to inputs and outputs
 904         """
 905         m.submodules.pack = self
 906         m.d.comb += self.i.eq(in_z)
 907
 908     def elaborate(self, platform):
 909         m = Module()
 910         z = FPNumOut(self.width, False)
 911         m.submodules.pack_in_z = self.i.z
 912         m.submodules.pack_out_z = z
 913         m.d.comb += self.o.mid.eq(self.i.mid)
 914         with m.If(~self.i.out_do_z):
 915             with m.If(self.i.z.is_overflowed):
 916                 m.d.comb += z.inf(self.i.z.s)
 917             with m.Else():
 918                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
 919         with m.Else():
 920             m.d.comb += z.v.eq(self.i.oz)
 921         m.d.comb += self.o.z.eq(z.v)
 922         return m
 923
 924
 925 class FPPack(FPState):
 926
 927     def __init__(self, width, id_wid):
 928         FPState.__init__(self, "pack")
 929         self.mod = FPPackMod(width)
 930         self.out_z = self.ospec()
 931
 932     def ispec(self):
 933         return self.mod.ispec()
 934
 935     def ospec(self):
 936         return self.mod.ospec()
 937
 938     def setup(self, m, in_z):
 939         """ links module to inputs and outputs
 940         """
 941         self.mod.setup(m, in_z)
 942
 943         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
 944         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 945
 946     def action(self, m):
 947         m.next = "pack_put_z"
 948
 949
 950 class FPPutZ(FPState):
 951
 952     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
 953         FPState.__init__(self, state)
 954         if to_state is None:
 955             to_state = "get_ops"
 956         self.to_state = to_state
 957         self.in_z = in_z
 958         self.out_z = out_z
 959         self.in_mid = in_mid
 960         self.out_mid = out_mid
 961
 962     def action(self, m):
 963         if self.in_mid is not None:
 964             m.d.sync += self.out_mid.eq(self.in_mid)
 965         m.d.sync += [
 966           self.out_z.z.v.eq(self.in_z)
 967         ]
 968         with m.If(self.out_z.z.stb & self.out_z.z.ack):
 969             m.d.sync += self.out_z.z.stb.eq(0)
 970             m.next = self.to_state
 971         with m.Else():
 972             m.d.sync += self.out_z.z.stb.eq(1)
 973
 974
 975 class FPPutZIdx(FPState):
 976
 977     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
 978         FPState.__init__(self, state)
 979         if to_state is None:
 980             to_state = "get_ops"
 981         self.to_state = to_state
 982         self.in_z = in_z
 983         self.out_zs = out_zs
 984         self.in_mid = in_mid
 985
 986     def action(self, m):
 987         outz_stb = Signal(reset_less=True)
 988         outz_ack = Signal(reset_less=True)
 989         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
 990                      outz_ack.eq(self.out_zs[self.in_mid].ack),
 991                     ]
 992         m.d.sync += [
 993           self.out_zs[self.in_mid].v.eq(self.in_z.v)
 994         ]
 995         with m.If(outz_stb & outz_ack):
 996             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
 997             m.next = self.to_state
 998         with m.Else():
 999             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1000
1001
1002 class FPOpData:
1003     def __init__(self, width, id_wid):
1004         self.z = FPOp(width)
1005         self.mid = Signal(id_wid, reset_less=True)
1006
1007     def eq(self, i):
1008         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1009
1010     def ports(self):
1011         return [self.z, self.mid]
1012
1013
1014 class FPADDBaseMod:
1015
1016     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1017         """ IEEE754 FP Add
1018
1019             * width: bit-width of IEEE754.  supported: 16, 32, 64
1020             * id_wid: an identifier that is sync-connected to the input
1021             * single_cycle: True indicates each stage to complete in 1 clock
1022             * compact: True indicates a reduced number of stages
1023         """
1024         self.width = width
1025         self.id_wid = id_wid
1026         self.single_cycle = single_cycle
1027         self.compact = compact
1028
1029         self.in_t = Trigger()
1030         self.i = self.ispec()
1031         self.o = self.ospec()
1032
1033         self.states = []
1034
1035     def ispec(self):
1036         return FPADDBaseData(self.width, self.id_wid)
1037
1038     def ospec(self):
1039         return FPOpData(self.width, self.id_wid)
1040
1041     def add_state(self, state):
1042         self.states.append(state)
1043         return state
1044
1045     def get_fragment(self, platform=None):
1046         """ creates the HDL code-fragment for FPAdd
1047         """
1048         m = Module()
1049         m.submodules.out_z = self.o.z
1050         m.submodules.in_t = self.in_t
1051         if self.compact:
1052             self.get_compact_fragment(m, platform)
1053         else:
1054             self.get_longer_fragment(m, platform)
1055
1056         with m.FSM() as fsm:
1057
1058             for state in self.states:
1059                 with m.State(state.state_from):
1060                     state.action(m)
1061
1062         return m
1063
1064     def get_longer_fragment(self, m, platform=None):
1065
1066         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1067                                       self.width))
1068         get.setup(m, self.i)
1069         a = get.out_op1
1070         b = get.out_op2
1071         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1072
1073         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1074         sc.setup(m, a, b, self.in_mid)
1075
1076         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1077         dn.setup(m, a, b, sc.in_mid)
1078
1079         if self.single_cycle:
1080             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1081             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1082         else:
1083             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1084             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1085
1086         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1087         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1088
1089         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1090         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1091
1092         if self.single_cycle:
1093             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1094             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1095         else:
1096             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1097             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1098
1099         rn = self.add_state(FPRound(self.width, self.id_wid))
1100         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1101
1102         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1103         cor.setup(m, rn.out_z, rn.in_mid)
1104
1105         pa = self.add_state(FPPack(self.width, self.id_wid))
1106         pa.setup(m, cor.out_z, rn.in_mid)
1107
1108         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1109                                     pa.in_mid, self.out_mid))
1110
1111         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1112                                     pa.in_mid, self.out_mid))
1113
1114     def get_compact_fragment(self, m, platform=None):
1115
1116
1117         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1118         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1119         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1120         n1 = FPNormToPack(self.width, self.id_wid)
1121
1122         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1123
1124         chainlist = [get, sc, alm, n1]
1125         chain = StageChain(chainlist, specallocate=True)
1126         chain.setup(m, self.i)
1127
1128         for mod in chainlist:
1129             sc = self.add_state(mod)
1130
1131         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1132                                     n1.out_z.mid, self.o.mid))
1133
1134         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1135         #                            sc.o.mid, self.o.mid))
1136
1137
1138 class FPADDBase(FPState):
1139
1140     def __init__(self, width, id_wid=None, single_cycle=False):
1141         """ IEEE754 FP Add
1142
1143             * width: bit-width of IEEE754.  supported: 16, 32, 64
1144             * id_wid: an identifier that is sync-connected to the input
1145             * single_cycle: True indicates each stage to complete in 1 clock
1146         """
1147         FPState.__init__(self, "fpadd")
1148         self.width = width
1149         self.single_cycle = single_cycle
1150         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1151         self.o = self.ospec()
1152
1153         self.in_t = Trigger()
1154         self.i = self.ispec()
1155
1156         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1157         self.in_accept = Signal(reset_less=True)
1158         self.add_stb = Signal(reset_less=True)
1159         self.add_ack = Signal(reset=0, reset_less=True)
1160
1161     def ispec(self):
1162         return self.mod.ispec()
1163
1164     def ospec(self):
1165         return self.mod.ospec()
1166
1167     def setup(self, m, i, add_stb, in_mid):
1168         m.d.comb += [self.i.eq(i),
1169                      self.mod.i.eq(self.i),
1170                      self.z_done.eq(self.mod.o.z.trigger),
1171                      #self.add_stb.eq(add_stb),
1172                      self.mod.in_t.stb.eq(self.in_t.stb),
1173                      self.in_t.ack.eq(self.mod.in_t.ack),
1174                      self.o.mid.eq(self.mod.o.mid),
1175                      self.o.z.v.eq(self.mod.o.z.v),
1176                      self.o.z.stb.eq(self.mod.o.z.stb),
1177                      self.mod.o.z.ack.eq(self.o.z.ack),
1178                     ]
1179
1180         m.d.sync += self.add_stb.eq(add_stb)
1181         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1182         m.d.sync += self.o.z.ack.eq(0) # likewise
1183         #m.d.sync += self.in_t.stb.eq(0)
1184
1185         m.submodules.fpadd = self.mod
1186
1187     def action(self, m):
1188
1189         # in_accept is set on incoming strobe HIGH and ack LOW.
1190         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1191
1192         #with m.If(self.in_t.ack):
1193         #    m.d.sync += self.in_t.stb.eq(0)
1194         with m.If(~self.z_done):
1195             # not done: test for accepting an incoming operand pair
1196             with m.If(self.in_accept):
1197                 m.d.sync += [
1198                     self.add_ack.eq(1), # acknowledge receipt...
1199                     self.in_t.stb.eq(1), # initiate add
1200                 ]
1201             with m.Else():
1202                 m.d.sync += [self.add_ack.eq(0),
1203                              self.in_t.stb.eq(0),
1204                              self.o.z.ack.eq(1),
1205                             ]
1206         with m.Else():
1207             # done: acknowledge, and write out id and value
1208             m.d.sync += [self.add_ack.eq(1),
1209                          self.in_t.stb.eq(0)
1210                         ]
1211             m.next = "put_z"
1212
1213             return
1214
1215             if self.in_mid is not None:
1216                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1217
1218             m.d.sync += [
1219               self.out_z.v.eq(self.mod.out_z.v)
1220             ]
1221             # move to output state on detecting z ack
1222             with m.If(self.out_z.trigger):
1223                 m.d.sync += self.out_z.stb.eq(0)
1224                 m.next = "put_z"
1225             with m.Else():
1226                 m.d.sync += self.out_z.stb.eq(1)
1227
1228
1229 class FPADDBasePipe(ControlBase):
1230     def __init__(self, width, id_wid):
1231         ControlBase.__init__(self)
1232         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1233         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1234         self.pipe3 = FPNormToPack(width, id_wid)
1235
1236         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1237
1238     def elaborate(self, platform):
1239         m = Module()
1240         m.submodules.scnorm = self.pipe1
1241         m.submodules.addalign = self.pipe2
1242         m.submodules.normpack = self.pipe3
1243         m.d.comb += self._eqs
1244         return m
1245
1246
1247 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1248     def __init__(self, width, id_wid, num_rows):
1249         self.num_rows = num_rows
1250         def iospec(): return FPADDBaseData(width, id_wid)
1251         stage = PassThroughStage(iospec)
1252         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1253
1254
1255 class FPADDMuxOutPipe(CombMuxOutPipe):
1256     def __init__(self, width, id_wid, num_rows):
1257         self.num_rows = num_rows
1258         def iospec(): return FPPackData(width, id_wid)
1259         stage = PassThroughStage(iospec)
1260         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1261
1262
1263 class FPADDMuxInOut:
1264     """ Reservation-Station version of FPADD pipeline.
1265
1266         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1267         * 3-stage adder pipeline
1268         * fan-out on outputs (an array of FPPackData: z,mid)
1269
1270         Fan-in and Fan-out are combinatorial.
1271     """
1272     def __init__(self, width, id_wid, num_rows):
1273         self.num_rows = num_rows
1274         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1275         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1276         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1277
1278         self.p = self.inpipe.p  # kinda annoying,
1279         self.n = self.outpipe.n # use pipe in/out as this class in/out
1280         self._ports = self.inpipe.ports() + self.outpipe.ports()
1281
1282     def elaborate(self, platform):
1283         m = Module()
1284         m.submodules.inpipe = self.inpipe
1285         m.submodules.fpadd = self.fpadd
1286         m.submodules.outpipe = self.outpipe
1287
1288         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1289         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1290
1291         return m
1292
1293     def ports(self):
1294         return self._ports
1295
1296
1297 class FPADD(FPID):
1298     """ FPADD: stages as follows:
1299
1300         FPGetOp (a)
1301            |
1302         FPGetOp (b)
1303            |
1304         FPAddBase---> FPAddBaseMod
1305            |            |
1306         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1307
1308         FPAddBase is tricky: it is both a stage and *has* stages.
1309         Connection to FPAddBaseMod therefore requires an in stb/ack
1310         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1311         needs to be the thing that raises the incoming stb.
1312     """
1313
1314     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1315         """ IEEE754 FP Add
1316
1317             * width: bit-width of IEEE754.  supported: 16, 32, 64
1318             * id_wid: an identifier that is sync-connected to the input
1319             * single_cycle: True indicates each stage to complete in 1 clock
1320         """
1321         self.width = width
1322         self.id_wid = id_wid
1323         self.single_cycle = single_cycle
1324
1325         #self.out_z = FPOp(width)
1326         self.ids = FPID(id_wid)
1327
1328         rs = []
1329         for i in range(rs_sz):
1330             in_a  = FPOp(width)
1331             in_b  = FPOp(width)
1332             in_a.name = "in_a_%d" % i
1333             in_b.name = "in_b_%d" % i
1334             rs.append((in_a, in_b))
1335         self.rs = Array(rs)
1336
1337         res = []
1338         for i in range(rs_sz):
1339             out_z = FPOp(width)
1340             out_z.name = "out_z_%d" % i
1341             res.append(out_z)
1342         self.res = Array(res)
1343
1344         self.states = []
1345
1346     def add_state(self, state):
1347         self.states.append(state)
1348         return state
1349
1350     def get_fragment(self, platform=None):
1351         """ creates the HDL code-fragment for FPAdd
1352         """
1353         m = Module()
1354         m.submodules += self.rs
1355
1356         in_a = self.rs[0][0]
1357         in_b = self.rs[0][1]
1358
1359         geta = self.add_state(FPGetOp("get_a", "get_b",
1360                                       in_a, self.width))
1361         geta.setup(m, in_a)
1362         a = geta.out_op
1363
1364         getb = self.add_state(FPGetOp("get_b", "fpadd",
1365                                       in_b, self.width))
1366         getb.setup(m, in_b)
1367         b = getb.out_op
1368
1369         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1370         ab = self.add_state(ab)
1371         abd = ab.ispec() # create an input spec object for FPADDBase
1372         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1373         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1374         o = ab.o
1375
1376         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1377                                     o.mid, "get_a"))
1378
1379         with m.FSM() as fsm:
1380
1381             for state in self.states:
1382                 with m.State(state.state_from):
1383                     state.action(m)
1384
1385         return m
1386
1387
1388 if __name__ == "__main__":
1389     if True:
1390         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1391         main(alu, ports=alu.rs[0][0].ports() + \
1392                         alu.rs[0][1].ports() + \
1393                         alu.res[0].ports() + \
1394                         [alu.ids.in_mid, alu.ids.out_mid])
1395     else:
1396         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1397         main(alu, ports=[alu.in_a, alu.in_b] + \
1398                         alu.in_t.ports() + \
1399                         alu.out_z.ports() + \
1400                         [alu.in_mid, alu.out_mid])
1401
1402
1403     # works... but don't use, just do "python fname.py convert -t v"
1404     #print (verilog.convert(alu, ports=[
1405     #                        ports=alu.in_a.ports() + \
1406     #                              alu.in_b.ports() + \
1407     #                              alu.out_z.ports())