src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  26 from fpcommon.normtopack import FPNormToPack
  27 from fpcommon.putz import (FPPutZ, FPPutZIdx)
  28
  29
  30 class FPAddSpecialCasesMod:
  31     """ special cases: NaNs, infs, zeros, denormalised
  32         NOTE: some of these are unique to add.  see "Special Operations"
  33         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  34     """
  35
  36     def __init__(self, width, id_wid):
  37         self.width = width
  38         self.id_wid = id_wid
  39         self.i = self.ispec()
  40         self.o = self.ospec()
  41
  42     def ispec(self):
  43         return FPADDBaseData(self.width, self.id_wid)
  44
  45     def ospec(self):
  46         return FPSCData(self.width, self.id_wid)
  47
  48     def setup(self, m, i):
  49         """ links module to inputs and outputs
  50         """
  51         m.submodules.specialcases = self
  52         m.d.comb += self.i.eq(i)
  53
  54     def process(self, i):
  55         return self.o
  56
  57     def elaborate(self, platform):
  58         m = Module()
  59
  60         m.submodules.sc_out_z = self.o.z
  61
  62         # decode: XXX really should move to separate stage
  63         a1 = FPNumIn(None, self.width)
  64         b1 = FPNumIn(None, self.width)
  65         m.submodules.sc_decode_a = a1
  66         m.submodules.sc_decode_b = b1
  67         m.d.comb += [a1.decode(self.i.a),
  68                      b1.decode(self.i.b),
  69                     ]
  70
  71         s_nomatch = Signal()
  72         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  73
  74         m_match = Signal()
  75         m.d.comb += m_match.eq(a1.m == b1.m)
  76
  77         # if a is NaN or b is NaN return NaN
  78         with m.If(a1.is_nan | b1.is_nan):
  79             m.d.comb += self.o.out_do_z.eq(1)
  80             m.d.comb += self.o.z.nan(0)
  81
  82         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  83         # under review
  84
  85         ## if a is zero and b is NaN return -b
  86         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  87         #    m.d.comb += self.o.out_do_z.eq(1)
  88         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  89
  90         ## if b is zero and a is NaN return -a
  91         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  92         #    m.d.comb += self.o.out_do_z.eq(1)
  93         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  94
  95         ## if a is -zero and b is NaN return -b
  96         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  97         #    m.d.comb += self.o.out_do_z.eq(1)
  98         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  99
 100         ## if b is -zero and a is NaN return -a
 101         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 102         #    m.d.comb += self.o.out_do_z.eq(1)
 103         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 104
 105         # if a is inf return inf (or NaN)
 106         with m.Elif(a1.is_inf):
 107             m.d.comb += self.o.out_do_z.eq(1)
 108             m.d.comb += self.o.z.inf(a1.s)
 109             # if a is inf and signs don't match return NaN
 110             with m.If(b1.exp_128 & s_nomatch):
 111                 m.d.comb += self.o.z.nan(0)
 112
 113         # if b is inf return inf
 114         with m.Elif(b1.is_inf):
 115             m.d.comb += self.o.out_do_z.eq(1)
 116             m.d.comb += self.o.z.inf(b1.s)
 117
 118         # if a is zero and b zero return signed-a/b
 119         with m.Elif(a1.is_zero & b1.is_zero):
 120             m.d.comb += self.o.out_do_z.eq(1)
 121             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 122
 123         # if a is zero return b
 124         with m.Elif(a1.is_zero):
 125             m.d.comb += self.o.out_do_z.eq(1)
 126             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 127
 128         # if b is zero return a
 129         with m.Elif(b1.is_zero):
 130             m.d.comb += self.o.out_do_z.eq(1)
 131             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 132
 133         # if a equal to -b return zero (+ve zero)
 134         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 135             m.d.comb += self.o.out_do_z.eq(1)
 136             m.d.comb += self.o.z.zero(0)
 137
 138         # Denormalised Number checks next, so pass a/b data through
 139         with m.Else():
 140             m.d.comb += self.o.out_do_z.eq(0)
 141             m.d.comb += self.o.a.eq(a1)
 142             m.d.comb += self.o.b.eq(b1)
 143
 144         m.d.comb += self.o.oz.eq(self.o.z.v)
 145         m.d.comb += self.o.mid.eq(self.i.mid)
 146
 147         return m
 148
 149
 150 class FPAddSpecialCases(FPState):
 151     """ special cases: NaNs, infs, zeros, denormalised
 152         NOTE: some of these are unique to add.  see "Special Operations"
 153         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 154     """
 155
 156     def __init__(self, width, id_wid):
 157         FPState.__init__(self, "special_cases")
 158         self.mod = FPAddSpecialCasesMod(width)
 159         self.out_z = self.mod.ospec()
 160         self.out_do_z = Signal(reset_less=True)
 161
 162     def setup(self, m, i):
 163         """ links module to inputs and outputs
 164         """
 165         self.mod.setup(m, i, self.out_do_z)
 166         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 167         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 168
 169     def action(self, m):
 170         self.idsync(m)
 171         with m.If(self.out_do_z):
 172             m.next = "put_z"
 173         with m.Else():
 174             m.next = "denormalise"
 175
 176
 177 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 178     """ special cases: NaNs, infs, zeros, denormalised
 179         NOTE: some of these are unique to add.  see "Special Operations"
 180         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 181     """
 182
 183     def __init__(self, width, id_wid):
 184         FPState.__init__(self, "special_cases")
 185         self.width = width
 186         self.id_wid = id_wid
 187         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 188         self.out = self.ospec()
 189
 190     def ispec(self):
 191         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 192
 193     def ospec(self):
 194         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 195
 196     def setup(self, m, i):
 197         """ links module to inputs and outputs
 198         """
 199         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 200         dmod = FPAddDeNormMod(self.width, self.id_wid)
 201
 202         chain = StageChain([smod, dmod])
 203         chain.setup(m, i)
 204
 205         # only needed for break-out (early-out)
 206         # self.out_do_z = smod.o.out_do_z
 207
 208         self.o = dmod.o
 209
 210     def process(self, i):
 211         return self.o
 212
 213     def action(self, m):
 214         # for break-out (early-out)
 215         #with m.If(self.out_do_z):
 216         #    m.next = "put_z"
 217         #with m.Else():
 218             m.d.sync += self.out.eq(self.process(None))
 219             m.next = "align"
 220
 221
 222 class FPAddAlignMultiMod(FPState):
 223
 224     def __init__(self, width):
 225         self.in_a = FPNumBase(width)
 226         self.in_b = FPNumBase(width)
 227         self.out_a = FPNumIn(None, width)
 228         self.out_b = FPNumIn(None, width)
 229         self.exp_eq = Signal(reset_less=True)
 230
 231     def elaborate(self, platform):
 232         # This one however (single-cycle) will do the shift
 233         # in one go.
 234
 235         m = Module()
 236
 237         m.submodules.align_in_a = self.in_a
 238         m.submodules.align_in_b = self.in_b
 239         m.submodules.align_out_a = self.out_a
 240         m.submodules.align_out_b = self.out_b
 241
 242         # NOTE: this does *not* do single-cycle multi-shifting,
 243         #       it *STAYS* in the align state until exponents match
 244
 245         # exponent of a greater than b: shift b down
 246         m.d.comb += self.exp_eq.eq(0)
 247         m.d.comb += self.out_a.eq(self.in_a)
 248         m.d.comb += self.out_b.eq(self.in_b)
 249         agtb = Signal(reset_less=True)
 250         altb = Signal(reset_less=True)
 251         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 252         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 253         with m.If(agtb):
 254             m.d.comb += self.out_b.shift_down(self.in_b)
 255         # exponent of b greater than a: shift a down
 256         with m.Elif(altb):
 257             m.d.comb += self.out_a.shift_down(self.in_a)
 258         # exponents equal: move to next stage.
 259         with m.Else():
 260             m.d.comb += self.exp_eq.eq(1)
 261         return m
 262
 263
 264 class FPAddAlignMulti(FPState):
 265
 266     def __init__(self, width, id_wid):
 267         FPState.__init__(self, "align")
 268         self.mod = FPAddAlignMultiMod(width)
 269         self.out_a = FPNumIn(None, width)
 270         self.out_b = FPNumIn(None, width)
 271         self.exp_eq = Signal(reset_less=True)
 272
 273     def setup(self, m, in_a, in_b):
 274         """ links module to inputs and outputs
 275         """
 276         m.submodules.align = self.mod
 277         m.d.comb += self.mod.in_a.eq(in_a)
 278         m.d.comb += self.mod.in_b.eq(in_b)
 279         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 280         m.d.sync += self.out_a.eq(self.mod.out_a)
 281         m.d.sync += self.out_b.eq(self.mod.out_b)
 282
 283     def action(self, m):
 284         with m.If(self.exp_eq):
 285             m.next = "add_0"
 286
 287
 288 class FPNumIn2Ops:
 289
 290     def __init__(self, width, id_wid):
 291         self.a = FPNumIn(None, width)
 292         self.b = FPNumIn(None, width)
 293         self.z = FPNumOut(width, False)
 294         self.out_do_z = Signal(reset_less=True)
 295         self.oz = Signal(width, reset_less=True)
 296         self.mid = Signal(id_wid, reset_less=True)
 297
 298     def eq(self, i):
 299         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 300                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 301
 302
 303 class FPAddAlignSingleMod:
 304
 305     def __init__(self, width, id_wid):
 306         self.width = width
 307         self.id_wid = id_wid
 308         self.i = self.ispec()
 309         self.o = self.ospec()
 310
 311     def ispec(self):
 312         return FPSCData(self.width, self.id_wid)
 313
 314     def ospec(self):
 315         return FPNumIn2Ops(self.width, self.id_wid)
 316
 317     def process(self, i):
 318         return self.o
 319
 320     def setup(self, m, i):
 321         """ links module to inputs and outputs
 322         """
 323         m.submodules.align = self
 324         m.d.comb += self.i.eq(i)
 325
 326     def elaborate(self, platform):
 327         """ Aligns A against B or B against A, depending on which has the
 328             greater exponent.  This is done in a *single* cycle using
 329             variable-width bit-shift
 330
 331             the shifter used here is quite expensive in terms of gates.
 332             Mux A or B in (and out) into temporaries, as only one of them
 333             needs to be aligned against the other
 334         """
 335         m = Module()
 336
 337         m.submodules.align_in_a = self.i.a
 338         m.submodules.align_in_b = self.i.b
 339         m.submodules.align_out_a = self.o.a
 340         m.submodules.align_out_b = self.o.b
 341
 342         # temporary (muxed) input and output to be shifted
 343         t_inp = FPNumBase(self.width)
 344         t_out = FPNumIn(None, self.width)
 345         espec = (len(self.i.a.e), True)
 346         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 347         m.submodules.align_t_in = t_inp
 348         m.submodules.align_t_out = t_out
 349         m.submodules.multishift_r = msr
 350
 351         ediff = Signal(espec, reset_less=True)
 352         ediffr = Signal(espec, reset_less=True)
 353         tdiff = Signal(espec, reset_less=True)
 354         elz = Signal(reset_less=True)
 355         egz = Signal(reset_less=True)
 356
 357         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 358         m.d.comb += msr.inp.eq(t_inp.m)
 359         m.d.comb += msr.diff.eq(tdiff)
 360         m.d.comb += t_out.m.eq(msr.m)
 361         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 362         m.d.comb += t_out.s.eq(t_inp.s)
 363
 364         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 365         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 366         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 367         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 368
 369         # default: A-exp == B-exp, A and B untouched (fall through)
 370         m.d.comb += self.o.a.eq(self.i.a)
 371         m.d.comb += self.o.b.eq(self.i.b)
 372         # only one shifter (muxed)
 373         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 374         # exponent of a greater than b: shift b down
 375         with m.If(~self.i.out_do_z):
 376             with m.If(egz):
 377                 m.d.comb += [t_inp.eq(self.i.b),
 378                              tdiff.eq(ediff),
 379                              self.o.b.eq(t_out),
 380                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 381                             ]
 382             # exponent of b greater than a: shift a down
 383             with m.Elif(elz):
 384                 m.d.comb += [t_inp.eq(self.i.a),
 385                              tdiff.eq(ediffr),
 386                              self.o.a.eq(t_out),
 387                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 388                             ]
 389
 390         m.d.comb += self.o.mid.eq(self.i.mid)
 391         m.d.comb += self.o.z.eq(self.i.z)
 392         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 393         m.d.comb += self.o.oz.eq(self.i.oz)
 394
 395         return m
 396
 397
 398 class FPAddAlignSingle(FPState):
 399
 400     def __init__(self, width, id_wid):
 401         FPState.__init__(self, "align")
 402         self.mod = FPAddAlignSingleMod(width, id_wid)
 403         self.out_a = FPNumIn(None, width)
 404         self.out_b = FPNumIn(None, width)
 405
 406     def setup(self, m, i):
 407         """ links module to inputs and outputs
 408         """
 409         self.mod.setup(m, i)
 410
 411         # NOTE: could be done as comb
 412         m.d.sync += self.out_a.eq(self.mod.out_a)
 413         m.d.sync += self.out_b.eq(self.mod.out_b)
 414
 415     def action(self, m):
 416         m.next = "add_0"
 417
 418
 419 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 420
 421     def __init__(self, width, id_wid):
 422         FPState.__init__(self, "align")
 423         self.width = width
 424         self.id_wid = id_wid
 425         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 426         self.a1o = self.ospec()
 427
 428     def ispec(self):
 429         return FPSCData(self.width, self.id_wid)
 430
 431     def ospec(self):
 432         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 433
 434     def setup(self, m, i):
 435         """ links module to inputs and outputs
 436         """
 437
 438         # chain AddAlignSingle, AddStage0 and AddStage1
 439         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 440         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 441         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 442
 443         chain = StageChain([mod, a0mod, a1mod])
 444         chain.setup(m, i)
 445
 446         self.o = a1mod.o
 447
 448     def process(self, i):
 449         return self.o
 450
 451     def action(self, m):
 452         m.d.sync += self.a1o.eq(self.process(None))
 453         m.next = "normalise_1"
 454
 455
 456 class FPAddStage0Data:
 457
 458     def __init__(self, width, id_wid):
 459         self.z = FPNumBase(width, False)
 460         self.out_do_z = Signal(reset_less=True)
 461         self.oz = Signal(width, reset_less=True)
 462         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 463         self.mid = Signal(id_wid, reset_less=True)
 464
 465     def eq(self, i):
 466         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 467                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 468
 469
 470 class FPAddStage0Mod:
 471
 472     def __init__(self, width, id_wid):
 473         self.width = width
 474         self.id_wid = id_wid
 475         self.i = self.ispec()
 476         self.o = self.ospec()
 477
 478     def ispec(self):
 479         return FPSCData(self.width, self.id_wid)
 480
 481     def ospec(self):
 482         return FPAddStage0Data(self.width, self.id_wid)
 483
 484     def process(self, i):
 485         return self.o
 486
 487     def setup(self, m, i):
 488         """ links module to inputs and outputs
 489         """
 490         m.submodules.add0 = self
 491         m.d.comb += self.i.eq(i)
 492
 493     def elaborate(self, platform):
 494         m = Module()
 495         m.submodules.add0_in_a = self.i.a
 496         m.submodules.add0_in_b = self.i.b
 497         m.submodules.add0_out_z = self.o.z
 498
 499         # store intermediate tests (and zero-extended mantissas)
 500         seq = Signal(reset_less=True)
 501         mge = Signal(reset_less=True)
 502         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 503         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 504         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 505                      mge.eq(self.i.a.m >= self.i.b.m),
 506                      am0.eq(Cat(self.i.a.m, 0)),
 507                      bm0.eq(Cat(self.i.b.m, 0))
 508                     ]
 509         # same-sign (both negative or both positive) add mantissas
 510         with m.If(~self.i.out_do_z):
 511             m.d.comb += self.o.z.e.eq(self.i.a.e)
 512             with m.If(seq):
 513                 m.d.comb += [
 514                     self.o.tot.eq(am0 + bm0),
 515                     self.o.z.s.eq(self.i.a.s)
 516                 ]
 517             # a mantissa greater than b, use a
 518             with m.Elif(mge):
 519                 m.d.comb += [
 520                     self.o.tot.eq(am0 - bm0),
 521                     self.o.z.s.eq(self.i.a.s)
 522                 ]
 523             # b mantissa greater than a, use b
 524             with m.Else():
 525                 m.d.comb += [
 526                     self.o.tot.eq(bm0 - am0),
 527                     self.o.z.s.eq(self.i.b.s)
 528             ]
 529
 530         m.d.comb += self.o.oz.eq(self.i.oz)
 531         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 532         m.d.comb += self.o.mid.eq(self.i.mid)
 533         return m
 534
 535
 536 class FPAddStage0(FPState):
 537     """ First stage of add.  covers same-sign (add) and subtract
 538         special-casing when mantissas are greater or equal, to
 539         give greatest accuracy.
 540     """
 541
 542     def __init__(self, width, id_wid):
 543         FPState.__init__(self, "add_0")
 544         self.mod = FPAddStage0Mod(width)
 545         self.o = self.mod.ospec()
 546
 547     def setup(self, m, i):
 548         """ links module to inputs and outputs
 549         """
 550         self.mod.setup(m, i)
 551
 552         # NOTE: these could be done as combinatorial (merge add0+add1)
 553         m.d.sync += self.o.eq(self.mod.o)
 554
 555     def action(self, m):
 556         m.next = "add_1"
 557
 558
 559 class FPAddStage1Mod(FPState):
 560     """ Second stage of add: preparation for normalisation.
 561         detects when tot sum is too big (tot[27] is kinda a carry bit)
 562     """
 563
 564     def __init__(self, width, id_wid):
 565         self.width = width
 566         self.id_wid = id_wid
 567         self.i = self.ispec()
 568         self.o = self.ospec()
 569
 570     def ispec(self):
 571         return FPAddStage0Data(self.width, self.id_wid)
 572
 573     def ospec(self):
 574         return FPAddStage1Data(self.width, self.id_wid)
 575
 576     def process(self, i):
 577         return self.o
 578
 579     def setup(self, m, i):
 580         """ links module to inputs and outputs
 581         """
 582         m.submodules.add1 = self
 583         m.submodules.add1_out_overflow = self.o.of
 584
 585         m.d.comb += self.i.eq(i)
 586
 587     def elaborate(self, platform):
 588         m = Module()
 589         m.d.comb += self.o.z.eq(self.i.z)
 590         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 591         with m.If(~self.i.out_do_z):
 592             with m.If(self.i.tot[-1]):
 593                 m.d.comb += [
 594                     self.o.z.m.eq(self.i.tot[4:]),
 595                     self.o.of.m0.eq(self.i.tot[4]),
 596                     self.o.of.guard.eq(self.i.tot[3]),
 597                     self.o.of.round_bit.eq(self.i.tot[2]),
 598                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 599                     self.o.z.e.eq(self.i.z.e + 1)
 600             ]
 601             # tot[-1] (MSB) zero case
 602             with m.Else():
 603                 m.d.comb += [
 604                     self.o.z.m.eq(self.i.tot[3:]),
 605                     self.o.of.m0.eq(self.i.tot[3]),
 606                     self.o.of.guard.eq(self.i.tot[2]),
 607                     self.o.of.round_bit.eq(self.i.tot[1]),
 608                     self.o.of.sticky.eq(self.i.tot[0])
 609             ]
 610
 611         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 612         m.d.comb += self.o.oz.eq(self.i.oz)
 613         m.d.comb += self.o.mid.eq(self.i.mid)
 614
 615         return m
 616
 617
 618 class FPAddStage1(FPState):
 619
 620     def __init__(self, width, id_wid):
 621         FPState.__init__(self, "add_1")
 622         self.mod = FPAddStage1Mod(width)
 623         self.out_z = FPNumBase(width, False)
 624         self.out_of = Overflow()
 625         self.norm_stb = Signal()
 626
 627     def setup(self, m, i):
 628         """ links module to inputs and outputs
 629         """
 630         self.mod.setup(m, i)
 631
 632         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 633
 634         m.d.sync += self.out_of.eq(self.mod.out_of)
 635         m.d.sync += self.out_z.eq(self.mod.out_z)
 636         m.d.sync += self.norm_stb.eq(1)
 637
 638     def action(self, m):
 639         m.next = "normalise_1"
 640
 641
 642
 643
 644 class FPOpData:
 645     def __init__(self, width, id_wid):
 646         self.z = FPOp(width)
 647         self.mid = Signal(id_wid, reset_less=True)
 648
 649     def eq(self, i):
 650         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 651
 652     def ports(self):
 653         return [self.z, self.mid]
 654
 655
 656 class FPADDBaseMod:
 657
 658     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
 659         """ IEEE754 FP Add
 660
 661             * width: bit-width of IEEE754.  supported: 16, 32, 64
 662             * id_wid: an identifier that is sync-connected to the input
 663             * single_cycle: True indicates each stage to complete in 1 clock
 664             * compact: True indicates a reduced number of stages
 665         """
 666         self.width = width
 667         self.id_wid = id_wid
 668         self.single_cycle = single_cycle
 669         self.compact = compact
 670
 671         self.in_t = Trigger()
 672         self.i = self.ispec()
 673         self.o = self.ospec()
 674
 675         self.states = []
 676
 677     def ispec(self):
 678         return FPADDBaseData(self.width, self.id_wid)
 679
 680     def ospec(self):
 681         return FPOpData(self.width, self.id_wid)
 682
 683     def add_state(self, state):
 684         self.states.append(state)
 685         return state
 686
 687     def get_fragment(self, platform=None):
 688         """ creates the HDL code-fragment for FPAdd
 689         """
 690         m = Module()
 691         m.submodules.out_z = self.o.z
 692         m.submodules.in_t = self.in_t
 693         if self.compact:
 694             self.get_compact_fragment(m, platform)
 695         else:
 696             self.get_longer_fragment(m, platform)
 697
 698         with m.FSM() as fsm:
 699
 700             for state in self.states:
 701                 with m.State(state.state_from):
 702                     state.action(m)
 703
 704         return m
 705
 706     def get_longer_fragment(self, m, platform=None):
 707
 708         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 709                                       self.width))
 710         get.setup(m, self.i)
 711         a = get.out_op1
 712         b = get.out_op2
 713         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 714
 715         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 716         sc.setup(m, a, b, self.in_mid)
 717
 718         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 719         dn.setup(m, a, b, sc.in_mid)
 720
 721         if self.single_cycle:
 722             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 723             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 724         else:
 725             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 726             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 727
 728         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 729         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 730
 731         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 732         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 733
 734         if self.single_cycle:
 735             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 736             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 737         else:
 738             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 739             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 740
 741         rn = self.add_state(FPRound(self.width, self.id_wid))
 742         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 743
 744         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 745         cor.setup(m, rn.out_z, rn.in_mid)
 746
 747         pa = self.add_state(FPPack(self.width, self.id_wid))
 748         pa.setup(m, cor.out_z, rn.in_mid)
 749
 750         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 751                                     pa.in_mid, self.out_mid))
 752
 753         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 754                                     pa.in_mid, self.out_mid))
 755
 756     def get_compact_fragment(self, m, platform=None):
 757
 758
 759         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 760         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 761         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 762         n1 = FPNormToPack(self.width, self.id_wid)
 763
 764         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 765
 766         chainlist = [get, sc, alm, n1]
 767         chain = StageChain(chainlist, specallocate=True)
 768         chain.setup(m, self.i)
 769
 770         for mod in chainlist:
 771             sc = self.add_state(mod)
 772
 773         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 774                                     n1.out_z.mid, self.o.mid))
 775
 776         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 777         #                            sc.o.mid, self.o.mid))
 778
 779
 780 class FPADDBase(FPState):
 781
 782     def __init__(self, width, id_wid=None, single_cycle=False):
 783         """ IEEE754 FP Add
 784
 785             * width: bit-width of IEEE754.  supported: 16, 32, 64
 786             * id_wid: an identifier that is sync-connected to the input
 787             * single_cycle: True indicates each stage to complete in 1 clock
 788         """
 789         FPState.__init__(self, "fpadd")
 790         self.width = width
 791         self.single_cycle = single_cycle
 792         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 793         self.o = self.ospec()
 794
 795         self.in_t = Trigger()
 796         self.i = self.ispec()
 797
 798         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 799         self.in_accept = Signal(reset_less=True)
 800         self.add_stb = Signal(reset_less=True)
 801         self.add_ack = Signal(reset=0, reset_less=True)
 802
 803     def ispec(self):
 804         return self.mod.ispec()
 805
 806     def ospec(self):
 807         return self.mod.ospec()
 808
 809     def setup(self, m, i, add_stb, in_mid):
 810         m.d.comb += [self.i.eq(i),
 811                      self.mod.i.eq(self.i),
 812                      self.z_done.eq(self.mod.o.z.trigger),
 813                      #self.add_stb.eq(add_stb),
 814                      self.mod.in_t.stb.eq(self.in_t.stb),
 815                      self.in_t.ack.eq(self.mod.in_t.ack),
 816                      self.o.mid.eq(self.mod.o.mid),
 817                      self.o.z.v.eq(self.mod.o.z.v),
 818                      self.o.z.stb.eq(self.mod.o.z.stb),
 819                      self.mod.o.z.ack.eq(self.o.z.ack),
 820                     ]
 821
 822         m.d.sync += self.add_stb.eq(add_stb)
 823         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 824         m.d.sync += self.o.z.ack.eq(0) # likewise
 825         #m.d.sync += self.in_t.stb.eq(0)
 826
 827         m.submodules.fpadd = self.mod
 828
 829     def action(self, m):
 830
 831         # in_accept is set on incoming strobe HIGH and ack LOW.
 832         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 833
 834         #with m.If(self.in_t.ack):
 835         #    m.d.sync += self.in_t.stb.eq(0)
 836         with m.If(~self.z_done):
 837             # not done: test for accepting an incoming operand pair
 838             with m.If(self.in_accept):
 839                 m.d.sync += [
 840                     self.add_ack.eq(1), # acknowledge receipt...
 841                     self.in_t.stb.eq(1), # initiate add
 842                 ]
 843             with m.Else():
 844                 m.d.sync += [self.add_ack.eq(0),
 845                              self.in_t.stb.eq(0),
 846                              self.o.z.ack.eq(1),
 847                             ]
 848         with m.Else():
 849             # done: acknowledge, and write out id and value
 850             m.d.sync += [self.add_ack.eq(1),
 851                          self.in_t.stb.eq(0)
 852                         ]
 853             m.next = "put_z"
 854
 855             return
 856
 857             if self.in_mid is not None:
 858                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 859
 860             m.d.sync += [
 861               self.out_z.v.eq(self.mod.out_z.v)
 862             ]
 863             # move to output state on detecting z ack
 864             with m.If(self.out_z.trigger):
 865                 m.d.sync += self.out_z.stb.eq(0)
 866                 m.next = "put_z"
 867             with m.Else():
 868                 m.d.sync += self.out_z.stb.eq(1)
 869
 870
 871 class FPADDBasePipe(ControlBase):
 872     def __init__(self, width, id_wid):
 873         ControlBase.__init__(self)
 874         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
 875         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
 876         self.pipe3 = FPNormToPack(width, id_wid)
 877
 878         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 879
 880     def elaborate(self, platform):
 881         m = Module()
 882         m.submodules.scnorm = self.pipe1
 883         m.submodules.addalign = self.pipe2
 884         m.submodules.normpack = self.pipe3
 885         m.d.comb += self._eqs
 886         return m
 887
 888
 889 class FPADDInMuxPipe(PriorityCombMuxInPipe):
 890     def __init__(self, width, id_wid, num_rows):
 891         self.num_rows = num_rows
 892         def iospec(): return FPADDBaseData(width, id_wid)
 893         stage = PassThroughStage(iospec)
 894         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
 895
 896
 897 class FPADDMuxOutPipe(CombMuxOutPipe):
 898     def __init__(self, width, id_wid, num_rows):
 899         self.num_rows = num_rows
 900         def iospec(): return FPPackData(width, id_wid)
 901         stage = PassThroughStage(iospec)
 902         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
 903
 904
 905 class FPADDMuxInOut:
 906     """ Reservation-Station version of FPADD pipeline.
 907
 908         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 909         * 3-stage adder pipeline
 910         * fan-out on outputs (an array of FPPackData: z,mid)
 911
 912         Fan-in and Fan-out are combinatorial.
 913     """
 914     def __init__(self, width, id_wid, num_rows):
 915         self.num_rows = num_rows
 916         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
 917         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
 918         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
 919
 920         self.p = self.inpipe.p  # kinda annoying,
 921         self.n = self.outpipe.n # use pipe in/out as this class in/out
 922         self._ports = self.inpipe.ports() + self.outpipe.ports()
 923
 924     def elaborate(self, platform):
 925         m = Module()
 926         m.submodules.inpipe = self.inpipe
 927         m.submodules.fpadd = self.fpadd
 928         m.submodules.outpipe = self.outpipe
 929
 930         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
 931         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
 932
 933         return m
 934
 935     def ports(self):
 936         return self._ports
 937
 938
 939 class FPADD(FPID):
 940     """ FPADD: stages as follows:
 941
 942         FPGetOp (a)
 943            |
 944         FPGetOp (b)
 945            |
 946         FPAddBase---> FPAddBaseMod
 947            |            |
 948         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
 949
 950         FPAddBase is tricky: it is both a stage and *has* stages.
 951         Connection to FPAddBaseMod therefore requires an in stb/ack
 952         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
 953         needs to be the thing that raises the incoming stb.
 954     """
 955
 956     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
 957         """ IEEE754 FP Add
 958
 959             * width: bit-width of IEEE754.  supported: 16, 32, 64
 960             * id_wid: an identifier that is sync-connected to the input
 961             * single_cycle: True indicates each stage to complete in 1 clock
 962         """
 963         self.width = width
 964         self.id_wid = id_wid
 965         self.single_cycle = single_cycle
 966
 967         #self.out_z = FPOp(width)
 968         self.ids = FPID(id_wid)
 969
 970         rs = []
 971         for i in range(rs_sz):
 972             in_a  = FPOp(width)
 973             in_b  = FPOp(width)
 974             in_a.name = "in_a_%d" % i
 975             in_b.name = "in_b_%d" % i
 976             rs.append((in_a, in_b))
 977         self.rs = Array(rs)
 978
 979         res = []
 980         for i in range(rs_sz):
 981             out_z = FPOp(width)
 982             out_z.name = "out_z_%d" % i
 983             res.append(out_z)
 984         self.res = Array(res)
 985
 986         self.states = []
 987
 988     def add_state(self, state):
 989         self.states.append(state)
 990         return state
 991
 992     def get_fragment(self, platform=None):
 993         """ creates the HDL code-fragment for FPAdd
 994         """
 995         m = Module()
 996         m.submodules += self.rs
 997
 998         in_a = self.rs[0][0]
 999         in_b = self.rs[0][1]
1000
1001         geta = self.add_state(FPGetOp("get_a", "get_b",
1002                                       in_a, self.width))
1003         geta.setup(m, in_a)
1004         a = geta.out_op
1005
1006         getb = self.add_state(FPGetOp("get_b", "fpadd",
1007                                       in_b, self.width))
1008         getb.setup(m, in_b)
1009         b = getb.out_op
1010
1011         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1012         ab = self.add_state(ab)
1013         abd = ab.ispec() # create an input spec object for FPADDBase
1014         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1015         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1016         o = ab.o
1017
1018         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1019                                     o.mid, "get_a"))
1020
1021         with m.FSM() as fsm:
1022
1023             for state in self.states:
1024                 with m.State(state.state_from):
1025                     state.action(m)
1026
1027         return m
1028
1029
1030 if __name__ == "__main__":
1031     if True:
1032         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1033         main(alu, ports=alu.rs[0][0].ports() + \
1034                         alu.rs[0][1].ports() + \
1035                         alu.res[0].ports() + \
1036                         [alu.ids.in_mid, alu.ids.out_mid])
1037     else:
1038         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1039         main(alu, ports=[alu.in_a, alu.in_b] + \
1040                         alu.in_t.ports() + \
1041                         alu.out_z.ports() + \
1042                         [alu.in_mid, alu.out_mid])
1043
1044
1045     # works... but don't use, just do "python fname.py convert -t v"
1046     #print (verilog.convert(alu, ports=[
1047     #                        ports=alu.in_a.ports() + \
1048     #                              alu.in_b.ports() + \
1049     #                              alu.out_z.ports())