src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  26 from fpcommon.normtopack import FPNormToPack
  27
  28
  29 class FPAddSpecialCasesMod:
  30     """ special cases: NaNs, infs, zeros, denormalised
  31         NOTE: some of these are unique to add.  see "Special Operations"
  32         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  33     """
  34
  35     def __init__(self, width, id_wid):
  36         self.width = width
  37         self.id_wid = id_wid
  38         self.i = self.ispec()
  39         self.o = self.ospec()
  40
  41     def ispec(self):
  42         return FPADDBaseData(self.width, self.id_wid)
  43
  44     def ospec(self):
  45         return FPSCData(self.width, self.id_wid)
  46
  47     def setup(self, m, i):
  48         """ links module to inputs and outputs
  49         """
  50         m.submodules.specialcases = self
  51         m.d.comb += self.i.eq(i)
  52
  53     def process(self, i):
  54         return self.o
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58
  59         m.submodules.sc_out_z = self.o.z
  60
  61         # decode: XXX really should move to separate stage
  62         a1 = FPNumIn(None, self.width)
  63         b1 = FPNumIn(None, self.width)
  64         m.submodules.sc_decode_a = a1
  65         m.submodules.sc_decode_b = b1
  66         m.d.comb += [a1.decode(self.i.a),
  67                      b1.decode(self.i.b),
  68                     ]
  69
  70         s_nomatch = Signal()
  71         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  72
  73         m_match = Signal()
  74         m.d.comb += m_match.eq(a1.m == b1.m)
  75
  76         # if a is NaN or b is NaN return NaN
  77         with m.If(a1.is_nan | b1.is_nan):
  78             m.d.comb += self.o.out_do_z.eq(1)
  79             m.d.comb += self.o.z.nan(0)
  80
  81         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  82         # under review
  83
  84         ## if a is zero and b is NaN return -b
  85         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  86         #    m.d.comb += self.o.out_do_z.eq(1)
  87         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  88
  89         ## if b is zero and a is NaN return -a
  90         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  91         #    m.d.comb += self.o.out_do_z.eq(1)
  92         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  93
  94         ## if a is -zero and b is NaN return -b
  95         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  96         #    m.d.comb += self.o.out_do_z.eq(1)
  97         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  98
  99         ## if b is -zero and a is NaN return -a
 100         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 101         #    m.d.comb += self.o.out_do_z.eq(1)
 102         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 103
 104         # if a is inf return inf (or NaN)
 105         with m.Elif(a1.is_inf):
 106             m.d.comb += self.o.out_do_z.eq(1)
 107             m.d.comb += self.o.z.inf(a1.s)
 108             # if a is inf and signs don't match return NaN
 109             with m.If(b1.exp_128 & s_nomatch):
 110                 m.d.comb += self.o.z.nan(0)
 111
 112         # if b is inf return inf
 113         with m.Elif(b1.is_inf):
 114             m.d.comb += self.o.out_do_z.eq(1)
 115             m.d.comb += self.o.z.inf(b1.s)
 116
 117         # if a is zero and b zero return signed-a/b
 118         with m.Elif(a1.is_zero & b1.is_zero):
 119             m.d.comb += self.o.out_do_z.eq(1)
 120             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 121
 122         # if a is zero return b
 123         with m.Elif(a1.is_zero):
 124             m.d.comb += self.o.out_do_z.eq(1)
 125             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 126
 127         # if b is zero return a
 128         with m.Elif(b1.is_zero):
 129             m.d.comb += self.o.out_do_z.eq(1)
 130             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 131
 132         # if a equal to -b return zero (+ve zero)
 133         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 134             m.d.comb += self.o.out_do_z.eq(1)
 135             m.d.comb += self.o.z.zero(0)
 136
 137         # Denormalised Number checks next, so pass a/b data through
 138         with m.Else():
 139             m.d.comb += self.o.out_do_z.eq(0)
 140             m.d.comb += self.o.a.eq(a1)
 141             m.d.comb += self.o.b.eq(b1)
 142
 143         m.d.comb += self.o.oz.eq(self.o.z.v)
 144         m.d.comb += self.o.mid.eq(self.i.mid)
 145
 146         return m
 147
 148
 149 class FPAddSpecialCases(FPState):
 150     """ special cases: NaNs, infs, zeros, denormalised
 151         NOTE: some of these are unique to add.  see "Special Operations"
 152         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 153     """
 154
 155     def __init__(self, width, id_wid):
 156         FPState.__init__(self, "special_cases")
 157         self.mod = FPAddSpecialCasesMod(width)
 158         self.out_z = self.mod.ospec()
 159         self.out_do_z = Signal(reset_less=True)
 160
 161     def setup(self, m, i):
 162         """ links module to inputs and outputs
 163         """
 164         self.mod.setup(m, i, self.out_do_z)
 165         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 166         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 167
 168     def action(self, m):
 169         self.idsync(m)
 170         with m.If(self.out_do_z):
 171             m.next = "put_z"
 172         with m.Else():
 173             m.next = "denormalise"
 174
 175
 176 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 177     """ special cases: NaNs, infs, zeros, denormalised
 178         NOTE: some of these are unique to add.  see "Special Operations"
 179         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 180     """
 181
 182     def __init__(self, width, id_wid):
 183         FPState.__init__(self, "special_cases")
 184         self.width = width
 185         self.id_wid = id_wid
 186         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 187         self.out = self.ospec()
 188
 189     def ispec(self):
 190         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 191
 192     def ospec(self):
 193         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 194
 195     def setup(self, m, i):
 196         """ links module to inputs and outputs
 197         """
 198         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 199         dmod = FPAddDeNormMod(self.width, self.id_wid)
 200
 201         chain = StageChain([smod, dmod])
 202         chain.setup(m, i)
 203
 204         # only needed for break-out (early-out)
 205         # self.out_do_z = smod.o.out_do_z
 206
 207         self.o = dmod.o
 208
 209     def process(self, i):
 210         return self.o
 211
 212     def action(self, m):
 213         # for break-out (early-out)
 214         #with m.If(self.out_do_z):
 215         #    m.next = "put_z"
 216         #with m.Else():
 217             m.d.sync += self.out.eq(self.process(None))
 218             m.next = "align"
 219
 220
 221 class FPAddAlignMultiMod(FPState):
 222
 223     def __init__(self, width):
 224         self.in_a = FPNumBase(width)
 225         self.in_b = FPNumBase(width)
 226         self.out_a = FPNumIn(None, width)
 227         self.out_b = FPNumIn(None, width)
 228         self.exp_eq = Signal(reset_less=True)
 229
 230     def elaborate(self, platform):
 231         # This one however (single-cycle) will do the shift
 232         # in one go.
 233
 234         m = Module()
 235
 236         m.submodules.align_in_a = self.in_a
 237         m.submodules.align_in_b = self.in_b
 238         m.submodules.align_out_a = self.out_a
 239         m.submodules.align_out_b = self.out_b
 240
 241         # NOTE: this does *not* do single-cycle multi-shifting,
 242         #       it *STAYS* in the align state until exponents match
 243
 244         # exponent of a greater than b: shift b down
 245         m.d.comb += self.exp_eq.eq(0)
 246         m.d.comb += self.out_a.eq(self.in_a)
 247         m.d.comb += self.out_b.eq(self.in_b)
 248         agtb = Signal(reset_less=True)
 249         altb = Signal(reset_less=True)
 250         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 251         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 252         with m.If(agtb):
 253             m.d.comb += self.out_b.shift_down(self.in_b)
 254         # exponent of b greater than a: shift a down
 255         with m.Elif(altb):
 256             m.d.comb += self.out_a.shift_down(self.in_a)
 257         # exponents equal: move to next stage.
 258         with m.Else():
 259             m.d.comb += self.exp_eq.eq(1)
 260         return m
 261
 262
 263 class FPAddAlignMulti(FPState):
 264
 265     def __init__(self, width, id_wid):
 266         FPState.__init__(self, "align")
 267         self.mod = FPAddAlignMultiMod(width)
 268         self.out_a = FPNumIn(None, width)
 269         self.out_b = FPNumIn(None, width)
 270         self.exp_eq = Signal(reset_less=True)
 271
 272     def setup(self, m, in_a, in_b):
 273         """ links module to inputs and outputs
 274         """
 275         m.submodules.align = self.mod
 276         m.d.comb += self.mod.in_a.eq(in_a)
 277         m.d.comb += self.mod.in_b.eq(in_b)
 278         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 279         m.d.sync += self.out_a.eq(self.mod.out_a)
 280         m.d.sync += self.out_b.eq(self.mod.out_b)
 281
 282     def action(self, m):
 283         with m.If(self.exp_eq):
 284             m.next = "add_0"
 285
 286
 287 class FPNumIn2Ops:
 288
 289     def __init__(self, width, id_wid):
 290         self.a = FPNumIn(None, width)
 291         self.b = FPNumIn(None, width)
 292         self.z = FPNumOut(width, False)
 293         self.out_do_z = Signal(reset_less=True)
 294         self.oz = Signal(width, reset_less=True)
 295         self.mid = Signal(id_wid, reset_less=True)
 296
 297     def eq(self, i):
 298         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 299                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 300
 301
 302 class FPAddAlignSingleMod:
 303
 304     def __init__(self, width, id_wid):
 305         self.width = width
 306         self.id_wid = id_wid
 307         self.i = self.ispec()
 308         self.o = self.ospec()
 309
 310     def ispec(self):
 311         return FPSCData(self.width, self.id_wid)
 312
 313     def ospec(self):
 314         return FPNumIn2Ops(self.width, self.id_wid)
 315
 316     def process(self, i):
 317         return self.o
 318
 319     def setup(self, m, i):
 320         """ links module to inputs and outputs
 321         """
 322         m.submodules.align = self
 323         m.d.comb += self.i.eq(i)
 324
 325     def elaborate(self, platform):
 326         """ Aligns A against B or B against A, depending on which has the
 327             greater exponent.  This is done in a *single* cycle using
 328             variable-width bit-shift
 329
 330             the shifter used here is quite expensive in terms of gates.
 331             Mux A or B in (and out) into temporaries, as only one of them
 332             needs to be aligned against the other
 333         """
 334         m = Module()
 335
 336         m.submodules.align_in_a = self.i.a
 337         m.submodules.align_in_b = self.i.b
 338         m.submodules.align_out_a = self.o.a
 339         m.submodules.align_out_b = self.o.b
 340
 341         # temporary (muxed) input and output to be shifted
 342         t_inp = FPNumBase(self.width)
 343         t_out = FPNumIn(None, self.width)
 344         espec = (len(self.i.a.e), True)
 345         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 346         m.submodules.align_t_in = t_inp
 347         m.submodules.align_t_out = t_out
 348         m.submodules.multishift_r = msr
 349
 350         ediff = Signal(espec, reset_less=True)
 351         ediffr = Signal(espec, reset_less=True)
 352         tdiff = Signal(espec, reset_less=True)
 353         elz = Signal(reset_less=True)
 354         egz = Signal(reset_less=True)
 355
 356         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 357         m.d.comb += msr.inp.eq(t_inp.m)
 358         m.d.comb += msr.diff.eq(tdiff)
 359         m.d.comb += t_out.m.eq(msr.m)
 360         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 361         m.d.comb += t_out.s.eq(t_inp.s)
 362
 363         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 364         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 365         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 366         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 367
 368         # default: A-exp == B-exp, A and B untouched (fall through)
 369         m.d.comb += self.o.a.eq(self.i.a)
 370         m.d.comb += self.o.b.eq(self.i.b)
 371         # only one shifter (muxed)
 372         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 373         # exponent of a greater than b: shift b down
 374         with m.If(~self.i.out_do_z):
 375             with m.If(egz):
 376                 m.d.comb += [t_inp.eq(self.i.b),
 377                              tdiff.eq(ediff),
 378                              self.o.b.eq(t_out),
 379                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 380                             ]
 381             # exponent of b greater than a: shift a down
 382             with m.Elif(elz):
 383                 m.d.comb += [t_inp.eq(self.i.a),
 384                              tdiff.eq(ediffr),
 385                              self.o.a.eq(t_out),
 386                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 387                             ]
 388
 389         m.d.comb += self.o.mid.eq(self.i.mid)
 390         m.d.comb += self.o.z.eq(self.i.z)
 391         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 392         m.d.comb += self.o.oz.eq(self.i.oz)
 393
 394         return m
 395
 396
 397 class FPAddAlignSingle(FPState):
 398
 399     def __init__(self, width, id_wid):
 400         FPState.__init__(self, "align")
 401         self.mod = FPAddAlignSingleMod(width, id_wid)
 402         self.out_a = FPNumIn(None, width)
 403         self.out_b = FPNumIn(None, width)
 404
 405     def setup(self, m, i):
 406         """ links module to inputs and outputs
 407         """
 408         self.mod.setup(m, i)
 409
 410         # NOTE: could be done as comb
 411         m.d.sync += self.out_a.eq(self.mod.out_a)
 412         m.d.sync += self.out_b.eq(self.mod.out_b)
 413
 414     def action(self, m):
 415         m.next = "add_0"
 416
 417
 418 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 419
 420     def __init__(self, width, id_wid):
 421         FPState.__init__(self, "align")
 422         self.width = width
 423         self.id_wid = id_wid
 424         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 425         self.a1o = self.ospec()
 426
 427     def ispec(self):
 428         return FPSCData(self.width, self.id_wid)
 429
 430     def ospec(self):
 431         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 432
 433     def setup(self, m, i):
 434         """ links module to inputs and outputs
 435         """
 436
 437         # chain AddAlignSingle, AddStage0 and AddStage1
 438         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 439         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 440         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 441
 442         chain = StageChain([mod, a0mod, a1mod])
 443         chain.setup(m, i)
 444
 445         self.o = a1mod.o
 446
 447     def process(self, i):
 448         return self.o
 449
 450     def action(self, m):
 451         m.d.sync += self.a1o.eq(self.process(None))
 452         m.next = "normalise_1"
 453
 454
 455 class FPAddStage0Data:
 456
 457     def __init__(self, width, id_wid):
 458         self.z = FPNumBase(width, False)
 459         self.out_do_z = Signal(reset_less=True)
 460         self.oz = Signal(width, reset_less=True)
 461         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 462         self.mid = Signal(id_wid, reset_less=True)
 463
 464     def eq(self, i):
 465         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 466                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 467
 468
 469 class FPAddStage0Mod:
 470
 471     def __init__(self, width, id_wid):
 472         self.width = width
 473         self.id_wid = id_wid
 474         self.i = self.ispec()
 475         self.o = self.ospec()
 476
 477     def ispec(self):
 478         return FPSCData(self.width, self.id_wid)
 479
 480     def ospec(self):
 481         return FPAddStage0Data(self.width, self.id_wid)
 482
 483     def process(self, i):
 484         return self.o
 485
 486     def setup(self, m, i):
 487         """ links module to inputs and outputs
 488         """
 489         m.submodules.add0 = self
 490         m.d.comb += self.i.eq(i)
 491
 492     def elaborate(self, platform):
 493         m = Module()
 494         m.submodules.add0_in_a = self.i.a
 495         m.submodules.add0_in_b = self.i.b
 496         m.submodules.add0_out_z = self.o.z
 497
 498         # store intermediate tests (and zero-extended mantissas)
 499         seq = Signal(reset_less=True)
 500         mge = Signal(reset_less=True)
 501         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 502         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 503         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 504                      mge.eq(self.i.a.m >= self.i.b.m),
 505                      am0.eq(Cat(self.i.a.m, 0)),
 506                      bm0.eq(Cat(self.i.b.m, 0))
 507                     ]
 508         # same-sign (both negative or both positive) add mantissas
 509         with m.If(~self.i.out_do_z):
 510             m.d.comb += self.o.z.e.eq(self.i.a.e)
 511             with m.If(seq):
 512                 m.d.comb += [
 513                     self.o.tot.eq(am0 + bm0),
 514                     self.o.z.s.eq(self.i.a.s)
 515                 ]
 516             # a mantissa greater than b, use a
 517             with m.Elif(mge):
 518                 m.d.comb += [
 519                     self.o.tot.eq(am0 - bm0),
 520                     self.o.z.s.eq(self.i.a.s)
 521                 ]
 522             # b mantissa greater than a, use b
 523             with m.Else():
 524                 m.d.comb += [
 525                     self.o.tot.eq(bm0 - am0),
 526                     self.o.z.s.eq(self.i.b.s)
 527             ]
 528
 529         m.d.comb += self.o.oz.eq(self.i.oz)
 530         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 531         m.d.comb += self.o.mid.eq(self.i.mid)
 532         return m
 533
 534
 535 class FPAddStage0(FPState):
 536     """ First stage of add.  covers same-sign (add) and subtract
 537         special-casing when mantissas are greater or equal, to
 538         give greatest accuracy.
 539     """
 540
 541     def __init__(self, width, id_wid):
 542         FPState.__init__(self, "add_0")
 543         self.mod = FPAddStage0Mod(width)
 544         self.o = self.mod.ospec()
 545
 546     def setup(self, m, i):
 547         """ links module to inputs and outputs
 548         """
 549         self.mod.setup(m, i)
 550
 551         # NOTE: these could be done as combinatorial (merge add0+add1)
 552         m.d.sync += self.o.eq(self.mod.o)
 553
 554     def action(self, m):
 555         m.next = "add_1"
 556
 557
 558 class FPAddStage1Mod(FPState):
 559     """ Second stage of add: preparation for normalisation.
 560         detects when tot sum is too big (tot[27] is kinda a carry bit)
 561     """
 562
 563     def __init__(self, width, id_wid):
 564         self.width = width
 565         self.id_wid = id_wid
 566         self.i = self.ispec()
 567         self.o = self.ospec()
 568
 569     def ispec(self):
 570         return FPAddStage0Data(self.width, self.id_wid)
 571
 572     def ospec(self):
 573         return FPAddStage1Data(self.width, self.id_wid)
 574
 575     def process(self, i):
 576         return self.o
 577
 578     def setup(self, m, i):
 579         """ links module to inputs and outputs
 580         """
 581         m.submodules.add1 = self
 582         m.submodules.add1_out_overflow = self.o.of
 583
 584         m.d.comb += self.i.eq(i)
 585
 586     def elaborate(self, platform):
 587         m = Module()
 588         m.d.comb += self.o.z.eq(self.i.z)
 589         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 590         with m.If(~self.i.out_do_z):
 591             with m.If(self.i.tot[-1]):
 592                 m.d.comb += [
 593                     self.o.z.m.eq(self.i.tot[4:]),
 594                     self.o.of.m0.eq(self.i.tot[4]),
 595                     self.o.of.guard.eq(self.i.tot[3]),
 596                     self.o.of.round_bit.eq(self.i.tot[2]),
 597                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 598                     self.o.z.e.eq(self.i.z.e + 1)
 599             ]
 600             # tot[-1] (MSB) zero case
 601             with m.Else():
 602                 m.d.comb += [
 603                     self.o.z.m.eq(self.i.tot[3:]),
 604                     self.o.of.m0.eq(self.i.tot[3]),
 605                     self.o.of.guard.eq(self.i.tot[2]),
 606                     self.o.of.round_bit.eq(self.i.tot[1]),
 607                     self.o.of.sticky.eq(self.i.tot[0])
 608             ]
 609
 610         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 611         m.d.comb += self.o.oz.eq(self.i.oz)
 612         m.d.comb += self.o.mid.eq(self.i.mid)
 613
 614         return m
 615
 616
 617 class FPAddStage1(FPState):
 618
 619     def __init__(self, width, id_wid):
 620         FPState.__init__(self, "add_1")
 621         self.mod = FPAddStage1Mod(width)
 622         self.out_z = FPNumBase(width, False)
 623         self.out_of = Overflow()
 624         self.norm_stb = Signal()
 625
 626     def setup(self, m, i):
 627         """ links module to inputs and outputs
 628         """
 629         self.mod.setup(m, i)
 630
 631         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 632
 633         m.d.sync += self.out_of.eq(self.mod.out_of)
 634         m.d.sync += self.out_z.eq(self.mod.out_z)
 635         m.d.sync += self.norm_stb.eq(1)
 636
 637     def action(self, m):
 638         m.next = "normalise_1"
 639
 640
 641
 642 class FPPutZ(FPState):
 643
 644     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
 645         FPState.__init__(self, state)
 646         if to_state is None:
 647             to_state = "get_ops"
 648         self.to_state = to_state
 649         self.in_z = in_z
 650         self.out_z = out_z
 651         self.in_mid = in_mid
 652         self.out_mid = out_mid
 653
 654     def action(self, m):
 655         if self.in_mid is not None:
 656             m.d.sync += self.out_mid.eq(self.in_mid)
 657         m.d.sync += [
 658           self.out_z.z.v.eq(self.in_z)
 659         ]
 660         with m.If(self.out_z.z.stb & self.out_z.z.ack):
 661             m.d.sync += self.out_z.z.stb.eq(0)
 662             m.next = self.to_state
 663         with m.Else():
 664             m.d.sync += self.out_z.z.stb.eq(1)
 665
 666
 667 class FPPutZIdx(FPState):
 668
 669     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
 670         FPState.__init__(self, state)
 671         if to_state is None:
 672             to_state = "get_ops"
 673         self.to_state = to_state
 674         self.in_z = in_z
 675         self.out_zs = out_zs
 676         self.in_mid = in_mid
 677
 678     def action(self, m):
 679         outz_stb = Signal(reset_less=True)
 680         outz_ack = Signal(reset_less=True)
 681         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
 682                      outz_ack.eq(self.out_zs[self.in_mid].ack),
 683                     ]
 684         m.d.sync += [
 685           self.out_zs[self.in_mid].v.eq(self.in_z.v)
 686         ]
 687         with m.If(outz_stb & outz_ack):
 688             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
 689             m.next = self.to_state
 690         with m.Else():
 691             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
 692
 693
 694 class FPOpData:
 695     def __init__(self, width, id_wid):
 696         self.z = FPOp(width)
 697         self.mid = Signal(id_wid, reset_less=True)
 698
 699     def eq(self, i):
 700         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 701
 702     def ports(self):
 703         return [self.z, self.mid]
 704
 705
 706 class FPADDBaseMod:
 707
 708     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
 709         """ IEEE754 FP Add
 710
 711             * width: bit-width of IEEE754.  supported: 16, 32, 64
 712             * id_wid: an identifier that is sync-connected to the input
 713             * single_cycle: True indicates each stage to complete in 1 clock
 714             * compact: True indicates a reduced number of stages
 715         """
 716         self.width = width
 717         self.id_wid = id_wid
 718         self.single_cycle = single_cycle
 719         self.compact = compact
 720
 721         self.in_t = Trigger()
 722         self.i = self.ispec()
 723         self.o = self.ospec()
 724
 725         self.states = []
 726
 727     def ispec(self):
 728         return FPADDBaseData(self.width, self.id_wid)
 729
 730     def ospec(self):
 731         return FPOpData(self.width, self.id_wid)
 732
 733     def add_state(self, state):
 734         self.states.append(state)
 735         return state
 736
 737     def get_fragment(self, platform=None):
 738         """ creates the HDL code-fragment for FPAdd
 739         """
 740         m = Module()
 741         m.submodules.out_z = self.o.z
 742         m.submodules.in_t = self.in_t
 743         if self.compact:
 744             self.get_compact_fragment(m, platform)
 745         else:
 746             self.get_longer_fragment(m, platform)
 747
 748         with m.FSM() as fsm:
 749
 750             for state in self.states:
 751                 with m.State(state.state_from):
 752                     state.action(m)
 753
 754         return m
 755
 756     def get_longer_fragment(self, m, platform=None):
 757
 758         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 759                                       self.width))
 760         get.setup(m, self.i)
 761         a = get.out_op1
 762         b = get.out_op2
 763         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 764
 765         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 766         sc.setup(m, a, b, self.in_mid)
 767
 768         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 769         dn.setup(m, a, b, sc.in_mid)
 770
 771         if self.single_cycle:
 772             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 773             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 774         else:
 775             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 776             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 777
 778         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 779         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 780
 781         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 782         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 783
 784         if self.single_cycle:
 785             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 786             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 787         else:
 788             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 789             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 790
 791         rn = self.add_state(FPRound(self.width, self.id_wid))
 792         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 793
 794         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 795         cor.setup(m, rn.out_z, rn.in_mid)
 796
 797         pa = self.add_state(FPPack(self.width, self.id_wid))
 798         pa.setup(m, cor.out_z, rn.in_mid)
 799
 800         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 801                                     pa.in_mid, self.out_mid))
 802
 803         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 804                                     pa.in_mid, self.out_mid))
 805
 806     def get_compact_fragment(self, m, platform=None):
 807
 808
 809         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 810         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 811         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 812         n1 = FPNormToPack(self.width, self.id_wid)
 813
 814         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 815
 816         chainlist = [get, sc, alm, n1]
 817         chain = StageChain(chainlist, specallocate=True)
 818         chain.setup(m, self.i)
 819
 820         for mod in chainlist:
 821             sc = self.add_state(mod)
 822
 823         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 824                                     n1.out_z.mid, self.o.mid))
 825
 826         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 827         #                            sc.o.mid, self.o.mid))
 828
 829
 830 class FPADDBase(FPState):
 831
 832     def __init__(self, width, id_wid=None, single_cycle=False):
 833         """ IEEE754 FP Add
 834
 835             * width: bit-width of IEEE754.  supported: 16, 32, 64
 836             * id_wid: an identifier that is sync-connected to the input
 837             * single_cycle: True indicates each stage to complete in 1 clock
 838         """
 839         FPState.__init__(self, "fpadd")
 840         self.width = width
 841         self.single_cycle = single_cycle
 842         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 843         self.o = self.ospec()
 844
 845         self.in_t = Trigger()
 846         self.i = self.ispec()
 847
 848         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 849         self.in_accept = Signal(reset_less=True)
 850         self.add_stb = Signal(reset_less=True)
 851         self.add_ack = Signal(reset=0, reset_less=True)
 852
 853     def ispec(self):
 854         return self.mod.ispec()
 855
 856     def ospec(self):
 857         return self.mod.ospec()
 858
 859     def setup(self, m, i, add_stb, in_mid):
 860         m.d.comb += [self.i.eq(i),
 861                      self.mod.i.eq(self.i),
 862                      self.z_done.eq(self.mod.o.z.trigger),
 863                      #self.add_stb.eq(add_stb),
 864                      self.mod.in_t.stb.eq(self.in_t.stb),
 865                      self.in_t.ack.eq(self.mod.in_t.ack),
 866                      self.o.mid.eq(self.mod.o.mid),
 867                      self.o.z.v.eq(self.mod.o.z.v),
 868                      self.o.z.stb.eq(self.mod.o.z.stb),
 869                      self.mod.o.z.ack.eq(self.o.z.ack),
 870                     ]
 871
 872         m.d.sync += self.add_stb.eq(add_stb)
 873         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 874         m.d.sync += self.o.z.ack.eq(0) # likewise
 875         #m.d.sync += self.in_t.stb.eq(0)
 876
 877         m.submodules.fpadd = self.mod
 878
 879     def action(self, m):
 880
 881         # in_accept is set on incoming strobe HIGH and ack LOW.
 882         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 883
 884         #with m.If(self.in_t.ack):
 885         #    m.d.sync += self.in_t.stb.eq(0)
 886         with m.If(~self.z_done):
 887             # not done: test for accepting an incoming operand pair
 888             with m.If(self.in_accept):
 889                 m.d.sync += [
 890                     self.add_ack.eq(1), # acknowledge receipt...
 891                     self.in_t.stb.eq(1), # initiate add
 892                 ]
 893             with m.Else():
 894                 m.d.sync += [self.add_ack.eq(0),
 895                              self.in_t.stb.eq(0),
 896                              self.o.z.ack.eq(1),
 897                             ]
 898         with m.Else():
 899             # done: acknowledge, and write out id and value
 900             m.d.sync += [self.add_ack.eq(1),
 901                          self.in_t.stb.eq(0)
 902                         ]
 903             m.next = "put_z"
 904
 905             return
 906
 907             if self.in_mid is not None:
 908                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 909
 910             m.d.sync += [
 911               self.out_z.v.eq(self.mod.out_z.v)
 912             ]
 913             # move to output state on detecting z ack
 914             with m.If(self.out_z.trigger):
 915                 m.d.sync += self.out_z.stb.eq(0)
 916                 m.next = "put_z"
 917             with m.Else():
 918                 m.d.sync += self.out_z.stb.eq(1)
 919
 920
 921 class FPADDBasePipe(ControlBase):
 922     def __init__(self, width, id_wid):
 923         ControlBase.__init__(self)
 924         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
 925         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
 926         self.pipe3 = FPNormToPack(width, id_wid)
 927
 928         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 929
 930     def elaborate(self, platform):
 931         m = Module()
 932         m.submodules.scnorm = self.pipe1
 933         m.submodules.addalign = self.pipe2
 934         m.submodules.normpack = self.pipe3
 935         m.d.comb += self._eqs
 936         return m
 937
 938
 939 class FPADDInMuxPipe(PriorityCombMuxInPipe):
 940     def __init__(self, width, id_wid, num_rows):
 941         self.num_rows = num_rows
 942         def iospec(): return FPADDBaseData(width, id_wid)
 943         stage = PassThroughStage(iospec)
 944         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
 945
 946
 947 class FPADDMuxOutPipe(CombMuxOutPipe):
 948     def __init__(self, width, id_wid, num_rows):
 949         self.num_rows = num_rows
 950         def iospec(): return FPPackData(width, id_wid)
 951         stage = PassThroughStage(iospec)
 952         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
 953
 954
 955 class FPADDMuxInOut:
 956     """ Reservation-Station version of FPADD pipeline.
 957
 958         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 959         * 3-stage adder pipeline
 960         * fan-out on outputs (an array of FPPackData: z,mid)
 961
 962         Fan-in and Fan-out are combinatorial.
 963     """
 964     def __init__(self, width, id_wid, num_rows):
 965         self.num_rows = num_rows
 966         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
 967         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
 968         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
 969
 970         self.p = self.inpipe.p  # kinda annoying,
 971         self.n = self.outpipe.n # use pipe in/out as this class in/out
 972         self._ports = self.inpipe.ports() + self.outpipe.ports()
 973
 974     def elaborate(self, platform):
 975         m = Module()
 976         m.submodules.inpipe = self.inpipe
 977         m.submodules.fpadd = self.fpadd
 978         m.submodules.outpipe = self.outpipe
 979
 980         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
 981         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
 982
 983         return m
 984
 985     def ports(self):
 986         return self._ports
 987
 988
 989 class FPADD(FPID):
 990     """ FPADD: stages as follows:
 991
 992         FPGetOp (a)
 993            |
 994         FPGetOp (b)
 995            |
 996         FPAddBase---> FPAddBaseMod
 997            |            |
 998         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
 999
1000         FPAddBase is tricky: it is both a stage and *has* stages.
1001         Connection to FPAddBaseMod therefore requires an in stb/ack
1002         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1003         needs to be the thing that raises the incoming stb.
1004     """
1005
1006     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1007         """ IEEE754 FP Add
1008
1009             * width: bit-width of IEEE754.  supported: 16, 32, 64
1010             * id_wid: an identifier that is sync-connected to the input
1011             * single_cycle: True indicates each stage to complete in 1 clock
1012         """
1013         self.width = width
1014         self.id_wid = id_wid
1015         self.single_cycle = single_cycle
1016
1017         #self.out_z = FPOp(width)
1018         self.ids = FPID(id_wid)
1019
1020         rs = []
1021         for i in range(rs_sz):
1022             in_a  = FPOp(width)
1023             in_b  = FPOp(width)
1024             in_a.name = "in_a_%d" % i
1025             in_b.name = "in_b_%d" % i
1026             rs.append((in_a, in_b))
1027         self.rs = Array(rs)
1028
1029         res = []
1030         for i in range(rs_sz):
1031             out_z = FPOp(width)
1032             out_z.name = "out_z_%d" % i
1033             res.append(out_z)
1034         self.res = Array(res)
1035
1036         self.states = []
1037
1038     def add_state(self, state):
1039         self.states.append(state)
1040         return state
1041
1042     def get_fragment(self, platform=None):
1043         """ creates the HDL code-fragment for FPAdd
1044         """
1045         m = Module()
1046         m.submodules += self.rs
1047
1048         in_a = self.rs[0][0]
1049         in_b = self.rs[0][1]
1050
1051         geta = self.add_state(FPGetOp("get_a", "get_b",
1052                                       in_a, self.width))
1053         geta.setup(m, in_a)
1054         a = geta.out_op
1055
1056         getb = self.add_state(FPGetOp("get_b", "fpadd",
1057                                       in_b, self.width))
1058         getb.setup(m, in_b)
1059         b = getb.out_op
1060
1061         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1062         ab = self.add_state(ab)
1063         abd = ab.ispec() # create an input spec object for FPADDBase
1064         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1065         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1066         o = ab.o
1067
1068         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1069                                     o.mid, "get_a"))
1070
1071         with m.FSM() as fsm:
1072
1073             for state in self.states:
1074                 with m.State(state.state_from):
1075                     state.action(m)
1076
1077         return m
1078
1079
1080 if __name__ == "__main__":
1081     if True:
1082         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1083         main(alu, ports=alu.rs[0][0].ports() + \
1084                         alu.rs[0][1].ports() + \
1085                         alu.res[0].ports() + \
1086                         [alu.ids.in_mid, alu.ids.out_mid])
1087     else:
1088         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1089         main(alu, ports=[alu.in_a, alu.in_b] + \
1090                         alu.in_t.ports() + \
1091                         alu.out_z.ports() + \
1092                         [alu.in_mid, alu.out_mid])
1093
1094
1095     # works... but don't use, just do "python fname.py convert -t v"
1096     #print (verilog.convert(alu, ports=[
1097     #                        ports=alu.in_a.ports() + \
1098     #                              alu.in_b.ports() + \
1099     #                              alu.out_z.ports())