src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  25
  26
  27 class FPAddSpecialCasesMod:
  28     """ special cases: NaNs, infs, zeros, denormalised
  29         NOTE: some of these are unique to add.  see "Special Operations"
  30         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  31     """
  32
  33     def __init__(self, width, id_wid):
  34         self.width = width
  35         self.id_wid = id_wid
  36         self.i = self.ispec()
  37         self.o = self.ospec()
  38
  39     def ispec(self):
  40         return FPADDBaseData(self.width, self.id_wid)
  41
  42     def ospec(self):
  43         return FPSCData(self.width, self.id_wid)
  44
  45     def setup(self, m, i):
  46         """ links module to inputs and outputs
  47         """
  48         m.submodules.specialcases = self
  49         m.d.comb += self.i.eq(i)
  50
  51     def process(self, i):
  52         return self.o
  53
  54     def elaborate(self, platform):
  55         m = Module()
  56
  57         m.submodules.sc_out_z = self.o.z
  58
  59         # decode: XXX really should move to separate stage
  60         a1 = FPNumIn(None, self.width)
  61         b1 = FPNumIn(None, self.width)
  62         m.submodules.sc_decode_a = a1
  63         m.submodules.sc_decode_b = b1
  64         m.d.comb += [a1.decode(self.i.a),
  65                      b1.decode(self.i.b),
  66                     ]
  67
  68         s_nomatch = Signal()
  69         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  70
  71         m_match = Signal()
  72         m.d.comb += m_match.eq(a1.m == b1.m)
  73
  74         # if a is NaN or b is NaN return NaN
  75         with m.If(a1.is_nan | b1.is_nan):
  76             m.d.comb += self.o.out_do_z.eq(1)
  77             m.d.comb += self.o.z.nan(0)
  78
  79         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  80         # under review
  81
  82         ## if a is zero and b is NaN return -b
  83         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  84         #    m.d.comb += self.o.out_do_z.eq(1)
  85         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  86
  87         ## if b is zero and a is NaN return -a
  88         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  89         #    m.d.comb += self.o.out_do_z.eq(1)
  90         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  91
  92         ## if a is -zero and b is NaN return -b
  93         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  94         #    m.d.comb += self.o.out_do_z.eq(1)
  95         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  96
  97         ## if b is -zero and a is NaN return -a
  98         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
  99         #    m.d.comb += self.o.out_do_z.eq(1)
 100         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 101
 102         # if a is inf return inf (or NaN)
 103         with m.Elif(a1.is_inf):
 104             m.d.comb += self.o.out_do_z.eq(1)
 105             m.d.comb += self.o.z.inf(a1.s)
 106             # if a is inf and signs don't match return NaN
 107             with m.If(b1.exp_128 & s_nomatch):
 108                 m.d.comb += self.o.z.nan(0)
 109
 110         # if b is inf return inf
 111         with m.Elif(b1.is_inf):
 112             m.d.comb += self.o.out_do_z.eq(1)
 113             m.d.comb += self.o.z.inf(b1.s)
 114
 115         # if a is zero and b zero return signed-a/b
 116         with m.Elif(a1.is_zero & b1.is_zero):
 117             m.d.comb += self.o.out_do_z.eq(1)
 118             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 119
 120         # if a is zero return b
 121         with m.Elif(a1.is_zero):
 122             m.d.comb += self.o.out_do_z.eq(1)
 123             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 124
 125         # if b is zero return a
 126         with m.Elif(b1.is_zero):
 127             m.d.comb += self.o.out_do_z.eq(1)
 128             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 129
 130         # if a equal to -b return zero (+ve zero)
 131         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 132             m.d.comb += self.o.out_do_z.eq(1)
 133             m.d.comb += self.o.z.zero(0)
 134
 135         # Denormalised Number checks next, so pass a/b data through
 136         with m.Else():
 137             m.d.comb += self.o.out_do_z.eq(0)
 138             m.d.comb += self.o.a.eq(a1)
 139             m.d.comb += self.o.b.eq(b1)
 140
 141         m.d.comb += self.o.oz.eq(self.o.z.v)
 142         m.d.comb += self.o.mid.eq(self.i.mid)
 143
 144         return m
 145
 146
 147 class FPAddSpecialCases(FPState):
 148     """ special cases: NaNs, infs, zeros, denormalised
 149         NOTE: some of these are unique to add.  see "Special Operations"
 150         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 151     """
 152
 153     def __init__(self, width, id_wid):
 154         FPState.__init__(self, "special_cases")
 155         self.mod = FPAddSpecialCasesMod(width)
 156         self.out_z = self.mod.ospec()
 157         self.out_do_z = Signal(reset_less=True)
 158
 159     def setup(self, m, i):
 160         """ links module to inputs and outputs
 161         """
 162         self.mod.setup(m, i, self.out_do_z)
 163         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 164         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 165
 166     def action(self, m):
 167         self.idsync(m)
 168         with m.If(self.out_do_z):
 169             m.next = "put_z"
 170         with m.Else():
 171             m.next = "denormalise"
 172
 173
 174 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 175     """ special cases: NaNs, infs, zeros, denormalised
 176         NOTE: some of these are unique to add.  see "Special Operations"
 177         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 178     """
 179
 180     def __init__(self, width, id_wid):
 181         FPState.__init__(self, "special_cases")
 182         self.width = width
 183         self.id_wid = id_wid
 184         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 185         self.out = self.ospec()
 186
 187     def ispec(self):
 188         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 189
 190     def ospec(self):
 191         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 192
 193     def setup(self, m, i):
 194         """ links module to inputs and outputs
 195         """
 196         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 197         dmod = FPAddDeNormMod(self.width, self.id_wid)
 198
 199         chain = StageChain([smod, dmod])
 200         chain.setup(m, i)
 201
 202         # only needed for break-out (early-out)
 203         # self.out_do_z = smod.o.out_do_z
 204
 205         self.o = dmod.o
 206
 207     def process(self, i):
 208         return self.o
 209
 210     def action(self, m):
 211         # for break-out (early-out)
 212         #with m.If(self.out_do_z):
 213         #    m.next = "put_z"
 214         #with m.Else():
 215             m.d.sync += self.out.eq(self.process(None))
 216             m.next = "align"
 217
 218
 219 class FPAddAlignMultiMod(FPState):
 220
 221     def __init__(self, width):
 222         self.in_a = FPNumBase(width)
 223         self.in_b = FPNumBase(width)
 224         self.out_a = FPNumIn(None, width)
 225         self.out_b = FPNumIn(None, width)
 226         self.exp_eq = Signal(reset_less=True)
 227
 228     def elaborate(self, platform):
 229         # This one however (single-cycle) will do the shift
 230         # in one go.
 231
 232         m = Module()
 233
 234         m.submodules.align_in_a = self.in_a
 235         m.submodules.align_in_b = self.in_b
 236         m.submodules.align_out_a = self.out_a
 237         m.submodules.align_out_b = self.out_b
 238
 239         # NOTE: this does *not* do single-cycle multi-shifting,
 240         #       it *STAYS* in the align state until exponents match
 241
 242         # exponent of a greater than b: shift b down
 243         m.d.comb += self.exp_eq.eq(0)
 244         m.d.comb += self.out_a.eq(self.in_a)
 245         m.d.comb += self.out_b.eq(self.in_b)
 246         agtb = Signal(reset_less=True)
 247         altb = Signal(reset_less=True)
 248         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 249         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 250         with m.If(agtb):
 251             m.d.comb += self.out_b.shift_down(self.in_b)
 252         # exponent of b greater than a: shift a down
 253         with m.Elif(altb):
 254             m.d.comb += self.out_a.shift_down(self.in_a)
 255         # exponents equal: move to next stage.
 256         with m.Else():
 257             m.d.comb += self.exp_eq.eq(1)
 258         return m
 259
 260
 261 class FPAddAlignMulti(FPState):
 262
 263     def __init__(self, width, id_wid):
 264         FPState.__init__(self, "align")
 265         self.mod = FPAddAlignMultiMod(width)
 266         self.out_a = FPNumIn(None, width)
 267         self.out_b = FPNumIn(None, width)
 268         self.exp_eq = Signal(reset_less=True)
 269
 270     def setup(self, m, in_a, in_b):
 271         """ links module to inputs and outputs
 272         """
 273         m.submodules.align = self.mod
 274         m.d.comb += self.mod.in_a.eq(in_a)
 275         m.d.comb += self.mod.in_b.eq(in_b)
 276         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 277         m.d.sync += self.out_a.eq(self.mod.out_a)
 278         m.d.sync += self.out_b.eq(self.mod.out_b)
 279
 280     def action(self, m):
 281         with m.If(self.exp_eq):
 282             m.next = "add_0"
 283
 284
 285 class FPNumIn2Ops:
 286
 287     def __init__(self, width, id_wid):
 288         self.a = FPNumIn(None, width)
 289         self.b = FPNumIn(None, width)
 290         self.z = FPNumOut(width, False)
 291         self.out_do_z = Signal(reset_less=True)
 292         self.oz = Signal(width, reset_less=True)
 293         self.mid = Signal(id_wid, reset_less=True)
 294
 295     def eq(self, i):
 296         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 297                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 298
 299
 300 class FPAddAlignSingleMod:
 301
 302     def __init__(self, width, id_wid):
 303         self.width = width
 304         self.id_wid = id_wid
 305         self.i = self.ispec()
 306         self.o = self.ospec()
 307
 308     def ispec(self):
 309         return FPSCData(self.width, self.id_wid)
 310
 311     def ospec(self):
 312         return FPNumIn2Ops(self.width, self.id_wid)
 313
 314     def process(self, i):
 315         return self.o
 316
 317     def setup(self, m, i):
 318         """ links module to inputs and outputs
 319         """
 320         m.submodules.align = self
 321         m.d.comb += self.i.eq(i)
 322
 323     def elaborate(self, platform):
 324         """ Aligns A against B or B against A, depending on which has the
 325             greater exponent.  This is done in a *single* cycle using
 326             variable-width bit-shift
 327
 328             the shifter used here is quite expensive in terms of gates.
 329             Mux A or B in (and out) into temporaries, as only one of them
 330             needs to be aligned against the other
 331         """
 332         m = Module()
 333
 334         m.submodules.align_in_a = self.i.a
 335         m.submodules.align_in_b = self.i.b
 336         m.submodules.align_out_a = self.o.a
 337         m.submodules.align_out_b = self.o.b
 338
 339         # temporary (muxed) input and output to be shifted
 340         t_inp = FPNumBase(self.width)
 341         t_out = FPNumIn(None, self.width)
 342         espec = (len(self.i.a.e), True)
 343         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 344         m.submodules.align_t_in = t_inp
 345         m.submodules.align_t_out = t_out
 346         m.submodules.multishift_r = msr
 347
 348         ediff = Signal(espec, reset_less=True)
 349         ediffr = Signal(espec, reset_less=True)
 350         tdiff = Signal(espec, reset_less=True)
 351         elz = Signal(reset_less=True)
 352         egz = Signal(reset_less=True)
 353
 354         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 355         m.d.comb += msr.inp.eq(t_inp.m)
 356         m.d.comb += msr.diff.eq(tdiff)
 357         m.d.comb += t_out.m.eq(msr.m)
 358         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 359         m.d.comb += t_out.s.eq(t_inp.s)
 360
 361         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 362         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 363         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 364         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 365
 366         # default: A-exp == B-exp, A and B untouched (fall through)
 367         m.d.comb += self.o.a.eq(self.i.a)
 368         m.d.comb += self.o.b.eq(self.i.b)
 369         # only one shifter (muxed)
 370         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 371         # exponent of a greater than b: shift b down
 372         with m.If(~self.i.out_do_z):
 373             with m.If(egz):
 374                 m.d.comb += [t_inp.eq(self.i.b),
 375                              tdiff.eq(ediff),
 376                              self.o.b.eq(t_out),
 377                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 378                             ]
 379             # exponent of b greater than a: shift a down
 380             with m.Elif(elz):
 381                 m.d.comb += [t_inp.eq(self.i.a),
 382                              tdiff.eq(ediffr),
 383                              self.o.a.eq(t_out),
 384                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 385                             ]
 386
 387         m.d.comb += self.o.mid.eq(self.i.mid)
 388         m.d.comb += self.o.z.eq(self.i.z)
 389         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 390         m.d.comb += self.o.oz.eq(self.i.oz)
 391
 392         return m
 393
 394
 395 class FPAddAlignSingle(FPState):
 396
 397     def __init__(self, width, id_wid):
 398         FPState.__init__(self, "align")
 399         self.mod = FPAddAlignSingleMod(width, id_wid)
 400         self.out_a = FPNumIn(None, width)
 401         self.out_b = FPNumIn(None, width)
 402
 403     def setup(self, m, i):
 404         """ links module to inputs and outputs
 405         """
 406         self.mod.setup(m, i)
 407
 408         # NOTE: could be done as comb
 409         m.d.sync += self.out_a.eq(self.mod.out_a)
 410         m.d.sync += self.out_b.eq(self.mod.out_b)
 411
 412     def action(self, m):
 413         m.next = "add_0"
 414
 415
 416 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 417
 418     def __init__(self, width, id_wid):
 419         FPState.__init__(self, "align")
 420         self.width = width
 421         self.id_wid = id_wid
 422         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 423         self.a1o = self.ospec()
 424
 425     def ispec(self):
 426         return FPSCData(self.width, self.id_wid)
 427
 428     def ospec(self):
 429         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 430
 431     def setup(self, m, i):
 432         """ links module to inputs and outputs
 433         """
 434
 435         # chain AddAlignSingle, AddStage0 and AddStage1
 436         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 437         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 438         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 439
 440         chain = StageChain([mod, a0mod, a1mod])
 441         chain.setup(m, i)
 442
 443         self.o = a1mod.o
 444
 445     def process(self, i):
 446         return self.o
 447
 448     def action(self, m):
 449         m.d.sync += self.a1o.eq(self.process(None))
 450         m.next = "normalise_1"
 451
 452
 453 class FPAddStage0Data:
 454
 455     def __init__(self, width, id_wid):
 456         self.z = FPNumBase(width, False)
 457         self.out_do_z = Signal(reset_less=True)
 458         self.oz = Signal(width, reset_less=True)
 459         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 460         self.mid = Signal(id_wid, reset_less=True)
 461
 462     def eq(self, i):
 463         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 464                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 465
 466
 467 class FPAddStage0Mod:
 468
 469     def __init__(self, width, id_wid):
 470         self.width = width
 471         self.id_wid = id_wid
 472         self.i = self.ispec()
 473         self.o = self.ospec()
 474
 475     def ispec(self):
 476         return FPSCData(self.width, self.id_wid)
 477
 478     def ospec(self):
 479         return FPAddStage0Data(self.width, self.id_wid)
 480
 481     def process(self, i):
 482         return self.o
 483
 484     def setup(self, m, i):
 485         """ links module to inputs and outputs
 486         """
 487         m.submodules.add0 = self
 488         m.d.comb += self.i.eq(i)
 489
 490     def elaborate(self, platform):
 491         m = Module()
 492         m.submodules.add0_in_a = self.i.a
 493         m.submodules.add0_in_b = self.i.b
 494         m.submodules.add0_out_z = self.o.z
 495
 496         # store intermediate tests (and zero-extended mantissas)
 497         seq = Signal(reset_less=True)
 498         mge = Signal(reset_less=True)
 499         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 500         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 501         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 502                      mge.eq(self.i.a.m >= self.i.b.m),
 503                      am0.eq(Cat(self.i.a.m, 0)),
 504                      bm0.eq(Cat(self.i.b.m, 0))
 505                     ]
 506         # same-sign (both negative or both positive) add mantissas
 507         with m.If(~self.i.out_do_z):
 508             m.d.comb += self.o.z.e.eq(self.i.a.e)
 509             with m.If(seq):
 510                 m.d.comb += [
 511                     self.o.tot.eq(am0 + bm0),
 512                     self.o.z.s.eq(self.i.a.s)
 513                 ]
 514             # a mantissa greater than b, use a
 515             with m.Elif(mge):
 516                 m.d.comb += [
 517                     self.o.tot.eq(am0 - bm0),
 518                     self.o.z.s.eq(self.i.a.s)
 519                 ]
 520             # b mantissa greater than a, use b
 521             with m.Else():
 522                 m.d.comb += [
 523                     self.o.tot.eq(bm0 - am0),
 524                     self.o.z.s.eq(self.i.b.s)
 525             ]
 526
 527         m.d.comb += self.o.oz.eq(self.i.oz)
 528         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 529         m.d.comb += self.o.mid.eq(self.i.mid)
 530         return m
 531
 532
 533 class FPAddStage0(FPState):
 534     """ First stage of add.  covers same-sign (add) and subtract
 535         special-casing when mantissas are greater or equal, to
 536         give greatest accuracy.
 537     """
 538
 539     def __init__(self, width, id_wid):
 540         FPState.__init__(self, "add_0")
 541         self.mod = FPAddStage0Mod(width)
 542         self.o = self.mod.ospec()
 543
 544     def setup(self, m, i):
 545         """ links module to inputs and outputs
 546         """
 547         self.mod.setup(m, i)
 548
 549         # NOTE: these could be done as combinatorial (merge add0+add1)
 550         m.d.sync += self.o.eq(self.mod.o)
 551
 552     def action(self, m):
 553         m.next = "add_1"
 554
 555
 556 class FPAddStage1Mod(FPState):
 557     """ Second stage of add: preparation for normalisation.
 558         detects when tot sum is too big (tot[27] is kinda a carry bit)
 559     """
 560
 561     def __init__(self, width, id_wid):
 562         self.width = width
 563         self.id_wid = id_wid
 564         self.i = self.ispec()
 565         self.o = self.ospec()
 566
 567     def ispec(self):
 568         return FPAddStage0Data(self.width, self.id_wid)
 569
 570     def ospec(self):
 571         return FPAddStage1Data(self.width, self.id_wid)
 572
 573     def process(self, i):
 574         return self.o
 575
 576     def setup(self, m, i):
 577         """ links module to inputs and outputs
 578         """
 579         m.submodules.add1 = self
 580         m.submodules.add1_out_overflow = self.o.of
 581
 582         m.d.comb += self.i.eq(i)
 583
 584     def elaborate(self, platform):
 585         m = Module()
 586         m.d.comb += self.o.z.eq(self.i.z)
 587         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 588         with m.If(~self.i.out_do_z):
 589             with m.If(self.i.tot[-1]):
 590                 m.d.comb += [
 591                     self.o.z.m.eq(self.i.tot[4:]),
 592                     self.o.of.m0.eq(self.i.tot[4]),
 593                     self.o.of.guard.eq(self.i.tot[3]),
 594                     self.o.of.round_bit.eq(self.i.tot[2]),
 595                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 596                     self.o.z.e.eq(self.i.z.e + 1)
 597             ]
 598             # tot[-1] (MSB) zero case
 599             with m.Else():
 600                 m.d.comb += [
 601                     self.o.z.m.eq(self.i.tot[3:]),
 602                     self.o.of.m0.eq(self.i.tot[3]),
 603                     self.o.of.guard.eq(self.i.tot[2]),
 604                     self.o.of.round_bit.eq(self.i.tot[1]),
 605                     self.o.of.sticky.eq(self.i.tot[0])
 606             ]
 607
 608         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 609         m.d.comb += self.o.oz.eq(self.i.oz)
 610         m.d.comb += self.o.mid.eq(self.i.mid)
 611
 612         return m
 613
 614
 615 class FPAddStage1(FPState):
 616
 617     def __init__(self, width, id_wid):
 618         FPState.__init__(self, "add_1")
 619         self.mod = FPAddStage1Mod(width)
 620         self.out_z = FPNumBase(width, False)
 621         self.out_of = Overflow()
 622         self.norm_stb = Signal()
 623
 624     def setup(self, m, i):
 625         """ links module to inputs and outputs
 626         """
 627         self.mod.setup(m, i)
 628
 629         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 630
 631         m.d.sync += self.out_of.eq(self.mod.out_of)
 632         m.d.sync += self.out_z.eq(self.mod.out_z)
 633         m.d.sync += self.norm_stb.eq(1)
 634
 635     def action(self, m):
 636         m.next = "normalise_1"
 637
 638
 639 class FPNormToPack(FPState, UnbufferedPipeline):
 640
 641     def __init__(self, width, id_wid):
 642         FPState.__init__(self, "normalise_1")
 643         self.id_wid = id_wid
 644         self.width = width
 645         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 646
 647     def ispec(self):
 648         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 649
 650     def ospec(self):
 651         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
 652
 653     def setup(self, m, i):
 654         """ links module to inputs and outputs
 655         """
 656
 657         # Normalisation, Rounding Corrections, Pack - in a chain
 658         nmod = FPNorm1ModSingle(self.width, self.id_wid)
 659         rmod = FPRoundMod(self.width, self.id_wid)
 660         cmod = FPCorrectionsMod(self.width, self.id_wid)
 661         pmod = FPPackMod(self.width, self.id_wid)
 662         chain = StageChain([nmod, rmod, cmod, pmod])
 663         chain.setup(m, i)
 664         self.out_z = pmod.ospec()
 665
 666         self.o = pmod.o
 667
 668     def process(self, i):
 669         return self.o
 670
 671     def action(self, m):
 672         m.d.sync += self.out_z.eq(self.process(None))
 673         m.next = "pack_put_z"
 674
 675
 676 class FPPackData:
 677
 678     def __init__(self, width, id_wid):
 679         self.z = Signal(width, reset_less=True)
 680         self.mid = Signal(id_wid, reset_less=True)
 681
 682     def eq(self, i):
 683         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 684
 685     def ports(self):
 686         return [self.z, self.mid]
 687
 688
 689 class FPPackMod:
 690
 691     def __init__(self, width, id_wid):
 692         self.width = width
 693         self.id_wid = id_wid
 694         self.i = self.ispec()
 695         self.o = self.ospec()
 696
 697     def ispec(self):
 698         return FPRoundData(self.width, self.id_wid)
 699
 700     def ospec(self):
 701         return FPPackData(self.width, self.id_wid)
 702
 703     def process(self, i):
 704         return self.o
 705
 706     def setup(self, m, in_z):
 707         """ links module to inputs and outputs
 708         """
 709         m.submodules.pack = self
 710         m.d.comb += self.i.eq(in_z)
 711
 712     def elaborate(self, platform):
 713         m = Module()
 714         z = FPNumOut(self.width, False)
 715         m.submodules.pack_in_z = self.i.z
 716         m.submodules.pack_out_z = z
 717         m.d.comb += self.o.mid.eq(self.i.mid)
 718         with m.If(~self.i.out_do_z):
 719             with m.If(self.i.z.is_overflowed):
 720                 m.d.comb += z.inf(self.i.z.s)
 721             with m.Else():
 722                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
 723         with m.Else():
 724             m.d.comb += z.v.eq(self.i.oz)
 725         m.d.comb += self.o.z.eq(z.v)
 726         return m
 727
 728
 729 class FPPack(FPState):
 730
 731     def __init__(self, width, id_wid):
 732         FPState.__init__(self, "pack")
 733         self.mod = FPPackMod(width)
 734         self.out_z = self.ospec()
 735
 736     def ispec(self):
 737         return self.mod.ispec()
 738
 739     def ospec(self):
 740         return self.mod.ospec()
 741
 742     def setup(self, m, in_z):
 743         """ links module to inputs and outputs
 744         """
 745         self.mod.setup(m, in_z)
 746
 747         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
 748         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
 749
 750     def action(self, m):
 751         m.next = "pack_put_z"
 752
 753
 754 class FPPutZ(FPState):
 755
 756     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
 757         FPState.__init__(self, state)
 758         if to_state is None:
 759             to_state = "get_ops"
 760         self.to_state = to_state
 761         self.in_z = in_z
 762         self.out_z = out_z
 763         self.in_mid = in_mid
 764         self.out_mid = out_mid
 765
 766     def action(self, m):
 767         if self.in_mid is not None:
 768             m.d.sync += self.out_mid.eq(self.in_mid)
 769         m.d.sync += [
 770           self.out_z.z.v.eq(self.in_z)
 771         ]
 772         with m.If(self.out_z.z.stb & self.out_z.z.ack):
 773             m.d.sync += self.out_z.z.stb.eq(0)
 774             m.next = self.to_state
 775         with m.Else():
 776             m.d.sync += self.out_z.z.stb.eq(1)
 777
 778
 779 class FPPutZIdx(FPState):
 780
 781     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
 782         FPState.__init__(self, state)
 783         if to_state is None:
 784             to_state = "get_ops"
 785         self.to_state = to_state
 786         self.in_z = in_z
 787         self.out_zs = out_zs
 788         self.in_mid = in_mid
 789
 790     def action(self, m):
 791         outz_stb = Signal(reset_less=True)
 792         outz_ack = Signal(reset_less=True)
 793         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
 794                      outz_ack.eq(self.out_zs[self.in_mid].ack),
 795                     ]
 796         m.d.sync += [
 797           self.out_zs[self.in_mid].v.eq(self.in_z.v)
 798         ]
 799         with m.If(outz_stb & outz_ack):
 800             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
 801             m.next = self.to_state
 802         with m.Else():
 803             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
 804
 805
 806 class FPOpData:
 807     def __init__(self, width, id_wid):
 808         self.z = FPOp(width)
 809         self.mid = Signal(id_wid, reset_less=True)
 810
 811     def eq(self, i):
 812         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 813
 814     def ports(self):
 815         return [self.z, self.mid]
 816
 817
 818 class FPADDBaseMod:
 819
 820     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
 821         """ IEEE754 FP Add
 822
 823             * width: bit-width of IEEE754.  supported: 16, 32, 64
 824             * id_wid: an identifier that is sync-connected to the input
 825             * single_cycle: True indicates each stage to complete in 1 clock
 826             * compact: True indicates a reduced number of stages
 827         """
 828         self.width = width
 829         self.id_wid = id_wid
 830         self.single_cycle = single_cycle
 831         self.compact = compact
 832
 833         self.in_t = Trigger()
 834         self.i = self.ispec()
 835         self.o = self.ospec()
 836
 837         self.states = []
 838
 839     def ispec(self):
 840         return FPADDBaseData(self.width, self.id_wid)
 841
 842     def ospec(self):
 843         return FPOpData(self.width, self.id_wid)
 844
 845     def add_state(self, state):
 846         self.states.append(state)
 847         return state
 848
 849     def get_fragment(self, platform=None):
 850         """ creates the HDL code-fragment for FPAdd
 851         """
 852         m = Module()
 853         m.submodules.out_z = self.o.z
 854         m.submodules.in_t = self.in_t
 855         if self.compact:
 856             self.get_compact_fragment(m, platform)
 857         else:
 858             self.get_longer_fragment(m, platform)
 859
 860         with m.FSM() as fsm:
 861
 862             for state in self.states:
 863                 with m.State(state.state_from):
 864                     state.action(m)
 865
 866         return m
 867
 868     def get_longer_fragment(self, m, platform=None):
 869
 870         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 871                                       self.width))
 872         get.setup(m, self.i)
 873         a = get.out_op1
 874         b = get.out_op2
 875         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 876
 877         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 878         sc.setup(m, a, b, self.in_mid)
 879
 880         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 881         dn.setup(m, a, b, sc.in_mid)
 882
 883         if self.single_cycle:
 884             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 885             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 886         else:
 887             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 888             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 889
 890         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 891         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 892
 893         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 894         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 895
 896         if self.single_cycle:
 897             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 898             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 899         else:
 900             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 901             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 902
 903         rn = self.add_state(FPRound(self.width, self.id_wid))
 904         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 905
 906         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 907         cor.setup(m, rn.out_z, rn.in_mid)
 908
 909         pa = self.add_state(FPPack(self.width, self.id_wid))
 910         pa.setup(m, cor.out_z, rn.in_mid)
 911
 912         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 913                                     pa.in_mid, self.out_mid))
 914
 915         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 916                                     pa.in_mid, self.out_mid))
 917
 918     def get_compact_fragment(self, m, platform=None):
 919
 920
 921         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 922         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 923         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 924         n1 = FPNormToPack(self.width, self.id_wid)
 925
 926         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 927
 928         chainlist = [get, sc, alm, n1]
 929         chain = StageChain(chainlist, specallocate=True)
 930         chain.setup(m, self.i)
 931
 932         for mod in chainlist:
 933             sc = self.add_state(mod)
 934
 935         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 936                                     n1.out_z.mid, self.o.mid))
 937
 938         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 939         #                            sc.o.mid, self.o.mid))
 940
 941
 942 class FPADDBase(FPState):
 943
 944     def __init__(self, width, id_wid=None, single_cycle=False):
 945         """ IEEE754 FP Add
 946
 947             * width: bit-width of IEEE754.  supported: 16, 32, 64
 948             * id_wid: an identifier that is sync-connected to the input
 949             * single_cycle: True indicates each stage to complete in 1 clock
 950         """
 951         FPState.__init__(self, "fpadd")
 952         self.width = width
 953         self.single_cycle = single_cycle
 954         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 955         self.o = self.ospec()
 956
 957         self.in_t = Trigger()
 958         self.i = self.ispec()
 959
 960         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 961         self.in_accept = Signal(reset_less=True)
 962         self.add_stb = Signal(reset_less=True)
 963         self.add_ack = Signal(reset=0, reset_less=True)
 964
 965     def ispec(self):
 966         return self.mod.ispec()
 967
 968     def ospec(self):
 969         return self.mod.ospec()
 970
 971     def setup(self, m, i, add_stb, in_mid):
 972         m.d.comb += [self.i.eq(i),
 973                      self.mod.i.eq(self.i),
 974                      self.z_done.eq(self.mod.o.z.trigger),
 975                      #self.add_stb.eq(add_stb),
 976                      self.mod.in_t.stb.eq(self.in_t.stb),
 977                      self.in_t.ack.eq(self.mod.in_t.ack),
 978                      self.o.mid.eq(self.mod.o.mid),
 979                      self.o.z.v.eq(self.mod.o.z.v),
 980                      self.o.z.stb.eq(self.mod.o.z.stb),
 981                      self.mod.o.z.ack.eq(self.o.z.ack),
 982                     ]
 983
 984         m.d.sync += self.add_stb.eq(add_stb)
 985         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 986         m.d.sync += self.o.z.ack.eq(0) # likewise
 987         #m.d.sync += self.in_t.stb.eq(0)
 988
 989         m.submodules.fpadd = self.mod
 990
 991     def action(self, m):
 992
 993         # in_accept is set on incoming strobe HIGH and ack LOW.
 994         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 995
 996         #with m.If(self.in_t.ack):
 997         #    m.d.sync += self.in_t.stb.eq(0)
 998         with m.If(~self.z_done):
 999             # not done: test for accepting an incoming operand pair
1000             with m.If(self.in_accept):
1001                 m.d.sync += [
1002                     self.add_ack.eq(1), # acknowledge receipt...
1003                     self.in_t.stb.eq(1), # initiate add
1004                 ]
1005             with m.Else():
1006                 m.d.sync += [self.add_ack.eq(0),
1007                              self.in_t.stb.eq(0),
1008                              self.o.z.ack.eq(1),
1009                             ]
1010         with m.Else():
1011             # done: acknowledge, and write out id and value
1012             m.d.sync += [self.add_ack.eq(1),
1013                          self.in_t.stb.eq(0)
1014                         ]
1015             m.next = "put_z"
1016
1017             return
1018
1019             if self.in_mid is not None:
1020                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1021
1022             m.d.sync += [
1023               self.out_z.v.eq(self.mod.out_z.v)
1024             ]
1025             # move to output state on detecting z ack
1026             with m.If(self.out_z.trigger):
1027                 m.d.sync += self.out_z.stb.eq(0)
1028                 m.next = "put_z"
1029             with m.Else():
1030                 m.d.sync += self.out_z.stb.eq(1)
1031
1032
1033 class FPADDBasePipe(ControlBase):
1034     def __init__(self, width, id_wid):
1035         ControlBase.__init__(self)
1036         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1037         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1038         self.pipe3 = FPNormToPack(width, id_wid)
1039
1040         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1041
1042     def elaborate(self, platform):
1043         m = Module()
1044         m.submodules.scnorm = self.pipe1
1045         m.submodules.addalign = self.pipe2
1046         m.submodules.normpack = self.pipe3
1047         m.d.comb += self._eqs
1048         return m
1049
1050
1051 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1052     def __init__(self, width, id_wid, num_rows):
1053         self.num_rows = num_rows
1054         def iospec(): return FPADDBaseData(width, id_wid)
1055         stage = PassThroughStage(iospec)
1056         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1057
1058
1059 class FPADDMuxOutPipe(CombMuxOutPipe):
1060     def __init__(self, width, id_wid, num_rows):
1061         self.num_rows = num_rows
1062         def iospec(): return FPPackData(width, id_wid)
1063         stage = PassThroughStage(iospec)
1064         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1065
1066
1067 class FPADDMuxInOut:
1068     """ Reservation-Station version of FPADD pipeline.
1069
1070         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1071         * 3-stage adder pipeline
1072         * fan-out on outputs (an array of FPPackData: z,mid)
1073
1074         Fan-in and Fan-out are combinatorial.
1075     """
1076     def __init__(self, width, id_wid, num_rows):
1077         self.num_rows = num_rows
1078         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1079         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1080         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1081
1082         self.p = self.inpipe.p  # kinda annoying,
1083         self.n = self.outpipe.n # use pipe in/out as this class in/out
1084         self._ports = self.inpipe.ports() + self.outpipe.ports()
1085
1086     def elaborate(self, platform):
1087         m = Module()
1088         m.submodules.inpipe = self.inpipe
1089         m.submodules.fpadd = self.fpadd
1090         m.submodules.outpipe = self.outpipe
1091
1092         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1093         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1094
1095         return m
1096
1097     def ports(self):
1098         return self._ports
1099
1100
1101 class FPADD(FPID):
1102     """ FPADD: stages as follows:
1103
1104         FPGetOp (a)
1105            |
1106         FPGetOp (b)
1107            |
1108         FPAddBase---> FPAddBaseMod
1109            |            |
1110         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1111
1112         FPAddBase is tricky: it is both a stage and *has* stages.
1113         Connection to FPAddBaseMod therefore requires an in stb/ack
1114         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1115         needs to be the thing that raises the incoming stb.
1116     """
1117
1118     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1119         """ IEEE754 FP Add
1120
1121             * width: bit-width of IEEE754.  supported: 16, 32, 64
1122             * id_wid: an identifier that is sync-connected to the input
1123             * single_cycle: True indicates each stage to complete in 1 clock
1124         """
1125         self.width = width
1126         self.id_wid = id_wid
1127         self.single_cycle = single_cycle
1128
1129         #self.out_z = FPOp(width)
1130         self.ids = FPID(id_wid)
1131
1132         rs = []
1133         for i in range(rs_sz):
1134             in_a  = FPOp(width)
1135             in_b  = FPOp(width)
1136             in_a.name = "in_a_%d" % i
1137             in_b.name = "in_b_%d" % i
1138             rs.append((in_a, in_b))
1139         self.rs = Array(rs)
1140
1141         res = []
1142         for i in range(rs_sz):
1143             out_z = FPOp(width)
1144             out_z.name = "out_z_%d" % i
1145             res.append(out_z)
1146         self.res = Array(res)
1147
1148         self.states = []
1149
1150     def add_state(self, state):
1151         self.states.append(state)
1152         return state
1153
1154     def get_fragment(self, platform=None):
1155         """ creates the HDL code-fragment for FPAdd
1156         """
1157         m = Module()
1158         m.submodules += self.rs
1159
1160         in_a = self.rs[0][0]
1161         in_b = self.rs[0][1]
1162
1163         geta = self.add_state(FPGetOp("get_a", "get_b",
1164                                       in_a, self.width))
1165         geta.setup(m, in_a)
1166         a = geta.out_op
1167
1168         getb = self.add_state(FPGetOp("get_b", "fpadd",
1169                                       in_b, self.width))
1170         getb.setup(m, in_b)
1171         b = getb.out_op
1172
1173         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1174         ab = self.add_state(ab)
1175         abd = ab.ispec() # create an input spec object for FPADDBase
1176         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1177         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1178         o = ab.o
1179
1180         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1181                                     o.mid, "get_a"))
1182
1183         with m.FSM() as fsm:
1184
1185             for state in self.states:
1186                 with m.State(state.state_from):
1187                     state.action(m)
1188
1189         return m
1190
1191
1192 if __name__ == "__main__":
1193     if True:
1194         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1195         main(alu, ports=alu.rs[0][0].ports() + \
1196                         alu.rs[0][1].ports() + \
1197                         alu.res[0].ports() + \
1198                         [alu.ids.in_mid, alu.ids.out_mid])
1199     else:
1200         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1201         main(alu, ports=[alu.in_a, alu.in_b] + \
1202                         alu.in_t.ports() + \
1203                         alu.out_z.ports() + \
1204                         [alu.in_mid, alu.out_mid])
1205
1206
1207     # works... but don't use, just do "python fname.py convert -t v"
1208     #print (verilog.convert(alu, ports=[
1209     #                        ports=alu.in_a.ports() + \
1210     #                              alu.in_b.ports() + \
1211     #                              alu.out_z.ports())