src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20 from fpcommon.postcalc import FPAddStage1Data
  21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  22                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  23 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  24 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  25 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  26
  27
  28 class FPAddSpecialCasesMod:
  29     """ special cases: NaNs, infs, zeros, denormalised
  30         NOTE: some of these are unique to add.  see "Special Operations"
  31         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  32     """
  33
  34     def __init__(self, width, id_wid):
  35         self.width = width
  36         self.id_wid = id_wid
  37         self.i = self.ispec()
  38         self.o = self.ospec()
  39
  40     def ispec(self):
  41         return FPADDBaseData(self.width, self.id_wid)
  42
  43     def ospec(self):
  44         return FPSCData(self.width, self.id_wid)
  45
  46     def setup(self, m, i):
  47         """ links module to inputs and outputs
  48         """
  49         m.submodules.specialcases = self
  50         m.d.comb += self.i.eq(i)
  51
  52     def process(self, i):
  53         return self.o
  54
  55     def elaborate(self, platform):
  56         m = Module()
  57
  58         m.submodules.sc_out_z = self.o.z
  59
  60         # decode: XXX really should move to separate stage
  61         a1 = FPNumIn(None, self.width)
  62         b1 = FPNumIn(None, self.width)
  63         m.submodules.sc_decode_a = a1
  64         m.submodules.sc_decode_b = b1
  65         m.d.comb += [a1.decode(self.i.a),
  66                      b1.decode(self.i.b),
  67                     ]
  68
  69         s_nomatch = Signal()
  70         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  71
  72         m_match = Signal()
  73         m.d.comb += m_match.eq(a1.m == b1.m)
  74
  75         # if a is NaN or b is NaN return NaN
  76         with m.If(a1.is_nan | b1.is_nan):
  77             m.d.comb += self.o.out_do_z.eq(1)
  78             m.d.comb += self.o.z.nan(0)
  79
  80         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  81         # under review
  82
  83         ## if a is zero and b is NaN return -b
  84         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  85         #    m.d.comb += self.o.out_do_z.eq(1)
  86         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  87
  88         ## if b is zero and a is NaN return -a
  89         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  90         #    m.d.comb += self.o.out_do_z.eq(1)
  91         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  92
  93         ## if a is -zero and b is NaN return -b
  94         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  95         #    m.d.comb += self.o.out_do_z.eq(1)
  96         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  97
  98         ## if b is -zero and a is NaN return -a
  99         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 100         #    m.d.comb += self.o.out_do_z.eq(1)
 101         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 102
 103         # if a is inf return inf (or NaN)
 104         with m.Elif(a1.is_inf):
 105             m.d.comb += self.o.out_do_z.eq(1)
 106             m.d.comb += self.o.z.inf(a1.s)
 107             # if a is inf and signs don't match return NaN
 108             with m.If(b1.exp_128 & s_nomatch):
 109                 m.d.comb += self.o.z.nan(0)
 110
 111         # if b is inf return inf
 112         with m.Elif(b1.is_inf):
 113             m.d.comb += self.o.out_do_z.eq(1)
 114             m.d.comb += self.o.z.inf(b1.s)
 115
 116         # if a is zero and b zero return signed-a/b
 117         with m.Elif(a1.is_zero & b1.is_zero):
 118             m.d.comb += self.o.out_do_z.eq(1)
 119             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 120
 121         # if a is zero return b
 122         with m.Elif(a1.is_zero):
 123             m.d.comb += self.o.out_do_z.eq(1)
 124             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 125
 126         # if b is zero return a
 127         with m.Elif(b1.is_zero):
 128             m.d.comb += self.o.out_do_z.eq(1)
 129             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 130
 131         # if a equal to -b return zero (+ve zero)
 132         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 133             m.d.comb += self.o.out_do_z.eq(1)
 134             m.d.comb += self.o.z.zero(0)
 135
 136         # Denormalised Number checks next, so pass a/b data through
 137         with m.Else():
 138             m.d.comb += self.o.out_do_z.eq(0)
 139             m.d.comb += self.o.a.eq(a1)
 140             m.d.comb += self.o.b.eq(b1)
 141
 142         m.d.comb += self.o.oz.eq(self.o.z.v)
 143         m.d.comb += self.o.mid.eq(self.i.mid)
 144
 145         return m
 146
 147
 148 class FPAddSpecialCases(FPState):
 149     """ special cases: NaNs, infs, zeros, denormalised
 150         NOTE: some of these are unique to add.  see "Special Operations"
 151         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 152     """
 153
 154     def __init__(self, width, id_wid):
 155         FPState.__init__(self, "special_cases")
 156         self.mod = FPAddSpecialCasesMod(width)
 157         self.out_z = self.mod.ospec()
 158         self.out_do_z = Signal(reset_less=True)
 159
 160     def setup(self, m, i):
 161         """ links module to inputs and outputs
 162         """
 163         self.mod.setup(m, i, self.out_do_z)
 164         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 165         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 166
 167     def action(self, m):
 168         self.idsync(m)
 169         with m.If(self.out_do_z):
 170             m.next = "put_z"
 171         with m.Else():
 172             m.next = "denormalise"
 173
 174
 175 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 176     """ special cases: NaNs, infs, zeros, denormalised
 177         NOTE: some of these are unique to add.  see "Special Operations"
 178         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 179     """
 180
 181     def __init__(self, width, id_wid):
 182         FPState.__init__(self, "special_cases")
 183         self.width = width
 184         self.id_wid = id_wid
 185         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 186         self.out = self.ospec()
 187
 188     def ispec(self):
 189         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 190
 191     def ospec(self):
 192         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 193
 194     def setup(self, m, i):
 195         """ links module to inputs and outputs
 196         """
 197         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 198         dmod = FPAddDeNormMod(self.width, self.id_wid)
 199
 200         chain = StageChain([smod, dmod])
 201         chain.setup(m, i)
 202
 203         # only needed for break-out (early-out)
 204         # self.out_do_z = smod.o.out_do_z
 205
 206         self.o = dmod.o
 207
 208     def process(self, i):
 209         return self.o
 210
 211     def action(self, m):
 212         # for break-out (early-out)
 213         #with m.If(self.out_do_z):
 214         #    m.next = "put_z"
 215         #with m.Else():
 216             m.d.sync += self.out.eq(self.process(None))
 217             m.next = "align"
 218
 219
 220 class FPAddAlignMultiMod(FPState):
 221
 222     def __init__(self, width):
 223         self.in_a = FPNumBase(width)
 224         self.in_b = FPNumBase(width)
 225         self.out_a = FPNumIn(None, width)
 226         self.out_b = FPNumIn(None, width)
 227         self.exp_eq = Signal(reset_less=True)
 228
 229     def elaborate(self, platform):
 230         # This one however (single-cycle) will do the shift
 231         # in one go.
 232
 233         m = Module()
 234
 235         m.submodules.align_in_a = self.in_a
 236         m.submodules.align_in_b = self.in_b
 237         m.submodules.align_out_a = self.out_a
 238         m.submodules.align_out_b = self.out_b
 239
 240         # NOTE: this does *not* do single-cycle multi-shifting,
 241         #       it *STAYS* in the align state until exponents match
 242
 243         # exponent of a greater than b: shift b down
 244         m.d.comb += self.exp_eq.eq(0)
 245         m.d.comb += self.out_a.eq(self.in_a)
 246         m.d.comb += self.out_b.eq(self.in_b)
 247         agtb = Signal(reset_less=True)
 248         altb = Signal(reset_less=True)
 249         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 250         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 251         with m.If(agtb):
 252             m.d.comb += self.out_b.shift_down(self.in_b)
 253         # exponent of b greater than a: shift a down
 254         with m.Elif(altb):
 255             m.d.comb += self.out_a.shift_down(self.in_a)
 256         # exponents equal: move to next stage.
 257         with m.Else():
 258             m.d.comb += self.exp_eq.eq(1)
 259         return m
 260
 261
 262 class FPAddAlignMulti(FPState):
 263
 264     def __init__(self, width, id_wid):
 265         FPState.__init__(self, "align")
 266         self.mod = FPAddAlignMultiMod(width)
 267         self.out_a = FPNumIn(None, width)
 268         self.out_b = FPNumIn(None, width)
 269         self.exp_eq = Signal(reset_less=True)
 270
 271     def setup(self, m, in_a, in_b):
 272         """ links module to inputs and outputs
 273         """
 274         m.submodules.align = self.mod
 275         m.d.comb += self.mod.in_a.eq(in_a)
 276         m.d.comb += self.mod.in_b.eq(in_b)
 277         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 278         m.d.sync += self.out_a.eq(self.mod.out_a)
 279         m.d.sync += self.out_b.eq(self.mod.out_b)
 280
 281     def action(self, m):
 282         with m.If(self.exp_eq):
 283             m.next = "add_0"
 284
 285
 286 class FPNumIn2Ops:
 287
 288     def __init__(self, width, id_wid):
 289         self.a = FPNumIn(None, width)
 290         self.b = FPNumIn(None, width)
 291         self.z = FPNumOut(width, False)
 292         self.out_do_z = Signal(reset_less=True)
 293         self.oz = Signal(width, reset_less=True)
 294         self.mid = Signal(id_wid, reset_less=True)
 295
 296     def eq(self, i):
 297         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 298                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 299
 300
 301 class FPAddAlignSingleMod:
 302
 303     def __init__(self, width, id_wid):
 304         self.width = width
 305         self.id_wid = id_wid
 306         self.i = self.ispec()
 307         self.o = self.ospec()
 308
 309     def ispec(self):
 310         return FPSCData(self.width, self.id_wid)
 311
 312     def ospec(self):
 313         return FPNumIn2Ops(self.width, self.id_wid)
 314
 315     def process(self, i):
 316         return self.o
 317
 318     def setup(self, m, i):
 319         """ links module to inputs and outputs
 320         """
 321         m.submodules.align = self
 322         m.d.comb += self.i.eq(i)
 323
 324     def elaborate(self, platform):
 325         """ Aligns A against B or B against A, depending on which has the
 326             greater exponent.  This is done in a *single* cycle using
 327             variable-width bit-shift
 328
 329             the shifter used here is quite expensive in terms of gates.
 330             Mux A or B in (and out) into temporaries, as only one of them
 331             needs to be aligned against the other
 332         """
 333         m = Module()
 334
 335         m.submodules.align_in_a = self.i.a
 336         m.submodules.align_in_b = self.i.b
 337         m.submodules.align_out_a = self.o.a
 338         m.submodules.align_out_b = self.o.b
 339
 340         # temporary (muxed) input and output to be shifted
 341         t_inp = FPNumBase(self.width)
 342         t_out = FPNumIn(None, self.width)
 343         espec = (len(self.i.a.e), True)
 344         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 345         m.submodules.align_t_in = t_inp
 346         m.submodules.align_t_out = t_out
 347         m.submodules.multishift_r = msr
 348
 349         ediff = Signal(espec, reset_less=True)
 350         ediffr = Signal(espec, reset_less=True)
 351         tdiff = Signal(espec, reset_less=True)
 352         elz = Signal(reset_less=True)
 353         egz = Signal(reset_less=True)
 354
 355         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 356         m.d.comb += msr.inp.eq(t_inp.m)
 357         m.d.comb += msr.diff.eq(tdiff)
 358         m.d.comb += t_out.m.eq(msr.m)
 359         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 360         m.d.comb += t_out.s.eq(t_inp.s)
 361
 362         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 363         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 364         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 365         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 366
 367         # default: A-exp == B-exp, A and B untouched (fall through)
 368         m.d.comb += self.o.a.eq(self.i.a)
 369         m.d.comb += self.o.b.eq(self.i.b)
 370         # only one shifter (muxed)
 371         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 372         # exponent of a greater than b: shift b down
 373         with m.If(~self.i.out_do_z):
 374             with m.If(egz):
 375                 m.d.comb += [t_inp.eq(self.i.b),
 376                              tdiff.eq(ediff),
 377                              self.o.b.eq(t_out),
 378                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 379                             ]
 380             # exponent of b greater than a: shift a down
 381             with m.Elif(elz):
 382                 m.d.comb += [t_inp.eq(self.i.a),
 383                              tdiff.eq(ediffr),
 384                              self.o.a.eq(t_out),
 385                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 386                             ]
 387
 388         m.d.comb += self.o.mid.eq(self.i.mid)
 389         m.d.comb += self.o.z.eq(self.i.z)
 390         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 391         m.d.comb += self.o.oz.eq(self.i.oz)
 392
 393         return m
 394
 395
 396 class FPAddAlignSingle(FPState):
 397
 398     def __init__(self, width, id_wid):
 399         FPState.__init__(self, "align")
 400         self.mod = FPAddAlignSingleMod(width, id_wid)
 401         self.out_a = FPNumIn(None, width)
 402         self.out_b = FPNumIn(None, width)
 403
 404     def setup(self, m, i):
 405         """ links module to inputs and outputs
 406         """
 407         self.mod.setup(m, i)
 408
 409         # NOTE: could be done as comb
 410         m.d.sync += self.out_a.eq(self.mod.out_a)
 411         m.d.sync += self.out_b.eq(self.mod.out_b)
 412
 413     def action(self, m):
 414         m.next = "add_0"
 415
 416
 417 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 418
 419     def __init__(self, width, id_wid):
 420         FPState.__init__(self, "align")
 421         self.width = width
 422         self.id_wid = id_wid
 423         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 424         self.a1o = self.ospec()
 425
 426     def ispec(self):
 427         return FPSCData(self.width, self.id_wid)
 428
 429     def ospec(self):
 430         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 431
 432     def setup(self, m, i):
 433         """ links module to inputs and outputs
 434         """
 435
 436         # chain AddAlignSingle, AddStage0 and AddStage1
 437         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 438         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 439         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 440
 441         chain = StageChain([mod, a0mod, a1mod])
 442         chain.setup(m, i)
 443
 444         self.o = a1mod.o
 445
 446     def process(self, i):
 447         return self.o
 448
 449     def action(self, m):
 450         m.d.sync += self.a1o.eq(self.process(None))
 451         m.next = "normalise_1"
 452
 453
 454 class FPAddStage0Data:
 455
 456     def __init__(self, width, id_wid):
 457         self.z = FPNumBase(width, False)
 458         self.out_do_z = Signal(reset_less=True)
 459         self.oz = Signal(width, reset_less=True)
 460         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 461         self.mid = Signal(id_wid, reset_less=True)
 462
 463     def eq(self, i):
 464         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 465                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 466
 467
 468 class FPAddStage0Mod:
 469
 470     def __init__(self, width, id_wid):
 471         self.width = width
 472         self.id_wid = id_wid
 473         self.i = self.ispec()
 474         self.o = self.ospec()
 475
 476     def ispec(self):
 477         return FPSCData(self.width, self.id_wid)
 478
 479     def ospec(self):
 480         return FPAddStage0Data(self.width, self.id_wid)
 481
 482     def process(self, i):
 483         return self.o
 484
 485     def setup(self, m, i):
 486         """ links module to inputs and outputs
 487         """
 488         m.submodules.add0 = self
 489         m.d.comb += self.i.eq(i)
 490
 491     def elaborate(self, platform):
 492         m = Module()
 493         m.submodules.add0_in_a = self.i.a
 494         m.submodules.add0_in_b = self.i.b
 495         m.submodules.add0_out_z = self.o.z
 496
 497         # store intermediate tests (and zero-extended mantissas)
 498         seq = Signal(reset_less=True)
 499         mge = Signal(reset_less=True)
 500         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 501         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 502         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 503                      mge.eq(self.i.a.m >= self.i.b.m),
 504                      am0.eq(Cat(self.i.a.m, 0)),
 505                      bm0.eq(Cat(self.i.b.m, 0))
 506                     ]
 507         # same-sign (both negative or both positive) add mantissas
 508         with m.If(~self.i.out_do_z):
 509             m.d.comb += self.o.z.e.eq(self.i.a.e)
 510             with m.If(seq):
 511                 m.d.comb += [
 512                     self.o.tot.eq(am0 + bm0),
 513                     self.o.z.s.eq(self.i.a.s)
 514                 ]
 515             # a mantissa greater than b, use a
 516             with m.Elif(mge):
 517                 m.d.comb += [
 518                     self.o.tot.eq(am0 - bm0),
 519                     self.o.z.s.eq(self.i.a.s)
 520                 ]
 521             # b mantissa greater than a, use b
 522             with m.Else():
 523                 m.d.comb += [
 524                     self.o.tot.eq(bm0 - am0),
 525                     self.o.z.s.eq(self.i.b.s)
 526             ]
 527
 528         m.d.comb += self.o.oz.eq(self.i.oz)
 529         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 530         m.d.comb += self.o.mid.eq(self.i.mid)
 531         return m
 532
 533
 534 class FPAddStage0(FPState):
 535     """ First stage of add.  covers same-sign (add) and subtract
 536         special-casing when mantissas are greater or equal, to
 537         give greatest accuracy.
 538     """
 539
 540     def __init__(self, width, id_wid):
 541         FPState.__init__(self, "add_0")
 542         self.mod = FPAddStage0Mod(width)
 543         self.o = self.mod.ospec()
 544
 545     def setup(self, m, i):
 546         """ links module to inputs and outputs
 547         """
 548         self.mod.setup(m, i)
 549
 550         # NOTE: these could be done as combinatorial (merge add0+add1)
 551         m.d.sync += self.o.eq(self.mod.o)
 552
 553     def action(self, m):
 554         m.next = "add_1"
 555
 556
 557 class FPAddStage1Mod(FPState):
 558     """ Second stage of add: preparation for normalisation.
 559         detects when tot sum is too big (tot[27] is kinda a carry bit)
 560     """
 561
 562     def __init__(self, width, id_wid):
 563         self.width = width
 564         self.id_wid = id_wid
 565         self.i = self.ispec()
 566         self.o = self.ospec()
 567
 568     def ispec(self):
 569         return FPAddStage0Data(self.width, self.id_wid)
 570
 571     def ospec(self):
 572         return FPAddStage1Data(self.width, self.id_wid)
 573
 574     def process(self, i):
 575         return self.o
 576
 577     def setup(self, m, i):
 578         """ links module to inputs and outputs
 579         """
 580         m.submodules.add1 = self
 581         m.submodules.add1_out_overflow = self.o.of
 582
 583         m.d.comb += self.i.eq(i)
 584
 585     def elaborate(self, platform):
 586         m = Module()
 587         m.d.comb += self.o.z.eq(self.i.z)
 588         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 589         with m.If(~self.i.out_do_z):
 590             with m.If(self.i.tot[-1]):
 591                 m.d.comb += [
 592                     self.o.z.m.eq(self.i.tot[4:]),
 593                     self.o.of.m0.eq(self.i.tot[4]),
 594                     self.o.of.guard.eq(self.i.tot[3]),
 595                     self.o.of.round_bit.eq(self.i.tot[2]),
 596                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 597                     self.o.z.e.eq(self.i.z.e + 1)
 598             ]
 599             # tot[-1] (MSB) zero case
 600             with m.Else():
 601                 m.d.comb += [
 602                     self.o.z.m.eq(self.i.tot[3:]),
 603                     self.o.of.m0.eq(self.i.tot[3]),
 604                     self.o.of.guard.eq(self.i.tot[2]),
 605                     self.o.of.round_bit.eq(self.i.tot[1]),
 606                     self.o.of.sticky.eq(self.i.tot[0])
 607             ]
 608
 609         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 610         m.d.comb += self.o.oz.eq(self.i.oz)
 611         m.d.comb += self.o.mid.eq(self.i.mid)
 612
 613         return m
 614
 615
 616 class FPAddStage1(FPState):
 617
 618     def __init__(self, width, id_wid):
 619         FPState.__init__(self, "add_1")
 620         self.mod = FPAddStage1Mod(width)
 621         self.out_z = FPNumBase(width, False)
 622         self.out_of = Overflow()
 623         self.norm_stb = Signal()
 624
 625     def setup(self, m, i):
 626         """ links module to inputs and outputs
 627         """
 628         self.mod.setup(m, i)
 629
 630         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 631
 632         m.d.sync += self.out_of.eq(self.mod.out_of)
 633         m.d.sync += self.out_z.eq(self.mod.out_z)
 634         m.d.sync += self.norm_stb.eq(1)
 635
 636     def action(self, m):
 637         m.next = "normalise_1"
 638
 639
 640 class FPNormToPack(FPState, UnbufferedPipeline):
 641
 642     def __init__(self, width, id_wid):
 643         FPState.__init__(self, "normalise_1")
 644         self.id_wid = id_wid
 645         self.width = width
 646         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 647
 648     def ispec(self):
 649         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 650
 651     def ospec(self):
 652         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
 653
 654     def setup(self, m, i):
 655         """ links module to inputs and outputs
 656         """
 657
 658         # Normalisation, Rounding Corrections, Pack - in a chain
 659         nmod = FPNorm1ModSingle(self.width, self.id_wid)
 660         rmod = FPRoundMod(self.width, self.id_wid)
 661         cmod = FPCorrectionsMod(self.width, self.id_wid)
 662         pmod = FPPackMod(self.width, self.id_wid)
 663         chain = StageChain([nmod, rmod, cmod, pmod])
 664         chain.setup(m, i)
 665         self.out_z = pmod.ospec()
 666
 667         self.o = pmod.o
 668
 669     def process(self, i):
 670         return self.o
 671
 672     def action(self, m):
 673         m.d.sync += self.out_z.eq(self.process(None))
 674         m.next = "pack_put_z"
 675
 676
 677
 678 class FPPutZ(FPState):
 679
 680     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
 681         FPState.__init__(self, state)
 682         if to_state is None:
 683             to_state = "get_ops"
 684         self.to_state = to_state
 685         self.in_z = in_z
 686         self.out_z = out_z
 687         self.in_mid = in_mid
 688         self.out_mid = out_mid
 689
 690     def action(self, m):
 691         if self.in_mid is not None:
 692             m.d.sync += self.out_mid.eq(self.in_mid)
 693         m.d.sync += [
 694           self.out_z.z.v.eq(self.in_z)
 695         ]
 696         with m.If(self.out_z.z.stb & self.out_z.z.ack):
 697             m.d.sync += self.out_z.z.stb.eq(0)
 698             m.next = self.to_state
 699         with m.Else():
 700             m.d.sync += self.out_z.z.stb.eq(1)
 701
 702
 703 class FPPutZIdx(FPState):
 704
 705     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
 706         FPState.__init__(self, state)
 707         if to_state is None:
 708             to_state = "get_ops"
 709         self.to_state = to_state
 710         self.in_z = in_z
 711         self.out_zs = out_zs
 712         self.in_mid = in_mid
 713
 714     def action(self, m):
 715         outz_stb = Signal(reset_less=True)
 716         outz_ack = Signal(reset_less=True)
 717         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
 718                      outz_ack.eq(self.out_zs[self.in_mid].ack),
 719                     ]
 720         m.d.sync += [
 721           self.out_zs[self.in_mid].v.eq(self.in_z.v)
 722         ]
 723         with m.If(outz_stb & outz_ack):
 724             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
 725             m.next = self.to_state
 726         with m.Else():
 727             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
 728
 729
 730 class FPOpData:
 731     def __init__(self, width, id_wid):
 732         self.z = FPOp(width)
 733         self.mid = Signal(id_wid, reset_less=True)
 734
 735     def eq(self, i):
 736         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 737
 738     def ports(self):
 739         return [self.z, self.mid]
 740
 741
 742 class FPADDBaseMod:
 743
 744     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
 745         """ IEEE754 FP Add
 746
 747             * width: bit-width of IEEE754.  supported: 16, 32, 64
 748             * id_wid: an identifier that is sync-connected to the input
 749             * single_cycle: True indicates each stage to complete in 1 clock
 750             * compact: True indicates a reduced number of stages
 751         """
 752         self.width = width
 753         self.id_wid = id_wid
 754         self.single_cycle = single_cycle
 755         self.compact = compact
 756
 757         self.in_t = Trigger()
 758         self.i = self.ispec()
 759         self.o = self.ospec()
 760
 761         self.states = []
 762
 763     def ispec(self):
 764         return FPADDBaseData(self.width, self.id_wid)
 765
 766     def ospec(self):
 767         return FPOpData(self.width, self.id_wid)
 768
 769     def add_state(self, state):
 770         self.states.append(state)
 771         return state
 772
 773     def get_fragment(self, platform=None):
 774         """ creates the HDL code-fragment for FPAdd
 775         """
 776         m = Module()
 777         m.submodules.out_z = self.o.z
 778         m.submodules.in_t = self.in_t
 779         if self.compact:
 780             self.get_compact_fragment(m, platform)
 781         else:
 782             self.get_longer_fragment(m, platform)
 783
 784         with m.FSM() as fsm:
 785
 786             for state in self.states:
 787                 with m.State(state.state_from):
 788                     state.action(m)
 789
 790         return m
 791
 792     def get_longer_fragment(self, m, platform=None):
 793
 794         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 795                                       self.width))
 796         get.setup(m, self.i)
 797         a = get.out_op1
 798         b = get.out_op2
 799         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 800
 801         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 802         sc.setup(m, a, b, self.in_mid)
 803
 804         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 805         dn.setup(m, a, b, sc.in_mid)
 806
 807         if self.single_cycle:
 808             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 809             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 810         else:
 811             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 812             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 813
 814         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 815         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 816
 817         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 818         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 819
 820         if self.single_cycle:
 821             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 822             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 823         else:
 824             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 825             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 826
 827         rn = self.add_state(FPRound(self.width, self.id_wid))
 828         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 829
 830         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 831         cor.setup(m, rn.out_z, rn.in_mid)
 832
 833         pa = self.add_state(FPPack(self.width, self.id_wid))
 834         pa.setup(m, cor.out_z, rn.in_mid)
 835
 836         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 837                                     pa.in_mid, self.out_mid))
 838
 839         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 840                                     pa.in_mid, self.out_mid))
 841
 842     def get_compact_fragment(self, m, platform=None):
 843
 844
 845         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 846         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 847         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 848         n1 = FPNormToPack(self.width, self.id_wid)
 849
 850         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 851
 852         chainlist = [get, sc, alm, n1]
 853         chain = StageChain(chainlist, specallocate=True)
 854         chain.setup(m, self.i)
 855
 856         for mod in chainlist:
 857             sc = self.add_state(mod)
 858
 859         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 860                                     n1.out_z.mid, self.o.mid))
 861
 862         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 863         #                            sc.o.mid, self.o.mid))
 864
 865
 866 class FPADDBase(FPState):
 867
 868     def __init__(self, width, id_wid=None, single_cycle=False):
 869         """ IEEE754 FP Add
 870
 871             * width: bit-width of IEEE754.  supported: 16, 32, 64
 872             * id_wid: an identifier that is sync-connected to the input
 873             * single_cycle: True indicates each stage to complete in 1 clock
 874         """
 875         FPState.__init__(self, "fpadd")
 876         self.width = width
 877         self.single_cycle = single_cycle
 878         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 879         self.o = self.ospec()
 880
 881         self.in_t = Trigger()
 882         self.i = self.ispec()
 883
 884         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 885         self.in_accept = Signal(reset_less=True)
 886         self.add_stb = Signal(reset_less=True)
 887         self.add_ack = Signal(reset=0, reset_less=True)
 888
 889     def ispec(self):
 890         return self.mod.ispec()
 891
 892     def ospec(self):
 893         return self.mod.ospec()
 894
 895     def setup(self, m, i, add_stb, in_mid):
 896         m.d.comb += [self.i.eq(i),
 897                      self.mod.i.eq(self.i),
 898                      self.z_done.eq(self.mod.o.z.trigger),
 899                      #self.add_stb.eq(add_stb),
 900                      self.mod.in_t.stb.eq(self.in_t.stb),
 901                      self.in_t.ack.eq(self.mod.in_t.ack),
 902                      self.o.mid.eq(self.mod.o.mid),
 903                      self.o.z.v.eq(self.mod.o.z.v),
 904                      self.o.z.stb.eq(self.mod.o.z.stb),
 905                      self.mod.o.z.ack.eq(self.o.z.ack),
 906                     ]
 907
 908         m.d.sync += self.add_stb.eq(add_stb)
 909         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 910         m.d.sync += self.o.z.ack.eq(0) # likewise
 911         #m.d.sync += self.in_t.stb.eq(0)
 912
 913         m.submodules.fpadd = self.mod
 914
 915     def action(self, m):
 916
 917         # in_accept is set on incoming strobe HIGH and ack LOW.
 918         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 919
 920         #with m.If(self.in_t.ack):
 921         #    m.d.sync += self.in_t.stb.eq(0)
 922         with m.If(~self.z_done):
 923             # not done: test for accepting an incoming operand pair
 924             with m.If(self.in_accept):
 925                 m.d.sync += [
 926                     self.add_ack.eq(1), # acknowledge receipt...
 927                     self.in_t.stb.eq(1), # initiate add
 928                 ]
 929             with m.Else():
 930                 m.d.sync += [self.add_ack.eq(0),
 931                              self.in_t.stb.eq(0),
 932                              self.o.z.ack.eq(1),
 933                             ]
 934         with m.Else():
 935             # done: acknowledge, and write out id and value
 936             m.d.sync += [self.add_ack.eq(1),
 937                          self.in_t.stb.eq(0)
 938                         ]
 939             m.next = "put_z"
 940
 941             return
 942
 943             if self.in_mid is not None:
 944                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 945
 946             m.d.sync += [
 947               self.out_z.v.eq(self.mod.out_z.v)
 948             ]
 949             # move to output state on detecting z ack
 950             with m.If(self.out_z.trigger):
 951                 m.d.sync += self.out_z.stb.eq(0)
 952                 m.next = "put_z"
 953             with m.Else():
 954                 m.d.sync += self.out_z.stb.eq(1)
 955
 956
 957 class FPADDBasePipe(ControlBase):
 958     def __init__(self, width, id_wid):
 959         ControlBase.__init__(self)
 960         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
 961         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
 962         self.pipe3 = FPNormToPack(width, id_wid)
 963
 964         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 965
 966     def elaborate(self, platform):
 967         m = Module()
 968         m.submodules.scnorm = self.pipe1
 969         m.submodules.addalign = self.pipe2
 970         m.submodules.normpack = self.pipe3
 971         m.d.comb += self._eqs
 972         return m
 973
 974
 975 class FPADDInMuxPipe(PriorityCombMuxInPipe):
 976     def __init__(self, width, id_wid, num_rows):
 977         self.num_rows = num_rows
 978         def iospec(): return FPADDBaseData(width, id_wid)
 979         stage = PassThroughStage(iospec)
 980         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
 981
 982
 983 class FPADDMuxOutPipe(CombMuxOutPipe):
 984     def __init__(self, width, id_wid, num_rows):
 985         self.num_rows = num_rows
 986         def iospec(): return FPPackData(width, id_wid)
 987         stage = PassThroughStage(iospec)
 988         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
 989
 990
 991 class FPADDMuxInOut:
 992     """ Reservation-Station version of FPADD pipeline.
 993
 994         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 995         * 3-stage adder pipeline
 996         * fan-out on outputs (an array of FPPackData: z,mid)
 997
 998         Fan-in and Fan-out are combinatorial.
 999     """
1000     def __init__(self, width, id_wid, num_rows):
1001         self.num_rows = num_rows
1002         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1003         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1004         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1005
1006         self.p = self.inpipe.p  # kinda annoying,
1007         self.n = self.outpipe.n # use pipe in/out as this class in/out
1008         self._ports = self.inpipe.ports() + self.outpipe.ports()
1009
1010     def elaborate(self, platform):
1011         m = Module()
1012         m.submodules.inpipe = self.inpipe
1013         m.submodules.fpadd = self.fpadd
1014         m.submodules.outpipe = self.outpipe
1015
1016         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1017         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1018
1019         return m
1020
1021     def ports(self):
1022         return self._ports
1023
1024
1025 class FPADD(FPID):
1026     """ FPADD: stages as follows:
1027
1028         FPGetOp (a)
1029            |
1030         FPGetOp (b)
1031            |
1032         FPAddBase---> FPAddBaseMod
1033            |            |
1034         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1035
1036         FPAddBase is tricky: it is both a stage and *has* stages.
1037         Connection to FPAddBaseMod therefore requires an in stb/ack
1038         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1039         needs to be the thing that raises the incoming stb.
1040     """
1041
1042     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1043         """ IEEE754 FP Add
1044
1045             * width: bit-width of IEEE754.  supported: 16, 32, 64
1046             * id_wid: an identifier that is sync-connected to the input
1047             * single_cycle: True indicates each stage to complete in 1 clock
1048         """
1049         self.width = width
1050         self.id_wid = id_wid
1051         self.single_cycle = single_cycle
1052
1053         #self.out_z = FPOp(width)
1054         self.ids = FPID(id_wid)
1055
1056         rs = []
1057         for i in range(rs_sz):
1058             in_a  = FPOp(width)
1059             in_b  = FPOp(width)
1060             in_a.name = "in_a_%d" % i
1061             in_b.name = "in_b_%d" % i
1062             rs.append((in_a, in_b))
1063         self.rs = Array(rs)
1064
1065         res = []
1066         for i in range(rs_sz):
1067             out_z = FPOp(width)
1068             out_z.name = "out_z_%d" % i
1069             res.append(out_z)
1070         self.res = Array(res)
1071
1072         self.states = []
1073
1074     def add_state(self, state):
1075         self.states.append(state)
1076         return state
1077
1078     def get_fragment(self, platform=None):
1079         """ creates the HDL code-fragment for FPAdd
1080         """
1081         m = Module()
1082         m.submodules += self.rs
1083
1084         in_a = self.rs[0][0]
1085         in_b = self.rs[0][1]
1086
1087         geta = self.add_state(FPGetOp("get_a", "get_b",
1088                                       in_a, self.width))
1089         geta.setup(m, in_a)
1090         a = geta.out_op
1091
1092         getb = self.add_state(FPGetOp("get_b", "fpadd",
1093                                       in_b, self.width))
1094         getb.setup(m, in_b)
1095         b = getb.out_op
1096
1097         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1098         ab = self.add_state(ab)
1099         abd = ab.ispec() # create an input spec object for FPADDBase
1100         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1101         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1102         o = ab.o
1103
1104         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1105                                     o.mid, "get_a"))
1106
1107         with m.FSM() as fsm:
1108
1109             for state in self.states:
1110                 with m.State(state.state_from):
1111                     state.action(m)
1112
1113         return m
1114
1115
1116 if __name__ == "__main__":
1117     if True:
1118         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1119         main(alu, ports=alu.rs[0][0].ports() + \
1120                         alu.rs[0][1].ports() + \
1121                         alu.res[0].ports() + \
1122                         [alu.ids.in_mid, alu.ids.out_mid])
1123     else:
1124         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1125         main(alu, ports=[alu.in_a, alu.in_b] + \
1126                         alu.in_t.ports() + \
1127                         alu.out_z.ports() + \
1128                         [alu.in_mid, alu.out_mid])
1129
1130
1131     # works... but don't use, just do "python fname.py convert -t v"
1132     #print (verilog.convert(alu, ports=[
1133     #                        ports=alu.in_a.ports() + \
1134     #                              alu.in_b.ports() + \
1135     #                              alu.out_z.ports())