src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20
  21
  22 class FPAddSpecialCasesMod:
  23     """ special cases: NaNs, infs, zeros, denormalised
  24         NOTE: some of these are unique to add.  see "Special Operations"
  25         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  26     """
  27
  28     def __init__(self, width, id_wid):
  29         self.width = width
  30         self.id_wid = id_wid
  31         self.i = self.ispec()
  32         self.o = self.ospec()
  33
  34     def ispec(self):
  35         return FPADDBaseData(self.width, self.id_wid)
  36
  37     def ospec(self):
  38         return FPSCData(self.width, self.id_wid)
  39
  40     def setup(self, m, i):
  41         """ links module to inputs and outputs
  42         """
  43         m.submodules.specialcases = self
  44         m.d.comb += self.i.eq(i)
  45
  46     def process(self, i):
  47         return self.o
  48
  49     def elaborate(self, platform):
  50         m = Module()
  51
  52         m.submodules.sc_out_z = self.o.z
  53
  54         # decode: XXX really should move to separate stage
  55         a1 = FPNumIn(None, self.width)
  56         b1 = FPNumIn(None, self.width)
  57         m.submodules.sc_decode_a = a1
  58         m.submodules.sc_decode_b = b1
  59         m.d.comb += [a1.decode(self.i.a),
  60                      b1.decode(self.i.b),
  61                     ]
  62
  63         s_nomatch = Signal()
  64         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  65
  66         m_match = Signal()
  67         m.d.comb += m_match.eq(a1.m == b1.m)
  68
  69         # if a is NaN or b is NaN return NaN
  70         with m.If(a1.is_nan | b1.is_nan):
  71             m.d.comb += self.o.out_do_z.eq(1)
  72             m.d.comb += self.o.z.nan(0)
  73
  74         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  75         # under review
  76
  77         ## if a is zero and b is NaN return -b
  78         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  79         #    m.d.comb += self.o.out_do_z.eq(1)
  80         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  81
  82         ## if b is zero and a is NaN return -a
  83         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  84         #    m.d.comb += self.o.out_do_z.eq(1)
  85         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  86
  87         ## if a is -zero and b is NaN return -b
  88         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  89         #    m.d.comb += self.o.out_do_z.eq(1)
  90         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  91
  92         ## if b is -zero and a is NaN return -a
  93         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
  94         #    m.d.comb += self.o.out_do_z.eq(1)
  95         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
  96
  97         # if a is inf return inf (or NaN)
  98         with m.Elif(a1.is_inf):
  99             m.d.comb += self.o.out_do_z.eq(1)
 100             m.d.comb += self.o.z.inf(a1.s)
 101             # if a is inf and signs don't match return NaN
 102             with m.If(b1.exp_128 & s_nomatch):
 103                 m.d.comb += self.o.z.nan(0)
 104
 105         # if b is inf return inf
 106         with m.Elif(b1.is_inf):
 107             m.d.comb += self.o.out_do_z.eq(1)
 108             m.d.comb += self.o.z.inf(b1.s)
 109
 110         # if a is zero and b zero return signed-a/b
 111         with m.Elif(a1.is_zero & b1.is_zero):
 112             m.d.comb += self.o.out_do_z.eq(1)
 113             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 114
 115         # if a is zero return b
 116         with m.Elif(a1.is_zero):
 117             m.d.comb += self.o.out_do_z.eq(1)
 118             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 119
 120         # if b is zero return a
 121         with m.Elif(b1.is_zero):
 122             m.d.comb += self.o.out_do_z.eq(1)
 123             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 124
 125         # if a equal to -b return zero (+ve zero)
 126         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 127             m.d.comb += self.o.out_do_z.eq(1)
 128             m.d.comb += self.o.z.zero(0)
 129
 130         # Denormalised Number checks next, so pass a/b data through
 131         with m.Else():
 132             m.d.comb += self.o.out_do_z.eq(0)
 133             m.d.comb += self.o.a.eq(a1)
 134             m.d.comb += self.o.b.eq(b1)
 135
 136         m.d.comb += self.o.oz.eq(self.o.z.v)
 137         m.d.comb += self.o.mid.eq(self.i.mid)
 138
 139         return m
 140
 141
 142 class FPID:
 143     def __init__(self, id_wid):
 144         self.id_wid = id_wid
 145         if self.id_wid:
 146             self.in_mid = Signal(id_wid, reset_less=True)
 147             self.out_mid = Signal(id_wid, reset_less=True)
 148         else:
 149             self.in_mid = None
 150             self.out_mid = None
 151
 152     def idsync(self, m):
 153         if self.id_wid is not None:
 154             m.d.sync += self.out_mid.eq(self.in_mid)
 155
 156
 157 class FPAddSpecialCases(FPState):
 158     """ special cases: NaNs, infs, zeros, denormalised
 159         NOTE: some of these are unique to add.  see "Special Operations"
 160         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 161     """
 162
 163     def __init__(self, width, id_wid):
 164         FPState.__init__(self, "special_cases")
 165         self.mod = FPAddSpecialCasesMod(width)
 166         self.out_z = self.mod.ospec()
 167         self.out_do_z = Signal(reset_less=True)
 168
 169     def setup(self, m, i):
 170         """ links module to inputs and outputs
 171         """
 172         self.mod.setup(m, i, self.out_do_z)
 173         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 174         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 175
 176     def action(self, m):
 177         self.idsync(m)
 178         with m.If(self.out_do_z):
 179             m.next = "put_z"
 180         with m.Else():
 181             m.next = "denormalise"
 182
 183
 184 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 185     """ special cases: NaNs, infs, zeros, denormalised
 186         NOTE: some of these are unique to add.  see "Special Operations"
 187         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 188     """
 189
 190     def __init__(self, width, id_wid):
 191         FPState.__init__(self, "special_cases")
 192         self.width = width
 193         self.id_wid = id_wid
 194         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 195         self.out = self.ospec()
 196
 197     def ispec(self):
 198         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 199
 200     def ospec(self):
 201         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 202
 203     def setup(self, m, i):
 204         """ links module to inputs and outputs
 205         """
 206         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 207         dmod = FPAddDeNormMod(self.width, self.id_wid)
 208
 209         chain = StageChain([smod, dmod])
 210         chain.setup(m, i)
 211
 212         # only needed for break-out (early-out)
 213         # self.out_do_z = smod.o.out_do_z
 214
 215         self.o = dmod.o
 216
 217     def process(self, i):
 218         return self.o
 219
 220     def action(self, m):
 221         # for break-out (early-out)
 222         #with m.If(self.out_do_z):
 223         #    m.next = "put_z"
 224         #with m.Else():
 225             m.d.sync += self.out.eq(self.process(None))
 226             m.next = "align"
 227
 228
 229 class FPAddAlignMultiMod(FPState):
 230
 231     def __init__(self, width):
 232         self.in_a = FPNumBase(width)
 233         self.in_b = FPNumBase(width)
 234         self.out_a = FPNumIn(None, width)
 235         self.out_b = FPNumIn(None, width)
 236         self.exp_eq = Signal(reset_less=True)
 237
 238     def elaborate(self, platform):
 239         # This one however (single-cycle) will do the shift
 240         # in one go.
 241
 242         m = Module()
 243
 244         m.submodules.align_in_a = self.in_a
 245         m.submodules.align_in_b = self.in_b
 246         m.submodules.align_out_a = self.out_a
 247         m.submodules.align_out_b = self.out_b
 248
 249         # NOTE: this does *not* do single-cycle multi-shifting,
 250         #       it *STAYS* in the align state until exponents match
 251
 252         # exponent of a greater than b: shift b down
 253         m.d.comb += self.exp_eq.eq(0)
 254         m.d.comb += self.out_a.eq(self.in_a)
 255         m.d.comb += self.out_b.eq(self.in_b)
 256         agtb = Signal(reset_less=True)
 257         altb = Signal(reset_less=True)
 258         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 259         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 260         with m.If(agtb):
 261             m.d.comb += self.out_b.shift_down(self.in_b)
 262         # exponent of b greater than a: shift a down
 263         with m.Elif(altb):
 264             m.d.comb += self.out_a.shift_down(self.in_a)
 265         # exponents equal: move to next stage.
 266         with m.Else():
 267             m.d.comb += self.exp_eq.eq(1)
 268         return m
 269
 270
 271 class FPAddAlignMulti(FPState):
 272
 273     def __init__(self, width, id_wid):
 274         FPState.__init__(self, "align")
 275         self.mod = FPAddAlignMultiMod(width)
 276         self.out_a = FPNumIn(None, width)
 277         self.out_b = FPNumIn(None, width)
 278         self.exp_eq = Signal(reset_less=True)
 279
 280     def setup(self, m, in_a, in_b):
 281         """ links module to inputs and outputs
 282         """
 283         m.submodules.align = self.mod
 284         m.d.comb += self.mod.in_a.eq(in_a)
 285         m.d.comb += self.mod.in_b.eq(in_b)
 286         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 287         m.d.sync += self.out_a.eq(self.mod.out_a)
 288         m.d.sync += self.out_b.eq(self.mod.out_b)
 289
 290     def action(self, m):
 291         with m.If(self.exp_eq):
 292             m.next = "add_0"
 293
 294
 295 class FPNumIn2Ops:
 296
 297     def __init__(self, width, id_wid):
 298         self.a = FPNumIn(None, width)
 299         self.b = FPNumIn(None, width)
 300         self.z = FPNumOut(width, False)
 301         self.out_do_z = Signal(reset_less=True)
 302         self.oz = Signal(width, reset_less=True)
 303         self.mid = Signal(id_wid, reset_less=True)
 304
 305     def eq(self, i):
 306         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 307                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 308
 309
 310 class FPAddAlignSingleMod:
 311
 312     def __init__(self, width, id_wid):
 313         self.width = width
 314         self.id_wid = id_wid
 315         self.i = self.ispec()
 316         self.o = self.ospec()
 317
 318     def ispec(self):
 319         return FPSCData(self.width, self.id_wid)
 320
 321     def ospec(self):
 322         return FPNumIn2Ops(self.width, self.id_wid)
 323
 324     def process(self, i):
 325         return self.o
 326
 327     def setup(self, m, i):
 328         """ links module to inputs and outputs
 329         """
 330         m.submodules.align = self
 331         m.d.comb += self.i.eq(i)
 332
 333     def elaborate(self, platform):
 334         """ Aligns A against B or B against A, depending on which has the
 335             greater exponent.  This is done in a *single* cycle using
 336             variable-width bit-shift
 337
 338             the shifter used here is quite expensive in terms of gates.
 339             Mux A or B in (and out) into temporaries, as only one of them
 340             needs to be aligned against the other
 341         """
 342         m = Module()
 343
 344         m.submodules.align_in_a = self.i.a
 345         m.submodules.align_in_b = self.i.b
 346         m.submodules.align_out_a = self.o.a
 347         m.submodules.align_out_b = self.o.b
 348
 349         # temporary (muxed) input and output to be shifted
 350         t_inp = FPNumBase(self.width)
 351         t_out = FPNumIn(None, self.width)
 352         espec = (len(self.i.a.e), True)
 353         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 354         m.submodules.align_t_in = t_inp
 355         m.submodules.align_t_out = t_out
 356         m.submodules.multishift_r = msr
 357
 358         ediff = Signal(espec, reset_less=True)
 359         ediffr = Signal(espec, reset_less=True)
 360         tdiff = Signal(espec, reset_less=True)
 361         elz = Signal(reset_less=True)
 362         egz = Signal(reset_less=True)
 363
 364         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 365         m.d.comb += msr.inp.eq(t_inp.m)
 366         m.d.comb += msr.diff.eq(tdiff)
 367         m.d.comb += t_out.m.eq(msr.m)
 368         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 369         m.d.comb += t_out.s.eq(t_inp.s)
 370
 371         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 372         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 373         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 374         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 375
 376         # default: A-exp == B-exp, A and B untouched (fall through)
 377         m.d.comb += self.o.a.eq(self.i.a)
 378         m.d.comb += self.o.b.eq(self.i.b)
 379         # only one shifter (muxed)
 380         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 381         # exponent of a greater than b: shift b down
 382         with m.If(~self.i.out_do_z):
 383             with m.If(egz):
 384                 m.d.comb += [t_inp.eq(self.i.b),
 385                              tdiff.eq(ediff),
 386                              self.o.b.eq(t_out),
 387                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 388                             ]
 389             # exponent of b greater than a: shift a down
 390             with m.Elif(elz):
 391                 m.d.comb += [t_inp.eq(self.i.a),
 392                              tdiff.eq(ediffr),
 393                              self.o.a.eq(t_out),
 394                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 395                             ]
 396
 397         m.d.comb += self.o.mid.eq(self.i.mid)
 398         m.d.comb += self.o.z.eq(self.i.z)
 399         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 400         m.d.comb += self.o.oz.eq(self.i.oz)
 401
 402         return m
 403
 404
 405 class FPAddAlignSingle(FPState):
 406
 407     def __init__(self, width, id_wid):
 408         FPState.__init__(self, "align")
 409         self.mod = FPAddAlignSingleMod(width, id_wid)
 410         self.out_a = FPNumIn(None, width)
 411         self.out_b = FPNumIn(None, width)
 412
 413     def setup(self, m, i):
 414         """ links module to inputs and outputs
 415         """
 416         self.mod.setup(m, i)
 417
 418         # NOTE: could be done as comb
 419         m.d.sync += self.out_a.eq(self.mod.out_a)
 420         m.d.sync += self.out_b.eq(self.mod.out_b)
 421
 422     def action(self, m):
 423         m.next = "add_0"
 424
 425
 426 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 427
 428     def __init__(self, width, id_wid):
 429         FPState.__init__(self, "align")
 430         self.width = width
 431         self.id_wid = id_wid
 432         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 433         self.a1o = self.ospec()
 434
 435     def ispec(self):
 436         return FPSCData(self.width, self.id_wid)
 437
 438     def ospec(self):
 439         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 440
 441     def setup(self, m, i):
 442         """ links module to inputs and outputs
 443         """
 444
 445         # chain AddAlignSingle, AddStage0 and AddStage1
 446         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 447         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 448         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 449
 450         chain = StageChain([mod, a0mod, a1mod])
 451         chain.setup(m, i)
 452
 453         self.o = a1mod.o
 454
 455     def process(self, i):
 456         return self.o
 457
 458     def action(self, m):
 459         m.d.sync += self.a1o.eq(self.process(None))
 460         m.next = "normalise_1"
 461
 462
 463 class FPAddStage0Data:
 464
 465     def __init__(self, width, id_wid):
 466         self.z = FPNumBase(width, False)
 467         self.out_do_z = Signal(reset_less=True)
 468         self.oz = Signal(width, reset_less=True)
 469         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 470         self.mid = Signal(id_wid, reset_less=True)
 471
 472     def eq(self, i):
 473         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 474                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 475
 476
 477 class FPAddStage0Mod:
 478
 479     def __init__(self, width, id_wid):
 480         self.width = width
 481         self.id_wid = id_wid
 482         self.i = self.ispec()
 483         self.o = self.ospec()
 484
 485     def ispec(self):
 486         return FPSCData(self.width, self.id_wid)
 487
 488     def ospec(self):
 489         return FPAddStage0Data(self.width, self.id_wid)
 490
 491     def process(self, i):
 492         return self.o
 493
 494     def setup(self, m, i):
 495         """ links module to inputs and outputs
 496         """
 497         m.submodules.add0 = self
 498         m.d.comb += self.i.eq(i)
 499
 500     def elaborate(self, platform):
 501         m = Module()
 502         m.submodules.add0_in_a = self.i.a
 503         m.submodules.add0_in_b = self.i.b
 504         m.submodules.add0_out_z = self.o.z
 505
 506         # store intermediate tests (and zero-extended mantissas)
 507         seq = Signal(reset_less=True)
 508         mge = Signal(reset_less=True)
 509         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 510         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 511         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 512                      mge.eq(self.i.a.m >= self.i.b.m),
 513                      am0.eq(Cat(self.i.a.m, 0)),
 514                      bm0.eq(Cat(self.i.b.m, 0))
 515                     ]
 516         # same-sign (both negative or both positive) add mantissas
 517         with m.If(~self.i.out_do_z):
 518             m.d.comb += self.o.z.e.eq(self.i.a.e)
 519             with m.If(seq):
 520                 m.d.comb += [
 521                     self.o.tot.eq(am0 + bm0),
 522                     self.o.z.s.eq(self.i.a.s)
 523                 ]
 524             # a mantissa greater than b, use a
 525             with m.Elif(mge):
 526                 m.d.comb += [
 527                     self.o.tot.eq(am0 - bm0),
 528                     self.o.z.s.eq(self.i.a.s)
 529                 ]
 530             # b mantissa greater than a, use b
 531             with m.Else():
 532                 m.d.comb += [
 533                     self.o.tot.eq(bm0 - am0),
 534                     self.o.z.s.eq(self.i.b.s)
 535             ]
 536
 537         m.d.comb += self.o.oz.eq(self.i.oz)
 538         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 539         m.d.comb += self.o.mid.eq(self.i.mid)
 540         return m
 541
 542
 543 class FPAddStage0(FPState):
 544     """ First stage of add.  covers same-sign (add) and subtract
 545         special-casing when mantissas are greater or equal, to
 546         give greatest accuracy.
 547     """
 548
 549     def __init__(self, width, id_wid):
 550         FPState.__init__(self, "add_0")
 551         self.mod = FPAddStage0Mod(width)
 552         self.o = self.mod.ospec()
 553
 554     def setup(self, m, i):
 555         """ links module to inputs and outputs
 556         """
 557         self.mod.setup(m, i)
 558
 559         # NOTE: these could be done as combinatorial (merge add0+add1)
 560         m.d.sync += self.o.eq(self.mod.o)
 561
 562     def action(self, m):
 563         m.next = "add_1"
 564
 565
 566 class FPAddStage1Data:
 567
 568     def __init__(self, width, id_wid):
 569         self.z = FPNumBase(width, False)
 570         self.out_do_z = Signal(reset_less=True)
 571         self.oz = Signal(width, reset_less=True)
 572         self.of = Overflow()
 573         self.mid = Signal(id_wid, reset_less=True)
 574
 575     def eq(self, i):
 576         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 577                 self.of.eq(i.of), self.mid.eq(i.mid)]
 578
 579
 580
 581 class FPAddStage1Mod(FPState):
 582     """ Second stage of add: preparation for normalisation.
 583         detects when tot sum is too big (tot[27] is kinda a carry bit)
 584     """
 585
 586     def __init__(self, width, id_wid):
 587         self.width = width
 588         self.id_wid = id_wid
 589         self.i = self.ispec()
 590         self.o = self.ospec()
 591
 592     def ispec(self):
 593         return FPAddStage0Data(self.width, self.id_wid)
 594
 595     def ospec(self):
 596         return FPAddStage1Data(self.width, self.id_wid)
 597
 598     def process(self, i):
 599         return self.o
 600
 601     def setup(self, m, i):
 602         """ links module to inputs and outputs
 603         """
 604         m.submodules.add1 = self
 605         m.submodules.add1_out_overflow = self.o.of
 606
 607         m.d.comb += self.i.eq(i)
 608
 609     def elaborate(self, platform):
 610         m = Module()
 611         m.d.comb += self.o.z.eq(self.i.z)
 612         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 613         with m.If(~self.i.out_do_z):
 614             with m.If(self.i.tot[-1]):
 615                 m.d.comb += [
 616                     self.o.z.m.eq(self.i.tot[4:]),
 617                     self.o.of.m0.eq(self.i.tot[4]),
 618                     self.o.of.guard.eq(self.i.tot[3]),
 619                     self.o.of.round_bit.eq(self.i.tot[2]),
 620                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 621                     self.o.z.e.eq(self.i.z.e + 1)
 622             ]
 623             # tot[-1] (MSB) zero case
 624             with m.Else():
 625                 m.d.comb += [
 626                     self.o.z.m.eq(self.i.tot[3:]),
 627                     self.o.of.m0.eq(self.i.tot[3]),
 628                     self.o.of.guard.eq(self.i.tot[2]),
 629                     self.o.of.round_bit.eq(self.i.tot[1]),
 630                     self.o.of.sticky.eq(self.i.tot[0])
 631             ]
 632
 633         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 634         m.d.comb += self.o.oz.eq(self.i.oz)
 635         m.d.comb += self.o.mid.eq(self.i.mid)
 636
 637         return m
 638
 639
 640 class FPAddStage1(FPState):
 641
 642     def __init__(self, width, id_wid):
 643         FPState.__init__(self, "add_1")
 644         self.mod = FPAddStage1Mod(width)
 645         self.out_z = FPNumBase(width, False)
 646         self.out_of = Overflow()
 647         self.norm_stb = Signal()
 648
 649     def setup(self, m, i):
 650         """ links module to inputs and outputs
 651         """
 652         self.mod.setup(m, i)
 653
 654         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 655
 656         m.d.sync += self.out_of.eq(self.mod.out_of)
 657         m.d.sync += self.out_z.eq(self.mod.out_z)
 658         m.d.sync += self.norm_stb.eq(1)
 659
 660     def action(self, m):
 661         m.next = "normalise_1"
 662
 663
 664 class FPNorm1Data:
 665
 666     def __init__(self, width, id_wid):
 667         self.roundz = Signal(reset_less=True)
 668         self.z = FPNumBase(width, False)
 669         self.out_do_z = Signal(reset_less=True)
 670         self.oz = Signal(width, reset_less=True)
 671         self.mid = Signal(id_wid, reset_less=True)
 672
 673     def eq(self, i):
 674         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 675                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
 676
 677
 678 class FPNorm1ModSingle:
 679
 680     def __init__(self, width, id_wid):
 681         self.width = width
 682         self.id_wid = id_wid
 683         self.i = self.ispec()
 684         self.o = self.ospec()
 685
 686     def ispec(self):
 687         return FPAddStage1Data(self.width, self.id_wid)
 688
 689     def ospec(self):
 690         return FPNorm1Data(self.width, self.id_wid)
 691
 692     def setup(self, m, i):
 693         """ links module to inputs and outputs
 694         """
 695         m.submodules.normalise_1 = self
 696         m.d.comb += self.i.eq(i)
 697
 698     def process(self, i):
 699         return self.o
 700
 701     def elaborate(self, platform):
 702         m = Module()
 703
 704         mwid = self.o.z.m_width+2
 705         pe = PriorityEncoder(mwid)
 706         m.submodules.norm_pe = pe
 707
 708         of = Overflow()
 709         m.d.comb += self.o.roundz.eq(of.roundz)
 710
 711         m.submodules.norm1_out_z = self.o.z
 712         m.submodules.norm1_out_overflow = of
 713         m.submodules.norm1_in_z = self.i.z
 714         m.submodules.norm1_in_overflow = self.i.of
 715
 716         i = self.ispec()
 717         m.submodules.norm1_insel_z = i.z
 718         m.submodules.norm1_insel_overflow = i.of
 719
 720         espec = (len(i.z.e), True)
 721         ediff_n126 = Signal(espec, reset_less=True)
 722         msr = MultiShiftRMerge(mwid, espec)
 723         m.submodules.multishift_r = msr
 724
 725         m.d.comb += i.eq(self.i)
 726         # initialise out from in (overridden below)
 727         m.d.comb += self.o.z.eq(i.z)
 728         m.d.comb += of.eq(i.of)
 729         # normalisation increase/decrease conditions
 730         decrease = Signal(reset_less=True)
 731         increase = Signal(reset_less=True)
 732         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
 733         m.d.comb += increase.eq(i.z.exp_lt_n126)
 734         # decrease exponent
 735         with m.If(~self.i.out_do_z):
 736             with m.If(decrease):
 737                 # *sigh* not entirely obvious: count leading zeros (clz)
 738                 # with a PriorityEncoder: to find from the MSB
 739                 # we reverse the order of the bits.
 740                 temp_m = Signal(mwid, reset_less=True)
 741                 temp_s = Signal(mwid+1, reset_less=True)
 742                 clz = Signal((len(i.z.e), True), reset_less=True)
 743                 # make sure that the amount to decrease by does NOT
 744                 # go below the minimum non-INF/NaN exponent
 745                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
 746                              i.z.exp_sub_n126)
 747                 m.d.comb += [
 748                     # cat round and guard bits back into the mantissa
 749                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
 750                     pe.i.eq(temp_m[::-1]),          # inverted
 751                     clz.eq(limclz),                 # count zeros from MSB down
 752                     temp_s.eq(temp_m << clz),       # shift mantissa UP
 753                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
 754                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
 755                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
 756                     # overflow in bits 0..1: got shifted too (leave sticky)
 757                     of.guard.eq(temp_s[1]),       # guard
 758                     of.round_bit.eq(temp_s[0]),   # round
 759                 ]
 760             # increase exponent
 761             with m.Elif(increase):
 762                 temp_m = Signal(mwid+1, reset_less=True)
 763                 m.d.comb += [
 764                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
 765                                   i.z.m)),
 766                     ediff_n126.eq(i.z.N126 - i.z.e),
 767                     # connect multi-shifter to inp/out mantissa (and ediff)
 768                     msr.inp.eq(temp_m),
 769                     msr.diff.eq(ediff_n126),
 770                     self.o.z.m.eq(msr.m[3:]),
 771                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 772                     # overflow in bits 0..1: got shifted too (leave sticky)
 773                     of.guard.eq(temp_s[2]),     # guard
 774                     of.round_bit.eq(temp_s[1]), # round
 775                     of.sticky.eq(temp_s[0]),    # sticky
 776                     self.o.z.e.eq(i.z.e + ediff_n126),
 777                 ]
 778
 779         m.d.comb += self.o.mid.eq(self.i.mid)
 780         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 781         m.d.comb += self.o.oz.eq(self.i.oz)
 782
 783         return m
 784
 785
 786 class FPNorm1ModMulti:
 787
 788     def __init__(self, width, single_cycle=True):
 789         self.width = width
 790         self.in_select = Signal(reset_less=True)
 791         self.in_z = FPNumBase(width, False)
 792         self.in_of = Overflow()
 793         self.temp_z = FPNumBase(width, False)
 794         self.temp_of = Overflow()
 795         self.out_z = FPNumBase(width, False)
 796         self.out_of = Overflow()
 797
 798     def elaborate(self, platform):
 799         m = Module()
 800
 801         m.submodules.norm1_out_z = self.out_z
 802         m.submodules.norm1_out_overflow = self.out_of
 803         m.submodules.norm1_temp_z = self.temp_z
 804         m.submodules.norm1_temp_of = self.temp_of
 805         m.submodules.norm1_in_z = self.in_z
 806         m.submodules.norm1_in_overflow = self.in_of
 807
 808         in_z = FPNumBase(self.width, False)
 809         in_of = Overflow()
 810         m.submodules.norm1_insel_z = in_z
 811         m.submodules.norm1_insel_overflow = in_of
 812
 813         # select which of temp or in z/of to use
 814         with m.If(self.in_select):
 815             m.d.comb += in_z.eq(self.in_z)
 816             m.d.comb += in_of.eq(self.in_of)
 817         with m.Else():
 818             m.d.comb += in_z.eq(self.temp_z)
 819             m.d.comb += in_of.eq(self.temp_of)
 820         # initialise out from in (overridden below)
 821         m.d.comb += self.out_z.eq(in_z)
 822         m.d.comb += self.out_of.eq(in_of)
 823         # normalisation increase/decrease conditions
 824         decrease = Signal(reset_less=True)
 825         increase = Signal(reset_less=True)
 826         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 827         m.d.comb += increase.eq(in_z.exp_lt_n126)
 828         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 829         # decrease exponent
 830         with m.If(decrease):
 831             m.d.comb += [
 832                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 833                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 834                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 835                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 836                 self.out_of.round_bit.eq(0),        # reset round bit
 837                 self.out_of.m0.eq(in_of.guard),
 838             ]
 839         # increase exponent
 840         with m.Elif(increase):
 841             m.d.comb += [
 842                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 843                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 844                 self.out_of.guard.eq(in_z.m[0]),
 845                 self.out_of.m0.eq(in_z.m[1]),
 846                 self.out_of.round_bit.eq(in_of.guard),
 847                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 848             ]
 849
 850         return m
 851
 852
 853 class FPNorm1Single(FPState):
 854
 855     def __init__(self, width, id_wid, single_cycle=True):
 856         FPState.__init__(self, "normalise_1")
 857         self.mod = FPNorm1ModSingle(width)
 858         self.o = self.ospec()
 859         self.out_z = FPNumBase(width, False)
 860         self.out_roundz = Signal(reset_less=True)
 861
 862     def ispec(self):
 863         return self.mod.ispec()
 864
 865     def ospec(self):
 866         return self.mod.ospec()
 867
 868     def setup(self, m, i):
 869         """ links module to inputs and outputs
 870         """
 871         self.mod.setup(m, i)
 872
 873     def action(self, m):
 874         m.next = "round"
 875
 876
 877 class FPNorm1Multi(FPState):
 878
 879     def __init__(self, width, id_wid):
 880         FPState.__init__(self, "normalise_1")
 881         self.mod = FPNorm1ModMulti(width)
 882         self.stb = Signal(reset_less=True)
 883         self.ack = Signal(reset=0, reset_less=True)
 884         self.out_norm = Signal(reset_less=True)
 885         self.in_accept = Signal(reset_less=True)
 886         self.temp_z = FPNumBase(width)
 887         self.temp_of = Overflow()
 888         self.out_z = FPNumBase(width)
 889         self.out_roundz = Signal(reset_less=True)
 890
 891     def setup(self, m, in_z, in_of, norm_stb):
 892         """ links module to inputs and outputs
 893         """
 894         self.mod.setup(m, in_z, in_of, norm_stb,
 895                        self.in_accept, self.temp_z, self.temp_of,
 896                        self.out_z, self.out_norm)
 897
 898         m.d.comb += self.stb.eq(norm_stb)
 899         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
 900
 901     def action(self, m):
 902         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
 903         m.d.sync += self.temp_of.eq(self.mod.out_of)
 904         m.d.sync += self.temp_z.eq(self.out_z)
 905         with m.If(self.out_norm):
 906             with m.If(self.in_accept):
 907                 m.d.sync += [
 908                     self.ack.eq(1),
 909                 ]
 910             with m.Else():
 911                 m.d.sync += self.ack.eq(0)
 912         with m.Else():
 913             # normalisation not required (or done).
 914             m.next = "round"
 915             m.d.sync += self.ack.eq(1)
 916             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 917
 918
 919 class FPNormToPack(FPState, UnbufferedPipeline):
 920
 921     def __init__(self, width, id_wid):
 922         FPState.__init__(self, "normalise_1")
 923         self.id_wid = id_wid
 924         self.width = width
 925         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 926
 927     def ispec(self):
 928         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 929
 930     def ospec(self):
 931         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
 932
 933     def setup(self, m, i):
 934         """ links module to inputs and outputs
 935         """
 936
 937         # Normalisation, Rounding Corrections, Pack - in a chain
 938         nmod = FPNorm1ModSingle(self.width, self.id_wid)
 939         rmod = FPRoundMod(self.width, self.id_wid)
 940         cmod = FPCorrectionsMod(self.width, self.id_wid)
 941         pmod = FPPackMod(self.width, self.id_wid)
 942         chain = StageChain([nmod, rmod, cmod, pmod])
 943         chain.setup(m, i)
 944         self.out_z = pmod.ospec()
 945
 946         self.o = pmod.o
 947
 948     def process(self, i):
 949         return self.o
 950
 951     def action(self, m):
 952         m.d.sync += self.out_z.eq(self.process(None))
 953         m.next = "pack_put_z"
 954
 955
 956 class FPRoundData:
 957
 958     def __init__(self, width, id_wid):
 959         self.z = FPNumBase(width, False)
 960         self.out_do_z = Signal(reset_less=True)
 961         self.oz = Signal(width, reset_less=True)
 962         self.mid = Signal(id_wid, reset_less=True)
 963
 964     def eq(self, i):
 965         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 966                 self.mid.eq(i.mid)]
 967
 968
 969 class FPRoundMod:
 970
 971     def __init__(self, width, id_wid):
 972         self.width = width
 973         self.id_wid = id_wid
 974         self.i = self.ispec()
 975         self.out_z = self.ospec()
 976
 977     def ispec(self):
 978         return FPNorm1Data(self.width, self.id_wid)
 979
 980     def ospec(self):
 981         return FPRoundData(self.width, self.id_wid)
 982
 983     def process(self, i):
 984         return self.out_z
 985
 986     def setup(self, m, i):
 987         m.submodules.roundz = self
 988         m.d.comb += self.i.eq(i)
 989
 990     def elaborate(self, platform):
 991         m = Module()
 992         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
 993         with m.If(~self.i.out_do_z):
 994             with m.If(self.i.roundz):
 995                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
 996                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
 997                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
 998
 999         return m
1000
1001
1002 class FPRound(FPState):
1003
1004     def __init__(self, width, id_wid):
1005         FPState.__init__(self, "round")
1006         self.mod = FPRoundMod(width)
1007         self.out_z = self.ospec()
1008
1009     def ispec(self):
1010         return self.mod.ispec()
1011
1012     def ospec(self):
1013         return self.mod.ospec()
1014
1015     def setup(self, m, i):
1016         """ links module to inputs and outputs
1017         """
1018         self.mod.setup(m, i)
1019
1020         self.idsync(m)
1021         m.d.sync += self.out_z.eq(self.mod.out_z)
1022         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1023
1024     def action(self, m):
1025         m.next = "corrections"
1026
1027
1028 class FPCorrectionsMod:
1029
1030     def __init__(self, width, id_wid):
1031         self.width = width
1032         self.id_wid = id_wid
1033         self.i = self.ispec()
1034         self.out_z = self.ospec()
1035
1036     def ispec(self):
1037         return FPRoundData(self.width, self.id_wid)
1038
1039     def ospec(self):
1040         return FPRoundData(self.width, self.id_wid)
1041
1042     def process(self, i):
1043         return self.out_z
1044
1045     def setup(self, m, i):
1046         """ links module to inputs and outputs
1047         """
1048         m.submodules.corrections = self
1049         m.d.comb += self.i.eq(i)
1050
1051     def elaborate(self, platform):
1052         m = Module()
1053         m.submodules.corr_in_z = self.i.z
1054         m.submodules.corr_out_z = self.out_z.z
1055         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1056         with m.If(~self.i.out_do_z):
1057             with m.If(self.i.z.is_denormalised):
1058                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1059         return m
1060
1061
1062 class FPCorrections(FPState):
1063
1064     def __init__(self, width, id_wid):
1065         FPState.__init__(self, "corrections")
1066         self.mod = FPCorrectionsMod(width)
1067         self.out_z = self.ospec()
1068
1069     def ispec(self):
1070         return self.mod.ispec()
1071
1072     def ospec(self):
1073         return self.mod.ospec()
1074
1075     def setup(self, m, in_z):
1076         """ links module to inputs and outputs
1077         """
1078         self.mod.setup(m, in_z)
1079
1080         m.d.sync += self.out_z.eq(self.mod.out_z)
1081         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1082
1083     def action(self, m):
1084         m.next = "pack"
1085
1086
1087 class FPPackData:
1088
1089     def __init__(self, width, id_wid):
1090         self.z = Signal(width, reset_less=True)
1091         self.mid = Signal(id_wid, reset_less=True)
1092
1093     def eq(self, i):
1094         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1095
1096     def ports(self):
1097         return [self.z, self.mid]
1098
1099
1100 class FPPackMod:
1101
1102     def __init__(self, width, id_wid):
1103         self.width = width
1104         self.id_wid = id_wid
1105         self.i = self.ispec()
1106         self.o = self.ospec()
1107
1108     def ispec(self):
1109         return FPRoundData(self.width, self.id_wid)
1110
1111     def ospec(self):
1112         return FPPackData(self.width, self.id_wid)
1113
1114     def process(self, i):
1115         return self.o
1116
1117     def setup(self, m, in_z):
1118         """ links module to inputs and outputs
1119         """
1120         m.submodules.pack = self
1121         m.d.comb += self.i.eq(in_z)
1122
1123     def elaborate(self, platform):
1124         m = Module()
1125         z = FPNumOut(self.width, False)
1126         m.submodules.pack_in_z = self.i.z
1127         m.submodules.pack_out_z = z
1128         m.d.comb += self.o.mid.eq(self.i.mid)
1129         with m.If(~self.i.out_do_z):
1130             with m.If(self.i.z.is_overflowed):
1131                 m.d.comb += z.inf(self.i.z.s)
1132             with m.Else():
1133                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1134         with m.Else():
1135             m.d.comb += z.v.eq(self.i.oz)
1136         m.d.comb += self.o.z.eq(z.v)
1137         return m
1138
1139
1140 class FPPack(FPState):
1141
1142     def __init__(self, width, id_wid):
1143         FPState.__init__(self, "pack")
1144         self.mod = FPPackMod(width)
1145         self.out_z = self.ospec()
1146
1147     def ispec(self):
1148         return self.mod.ispec()
1149
1150     def ospec(self):
1151         return self.mod.ospec()
1152
1153     def setup(self, m, in_z):
1154         """ links module to inputs and outputs
1155         """
1156         self.mod.setup(m, in_z)
1157
1158         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1159         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1160
1161     def action(self, m):
1162         m.next = "pack_put_z"
1163
1164
1165 class FPPutZ(FPState):
1166
1167     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1168         FPState.__init__(self, state)
1169         if to_state is None:
1170             to_state = "get_ops"
1171         self.to_state = to_state
1172         self.in_z = in_z
1173         self.out_z = out_z
1174         self.in_mid = in_mid
1175         self.out_mid = out_mid
1176
1177     def action(self, m):
1178         if self.in_mid is not None:
1179             m.d.sync += self.out_mid.eq(self.in_mid)
1180         m.d.sync += [
1181           self.out_z.z.v.eq(self.in_z)
1182         ]
1183         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1184             m.d.sync += self.out_z.z.stb.eq(0)
1185             m.next = self.to_state
1186         with m.Else():
1187             m.d.sync += self.out_z.z.stb.eq(1)
1188
1189
1190 class FPPutZIdx(FPState):
1191
1192     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1193         FPState.__init__(self, state)
1194         if to_state is None:
1195             to_state = "get_ops"
1196         self.to_state = to_state
1197         self.in_z = in_z
1198         self.out_zs = out_zs
1199         self.in_mid = in_mid
1200
1201     def action(self, m):
1202         outz_stb = Signal(reset_less=True)
1203         outz_ack = Signal(reset_less=True)
1204         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1205                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1206                     ]
1207         m.d.sync += [
1208           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1209         ]
1210         with m.If(outz_stb & outz_ack):
1211             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1212             m.next = self.to_state
1213         with m.Else():
1214             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1215
1216
1217 class FPOpData:
1218     def __init__(self, width, id_wid):
1219         self.z = FPOp(width)
1220         self.mid = Signal(id_wid, reset_less=True)
1221
1222     def eq(self, i):
1223         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1224
1225     def ports(self):
1226         return [self.z, self.mid]
1227
1228
1229 class FPADDBaseMod:
1230
1231     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1232         """ IEEE754 FP Add
1233
1234             * width: bit-width of IEEE754.  supported: 16, 32, 64
1235             * id_wid: an identifier that is sync-connected to the input
1236             * single_cycle: True indicates each stage to complete in 1 clock
1237             * compact: True indicates a reduced number of stages
1238         """
1239         self.width = width
1240         self.id_wid = id_wid
1241         self.single_cycle = single_cycle
1242         self.compact = compact
1243
1244         self.in_t = Trigger()
1245         self.i = self.ispec()
1246         self.o = self.ospec()
1247
1248         self.states = []
1249
1250     def ispec(self):
1251         return FPADDBaseData(self.width, self.id_wid)
1252
1253     def ospec(self):
1254         return FPOpData(self.width, self.id_wid)
1255
1256     def add_state(self, state):
1257         self.states.append(state)
1258         return state
1259
1260     def get_fragment(self, platform=None):
1261         """ creates the HDL code-fragment for FPAdd
1262         """
1263         m = Module()
1264         m.submodules.out_z = self.o.z
1265         m.submodules.in_t = self.in_t
1266         if self.compact:
1267             self.get_compact_fragment(m, platform)
1268         else:
1269             self.get_longer_fragment(m, platform)
1270
1271         with m.FSM() as fsm:
1272
1273             for state in self.states:
1274                 with m.State(state.state_from):
1275                     state.action(m)
1276
1277         return m
1278
1279     def get_longer_fragment(self, m, platform=None):
1280
1281         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1282                                       self.width))
1283         get.setup(m, self.i)
1284         a = get.out_op1
1285         b = get.out_op2
1286         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1287
1288         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1289         sc.setup(m, a, b, self.in_mid)
1290
1291         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1292         dn.setup(m, a, b, sc.in_mid)
1293
1294         if self.single_cycle:
1295             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1296             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1297         else:
1298             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1299             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1300
1301         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1302         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1303
1304         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1305         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1306
1307         if self.single_cycle:
1308             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1309             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1310         else:
1311             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1312             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1313
1314         rn = self.add_state(FPRound(self.width, self.id_wid))
1315         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1316
1317         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1318         cor.setup(m, rn.out_z, rn.in_mid)
1319
1320         pa = self.add_state(FPPack(self.width, self.id_wid))
1321         pa.setup(m, cor.out_z, rn.in_mid)
1322
1323         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1324                                     pa.in_mid, self.out_mid))
1325
1326         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1327                                     pa.in_mid, self.out_mid))
1328
1329     def get_compact_fragment(self, m, platform=None):
1330
1331
1332         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1333         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1334         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1335         n1 = FPNormToPack(self.width, self.id_wid)
1336
1337         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1338
1339         chainlist = [get, sc, alm, n1]
1340         chain = StageChain(chainlist, specallocate=True)
1341         chain.setup(m, self.i)
1342
1343         for mod in chainlist:
1344             sc = self.add_state(mod)
1345
1346         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1347                                     n1.out_z.mid, self.o.mid))
1348
1349         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1350         #                            sc.o.mid, self.o.mid))
1351
1352
1353 class FPADDBase(FPState):
1354
1355     def __init__(self, width, id_wid=None, single_cycle=False):
1356         """ IEEE754 FP Add
1357
1358             * width: bit-width of IEEE754.  supported: 16, 32, 64
1359             * id_wid: an identifier that is sync-connected to the input
1360             * single_cycle: True indicates each stage to complete in 1 clock
1361         """
1362         FPState.__init__(self, "fpadd")
1363         self.width = width
1364         self.single_cycle = single_cycle
1365         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1366         self.o = self.ospec()
1367
1368         self.in_t = Trigger()
1369         self.i = self.ispec()
1370
1371         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1372         self.in_accept = Signal(reset_less=True)
1373         self.add_stb = Signal(reset_less=True)
1374         self.add_ack = Signal(reset=0, reset_less=True)
1375
1376     def ispec(self):
1377         return self.mod.ispec()
1378
1379     def ospec(self):
1380         return self.mod.ospec()
1381
1382     def setup(self, m, i, add_stb, in_mid):
1383         m.d.comb += [self.i.eq(i),
1384                      self.mod.i.eq(self.i),
1385                      self.z_done.eq(self.mod.o.z.trigger),
1386                      #self.add_stb.eq(add_stb),
1387                      self.mod.in_t.stb.eq(self.in_t.stb),
1388                      self.in_t.ack.eq(self.mod.in_t.ack),
1389                      self.o.mid.eq(self.mod.o.mid),
1390                      self.o.z.v.eq(self.mod.o.z.v),
1391                      self.o.z.stb.eq(self.mod.o.z.stb),
1392                      self.mod.o.z.ack.eq(self.o.z.ack),
1393                     ]
1394
1395         m.d.sync += self.add_stb.eq(add_stb)
1396         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1397         m.d.sync += self.o.z.ack.eq(0) # likewise
1398         #m.d.sync += self.in_t.stb.eq(0)
1399
1400         m.submodules.fpadd = self.mod
1401
1402     def action(self, m):
1403
1404         # in_accept is set on incoming strobe HIGH and ack LOW.
1405         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1406
1407         #with m.If(self.in_t.ack):
1408         #    m.d.sync += self.in_t.stb.eq(0)
1409         with m.If(~self.z_done):
1410             # not done: test for accepting an incoming operand pair
1411             with m.If(self.in_accept):
1412                 m.d.sync += [
1413                     self.add_ack.eq(1), # acknowledge receipt...
1414                     self.in_t.stb.eq(1), # initiate add
1415                 ]
1416             with m.Else():
1417                 m.d.sync += [self.add_ack.eq(0),
1418                              self.in_t.stb.eq(0),
1419                              self.o.z.ack.eq(1),
1420                             ]
1421         with m.Else():
1422             # done: acknowledge, and write out id and value
1423             m.d.sync += [self.add_ack.eq(1),
1424                          self.in_t.stb.eq(0)
1425                         ]
1426             m.next = "put_z"
1427
1428             return
1429
1430             if self.in_mid is not None:
1431                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1432
1433             m.d.sync += [
1434               self.out_z.v.eq(self.mod.out_z.v)
1435             ]
1436             # move to output state on detecting z ack
1437             with m.If(self.out_z.trigger):
1438                 m.d.sync += self.out_z.stb.eq(0)
1439                 m.next = "put_z"
1440             with m.Else():
1441                 m.d.sync += self.out_z.stb.eq(1)
1442
1443
1444 class FPADDBasePipe(ControlBase):
1445     def __init__(self, width, id_wid):
1446         ControlBase.__init__(self)
1447         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1448         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1449         self.pipe3 = FPNormToPack(width, id_wid)
1450
1451         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1452
1453     def elaborate(self, platform):
1454         m = Module()
1455         m.submodules.scnorm = self.pipe1
1456         m.submodules.addalign = self.pipe2
1457         m.submodules.normpack = self.pipe3
1458         m.d.comb += self._eqs
1459         return m
1460
1461
1462 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1463     def __init__(self, width, id_wid, num_rows):
1464         self.num_rows = num_rows
1465         def iospec(): return FPADDBaseData(width, id_wid)
1466         stage = PassThroughStage(iospec)
1467         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1468
1469
1470 class FPADDMuxOutPipe(CombMuxOutPipe):
1471     def __init__(self, width, id_wid, num_rows):
1472         self.num_rows = num_rows
1473         def iospec(): return FPPackData(width, id_wid)
1474         stage = PassThroughStage(iospec)
1475         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1476
1477
1478 class FPADDMuxInOut:
1479     """ Reservation-Station version of FPADD pipeline.
1480
1481         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1482         * 3-stage adder pipeline
1483         * fan-out on outputs (an array of FPPackData: z,mid)
1484
1485         Fan-in and Fan-out are combinatorial.
1486     """
1487     def __init__(self, width, id_wid, num_rows):
1488         self.num_rows = num_rows
1489         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1490         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1491         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1492
1493         self.p = self.inpipe.p  # kinda annoying,
1494         self.n = self.outpipe.n # use pipe in/out as this class in/out
1495         self._ports = self.inpipe.ports() + self.outpipe.ports()
1496
1497     def elaborate(self, platform):
1498         m = Module()
1499         m.submodules.inpipe = self.inpipe
1500         m.submodules.fpadd = self.fpadd
1501         m.submodules.outpipe = self.outpipe
1502
1503         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1504         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1505
1506         return m
1507
1508     def ports(self):
1509         return self._ports
1510
1511
1512 class FPADD(FPID):
1513     """ FPADD: stages as follows:
1514
1515         FPGetOp (a)
1516            |
1517         FPGetOp (b)
1518            |
1519         FPAddBase---> FPAddBaseMod
1520            |            |
1521         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1522
1523         FPAddBase is tricky: it is both a stage and *has* stages.
1524         Connection to FPAddBaseMod therefore requires an in stb/ack
1525         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1526         needs to be the thing that raises the incoming stb.
1527     """
1528
1529     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1530         """ IEEE754 FP Add
1531
1532             * width: bit-width of IEEE754.  supported: 16, 32, 64
1533             * id_wid: an identifier that is sync-connected to the input
1534             * single_cycle: True indicates each stage to complete in 1 clock
1535         """
1536         self.width = width
1537         self.id_wid = id_wid
1538         self.single_cycle = single_cycle
1539
1540         #self.out_z = FPOp(width)
1541         self.ids = FPID(id_wid)
1542
1543         rs = []
1544         for i in range(rs_sz):
1545             in_a  = FPOp(width)
1546             in_b  = FPOp(width)
1547             in_a.name = "in_a_%d" % i
1548             in_b.name = "in_b_%d" % i
1549             rs.append((in_a, in_b))
1550         self.rs = Array(rs)
1551
1552         res = []
1553         for i in range(rs_sz):
1554             out_z = FPOp(width)
1555             out_z.name = "out_z_%d" % i
1556             res.append(out_z)
1557         self.res = Array(res)
1558
1559         self.states = []
1560
1561     def add_state(self, state):
1562         self.states.append(state)
1563         return state
1564
1565     def get_fragment(self, platform=None):
1566         """ creates the HDL code-fragment for FPAdd
1567         """
1568         m = Module()
1569         m.submodules += self.rs
1570
1571         in_a = self.rs[0][0]
1572         in_b = self.rs[0][1]
1573
1574         geta = self.add_state(FPGetOp("get_a", "get_b",
1575                                       in_a, self.width))
1576         geta.setup(m, in_a)
1577         a = geta.out_op
1578
1579         getb = self.add_state(FPGetOp("get_b", "fpadd",
1580                                       in_b, self.width))
1581         getb.setup(m, in_b)
1582         b = getb.out_op
1583
1584         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1585         ab = self.add_state(ab)
1586         abd = ab.ispec() # create an input spec object for FPADDBase
1587         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1588         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1589         o = ab.o
1590
1591         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1592                                     o.mid, "get_a"))
1593
1594         with m.FSM() as fsm:
1595
1596             for state in self.states:
1597                 with m.State(state.state_from):
1598                     state.action(m)
1599
1600         return m
1601
1602
1603 if __name__ == "__main__":
1604     if True:
1605         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1606         main(alu, ports=alu.rs[0][0].ports() + \
1607                         alu.rs[0][1].ports() + \
1608                         alu.res[0].ports() + \
1609                         [alu.ids.in_mid, alu.ids.out_mid])
1610     else:
1611         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1612         main(alu, ports=[alu.in_a, alu.in_b] + \
1613                         alu.in_t.ports() + \
1614                         alu.out_z.ports() + \
1615                         [alu.in_mid, alu.out_mid])
1616
1617
1618     # works... but don't use, just do "python fname.py convert -t v"
1619     #print (verilog.convert(alu, ports=[
1620     #                        ports=alu.in_a.ports() + \
1621     #                              alu.in_b.ports() + \
1622     #                              alu.out_z.ports())