src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  20
  21
  22 class FPAddSpecialCasesMod:
  23     """ special cases: NaNs, infs, zeros, denormalised
  24         NOTE: some of these are unique to add.  see "Special Operations"
  25         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  26     """
  27
  28     def __init__(self, width, id_wid):
  29         self.width = width
  30         self.id_wid = id_wid
  31         self.i = self.ispec()
  32         self.o = self.ospec()
  33
  34     def ispec(self):
  35         return FPADDBaseData(self.width, self.id_wid)
  36
  37     def ospec(self):
  38         return FPSCData(self.width, self.id_wid)
  39
  40     def setup(self, m, i):
  41         """ links module to inputs and outputs
  42         """
  43         m.submodules.specialcases = self
  44         m.d.comb += self.i.eq(i)
  45
  46     def process(self, i):
  47         return self.o
  48
  49     def elaborate(self, platform):
  50         m = Module()
  51
  52         m.submodules.sc_out_z = self.o.z
  53
  54         # decode: XXX really should move to separate stage
  55         a1 = FPNumIn(None, self.width)
  56         b1 = FPNumIn(None, self.width)
  57         m.submodules.sc_decode_a = a1
  58         m.submodules.sc_decode_b = b1
  59         m.d.comb += [a1.decode(self.i.a),
  60                      b1.decode(self.i.b),
  61                     ]
  62
  63         s_nomatch = Signal()
  64         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  65
  66         m_match = Signal()
  67         m.d.comb += m_match.eq(a1.m == b1.m)
  68
  69         # if a is NaN or b is NaN return NaN
  70         with m.If(a1.is_nan | b1.is_nan):
  71             m.d.comb += self.o.out_do_z.eq(1)
  72             m.d.comb += self.o.z.nan(0)
  73
  74         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  75         # under review
  76
  77         ## if a is zero and b is NaN return -b
  78         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  79         #    m.d.comb += self.o.out_do_z.eq(1)
  80         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  81
  82         ## if b is zero and a is NaN return -a
  83         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  84         #    m.d.comb += self.o.out_do_z.eq(1)
  85         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
  86
  87         ## if a is -zero and b is NaN return -b
  88         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
  89         #    m.d.comb += self.o.out_do_z.eq(1)
  90         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
  91
  92         ## if b is -zero and a is NaN return -a
  93         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
  94         #    m.d.comb += self.o.out_do_z.eq(1)
  95         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
  96
  97         # if a is inf return inf (or NaN)
  98         with m.Elif(a1.is_inf):
  99             m.d.comb += self.o.out_do_z.eq(1)
 100             m.d.comb += self.o.z.inf(a1.s)
 101             # if a is inf and signs don't match return NaN
 102             with m.If(b1.exp_128 & s_nomatch):
 103                 m.d.comb += self.o.z.nan(0)
 104
 105         # if b is inf return inf
 106         with m.Elif(b1.is_inf):
 107             m.d.comb += self.o.out_do_z.eq(1)
 108             m.d.comb += self.o.z.inf(b1.s)
 109
 110         # if a is zero and b zero return signed-a/b
 111         with m.Elif(a1.is_zero & b1.is_zero):
 112             m.d.comb += self.o.out_do_z.eq(1)
 113             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 114
 115         # if a is zero return b
 116         with m.Elif(a1.is_zero):
 117             m.d.comb += self.o.out_do_z.eq(1)
 118             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 119
 120         # if b is zero return a
 121         with m.Elif(b1.is_zero):
 122             m.d.comb += self.o.out_do_z.eq(1)
 123             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 124
 125         # if a equal to -b return zero (+ve zero)
 126         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 127             m.d.comb += self.o.out_do_z.eq(1)
 128             m.d.comb += self.o.z.zero(0)
 129
 130         # Denormalised Number checks next, so pass a/b data through
 131         with m.Else():
 132             m.d.comb += self.o.out_do_z.eq(0)
 133             m.d.comb += self.o.a.eq(a1)
 134             m.d.comb += self.o.b.eq(b1)
 135
 136         m.d.comb += self.o.oz.eq(self.o.z.v)
 137         m.d.comb += self.o.mid.eq(self.i.mid)
 138
 139         return m
 140
 141
 142 class FPID:
 143     def __init__(self, id_wid):
 144         self.id_wid = id_wid
 145         if self.id_wid:
 146             self.in_mid = Signal(id_wid, reset_less=True)
 147             self.out_mid = Signal(id_wid, reset_less=True)
 148         else:
 149             self.in_mid = None
 150             self.out_mid = None
 151
 152     def idsync(self, m):
 153         if self.id_wid is not None:
 154             m.d.sync += self.out_mid.eq(self.in_mid)
 155
 156
 157 class FPAddSpecialCases(FPState):
 158     """ special cases: NaNs, infs, zeros, denormalised
 159         NOTE: some of these are unique to add.  see "Special Operations"
 160         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 161     """
 162
 163     def __init__(self, width, id_wid):
 164         FPState.__init__(self, "special_cases")
 165         self.mod = FPAddSpecialCasesMod(width)
 166         self.out_z = self.mod.ospec()
 167         self.out_do_z = Signal(reset_less=True)
 168
 169     def setup(self, m, i):
 170         """ links module to inputs and outputs
 171         """
 172         self.mod.setup(m, i, self.out_do_z)
 173         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 174         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 175
 176     def action(self, m):
 177         self.idsync(m)
 178         with m.If(self.out_do_z):
 179             m.next = "put_z"
 180         with m.Else():
 181             m.next = "denormalise"
 182
 183
 184 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 185     """ special cases: NaNs, infs, zeros, denormalised
 186         NOTE: some of these are unique to add.  see "Special Operations"
 187         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 188     """
 189
 190     def __init__(self, width, id_wid):
 191         FPState.__init__(self, "special_cases")
 192         self.width = width
 193         self.id_wid = id_wid
 194         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 195         self.out = self.ospec()
 196
 197     def ispec(self):
 198         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 199
 200     def ospec(self):
 201         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 202
 203     def setup(self, m, i):
 204         """ links module to inputs and outputs
 205         """
 206         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 207         dmod = FPAddDeNormMod(self.width, self.id_wid)
 208
 209         chain = StageChain([smod, dmod])
 210         chain.setup(m, i)
 211
 212         # only needed for break-out (early-out)
 213         # self.out_do_z = smod.o.out_do_z
 214
 215         self.o = dmod.o
 216
 217     def process(self, i):
 218         return self.o
 219
 220     def action(self, m):
 221         # for break-out (early-out)
 222         #with m.If(self.out_do_z):
 223         #    m.next = "put_z"
 224         #with m.Else():
 225             m.d.sync += self.out.eq(self.process(None))
 226             m.next = "align"
 227
 228
 229 class FPAddAlignMultiMod(FPState):
 230
 231     def __init__(self, width):
 232         self.in_a = FPNumBase(width)
 233         self.in_b = FPNumBase(width)
 234         self.out_a = FPNumIn(None, width)
 235         self.out_b = FPNumIn(None, width)
 236         self.exp_eq = Signal(reset_less=True)
 237
 238     def elaborate(self, platform):
 239         # This one however (single-cycle) will do the shift
 240         # in one go.
 241
 242         m = Module()
 243
 244         m.submodules.align_in_a = self.in_a
 245         m.submodules.align_in_b = self.in_b
 246         m.submodules.align_out_a = self.out_a
 247         m.submodules.align_out_b = self.out_b
 248
 249         # NOTE: this does *not* do single-cycle multi-shifting,
 250         #       it *STAYS* in the align state until exponents match
 251
 252         # exponent of a greater than b: shift b down
 253         m.d.comb += self.exp_eq.eq(0)
 254         m.d.comb += self.out_a.eq(self.in_a)
 255         m.d.comb += self.out_b.eq(self.in_b)
 256         agtb = Signal(reset_less=True)
 257         altb = Signal(reset_less=True)
 258         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 259         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 260         with m.If(agtb):
 261             m.d.comb += self.out_b.shift_down(self.in_b)
 262         # exponent of b greater than a: shift a down
 263         with m.Elif(altb):
 264             m.d.comb += self.out_a.shift_down(self.in_a)
 265         # exponents equal: move to next stage.
 266         with m.Else():
 267             m.d.comb += self.exp_eq.eq(1)
 268         return m
 269
 270
 271 class FPAddAlignMulti(FPState):
 272
 273     def __init__(self, width, id_wid):
 274         FPState.__init__(self, "align")
 275         self.mod = FPAddAlignMultiMod(width)
 276         self.out_a = FPNumIn(None, width)
 277         self.out_b = FPNumIn(None, width)
 278         self.exp_eq = Signal(reset_less=True)
 279
 280     def setup(self, m, in_a, in_b):
 281         """ links module to inputs and outputs
 282         """
 283         m.submodules.align = self.mod
 284         m.d.comb += self.mod.in_a.eq(in_a)
 285         m.d.comb += self.mod.in_b.eq(in_b)
 286         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 287         m.d.sync += self.out_a.eq(self.mod.out_a)
 288         m.d.sync += self.out_b.eq(self.mod.out_b)
 289
 290     def action(self, m):
 291         with m.If(self.exp_eq):
 292             m.next = "add_0"
 293
 294
 295 class FPNumIn2Ops:
 296
 297     def __init__(self, width, id_wid):
 298         self.a = FPNumIn(None, width)
 299         self.b = FPNumIn(None, width)
 300         self.z = FPNumOut(width, False)
 301         self.out_do_z = Signal(reset_less=True)
 302         self.oz = Signal(width, reset_less=True)
 303         self.mid = Signal(id_wid, reset_less=True)
 304
 305     def eq(self, i):
 306         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 307                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 308
 309
 310 class FPAddAlignSingleMod:
 311
 312     def __init__(self, width, id_wid):
 313         self.width = width
 314         self.id_wid = id_wid
 315         self.i = self.ispec()
 316         self.o = self.ospec()
 317
 318     def ispec(self):
 319         return FPSCData(self.width, self.id_wid)
 320
 321     def ospec(self):
 322         return FPNumIn2Ops(self.width, self.id_wid)
 323
 324     def process(self, i):
 325         return self.o
 326
 327     def setup(self, m, i):
 328         """ links module to inputs and outputs
 329         """
 330         m.submodules.align = self
 331         m.d.comb += self.i.eq(i)
 332
 333     def elaborate(self, platform):
 334         """ Aligns A against B or B against A, depending on which has the
 335             greater exponent.  This is done in a *single* cycle using
 336             variable-width bit-shift
 337
 338             the shifter used here is quite expensive in terms of gates.
 339             Mux A or B in (and out) into temporaries, as only one of them
 340             needs to be aligned against the other
 341         """
 342         m = Module()
 343
 344         m.submodules.align_in_a = self.i.a
 345         m.submodules.align_in_b = self.i.b
 346         m.submodules.align_out_a = self.o.a
 347         m.submodules.align_out_b = self.o.b
 348
 349         # temporary (muxed) input and output to be shifted
 350         t_inp = FPNumBase(self.width)
 351         t_out = FPNumIn(None, self.width)
 352         espec = (len(self.i.a.e), True)
 353         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 354         m.submodules.align_t_in = t_inp
 355         m.submodules.align_t_out = t_out
 356         m.submodules.multishift_r = msr
 357
 358         ediff = Signal(espec, reset_less=True)
 359         ediffr = Signal(espec, reset_less=True)
 360         tdiff = Signal(espec, reset_less=True)
 361         elz = Signal(reset_less=True)
 362         egz = Signal(reset_less=True)
 363
 364         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 365         m.d.comb += msr.inp.eq(t_inp.m)
 366         m.d.comb += msr.diff.eq(tdiff)
 367         m.d.comb += t_out.m.eq(msr.m)
 368         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 369         m.d.comb += t_out.s.eq(t_inp.s)
 370
 371         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 372         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 373         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 374         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 375
 376         # default: A-exp == B-exp, A and B untouched (fall through)
 377         m.d.comb += self.o.a.eq(self.i.a)
 378         m.d.comb += self.o.b.eq(self.i.b)
 379         # only one shifter (muxed)
 380         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 381         # exponent of a greater than b: shift b down
 382         with m.If(~self.i.out_do_z):
 383             with m.If(egz):
 384                 m.d.comb += [t_inp.eq(self.i.b),
 385                              tdiff.eq(ediff),
 386                              self.o.b.eq(t_out),
 387                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 388                             ]
 389             # exponent of b greater than a: shift a down
 390             with m.Elif(elz):
 391                 m.d.comb += [t_inp.eq(self.i.a),
 392                              tdiff.eq(ediffr),
 393                              self.o.a.eq(t_out),
 394                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 395                             ]
 396
 397         m.d.comb += self.o.mid.eq(self.i.mid)
 398         m.d.comb += self.o.z.eq(self.i.z)
 399         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 400         m.d.comb += self.o.oz.eq(self.i.oz)
 401
 402         return m
 403
 404
 405 class FPAddAlignSingle(FPState):
 406
 407     def __init__(self, width, id_wid):
 408         FPState.__init__(self, "align")
 409         self.mod = FPAddAlignSingleMod(width, id_wid)
 410         self.out_a = FPNumIn(None, width)
 411         self.out_b = FPNumIn(None, width)
 412
 413     def setup(self, m, i):
 414         """ links module to inputs and outputs
 415         """
 416         self.mod.setup(m, i)
 417
 418         # NOTE: could be done as comb
 419         m.d.sync += self.out_a.eq(self.mod.out_a)
 420         m.d.sync += self.out_b.eq(self.mod.out_b)
 421
 422     def action(self, m):
 423         m.next = "add_0"
 424
 425
 426 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 427
 428     def __init__(self, width, id_wid):
 429         FPState.__init__(self, "align")
 430         self.width = width
 431         self.id_wid = id_wid
 432         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 433         self.a1o = self.ospec()
 434
 435     def ispec(self):
 436         return FPSCData(self.width, self.id_wid)
 437
 438     def ospec(self):
 439         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 440
 441     def setup(self, m, i):
 442         """ links module to inputs and outputs
 443         """
 444
 445         # chain AddAlignSingle, AddStage0 and AddStage1
 446         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 447         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 448         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 449
 450         chain = StageChain([mod, a0mod, a1mod])
 451         chain.setup(m, i)
 452
 453         self.o = a1mod.o
 454
 455     def process(self, i):
 456         return self.o
 457
 458     def action(self, m):
 459         m.d.sync += self.a1o.eq(self.process(None))
 460         m.next = "normalise_1"
 461
 462
 463 class FPAddStage0Data:
 464
 465     def __init__(self, width, id_wid):
 466         self.z = FPNumBase(width, False)
 467         self.out_do_z = Signal(reset_less=True)
 468         self.oz = Signal(width, reset_less=True)
 469         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 470         self.mid = Signal(id_wid, reset_less=True)
 471
 472     def eq(self, i):
 473         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 474                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 475
 476
 477 class FPAddStage0Mod:
 478
 479     def __init__(self, width, id_wid):
 480         self.width = width
 481         self.id_wid = id_wid
 482         self.i = self.ispec()
 483         self.o = self.ospec()
 484
 485     def ispec(self):
 486         return FPSCData(self.width, self.id_wid)
 487
 488     def ospec(self):
 489         return FPAddStage0Data(self.width, self.id_wid)
 490
 491     def process(self, i):
 492         return self.o
 493
 494     def setup(self, m, i):
 495         """ links module to inputs and outputs
 496         """
 497         m.submodules.add0 = self
 498         m.d.comb += self.i.eq(i)
 499
 500     def elaborate(self, platform):
 501         m = Module()
 502         m.submodules.add0_in_a = self.i.a
 503         m.submodules.add0_in_b = self.i.b
 504         m.submodules.add0_out_z = self.o.z
 505
 506         # store intermediate tests (and zero-extended mantissas)
 507         seq = Signal(reset_less=True)
 508         mge = Signal(reset_less=True)
 509         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 510         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 511         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 512                      mge.eq(self.i.a.m >= self.i.b.m),
 513                      am0.eq(Cat(self.i.a.m, 0)),
 514                      bm0.eq(Cat(self.i.b.m, 0))
 515                     ]
 516         # same-sign (both negative or both positive) add mantissas
 517         with m.If(~self.i.out_do_z):
 518             m.d.comb += self.o.z.e.eq(self.i.a.e)
 519             with m.If(seq):
 520                 m.d.comb += [
 521                     self.o.tot.eq(am0 + bm0),
 522                     self.o.z.s.eq(self.i.a.s)
 523                 ]
 524             # a mantissa greater than b, use a
 525             with m.Elif(mge):
 526                 m.d.comb += [
 527                     self.o.tot.eq(am0 - bm0),
 528                     self.o.z.s.eq(self.i.a.s)
 529                 ]
 530             # b mantissa greater than a, use b
 531             with m.Else():
 532                 m.d.comb += [
 533                     self.o.tot.eq(bm0 - am0),
 534                     self.o.z.s.eq(self.i.b.s)
 535             ]
 536
 537         m.d.comb += self.o.oz.eq(self.i.oz)
 538         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 539         m.d.comb += self.o.mid.eq(self.i.mid)
 540         return m
 541
 542
 543 class FPAddStage0(FPState):
 544     """ First stage of add.  covers same-sign (add) and subtract
 545         special-casing when mantissas are greater or equal, to
 546         give greatest accuracy.
 547     """
 548
 549     def __init__(self, width, id_wid):
 550         FPState.__init__(self, "add_0")
 551         self.mod = FPAddStage0Mod(width)
 552         self.o = self.mod.ospec()
 553
 554     def setup(self, m, i):
 555         """ links module to inputs and outputs
 556         """
 557         self.mod.setup(m, i)
 558
 559         # NOTE: these could be done as combinatorial (merge add0+add1)
 560         m.d.sync += self.o.eq(self.mod.o)
 561
 562     def action(self, m):
 563         m.next = "add_1"
 564
 565
 566 class FPAddStage1Data:
 567
 568     def __init__(self, width, id_wid):
 569         self.z = FPNumBase(width, False)
 570         self.out_do_z = Signal(reset_less=True)
 571         self.oz = Signal(width, reset_less=True)
 572         self.of = Overflow()
 573         self.mid = Signal(id_wid, reset_less=True)
 574
 575     def eq(self, i):
 576         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 577                 self.of.eq(i.of), self.mid.eq(i.mid)]
 578
 579
 580
 581 class FPAddStage1Mod(FPState):
 582     """ Second stage of add: preparation for normalisation.
 583         detects when tot sum is too big (tot[27] is kinda a carry bit)
 584     """
 585
 586     def __init__(self, width, id_wid):
 587         self.width = width
 588         self.id_wid = id_wid
 589         self.i = self.ispec()
 590         self.o = self.ospec()
 591
 592     def ispec(self):
 593         return FPAddStage0Data(self.width, self.id_wid)
 594
 595     def ospec(self):
 596         return FPAddStage1Data(self.width, self.id_wid)
 597
 598     def process(self, i):
 599         return self.o
 600
 601     def setup(self, m, i):
 602         """ links module to inputs and outputs
 603         """
 604         m.submodules.add1 = self
 605         m.submodules.add1_out_overflow = self.o.of
 606
 607         m.d.comb += self.i.eq(i)
 608
 609     def elaborate(self, platform):
 610         m = Module()
 611         m.d.comb += self.o.z.eq(self.i.z)
 612         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 613         with m.If(~self.i.out_do_z):
 614             with m.If(self.i.tot[-1]):
 615                 m.d.comb += [
 616                     self.o.z.m.eq(self.i.tot[4:]),
 617                     self.o.of.m0.eq(self.i.tot[4]),
 618                     self.o.of.guard.eq(self.i.tot[3]),
 619                     self.o.of.round_bit.eq(self.i.tot[2]),
 620                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 621                     self.o.z.e.eq(self.i.z.e + 1)
 622             ]
 623             # tot[-1] (MSB) zero case
 624             with m.Else():
 625                 m.d.comb += [
 626                     self.o.z.m.eq(self.i.tot[3:]),
 627                     self.o.of.m0.eq(self.i.tot[3]),
 628                     self.o.of.guard.eq(self.i.tot[2]),
 629                     self.o.of.round_bit.eq(self.i.tot[1]),
 630                     self.o.of.sticky.eq(self.i.tot[0])
 631             ]
 632
 633         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 634         m.d.comb += self.o.oz.eq(self.i.oz)
 635         m.d.comb += self.o.mid.eq(self.i.mid)
 636
 637         return m
 638
 639
 640 class FPAddStage1(FPState):
 641
 642     def __init__(self, width, id_wid):
 643         FPState.__init__(self, "add_1")
 644         self.mod = FPAddStage1Mod(width)
 645         self.out_z = FPNumBase(width, False)
 646         self.out_of = Overflow()
 647         self.norm_stb = Signal()
 648
 649     def setup(self, m, i):
 650         """ links module to inputs and outputs
 651         """
 652         self.mod.setup(m, i)
 653
 654         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 655
 656         m.d.sync += self.out_of.eq(self.mod.out_of)
 657         m.d.sync += self.out_z.eq(self.mod.out_z)
 658         m.d.sync += self.norm_stb.eq(1)
 659
 660     def action(self, m):
 661         m.next = "normalise_1"
 662
 663
 664 class FPNormaliseModSingle:
 665
 666     def __init__(self, width):
 667         self.width = width
 668         self.in_z = self.ispec()
 669         self.out_z = self.ospec()
 670
 671     def ispec(self):
 672         return FPNumBase(self.width, False)
 673
 674     def ospec(self):
 675         return FPNumBase(self.width, False)
 676
 677     def setup(self, m, i):
 678         """ links module to inputs and outputs
 679         """
 680         m.submodules.normalise = self
 681         m.d.comb += self.i.eq(i)
 682
 683     def elaborate(self, platform):
 684         m = Module()
 685
 686         mwid = self.out_z.m_width+2
 687         pe = PriorityEncoder(mwid)
 688         m.submodules.norm_pe = pe
 689
 690         m.submodules.norm1_out_z = self.out_z
 691         m.submodules.norm1_in_z = self.in_z
 692
 693         in_z = FPNumBase(self.width, False)
 694         in_of = Overflow()
 695         m.submodules.norm1_insel_z = in_z
 696         m.submodules.norm1_insel_overflow = in_of
 697
 698         espec = (len(in_z.e), True)
 699         ediff_n126 = Signal(espec, reset_less=True)
 700         msr = MultiShiftRMerge(mwid, espec)
 701         m.submodules.multishift_r = msr
 702
 703         m.d.comb += in_z.eq(self.in_z)
 704         m.d.comb += in_of.eq(self.in_of)
 705         # initialise out from in (overridden below)
 706         m.d.comb += self.out_z.eq(in_z)
 707         m.d.comb += self.out_of.eq(in_of)
 708         # normalisation decrease condition
 709         decrease = Signal(reset_less=True)
 710         m.d.comb += decrease.eq(in_z.m_msbzero)
 711         # decrease exponent
 712         with m.If(decrease):
 713             # *sigh* not entirely obvious: count leading zeros (clz)
 714             # with a PriorityEncoder: to find from the MSB
 715             # we reverse the order of the bits.
 716             temp_m = Signal(mwid, reset_less=True)
 717             temp_s = Signal(mwid+1, reset_less=True)
 718             clz = Signal((len(in_z.e), True), reset_less=True)
 719             m.d.comb += [
 720                 # cat round and guard bits back into the mantissa
 721                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 722                 pe.i.eq(temp_m[::-1]),          # inverted
 723                 clz.eq(pe.o),                   # count zeros from MSB down
 724                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 725                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 726                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 727             ]
 728
 729         return m
 730
 731
 732 class FPNorm1Data:
 733
 734     def __init__(self, width, id_wid):
 735         self.roundz = Signal(reset_less=True)
 736         self.z = FPNumBase(width, False)
 737         self.out_do_z = Signal(reset_less=True)
 738         self.oz = Signal(width, reset_less=True)
 739         self.mid = Signal(id_wid, reset_less=True)
 740
 741     def eq(self, i):
 742         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 743                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
 744
 745
 746 class FPNorm1ModSingle:
 747
 748     def __init__(self, width, id_wid):
 749         self.width = width
 750         self.id_wid = id_wid
 751         self.i = self.ispec()
 752         self.o = self.ospec()
 753
 754     def ispec(self):
 755         return FPAddStage1Data(self.width, self.id_wid)
 756
 757     def ospec(self):
 758         return FPNorm1Data(self.width, self.id_wid)
 759
 760     def setup(self, m, i):
 761         """ links module to inputs and outputs
 762         """
 763         m.submodules.normalise_1 = self
 764         m.d.comb += self.i.eq(i)
 765
 766     def process(self, i):
 767         return self.o
 768
 769     def elaborate(self, platform):
 770         m = Module()
 771
 772         mwid = self.o.z.m_width+2
 773         pe = PriorityEncoder(mwid)
 774         m.submodules.norm_pe = pe
 775
 776         of = Overflow()
 777         m.d.comb += self.o.roundz.eq(of.roundz)
 778
 779         m.submodules.norm1_out_z = self.o.z
 780         m.submodules.norm1_out_overflow = of
 781         m.submodules.norm1_in_z = self.i.z
 782         m.submodules.norm1_in_overflow = self.i.of
 783
 784         i = self.ispec()
 785         m.submodules.norm1_insel_z = i.z
 786         m.submodules.norm1_insel_overflow = i.of
 787
 788         espec = (len(i.z.e), True)
 789         ediff_n126 = Signal(espec, reset_less=True)
 790         msr = MultiShiftRMerge(mwid, espec)
 791         m.submodules.multishift_r = msr
 792
 793         m.d.comb += i.eq(self.i)
 794         # initialise out from in (overridden below)
 795         m.d.comb += self.o.z.eq(i.z)
 796         m.d.comb += of.eq(i.of)
 797         # normalisation increase/decrease conditions
 798         decrease = Signal(reset_less=True)
 799         increase = Signal(reset_less=True)
 800         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
 801         m.d.comb += increase.eq(i.z.exp_lt_n126)
 802         # decrease exponent
 803         with m.If(~self.i.out_do_z):
 804             with m.If(decrease):
 805                 # *sigh* not entirely obvious: count leading zeros (clz)
 806                 # with a PriorityEncoder: to find from the MSB
 807                 # we reverse the order of the bits.
 808                 temp_m = Signal(mwid, reset_less=True)
 809                 temp_s = Signal(mwid+1, reset_less=True)
 810                 clz = Signal((len(i.z.e), True), reset_less=True)
 811                 # make sure that the amount to decrease by does NOT
 812                 # go below the minimum non-INF/NaN exponent
 813                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
 814                              i.z.exp_sub_n126)
 815                 m.d.comb += [
 816                     # cat round and guard bits back into the mantissa
 817                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
 818                     pe.i.eq(temp_m[::-1]),          # inverted
 819                     clz.eq(limclz),                 # count zeros from MSB down
 820                     temp_s.eq(temp_m << clz),       # shift mantissa UP
 821                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
 822                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
 823                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
 824                     # overflow in bits 0..1: got shifted too (leave sticky)
 825                     of.guard.eq(temp_s[1]),       # guard
 826                     of.round_bit.eq(temp_s[0]),   # round
 827                 ]
 828             # increase exponent
 829             with m.Elif(increase):
 830                 temp_m = Signal(mwid+1, reset_less=True)
 831                 m.d.comb += [
 832                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
 833                                   i.z.m)),
 834                     ediff_n126.eq(i.z.N126 - i.z.e),
 835                     # connect multi-shifter to inp/out mantissa (and ediff)
 836                     msr.inp.eq(temp_m),
 837                     msr.diff.eq(ediff_n126),
 838                     self.o.z.m.eq(msr.m[3:]),
 839                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 840                     # overflow in bits 0..1: got shifted too (leave sticky)
 841                     of.guard.eq(temp_s[2]),     # guard
 842                     of.round_bit.eq(temp_s[1]), # round
 843                     of.sticky.eq(temp_s[0]),    # sticky
 844                     self.o.z.e.eq(i.z.e + ediff_n126),
 845                 ]
 846
 847         m.d.comb += self.o.mid.eq(self.i.mid)
 848         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 849         m.d.comb += self.o.oz.eq(self.i.oz)
 850
 851         return m
 852
 853
 854 class FPNorm1ModMulti:
 855
 856     def __init__(self, width, single_cycle=True):
 857         self.width = width
 858         self.in_select = Signal(reset_less=True)
 859         self.in_z = FPNumBase(width, False)
 860         self.in_of = Overflow()
 861         self.temp_z = FPNumBase(width, False)
 862         self.temp_of = Overflow()
 863         self.out_z = FPNumBase(width, False)
 864         self.out_of = Overflow()
 865
 866     def elaborate(self, platform):
 867         m = Module()
 868
 869         m.submodules.norm1_out_z = self.out_z
 870         m.submodules.norm1_out_overflow = self.out_of
 871         m.submodules.norm1_temp_z = self.temp_z
 872         m.submodules.norm1_temp_of = self.temp_of
 873         m.submodules.norm1_in_z = self.in_z
 874         m.submodules.norm1_in_overflow = self.in_of
 875
 876         in_z = FPNumBase(self.width, False)
 877         in_of = Overflow()
 878         m.submodules.norm1_insel_z = in_z
 879         m.submodules.norm1_insel_overflow = in_of
 880
 881         # select which of temp or in z/of to use
 882         with m.If(self.in_select):
 883             m.d.comb += in_z.eq(self.in_z)
 884             m.d.comb += in_of.eq(self.in_of)
 885         with m.Else():
 886             m.d.comb += in_z.eq(self.temp_z)
 887             m.d.comb += in_of.eq(self.temp_of)
 888         # initialise out from in (overridden below)
 889         m.d.comb += self.out_z.eq(in_z)
 890         m.d.comb += self.out_of.eq(in_of)
 891         # normalisation increase/decrease conditions
 892         decrease = Signal(reset_less=True)
 893         increase = Signal(reset_less=True)
 894         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 895         m.d.comb += increase.eq(in_z.exp_lt_n126)
 896         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 897         # decrease exponent
 898         with m.If(decrease):
 899             m.d.comb += [
 900                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 901                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 902                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 903                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 904                 self.out_of.round_bit.eq(0),        # reset round bit
 905                 self.out_of.m0.eq(in_of.guard),
 906             ]
 907         # increase exponent
 908         with m.Elif(increase):
 909             m.d.comb += [
 910                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 911                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 912                 self.out_of.guard.eq(in_z.m[0]),
 913                 self.out_of.m0.eq(in_z.m[1]),
 914                 self.out_of.round_bit.eq(in_of.guard),
 915                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 916             ]
 917
 918         return m
 919
 920
 921 class FPNorm1Single(FPState):
 922
 923     def __init__(self, width, id_wid, single_cycle=True):
 924         FPState.__init__(self, "normalise_1")
 925         self.mod = FPNorm1ModSingle(width)
 926         self.o = self.ospec()
 927         self.out_z = FPNumBase(width, False)
 928         self.out_roundz = Signal(reset_less=True)
 929
 930     def ispec(self):
 931         return self.mod.ispec()
 932
 933     def ospec(self):
 934         return self.mod.ospec()
 935
 936     def setup(self, m, i):
 937         """ links module to inputs and outputs
 938         """
 939         self.mod.setup(m, i)
 940
 941     def action(self, m):
 942         m.next = "round"
 943
 944
 945 class FPNorm1Multi(FPState):
 946
 947     def __init__(self, width, id_wid):
 948         FPState.__init__(self, "normalise_1")
 949         self.mod = FPNorm1ModMulti(width)
 950         self.stb = Signal(reset_less=True)
 951         self.ack = Signal(reset=0, reset_less=True)
 952         self.out_norm = Signal(reset_less=True)
 953         self.in_accept = Signal(reset_less=True)
 954         self.temp_z = FPNumBase(width)
 955         self.temp_of = Overflow()
 956         self.out_z = FPNumBase(width)
 957         self.out_roundz = Signal(reset_less=True)
 958
 959     def setup(self, m, in_z, in_of, norm_stb):
 960         """ links module to inputs and outputs
 961         """
 962         self.mod.setup(m, in_z, in_of, norm_stb,
 963                        self.in_accept, self.temp_z, self.temp_of,
 964                        self.out_z, self.out_norm)
 965
 966         m.d.comb += self.stb.eq(norm_stb)
 967         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
 968
 969     def action(self, m):
 970         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
 971         m.d.sync += self.temp_of.eq(self.mod.out_of)
 972         m.d.sync += self.temp_z.eq(self.out_z)
 973         with m.If(self.out_norm):
 974             with m.If(self.in_accept):
 975                 m.d.sync += [
 976                     self.ack.eq(1),
 977                 ]
 978             with m.Else():
 979                 m.d.sync += self.ack.eq(0)
 980         with m.Else():
 981             # normalisation not required (or done).
 982             m.next = "round"
 983             m.d.sync += self.ack.eq(1)
 984             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 985
 986
 987 class FPNormToPack(FPState, UnbufferedPipeline):
 988
 989     def __init__(self, width, id_wid):
 990         FPState.__init__(self, "normalise_1")
 991         self.id_wid = id_wid
 992         self.width = width
 993         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 994
 995     def ispec(self):
 996         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
 997
 998     def ospec(self):
 999         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1000
1001     def setup(self, m, i):
1002         """ links module to inputs and outputs
1003         """
1004
1005         # Normalisation, Rounding Corrections, Pack - in a chain
1006         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1007         rmod = FPRoundMod(self.width, self.id_wid)
1008         cmod = FPCorrectionsMod(self.width, self.id_wid)
1009         pmod = FPPackMod(self.width, self.id_wid)
1010         chain = StageChain([nmod, rmod, cmod, pmod])
1011         chain.setup(m, i)
1012         self.out_z = pmod.ospec()
1013
1014         self.o = pmod.o
1015
1016     def process(self, i):
1017         return self.o
1018
1019     def action(self, m):
1020         m.d.sync += self.out_z.eq(self.process(None))
1021         m.next = "pack_put_z"
1022
1023
1024 class FPRoundData:
1025
1026     def __init__(self, width, id_wid):
1027         self.z = FPNumBase(width, False)
1028         self.out_do_z = Signal(reset_less=True)
1029         self.oz = Signal(width, reset_less=True)
1030         self.mid = Signal(id_wid, reset_less=True)
1031
1032     def eq(self, i):
1033         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1034                 self.mid.eq(i.mid)]
1035
1036
1037 class FPRoundMod:
1038
1039     def __init__(self, width, id_wid):
1040         self.width = width
1041         self.id_wid = id_wid
1042         self.i = self.ispec()
1043         self.out_z = self.ospec()
1044
1045     def ispec(self):
1046         return FPNorm1Data(self.width, self.id_wid)
1047
1048     def ospec(self):
1049         return FPRoundData(self.width, self.id_wid)
1050
1051     def process(self, i):
1052         return self.out_z
1053
1054     def setup(self, m, i):
1055         m.submodules.roundz = self
1056         m.d.comb += self.i.eq(i)
1057
1058     def elaborate(self, platform):
1059         m = Module()
1060         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1061         with m.If(~self.i.out_do_z):
1062             with m.If(self.i.roundz):
1063                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1064                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1065                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1066
1067         return m
1068
1069
1070 class FPRound(FPState):
1071
1072     def __init__(self, width, id_wid):
1073         FPState.__init__(self, "round")
1074         self.mod = FPRoundMod(width)
1075         self.out_z = self.ospec()
1076
1077     def ispec(self):
1078         return self.mod.ispec()
1079
1080     def ospec(self):
1081         return self.mod.ospec()
1082
1083     def setup(self, m, i):
1084         """ links module to inputs and outputs
1085         """
1086         self.mod.setup(m, i)
1087
1088         self.idsync(m)
1089         m.d.sync += self.out_z.eq(self.mod.out_z)
1090         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1091
1092     def action(self, m):
1093         m.next = "corrections"
1094
1095
1096 class FPCorrectionsMod:
1097
1098     def __init__(self, width, id_wid):
1099         self.width = width
1100         self.id_wid = id_wid
1101         self.i = self.ispec()
1102         self.out_z = self.ospec()
1103
1104     def ispec(self):
1105         return FPRoundData(self.width, self.id_wid)
1106
1107     def ospec(self):
1108         return FPRoundData(self.width, self.id_wid)
1109
1110     def process(self, i):
1111         return self.out_z
1112
1113     def setup(self, m, i):
1114         """ links module to inputs and outputs
1115         """
1116         m.submodules.corrections = self
1117         m.d.comb += self.i.eq(i)
1118
1119     def elaborate(self, platform):
1120         m = Module()
1121         m.submodules.corr_in_z = self.i.z
1122         m.submodules.corr_out_z = self.out_z.z
1123         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1124         with m.If(~self.i.out_do_z):
1125             with m.If(self.i.z.is_denormalised):
1126                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1127         return m
1128
1129
1130 class FPCorrections(FPState):
1131
1132     def __init__(self, width, id_wid):
1133         FPState.__init__(self, "corrections")
1134         self.mod = FPCorrectionsMod(width)
1135         self.out_z = self.ospec()
1136
1137     def ispec(self):
1138         return self.mod.ispec()
1139
1140     def ospec(self):
1141         return self.mod.ospec()
1142
1143     def setup(self, m, in_z):
1144         """ links module to inputs and outputs
1145         """
1146         self.mod.setup(m, in_z)
1147
1148         m.d.sync += self.out_z.eq(self.mod.out_z)
1149         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1150
1151     def action(self, m):
1152         m.next = "pack"
1153
1154
1155 class FPPackData:
1156
1157     def __init__(self, width, id_wid):
1158         self.z = Signal(width, reset_less=True)
1159         self.mid = Signal(id_wid, reset_less=True)
1160
1161     def eq(self, i):
1162         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1163
1164     def ports(self):
1165         return [self.z, self.mid]
1166
1167
1168 class FPPackMod:
1169
1170     def __init__(self, width, id_wid):
1171         self.width = width
1172         self.id_wid = id_wid
1173         self.i = self.ispec()
1174         self.o = self.ospec()
1175
1176     def ispec(self):
1177         return FPRoundData(self.width, self.id_wid)
1178
1179     def ospec(self):
1180         return FPPackData(self.width, self.id_wid)
1181
1182     def process(self, i):
1183         return self.o
1184
1185     def setup(self, m, in_z):
1186         """ links module to inputs and outputs
1187         """
1188         m.submodules.pack = self
1189         m.d.comb += self.i.eq(in_z)
1190
1191     def elaborate(self, platform):
1192         m = Module()
1193         z = FPNumOut(self.width, False)
1194         m.submodules.pack_in_z = self.i.z
1195         m.submodules.pack_out_z = z
1196         m.d.comb += self.o.mid.eq(self.i.mid)
1197         with m.If(~self.i.out_do_z):
1198             with m.If(self.i.z.is_overflowed):
1199                 m.d.comb += z.inf(self.i.z.s)
1200             with m.Else():
1201                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1202         with m.Else():
1203             m.d.comb += z.v.eq(self.i.oz)
1204         m.d.comb += self.o.z.eq(z.v)
1205         return m
1206
1207
1208 class FPPack(FPState):
1209
1210     def __init__(self, width, id_wid):
1211         FPState.__init__(self, "pack")
1212         self.mod = FPPackMod(width)
1213         self.out_z = self.ospec()
1214
1215     def ispec(self):
1216         return self.mod.ispec()
1217
1218     def ospec(self):
1219         return self.mod.ospec()
1220
1221     def setup(self, m, in_z):
1222         """ links module to inputs and outputs
1223         """
1224         self.mod.setup(m, in_z)
1225
1226         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1227         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1228
1229     def action(self, m):
1230         m.next = "pack_put_z"
1231
1232
1233 class FPPutZ(FPState):
1234
1235     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1236         FPState.__init__(self, state)
1237         if to_state is None:
1238             to_state = "get_ops"
1239         self.to_state = to_state
1240         self.in_z = in_z
1241         self.out_z = out_z
1242         self.in_mid = in_mid
1243         self.out_mid = out_mid
1244
1245     def action(self, m):
1246         if self.in_mid is not None:
1247             m.d.sync += self.out_mid.eq(self.in_mid)
1248         m.d.sync += [
1249           self.out_z.z.v.eq(self.in_z)
1250         ]
1251         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1252             m.d.sync += self.out_z.z.stb.eq(0)
1253             m.next = self.to_state
1254         with m.Else():
1255             m.d.sync += self.out_z.z.stb.eq(1)
1256
1257
1258 class FPPutZIdx(FPState):
1259
1260     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1261         FPState.__init__(self, state)
1262         if to_state is None:
1263             to_state = "get_ops"
1264         self.to_state = to_state
1265         self.in_z = in_z
1266         self.out_zs = out_zs
1267         self.in_mid = in_mid
1268
1269     def action(self, m):
1270         outz_stb = Signal(reset_less=True)
1271         outz_ack = Signal(reset_less=True)
1272         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1273                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1274                     ]
1275         m.d.sync += [
1276           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1277         ]
1278         with m.If(outz_stb & outz_ack):
1279             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1280             m.next = self.to_state
1281         with m.Else():
1282             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1283
1284
1285 class FPOpData:
1286     def __init__(self, width, id_wid):
1287         self.z = FPOp(width)
1288         self.mid = Signal(id_wid, reset_less=True)
1289
1290     def eq(self, i):
1291         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1292
1293     def ports(self):
1294         return [self.z, self.mid]
1295
1296
1297 class FPADDBaseMod:
1298
1299     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1300         """ IEEE754 FP Add
1301
1302             * width: bit-width of IEEE754.  supported: 16, 32, 64
1303             * id_wid: an identifier that is sync-connected to the input
1304             * single_cycle: True indicates each stage to complete in 1 clock
1305             * compact: True indicates a reduced number of stages
1306         """
1307         self.width = width
1308         self.id_wid = id_wid
1309         self.single_cycle = single_cycle
1310         self.compact = compact
1311
1312         self.in_t = Trigger()
1313         self.i = self.ispec()
1314         self.o = self.ospec()
1315
1316         self.states = []
1317
1318     def ispec(self):
1319         return FPADDBaseData(self.width, self.id_wid)
1320
1321     def ospec(self):
1322         return FPOpData(self.width, self.id_wid)
1323
1324     def add_state(self, state):
1325         self.states.append(state)
1326         return state
1327
1328     def get_fragment(self, platform=None):
1329         """ creates the HDL code-fragment for FPAdd
1330         """
1331         m = Module()
1332         m.submodules.out_z = self.o.z
1333         m.submodules.in_t = self.in_t
1334         if self.compact:
1335             self.get_compact_fragment(m, platform)
1336         else:
1337             self.get_longer_fragment(m, platform)
1338
1339         with m.FSM() as fsm:
1340
1341             for state in self.states:
1342                 with m.State(state.state_from):
1343                     state.action(m)
1344
1345         return m
1346
1347     def get_longer_fragment(self, m, platform=None):
1348
1349         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1350                                       self.width))
1351         get.setup(m, self.i)
1352         a = get.out_op1
1353         b = get.out_op2
1354         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1355
1356         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1357         sc.setup(m, a, b, self.in_mid)
1358
1359         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1360         dn.setup(m, a, b, sc.in_mid)
1361
1362         if self.single_cycle:
1363             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1364             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1365         else:
1366             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1367             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1368
1369         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1370         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1371
1372         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1373         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1374
1375         if self.single_cycle:
1376             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1377             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1378         else:
1379             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1380             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1381
1382         rn = self.add_state(FPRound(self.width, self.id_wid))
1383         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1384
1385         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1386         cor.setup(m, rn.out_z, rn.in_mid)
1387
1388         pa = self.add_state(FPPack(self.width, self.id_wid))
1389         pa.setup(m, cor.out_z, rn.in_mid)
1390
1391         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1392                                     pa.in_mid, self.out_mid))
1393
1394         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1395                                     pa.in_mid, self.out_mid))
1396
1397     def get_compact_fragment(self, m, platform=None):
1398
1399
1400         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1401         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1402         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1403         n1 = FPNormToPack(self.width, self.id_wid)
1404
1405         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1406
1407         chainlist = [get, sc, alm, n1]
1408         chain = StageChain(chainlist, specallocate=True)
1409         chain.setup(m, self.i)
1410
1411         for mod in chainlist:
1412             sc = self.add_state(mod)
1413
1414         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1415                                     n1.out_z.mid, self.o.mid))
1416
1417         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1418         #                            sc.o.mid, self.o.mid))
1419
1420
1421 class FPADDBase(FPState):
1422
1423     def __init__(self, width, id_wid=None, single_cycle=False):
1424         """ IEEE754 FP Add
1425
1426             * width: bit-width of IEEE754.  supported: 16, 32, 64
1427             * id_wid: an identifier that is sync-connected to the input
1428             * single_cycle: True indicates each stage to complete in 1 clock
1429         """
1430         FPState.__init__(self, "fpadd")
1431         self.width = width
1432         self.single_cycle = single_cycle
1433         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1434         self.o = self.ospec()
1435
1436         self.in_t = Trigger()
1437         self.i = self.ispec()
1438
1439         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1440         self.in_accept = Signal(reset_less=True)
1441         self.add_stb = Signal(reset_less=True)
1442         self.add_ack = Signal(reset=0, reset_less=True)
1443
1444     def ispec(self):
1445         return self.mod.ispec()
1446
1447     def ospec(self):
1448         return self.mod.ospec()
1449
1450     def setup(self, m, i, add_stb, in_mid):
1451         m.d.comb += [self.i.eq(i),
1452                      self.mod.i.eq(self.i),
1453                      self.z_done.eq(self.mod.o.z.trigger),
1454                      #self.add_stb.eq(add_stb),
1455                      self.mod.in_t.stb.eq(self.in_t.stb),
1456                      self.in_t.ack.eq(self.mod.in_t.ack),
1457                      self.o.mid.eq(self.mod.o.mid),
1458                      self.o.z.v.eq(self.mod.o.z.v),
1459                      self.o.z.stb.eq(self.mod.o.z.stb),
1460                      self.mod.o.z.ack.eq(self.o.z.ack),
1461                     ]
1462
1463         m.d.sync += self.add_stb.eq(add_stb)
1464         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1465         m.d.sync += self.o.z.ack.eq(0) # likewise
1466         #m.d.sync += self.in_t.stb.eq(0)
1467
1468         m.submodules.fpadd = self.mod
1469
1470     def action(self, m):
1471
1472         # in_accept is set on incoming strobe HIGH and ack LOW.
1473         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1474
1475         #with m.If(self.in_t.ack):
1476         #    m.d.sync += self.in_t.stb.eq(0)
1477         with m.If(~self.z_done):
1478             # not done: test for accepting an incoming operand pair
1479             with m.If(self.in_accept):
1480                 m.d.sync += [
1481                     self.add_ack.eq(1), # acknowledge receipt...
1482                     self.in_t.stb.eq(1), # initiate add
1483                 ]
1484             with m.Else():
1485                 m.d.sync += [self.add_ack.eq(0),
1486                              self.in_t.stb.eq(0),
1487                              self.o.z.ack.eq(1),
1488                             ]
1489         with m.Else():
1490             # done: acknowledge, and write out id and value
1491             m.d.sync += [self.add_ack.eq(1),
1492                          self.in_t.stb.eq(0)
1493                         ]
1494             m.next = "put_z"
1495
1496             return
1497
1498             if self.in_mid is not None:
1499                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1500
1501             m.d.sync += [
1502               self.out_z.v.eq(self.mod.out_z.v)
1503             ]
1504             # move to output state on detecting z ack
1505             with m.If(self.out_z.trigger):
1506                 m.d.sync += self.out_z.stb.eq(0)
1507                 m.next = "put_z"
1508             with m.Else():
1509                 m.d.sync += self.out_z.stb.eq(1)
1510
1511
1512 class FPADDBasePipe(ControlBase):
1513     def __init__(self, width, id_wid):
1514         ControlBase.__init__(self)
1515         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1516         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1517         self.pipe3 = FPNormToPack(width, id_wid)
1518
1519         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1520
1521     def elaborate(self, platform):
1522         m = Module()
1523         m.submodules.scnorm = self.pipe1
1524         m.submodules.addalign = self.pipe2
1525         m.submodules.normpack = self.pipe3
1526         m.d.comb += self._eqs
1527         return m
1528
1529
1530 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1531     def __init__(self, width, id_wid, num_rows):
1532         self.num_rows = num_rows
1533         def iospec(): return FPADDBaseData(width, id_wid)
1534         stage = PassThroughStage(iospec)
1535         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1536
1537
1538 class FPADDMuxOutPipe(CombMuxOutPipe):
1539     def __init__(self, width, id_wid, num_rows):
1540         self.num_rows = num_rows
1541         def iospec(): return FPPackData(width, id_wid)
1542         stage = PassThroughStage(iospec)
1543         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1544
1545
1546 class FPADDMuxInOut:
1547     """ Reservation-Station version of FPADD pipeline.
1548
1549         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1550         * 3-stage adder pipeline
1551         * fan-out on outputs (an array of FPPackData: z,mid)
1552
1553         Fan-in and Fan-out are combinatorial.
1554     """
1555     def __init__(self, width, id_wid, num_rows):
1556         self.num_rows = num_rows
1557         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1558         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1559         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1560
1561         self.p = self.inpipe.p  # kinda annoying,
1562         self.n = self.outpipe.n # use pipe in/out as this class in/out
1563         self._ports = self.inpipe.ports() + self.outpipe.ports()
1564
1565     def elaborate(self, platform):
1566         m = Module()
1567         m.submodules.inpipe = self.inpipe
1568         m.submodules.fpadd = self.fpadd
1569         m.submodules.outpipe = self.outpipe
1570
1571         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1572         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1573
1574         return m
1575
1576     def ports(self):
1577         return self._ports
1578
1579
1580 class FPADD(FPID):
1581     """ FPADD: stages as follows:
1582
1583         FPGetOp (a)
1584            |
1585         FPGetOp (b)
1586            |
1587         FPAddBase---> FPAddBaseMod
1588            |            |
1589         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1590
1591         FPAddBase is tricky: it is both a stage and *has* stages.
1592         Connection to FPAddBaseMod therefore requires an in stb/ack
1593         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1594         needs to be the thing that raises the incoming stb.
1595     """
1596
1597     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1598         """ IEEE754 FP Add
1599
1600             * width: bit-width of IEEE754.  supported: 16, 32, 64
1601             * id_wid: an identifier that is sync-connected to the input
1602             * single_cycle: True indicates each stage to complete in 1 clock
1603         """
1604         self.width = width
1605         self.id_wid = id_wid
1606         self.single_cycle = single_cycle
1607
1608         #self.out_z = FPOp(width)
1609         self.ids = FPID(id_wid)
1610
1611         rs = []
1612         for i in range(rs_sz):
1613             in_a  = FPOp(width)
1614             in_b  = FPOp(width)
1615             in_a.name = "in_a_%d" % i
1616             in_b.name = "in_b_%d" % i
1617             rs.append((in_a, in_b))
1618         self.rs = Array(rs)
1619
1620         res = []
1621         for i in range(rs_sz):
1622             out_z = FPOp(width)
1623             out_z.name = "out_z_%d" % i
1624             res.append(out_z)
1625         self.res = Array(res)
1626
1627         self.states = []
1628
1629     def add_state(self, state):
1630         self.states.append(state)
1631         return state
1632
1633     def get_fragment(self, platform=None):
1634         """ creates the HDL code-fragment for FPAdd
1635         """
1636         m = Module()
1637         m.submodules += self.rs
1638
1639         in_a = self.rs[0][0]
1640         in_b = self.rs[0][1]
1641
1642         geta = self.add_state(FPGetOp("get_a", "get_b",
1643                                       in_a, self.width))
1644         geta.setup(m, in_a)
1645         a = geta.out_op
1646
1647         getb = self.add_state(FPGetOp("get_b", "fpadd",
1648                                       in_b, self.width))
1649         getb.setup(m, in_b)
1650         b = getb.out_op
1651
1652         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1653         ab = self.add_state(ab)
1654         abd = ab.ispec() # create an input spec object for FPADDBase
1655         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1656         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1657         o = ab.o
1658
1659         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1660                                     o.mid, "get_a"))
1661
1662         with m.FSM() as fsm:
1663
1664             for state in self.states:
1665                 with m.State(state.state_from):
1666                     state.action(m)
1667
1668         return m
1669
1670
1671 if __name__ == "__main__":
1672     if True:
1673         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1674         main(alu, ports=alu.rs[0][0].ports() + \
1675                         alu.rs[0][1].ports() + \
1676                         alu.res[0].ports() + \
1677                         [alu.ids.in_mid, alu.ids.out_mid])
1678     else:
1679         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1680         main(alu, ports=[alu.in_a, alu.in_b] + \
1681                         alu.in_t.ports() + \
1682                         alu.out_z.ports() + \
1683                         [alu.in_mid, alu.out_mid])
1684
1685
1686     # works... but don't use, just do "python fname.py convert -t v"
1687     #print (verilog.convert(alu, ports=[
1688     #                        ports=alu.in_a.ports() + \
1689     #                              alu.in_b.ports() + \
1690     #                              alu.out_z.ports())