src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,                              FPGet2OpMod, FPGet2Op)
  19
  20
  21 class FPSCData:
  22
  23     def __init__(self, width, id_wid):
  24         self.a = FPNumBase(width, True)
  25         self.b = FPNumBase(width, True)
  26         self.z = FPNumOut(width, False)
  27         self.oz = Signal(width, reset_less=True)
  28         self.out_do_z = Signal(reset_less=True)
  29         self.mid = Signal(id_wid, reset_less=True)
  30
  31     def eq(self, i):
  32         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
  33                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
  34
  35
  36 class FPAddSpecialCasesMod:
  37     """ special cases: NaNs, infs, zeros, denormalised
  38         NOTE: some of these are unique to add.  see "Special Operations"
  39         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  40     """
  41
  42     def __init__(self, width, id_wid):
  43         self.width = width
  44         self.id_wid = id_wid
  45         self.i = self.ispec()
  46         self.o = self.ospec()
  47
  48     def ispec(self):
  49         return FPADDBaseData(self.width, self.id_wid)
  50
  51     def ospec(self):
  52         return FPSCData(self.width, self.id_wid)
  53
  54     def setup(self, m, i):
  55         """ links module to inputs and outputs
  56         """
  57         m.submodules.specialcases = self
  58         m.d.comb += self.i.eq(i)
  59
  60     def process(self, i):
  61         return self.o
  62
  63     def elaborate(self, platform):
  64         m = Module()
  65
  66         m.submodules.sc_out_z = self.o.z
  67
  68         # decode: XXX really should move to separate stage
  69         a1 = FPNumIn(None, self.width)
  70         b1 = FPNumIn(None, self.width)
  71         m.submodules.sc_decode_a = a1
  72         m.submodules.sc_decode_b = b1
  73         m.d.comb += [a1.decode(self.i.a),
  74                      b1.decode(self.i.b),
  75                     ]
  76
  77         s_nomatch = Signal()
  78         m.d.comb += s_nomatch.eq(a1.s != b1.s)
  79
  80         m_match = Signal()
  81         m.d.comb += m_match.eq(a1.m == b1.m)
  82
  83         # if a is NaN or b is NaN return NaN
  84         with m.If(a1.is_nan | b1.is_nan):
  85             m.d.comb += self.o.out_do_z.eq(1)
  86             m.d.comb += self.o.z.nan(0)
  87
  88         # XXX WEIRDNESS for FP16 non-canonical NaN handling
  89         # under review
  90
  91         ## if a is zero and b is NaN return -b
  92         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
  93         #    m.d.comb += self.o.out_do_z.eq(1)
  94         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
  95
  96         ## if b is zero and a is NaN return -a
  97         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
  98         #    m.d.comb += self.o.out_do_z.eq(1)
  99         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 100
 101         ## if a is -zero and b is NaN return -b
 102         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 103         #    m.d.comb += self.o.out_do_z.eq(1)
 104         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 105
 106         ## if b is -zero and a is NaN return -a
 107         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 108         #    m.d.comb += self.o.out_do_z.eq(1)
 109         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 110
 111         # if a is inf return inf (or NaN)
 112         with m.Elif(a1.is_inf):
 113             m.d.comb += self.o.out_do_z.eq(1)
 114             m.d.comb += self.o.z.inf(a1.s)
 115             # if a is inf and signs don't match return NaN
 116             with m.If(b1.exp_128 & s_nomatch):
 117                 m.d.comb += self.o.z.nan(0)
 118
 119         # if b is inf return inf
 120         with m.Elif(b1.is_inf):
 121             m.d.comb += self.o.out_do_z.eq(1)
 122             m.d.comb += self.o.z.inf(b1.s)
 123
 124         # if a is zero and b zero return signed-a/b
 125         with m.Elif(a1.is_zero & b1.is_zero):
 126             m.d.comb += self.o.out_do_z.eq(1)
 127             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 128
 129         # if a is zero return b
 130         with m.Elif(a1.is_zero):
 131             m.d.comb += self.o.out_do_z.eq(1)
 132             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 133
 134         # if b is zero return a
 135         with m.Elif(b1.is_zero):
 136             m.d.comb += self.o.out_do_z.eq(1)
 137             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 138
 139         # if a equal to -b return zero (+ve zero)
 140         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 141             m.d.comb += self.o.out_do_z.eq(1)
 142             m.d.comb += self.o.z.zero(0)
 143
 144         # Denormalised Number checks next, so pass a/b data through
 145         with m.Else():
 146             m.d.comb += self.o.out_do_z.eq(0)
 147             m.d.comb += self.o.a.eq(a1)
 148             m.d.comb += self.o.b.eq(b1)
 149
 150         m.d.comb += self.o.oz.eq(self.o.z.v)
 151         m.d.comb += self.o.mid.eq(self.i.mid)
 152
 153         return m
 154
 155
 156 class FPID:
 157     def __init__(self, id_wid):
 158         self.id_wid = id_wid
 159         if self.id_wid:
 160             self.in_mid = Signal(id_wid, reset_less=True)
 161             self.out_mid = Signal(id_wid, reset_less=True)
 162         else:
 163             self.in_mid = None
 164             self.out_mid = None
 165
 166     def idsync(self, m):
 167         if self.id_wid is not None:
 168             m.d.sync += self.out_mid.eq(self.in_mid)
 169
 170
 171 class FPAddSpecialCases(FPState):
 172     """ special cases: NaNs, infs, zeros, denormalised
 173         NOTE: some of these are unique to add.  see "Special Operations"
 174         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 175     """
 176
 177     def __init__(self, width, id_wid):
 178         FPState.__init__(self, "special_cases")
 179         self.mod = FPAddSpecialCasesMod(width)
 180         self.out_z = self.mod.ospec()
 181         self.out_do_z = Signal(reset_less=True)
 182
 183     def setup(self, m, i):
 184         """ links module to inputs and outputs
 185         """
 186         self.mod.setup(m, i, self.out_do_z)
 187         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 188         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 189
 190     def action(self, m):
 191         self.idsync(m)
 192         with m.If(self.out_do_z):
 193             m.next = "put_z"
 194         with m.Else():
 195             m.next = "denormalise"
 196
 197
 198 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 199     """ special cases: NaNs, infs, zeros, denormalised
 200         NOTE: some of these are unique to add.  see "Special Operations"
 201         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 202     """
 203
 204     def __init__(self, width, id_wid):
 205         FPState.__init__(self, "special_cases")
 206         self.width = width
 207         self.id_wid = id_wid
 208         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 209         self.out = self.ospec()
 210
 211     def ispec(self):
 212         return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
 213
 214     def ospec(self):
 215         return FPSCData(self.width, self.id_wid) # DeNorm ospec
 216
 217     def setup(self, m, i):
 218         """ links module to inputs and outputs
 219         """
 220         smod = FPAddSpecialCasesMod(self.width, self.id_wid)
 221         dmod = FPAddDeNormMod(self.width, self.id_wid)
 222
 223         chain = StageChain([smod, dmod])
 224         chain.setup(m, i)
 225
 226         # only needed for break-out (early-out)
 227         # self.out_do_z = smod.o.out_do_z
 228
 229         self.o = dmod.o
 230
 231     def process(self, i):
 232         return self.o
 233
 234     def action(self, m):
 235         # for break-out (early-out)
 236         #with m.If(self.out_do_z):
 237         #    m.next = "put_z"
 238         #with m.Else():
 239             m.d.sync += self.out.eq(self.process(None))
 240             m.next = "align"
 241
 242
 243 class FPAddDeNormMod(FPState):
 244
 245     def __init__(self, width, id_wid):
 246         self.width = width
 247         self.id_wid = id_wid
 248         self.i = self.ispec()
 249         self.o = self.ospec()
 250
 251     def ispec(self):
 252         return FPSCData(self.width, self.id_wid)
 253
 254     def ospec(self):
 255         return FPSCData(self.width, self.id_wid)
 256
 257     def process(self, i):
 258         return self.o
 259
 260     def setup(self, m, i):
 261         """ links module to inputs and outputs
 262         """
 263         m.submodules.denormalise = self
 264         m.d.comb += self.i.eq(i)
 265
 266     def elaborate(self, platform):
 267         m = Module()
 268         m.submodules.denorm_in_a = self.i.a
 269         m.submodules.denorm_in_b = self.i.b
 270         m.submodules.denorm_out_a = self.o.a
 271         m.submodules.denorm_out_b = self.o.b
 272
 273         with m.If(~self.i.out_do_z):
 274             # XXX hmmm, don't like repeating identical code
 275             m.d.comb += self.o.a.eq(self.i.a)
 276             with m.If(self.i.a.exp_n127):
 277                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 278             with m.Else():
 279                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 280
 281             m.d.comb += self.o.b.eq(self.i.b)
 282             with m.If(self.i.b.exp_n127):
 283                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 284             with m.Else():
 285                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 286
 287         m.d.comb += self.o.mid.eq(self.i.mid)
 288         m.d.comb += self.o.z.eq(self.i.z)
 289         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 290         m.d.comb += self.o.oz.eq(self.i.oz)
 291
 292         return m
 293
 294
 295 class FPAddDeNorm(FPState):
 296
 297     def __init__(self, width, id_wid):
 298         FPState.__init__(self, "denormalise")
 299         self.mod = FPAddDeNormMod(width)
 300         self.out_a = FPNumBase(width)
 301         self.out_b = FPNumBase(width)
 302
 303     def setup(self, m, i):
 304         """ links module to inputs and outputs
 305         """
 306         self.mod.setup(m, i)
 307
 308         m.d.sync += self.out_a.eq(self.mod.out_a)
 309         m.d.sync += self.out_b.eq(self.mod.out_b)
 310
 311     def action(self, m):
 312         # Denormalised Number checks
 313         m.next = "align"
 314
 315
 316 class FPAddAlignMultiMod(FPState):
 317
 318     def __init__(self, width):
 319         self.in_a = FPNumBase(width)
 320         self.in_b = FPNumBase(width)
 321         self.out_a = FPNumIn(None, width)
 322         self.out_b = FPNumIn(None, width)
 323         self.exp_eq = Signal(reset_less=True)
 324
 325     def elaborate(self, platform):
 326         # This one however (single-cycle) will do the shift
 327         # in one go.
 328
 329         m = Module()
 330
 331         m.submodules.align_in_a = self.in_a
 332         m.submodules.align_in_b = self.in_b
 333         m.submodules.align_out_a = self.out_a
 334         m.submodules.align_out_b = self.out_b
 335
 336         # NOTE: this does *not* do single-cycle multi-shifting,
 337         #       it *STAYS* in the align state until exponents match
 338
 339         # exponent of a greater than b: shift b down
 340         m.d.comb += self.exp_eq.eq(0)
 341         m.d.comb += self.out_a.eq(self.in_a)
 342         m.d.comb += self.out_b.eq(self.in_b)
 343         agtb = Signal(reset_less=True)
 344         altb = Signal(reset_less=True)
 345         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 346         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 347         with m.If(agtb):
 348             m.d.comb += self.out_b.shift_down(self.in_b)
 349         # exponent of b greater than a: shift a down
 350         with m.Elif(altb):
 351             m.d.comb += self.out_a.shift_down(self.in_a)
 352         # exponents equal: move to next stage.
 353         with m.Else():
 354             m.d.comb += self.exp_eq.eq(1)
 355         return m
 356
 357
 358 class FPAddAlignMulti(FPState):
 359
 360     def __init__(self, width, id_wid):
 361         FPState.__init__(self, "align")
 362         self.mod = FPAddAlignMultiMod(width)
 363         self.out_a = FPNumIn(None, width)
 364         self.out_b = FPNumIn(None, width)
 365         self.exp_eq = Signal(reset_less=True)
 366
 367     def setup(self, m, in_a, in_b):
 368         """ links module to inputs and outputs
 369         """
 370         m.submodules.align = self.mod
 371         m.d.comb += self.mod.in_a.eq(in_a)
 372         m.d.comb += self.mod.in_b.eq(in_b)
 373         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 374         m.d.sync += self.out_a.eq(self.mod.out_a)
 375         m.d.sync += self.out_b.eq(self.mod.out_b)
 376
 377     def action(self, m):
 378         with m.If(self.exp_eq):
 379             m.next = "add_0"
 380
 381
 382 class FPNumIn2Ops:
 383
 384     def __init__(self, width, id_wid):
 385         self.a = FPNumIn(None, width)
 386         self.b = FPNumIn(None, width)
 387         self.z = FPNumOut(width, False)
 388         self.out_do_z = Signal(reset_less=True)
 389         self.oz = Signal(width, reset_less=True)
 390         self.mid = Signal(id_wid, reset_less=True)
 391
 392     def eq(self, i):
 393         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 394                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 395
 396
 397 class FPAddAlignSingleMod:
 398
 399     def __init__(self, width, id_wid):
 400         self.width = width
 401         self.id_wid = id_wid
 402         self.i = self.ispec()
 403         self.o = self.ospec()
 404
 405     def ispec(self):
 406         return FPSCData(self.width, self.id_wid)
 407
 408     def ospec(self):
 409         return FPNumIn2Ops(self.width, self.id_wid)
 410
 411     def process(self, i):
 412         return self.o
 413
 414     def setup(self, m, i):
 415         """ links module to inputs and outputs
 416         """
 417         m.submodules.align = self
 418         m.d.comb += self.i.eq(i)
 419
 420     def elaborate(self, platform):
 421         """ Aligns A against B or B against A, depending on which has the
 422             greater exponent.  This is done in a *single* cycle using
 423             variable-width bit-shift
 424
 425             the shifter used here is quite expensive in terms of gates.
 426             Mux A or B in (and out) into temporaries, as only one of them
 427             needs to be aligned against the other
 428         """
 429         m = Module()
 430
 431         m.submodules.align_in_a = self.i.a
 432         m.submodules.align_in_b = self.i.b
 433         m.submodules.align_out_a = self.o.a
 434         m.submodules.align_out_b = self.o.b
 435
 436         # temporary (muxed) input and output to be shifted
 437         t_inp = FPNumBase(self.width)
 438         t_out = FPNumIn(None, self.width)
 439         espec = (len(self.i.a.e), True)
 440         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 441         m.submodules.align_t_in = t_inp
 442         m.submodules.align_t_out = t_out
 443         m.submodules.multishift_r = msr
 444
 445         ediff = Signal(espec, reset_less=True)
 446         ediffr = Signal(espec, reset_less=True)
 447         tdiff = Signal(espec, reset_less=True)
 448         elz = Signal(reset_less=True)
 449         egz = Signal(reset_less=True)
 450
 451         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 452         m.d.comb += msr.inp.eq(t_inp.m)
 453         m.d.comb += msr.diff.eq(tdiff)
 454         m.d.comb += t_out.m.eq(msr.m)
 455         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 456         m.d.comb += t_out.s.eq(t_inp.s)
 457
 458         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 459         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 460         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 461         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 462
 463         # default: A-exp == B-exp, A and B untouched (fall through)
 464         m.d.comb += self.o.a.eq(self.i.a)
 465         m.d.comb += self.o.b.eq(self.i.b)
 466         # only one shifter (muxed)
 467         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 468         # exponent of a greater than b: shift b down
 469         with m.If(~self.i.out_do_z):
 470             with m.If(egz):
 471                 m.d.comb += [t_inp.eq(self.i.b),
 472                              tdiff.eq(ediff),
 473                              self.o.b.eq(t_out),
 474                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 475                             ]
 476             # exponent of b greater than a: shift a down
 477             with m.Elif(elz):
 478                 m.d.comb += [t_inp.eq(self.i.a),
 479                              tdiff.eq(ediffr),
 480                              self.o.a.eq(t_out),
 481                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 482                             ]
 483
 484         m.d.comb += self.o.mid.eq(self.i.mid)
 485         m.d.comb += self.o.z.eq(self.i.z)
 486         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 487         m.d.comb += self.o.oz.eq(self.i.oz)
 488
 489         return m
 490
 491
 492 class FPAddAlignSingle(FPState):
 493
 494     def __init__(self, width, id_wid):
 495         FPState.__init__(self, "align")
 496         self.mod = FPAddAlignSingleMod(width, id_wid)
 497         self.out_a = FPNumIn(None, width)
 498         self.out_b = FPNumIn(None, width)
 499
 500     def setup(self, m, i):
 501         """ links module to inputs and outputs
 502         """
 503         self.mod.setup(m, i)
 504
 505         # NOTE: could be done as comb
 506         m.d.sync += self.out_a.eq(self.mod.out_a)
 507         m.d.sync += self.out_b.eq(self.mod.out_b)
 508
 509     def action(self, m):
 510         m.next = "add_0"
 511
 512
 513 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 514
 515     def __init__(self, width, id_wid):
 516         FPState.__init__(self, "align")
 517         self.width = width
 518         self.id_wid = id_wid
 519         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 520         self.a1o = self.ospec()
 521
 522     def ispec(self):
 523         return FPSCData(self.width, self.id_wid)
 524
 525     def ospec(self):
 526         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 527
 528     def setup(self, m, i):
 529         """ links module to inputs and outputs
 530         """
 531
 532         # chain AddAlignSingle, AddStage0 and AddStage1
 533         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 534         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 535         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 536
 537         chain = StageChain([mod, a0mod, a1mod])
 538         chain.setup(m, i)
 539
 540         self.o = a1mod.o
 541
 542     def process(self, i):
 543         return self.o
 544
 545     def action(self, m):
 546         m.d.sync += self.a1o.eq(self.process(None))
 547         m.next = "normalise_1"
 548
 549
 550 class FPAddStage0Data:
 551
 552     def __init__(self, width, id_wid):
 553         self.z = FPNumBase(width, False)
 554         self.out_do_z = Signal(reset_less=True)
 555         self.oz = Signal(width, reset_less=True)
 556         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 557         self.mid = Signal(id_wid, reset_less=True)
 558
 559     def eq(self, i):
 560         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 561                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 562
 563
 564 class FPAddStage0Mod:
 565
 566     def __init__(self, width, id_wid):
 567         self.width = width
 568         self.id_wid = id_wid
 569         self.i = self.ispec()
 570         self.o = self.ospec()
 571
 572     def ispec(self):
 573         return FPSCData(self.width, self.id_wid)
 574
 575     def ospec(self):
 576         return FPAddStage0Data(self.width, self.id_wid)
 577
 578     def process(self, i):
 579         return self.o
 580
 581     def setup(self, m, i):
 582         """ links module to inputs and outputs
 583         """
 584         m.submodules.add0 = self
 585         m.d.comb += self.i.eq(i)
 586
 587     def elaborate(self, platform):
 588         m = Module()
 589         m.submodules.add0_in_a = self.i.a
 590         m.submodules.add0_in_b = self.i.b
 591         m.submodules.add0_out_z = self.o.z
 592
 593         # store intermediate tests (and zero-extended mantissas)
 594         seq = Signal(reset_less=True)
 595         mge = Signal(reset_less=True)
 596         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 597         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 598         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 599                      mge.eq(self.i.a.m >= self.i.b.m),
 600                      am0.eq(Cat(self.i.a.m, 0)),
 601                      bm0.eq(Cat(self.i.b.m, 0))
 602                     ]
 603         # same-sign (both negative or both positive) add mantissas
 604         with m.If(~self.i.out_do_z):
 605             m.d.comb += self.o.z.e.eq(self.i.a.e)
 606             with m.If(seq):
 607                 m.d.comb += [
 608                     self.o.tot.eq(am0 + bm0),
 609                     self.o.z.s.eq(self.i.a.s)
 610                 ]
 611             # a mantissa greater than b, use a
 612             with m.Elif(mge):
 613                 m.d.comb += [
 614                     self.o.tot.eq(am0 - bm0),
 615                     self.o.z.s.eq(self.i.a.s)
 616                 ]
 617             # b mantissa greater than a, use b
 618             with m.Else():
 619                 m.d.comb += [
 620                     self.o.tot.eq(bm0 - am0),
 621                     self.o.z.s.eq(self.i.b.s)
 622             ]
 623
 624         m.d.comb += self.o.oz.eq(self.i.oz)
 625         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 626         m.d.comb += self.o.mid.eq(self.i.mid)
 627         return m
 628
 629
 630 class FPAddStage0(FPState):
 631     """ First stage of add.  covers same-sign (add) and subtract
 632         special-casing when mantissas are greater or equal, to
 633         give greatest accuracy.
 634     """
 635
 636     def __init__(self, width, id_wid):
 637         FPState.__init__(self, "add_0")
 638         self.mod = FPAddStage0Mod(width)
 639         self.o = self.mod.ospec()
 640
 641     def setup(self, m, i):
 642         """ links module to inputs and outputs
 643         """
 644         self.mod.setup(m, i)
 645
 646         # NOTE: these could be done as combinatorial (merge add0+add1)
 647         m.d.sync += self.o.eq(self.mod.o)
 648
 649     def action(self, m):
 650         m.next = "add_1"
 651
 652
 653 class FPAddStage1Data:
 654
 655     def __init__(self, width, id_wid):
 656         self.z = FPNumBase(width, False)
 657         self.out_do_z = Signal(reset_less=True)
 658         self.oz = Signal(width, reset_less=True)
 659         self.of = Overflow()
 660         self.mid = Signal(id_wid, reset_less=True)
 661
 662     def eq(self, i):
 663         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 664                 self.of.eq(i.of), self.mid.eq(i.mid)]
 665
 666
 667
 668 class FPAddStage1Mod(FPState):
 669     """ Second stage of add: preparation for normalisation.
 670         detects when tot sum is too big (tot[27] is kinda a carry bit)
 671     """
 672
 673     def __init__(self, width, id_wid):
 674         self.width = width
 675         self.id_wid = id_wid
 676         self.i = self.ispec()
 677         self.o = self.ospec()
 678
 679     def ispec(self):
 680         return FPAddStage0Data(self.width, self.id_wid)
 681
 682     def ospec(self):
 683         return FPAddStage1Data(self.width, self.id_wid)
 684
 685     def process(self, i):
 686         return self.o
 687
 688     def setup(self, m, i):
 689         """ links module to inputs and outputs
 690         """
 691         m.submodules.add1 = self
 692         m.submodules.add1_out_overflow = self.o.of
 693
 694         m.d.comb += self.i.eq(i)
 695
 696     def elaborate(self, platform):
 697         m = Module()
 698         m.d.comb += self.o.z.eq(self.i.z)
 699         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 700         with m.If(~self.i.out_do_z):
 701             with m.If(self.i.tot[-1]):
 702                 m.d.comb += [
 703                     self.o.z.m.eq(self.i.tot[4:]),
 704                     self.o.of.m0.eq(self.i.tot[4]),
 705                     self.o.of.guard.eq(self.i.tot[3]),
 706                     self.o.of.round_bit.eq(self.i.tot[2]),
 707                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 708                     self.o.z.e.eq(self.i.z.e + 1)
 709             ]
 710             # tot[-1] (MSB) zero case
 711             with m.Else():
 712                 m.d.comb += [
 713                     self.o.z.m.eq(self.i.tot[3:]),
 714                     self.o.of.m0.eq(self.i.tot[3]),
 715                     self.o.of.guard.eq(self.i.tot[2]),
 716                     self.o.of.round_bit.eq(self.i.tot[1]),
 717                     self.o.of.sticky.eq(self.i.tot[0])
 718             ]
 719
 720         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 721         m.d.comb += self.o.oz.eq(self.i.oz)
 722         m.d.comb += self.o.mid.eq(self.i.mid)
 723
 724         return m
 725
 726
 727 class FPAddStage1(FPState):
 728
 729     def __init__(self, width, id_wid):
 730         FPState.__init__(self, "add_1")
 731         self.mod = FPAddStage1Mod(width)
 732         self.out_z = FPNumBase(width, False)
 733         self.out_of = Overflow()
 734         self.norm_stb = Signal()
 735
 736     def setup(self, m, i):
 737         """ links module to inputs and outputs
 738         """
 739         self.mod.setup(m, i)
 740
 741         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 742
 743         m.d.sync += self.out_of.eq(self.mod.out_of)
 744         m.d.sync += self.out_z.eq(self.mod.out_z)
 745         m.d.sync += self.norm_stb.eq(1)
 746
 747     def action(self, m):
 748         m.next = "normalise_1"
 749
 750
 751 class FPNormaliseModSingle:
 752
 753     def __init__(self, width):
 754         self.width = width
 755         self.in_z = self.ispec()
 756         self.out_z = self.ospec()
 757
 758     def ispec(self):
 759         return FPNumBase(self.width, False)
 760
 761     def ospec(self):
 762         return FPNumBase(self.width, False)
 763
 764     def setup(self, m, i):
 765         """ links module to inputs and outputs
 766         """
 767         m.submodules.normalise = self
 768         m.d.comb += self.i.eq(i)
 769
 770     def elaborate(self, platform):
 771         m = Module()
 772
 773         mwid = self.out_z.m_width+2
 774         pe = PriorityEncoder(mwid)
 775         m.submodules.norm_pe = pe
 776
 777         m.submodules.norm1_out_z = self.out_z
 778         m.submodules.norm1_in_z = self.in_z
 779
 780         in_z = FPNumBase(self.width, False)
 781         in_of = Overflow()
 782         m.submodules.norm1_insel_z = in_z
 783         m.submodules.norm1_insel_overflow = in_of
 784
 785         espec = (len(in_z.e), True)
 786         ediff_n126 = Signal(espec, reset_less=True)
 787         msr = MultiShiftRMerge(mwid, espec)
 788         m.submodules.multishift_r = msr
 789
 790         m.d.comb += in_z.eq(self.in_z)
 791         m.d.comb += in_of.eq(self.in_of)
 792         # initialise out from in (overridden below)
 793         m.d.comb += self.out_z.eq(in_z)
 794         m.d.comb += self.out_of.eq(in_of)
 795         # normalisation decrease condition
 796         decrease = Signal(reset_less=True)
 797         m.d.comb += decrease.eq(in_z.m_msbzero)
 798         # decrease exponent
 799         with m.If(decrease):
 800             # *sigh* not entirely obvious: count leading zeros (clz)
 801             # with a PriorityEncoder: to find from the MSB
 802             # we reverse the order of the bits.
 803             temp_m = Signal(mwid, reset_less=True)
 804             temp_s = Signal(mwid+1, reset_less=True)
 805             clz = Signal((len(in_z.e), True), reset_less=True)
 806             m.d.comb += [
 807                 # cat round and guard bits back into the mantissa
 808                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 809                 pe.i.eq(temp_m[::-1]),          # inverted
 810                 clz.eq(pe.o),                   # count zeros from MSB down
 811                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 812                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 813                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 814             ]
 815
 816         return m
 817
 818
 819 class FPNorm1Data:
 820
 821     def __init__(self, width, id_wid):
 822         self.roundz = Signal(reset_less=True)
 823         self.z = FPNumBase(width, False)
 824         self.out_do_z = Signal(reset_less=True)
 825         self.oz = Signal(width, reset_less=True)
 826         self.mid = Signal(id_wid, reset_less=True)
 827
 828     def eq(self, i):
 829         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 830                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
 831
 832
 833 class FPNorm1ModSingle:
 834
 835     def __init__(self, width, id_wid):
 836         self.width = width
 837         self.id_wid = id_wid
 838         self.i = self.ispec()
 839         self.o = self.ospec()
 840
 841     def ispec(self):
 842         return FPAddStage1Data(self.width, self.id_wid)
 843
 844     def ospec(self):
 845         return FPNorm1Data(self.width, self.id_wid)
 846
 847     def setup(self, m, i):
 848         """ links module to inputs and outputs
 849         """
 850         m.submodules.normalise_1 = self
 851         m.d.comb += self.i.eq(i)
 852
 853     def process(self, i):
 854         return self.o
 855
 856     def elaborate(self, platform):
 857         m = Module()
 858
 859         mwid = self.o.z.m_width+2
 860         pe = PriorityEncoder(mwid)
 861         m.submodules.norm_pe = pe
 862
 863         of = Overflow()
 864         m.d.comb += self.o.roundz.eq(of.roundz)
 865
 866         m.submodules.norm1_out_z = self.o.z
 867         m.submodules.norm1_out_overflow = of
 868         m.submodules.norm1_in_z = self.i.z
 869         m.submodules.norm1_in_overflow = self.i.of
 870
 871         i = self.ispec()
 872         m.submodules.norm1_insel_z = i.z
 873         m.submodules.norm1_insel_overflow = i.of
 874
 875         espec = (len(i.z.e), True)
 876         ediff_n126 = Signal(espec, reset_less=True)
 877         msr = MultiShiftRMerge(mwid, espec)
 878         m.submodules.multishift_r = msr
 879
 880         m.d.comb += i.eq(self.i)
 881         # initialise out from in (overridden below)
 882         m.d.comb += self.o.z.eq(i.z)
 883         m.d.comb += of.eq(i.of)
 884         # normalisation increase/decrease conditions
 885         decrease = Signal(reset_less=True)
 886         increase = Signal(reset_less=True)
 887         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
 888         m.d.comb += increase.eq(i.z.exp_lt_n126)
 889         # decrease exponent
 890         with m.If(~self.i.out_do_z):
 891             with m.If(decrease):
 892                 # *sigh* not entirely obvious: count leading zeros (clz)
 893                 # with a PriorityEncoder: to find from the MSB
 894                 # we reverse the order of the bits.
 895                 temp_m = Signal(mwid, reset_less=True)
 896                 temp_s = Signal(mwid+1, reset_less=True)
 897                 clz = Signal((len(i.z.e), True), reset_less=True)
 898                 # make sure that the amount to decrease by does NOT
 899                 # go below the minimum non-INF/NaN exponent
 900                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
 901                              i.z.exp_sub_n126)
 902                 m.d.comb += [
 903                     # cat round and guard bits back into the mantissa
 904                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
 905                     pe.i.eq(temp_m[::-1]),          # inverted
 906                     clz.eq(limclz),                 # count zeros from MSB down
 907                     temp_s.eq(temp_m << clz),       # shift mantissa UP
 908                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
 909                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
 910                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
 911                     # overflow in bits 0..1: got shifted too (leave sticky)
 912                     of.guard.eq(temp_s[1]),       # guard
 913                     of.round_bit.eq(temp_s[0]),   # round
 914                 ]
 915             # increase exponent
 916             with m.Elif(increase):
 917                 temp_m = Signal(mwid+1, reset_less=True)
 918                 m.d.comb += [
 919                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
 920                                   i.z.m)),
 921                     ediff_n126.eq(i.z.N126 - i.z.e),
 922                     # connect multi-shifter to inp/out mantissa (and ediff)
 923                     msr.inp.eq(temp_m),
 924                     msr.diff.eq(ediff_n126),
 925                     self.o.z.m.eq(msr.m[3:]),
 926                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 927                     # overflow in bits 0..1: got shifted too (leave sticky)
 928                     of.guard.eq(temp_s[2]),     # guard
 929                     of.round_bit.eq(temp_s[1]), # round
 930                     of.sticky.eq(temp_s[0]),    # sticky
 931                     self.o.z.e.eq(i.z.e + ediff_n126),
 932                 ]
 933
 934         m.d.comb += self.o.mid.eq(self.i.mid)
 935         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 936         m.d.comb += self.o.oz.eq(self.i.oz)
 937
 938         return m
 939
 940
 941 class FPNorm1ModMulti:
 942
 943     def __init__(self, width, single_cycle=True):
 944         self.width = width
 945         self.in_select = Signal(reset_less=True)
 946         self.in_z = FPNumBase(width, False)
 947         self.in_of = Overflow()
 948         self.temp_z = FPNumBase(width, False)
 949         self.temp_of = Overflow()
 950         self.out_z = FPNumBase(width, False)
 951         self.out_of = Overflow()
 952
 953     def elaborate(self, platform):
 954         m = Module()
 955
 956         m.submodules.norm1_out_z = self.out_z
 957         m.submodules.norm1_out_overflow = self.out_of
 958         m.submodules.norm1_temp_z = self.temp_z
 959         m.submodules.norm1_temp_of = self.temp_of
 960         m.submodules.norm1_in_z = self.in_z
 961         m.submodules.norm1_in_overflow = self.in_of
 962
 963         in_z = FPNumBase(self.width, False)
 964         in_of = Overflow()
 965         m.submodules.norm1_insel_z = in_z
 966         m.submodules.norm1_insel_overflow = in_of
 967
 968         # select which of temp or in z/of to use
 969         with m.If(self.in_select):
 970             m.d.comb += in_z.eq(self.in_z)
 971             m.d.comb += in_of.eq(self.in_of)
 972         with m.Else():
 973             m.d.comb += in_z.eq(self.temp_z)
 974             m.d.comb += in_of.eq(self.temp_of)
 975         # initialise out from in (overridden below)
 976         m.d.comb += self.out_z.eq(in_z)
 977         m.d.comb += self.out_of.eq(in_of)
 978         # normalisation increase/decrease conditions
 979         decrease = Signal(reset_less=True)
 980         increase = Signal(reset_less=True)
 981         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 982         m.d.comb += increase.eq(in_z.exp_lt_n126)
 983         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 984         # decrease exponent
 985         with m.If(decrease):
 986             m.d.comb += [
 987                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 988                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 989                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 990                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 991                 self.out_of.round_bit.eq(0),        # reset round bit
 992                 self.out_of.m0.eq(in_of.guard),
 993             ]
 994         # increase exponent
 995         with m.Elif(increase):
 996             m.d.comb += [
 997                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 998                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 999                 self.out_of.guard.eq(in_z.m[0]),
1000                 self.out_of.m0.eq(in_z.m[1]),
1001                 self.out_of.round_bit.eq(in_of.guard),
1002                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1003             ]
1004
1005         return m
1006
1007
1008 class FPNorm1Single(FPState):
1009
1010     def __init__(self, width, id_wid, single_cycle=True):
1011         FPState.__init__(self, "normalise_1")
1012         self.mod = FPNorm1ModSingle(width)
1013         self.o = self.ospec()
1014         self.out_z = FPNumBase(width, False)
1015         self.out_roundz = Signal(reset_less=True)
1016
1017     def ispec(self):
1018         return self.mod.ispec()
1019
1020     def ospec(self):
1021         return self.mod.ospec()
1022
1023     def setup(self, m, i):
1024         """ links module to inputs and outputs
1025         """
1026         self.mod.setup(m, i)
1027
1028     def action(self, m):
1029         m.next = "round"
1030
1031
1032 class FPNorm1Multi(FPState):
1033
1034     def __init__(self, width, id_wid):
1035         FPState.__init__(self, "normalise_1")
1036         self.mod = FPNorm1ModMulti(width)
1037         self.stb = Signal(reset_less=True)
1038         self.ack = Signal(reset=0, reset_less=True)
1039         self.out_norm = Signal(reset_less=True)
1040         self.in_accept = Signal(reset_less=True)
1041         self.temp_z = FPNumBase(width)
1042         self.temp_of = Overflow()
1043         self.out_z = FPNumBase(width)
1044         self.out_roundz = Signal(reset_less=True)
1045
1046     def setup(self, m, in_z, in_of, norm_stb):
1047         """ links module to inputs and outputs
1048         """
1049         self.mod.setup(m, in_z, in_of, norm_stb,
1050                        self.in_accept, self.temp_z, self.temp_of,
1051                        self.out_z, self.out_norm)
1052
1053         m.d.comb += self.stb.eq(norm_stb)
1054         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1055
1056     def action(self, m):
1057         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1058         m.d.sync += self.temp_of.eq(self.mod.out_of)
1059         m.d.sync += self.temp_z.eq(self.out_z)
1060         with m.If(self.out_norm):
1061             with m.If(self.in_accept):
1062                 m.d.sync += [
1063                     self.ack.eq(1),
1064                 ]
1065             with m.Else():
1066                 m.d.sync += self.ack.eq(0)
1067         with m.Else():
1068             # normalisation not required (or done).
1069             m.next = "round"
1070             m.d.sync += self.ack.eq(1)
1071             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1072
1073
1074 class FPNormToPack(FPState, UnbufferedPipeline):
1075
1076     def __init__(self, width, id_wid):
1077         FPState.__init__(self, "normalise_1")
1078         self.id_wid = id_wid
1079         self.width = width
1080         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1081
1082     def ispec(self):
1083         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1084
1085     def ospec(self):
1086         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1087
1088     def setup(self, m, i):
1089         """ links module to inputs and outputs
1090         """
1091
1092         # Normalisation, Rounding Corrections, Pack - in a chain
1093         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1094         rmod = FPRoundMod(self.width, self.id_wid)
1095         cmod = FPCorrectionsMod(self.width, self.id_wid)
1096         pmod = FPPackMod(self.width, self.id_wid)
1097         chain = StageChain([nmod, rmod, cmod, pmod])
1098         chain.setup(m, i)
1099         self.out_z = pmod.ospec()
1100
1101         self.o = pmod.o
1102
1103     def process(self, i):
1104         return self.o
1105
1106     def action(self, m):
1107         m.d.sync += self.out_z.eq(self.process(None))
1108         m.next = "pack_put_z"
1109
1110
1111 class FPRoundData:
1112
1113     def __init__(self, width, id_wid):
1114         self.z = FPNumBase(width, False)
1115         self.out_do_z = Signal(reset_less=True)
1116         self.oz = Signal(width, reset_less=True)
1117         self.mid = Signal(id_wid, reset_less=True)
1118
1119     def eq(self, i):
1120         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1121                 self.mid.eq(i.mid)]
1122
1123
1124 class FPRoundMod:
1125
1126     def __init__(self, width, id_wid):
1127         self.width = width
1128         self.id_wid = id_wid
1129         self.i = self.ispec()
1130         self.out_z = self.ospec()
1131
1132     def ispec(self):
1133         return FPNorm1Data(self.width, self.id_wid)
1134
1135     def ospec(self):
1136         return FPRoundData(self.width, self.id_wid)
1137
1138     def process(self, i):
1139         return self.out_z
1140
1141     def setup(self, m, i):
1142         m.submodules.roundz = self
1143         m.d.comb += self.i.eq(i)
1144
1145     def elaborate(self, platform):
1146         m = Module()
1147         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1148         with m.If(~self.i.out_do_z):
1149             with m.If(self.i.roundz):
1150                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1151                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1152                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1153
1154         return m
1155
1156
1157 class FPRound(FPState):
1158
1159     def __init__(self, width, id_wid):
1160         FPState.__init__(self, "round")
1161         self.mod = FPRoundMod(width)
1162         self.out_z = self.ospec()
1163
1164     def ispec(self):
1165         return self.mod.ispec()
1166
1167     def ospec(self):
1168         return self.mod.ospec()
1169
1170     def setup(self, m, i):
1171         """ links module to inputs and outputs
1172         """
1173         self.mod.setup(m, i)
1174
1175         self.idsync(m)
1176         m.d.sync += self.out_z.eq(self.mod.out_z)
1177         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1178
1179     def action(self, m):
1180         m.next = "corrections"
1181
1182
1183 class FPCorrectionsMod:
1184
1185     def __init__(self, width, id_wid):
1186         self.width = width
1187         self.id_wid = id_wid
1188         self.i = self.ispec()
1189         self.out_z = self.ospec()
1190
1191     def ispec(self):
1192         return FPRoundData(self.width, self.id_wid)
1193
1194     def ospec(self):
1195         return FPRoundData(self.width, self.id_wid)
1196
1197     def process(self, i):
1198         return self.out_z
1199
1200     def setup(self, m, i):
1201         """ links module to inputs and outputs
1202         """
1203         m.submodules.corrections = self
1204         m.d.comb += self.i.eq(i)
1205
1206     def elaborate(self, platform):
1207         m = Module()
1208         m.submodules.corr_in_z = self.i.z
1209         m.submodules.corr_out_z = self.out_z.z
1210         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1211         with m.If(~self.i.out_do_z):
1212             with m.If(self.i.z.is_denormalised):
1213                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1214         return m
1215
1216
1217 class FPCorrections(FPState):
1218
1219     def __init__(self, width, id_wid):
1220         FPState.__init__(self, "corrections")
1221         self.mod = FPCorrectionsMod(width)
1222         self.out_z = self.ospec()
1223
1224     def ispec(self):
1225         return self.mod.ispec()
1226
1227     def ospec(self):
1228         return self.mod.ospec()
1229
1230     def setup(self, m, in_z):
1231         """ links module to inputs and outputs
1232         """
1233         self.mod.setup(m, in_z)
1234
1235         m.d.sync += self.out_z.eq(self.mod.out_z)
1236         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1237
1238     def action(self, m):
1239         m.next = "pack"
1240
1241
1242 class FPPackData:
1243
1244     def __init__(self, width, id_wid):
1245         self.z = Signal(width, reset_less=True)
1246         self.mid = Signal(id_wid, reset_less=True)
1247
1248     def eq(self, i):
1249         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1250
1251     def ports(self):
1252         return [self.z, self.mid]
1253
1254
1255 class FPPackMod:
1256
1257     def __init__(self, width, id_wid):
1258         self.width = width
1259         self.id_wid = id_wid
1260         self.i = self.ispec()
1261         self.o = self.ospec()
1262
1263     def ispec(self):
1264         return FPRoundData(self.width, self.id_wid)
1265
1266     def ospec(self):
1267         return FPPackData(self.width, self.id_wid)
1268
1269     def process(self, i):
1270         return self.o
1271
1272     def setup(self, m, in_z):
1273         """ links module to inputs and outputs
1274         """
1275         m.submodules.pack = self
1276         m.d.comb += self.i.eq(in_z)
1277
1278     def elaborate(self, platform):
1279         m = Module()
1280         z = FPNumOut(self.width, False)
1281         m.submodules.pack_in_z = self.i.z
1282         m.submodules.pack_out_z = z
1283         m.d.comb += self.o.mid.eq(self.i.mid)
1284         with m.If(~self.i.out_do_z):
1285             with m.If(self.i.z.is_overflowed):
1286                 m.d.comb += z.inf(self.i.z.s)
1287             with m.Else():
1288                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1289         with m.Else():
1290             m.d.comb += z.v.eq(self.i.oz)
1291         m.d.comb += self.o.z.eq(z.v)
1292         return m
1293
1294
1295 class FPPack(FPState):
1296
1297     def __init__(self, width, id_wid):
1298         FPState.__init__(self, "pack")
1299         self.mod = FPPackMod(width)
1300         self.out_z = self.ospec()
1301
1302     def ispec(self):
1303         return self.mod.ispec()
1304
1305     def ospec(self):
1306         return self.mod.ospec()
1307
1308     def setup(self, m, in_z):
1309         """ links module to inputs and outputs
1310         """
1311         self.mod.setup(m, in_z)
1312
1313         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1314         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1315
1316     def action(self, m):
1317         m.next = "pack_put_z"
1318
1319
1320 class FPPutZ(FPState):
1321
1322     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1323         FPState.__init__(self, state)
1324         if to_state is None:
1325             to_state = "get_ops"
1326         self.to_state = to_state
1327         self.in_z = in_z
1328         self.out_z = out_z
1329         self.in_mid = in_mid
1330         self.out_mid = out_mid
1331
1332     def action(self, m):
1333         if self.in_mid is not None:
1334             m.d.sync += self.out_mid.eq(self.in_mid)
1335         m.d.sync += [
1336           self.out_z.z.v.eq(self.in_z)
1337         ]
1338         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1339             m.d.sync += self.out_z.z.stb.eq(0)
1340             m.next = self.to_state
1341         with m.Else():
1342             m.d.sync += self.out_z.z.stb.eq(1)
1343
1344
1345 class FPPutZIdx(FPState):
1346
1347     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1348         FPState.__init__(self, state)
1349         if to_state is None:
1350             to_state = "get_ops"
1351         self.to_state = to_state
1352         self.in_z = in_z
1353         self.out_zs = out_zs
1354         self.in_mid = in_mid
1355
1356     def action(self, m):
1357         outz_stb = Signal(reset_less=True)
1358         outz_ack = Signal(reset_less=True)
1359         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1360                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1361                     ]
1362         m.d.sync += [
1363           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1364         ]
1365         with m.If(outz_stb & outz_ack):
1366             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1367             m.next = self.to_state
1368         with m.Else():
1369             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1370
1371
1372 class FPOpData:
1373     def __init__(self, width, id_wid):
1374         self.z = FPOp(width)
1375         self.mid = Signal(id_wid, reset_less=True)
1376
1377     def eq(self, i):
1378         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1379
1380     def ports(self):
1381         return [self.z, self.mid]
1382
1383
1384 class FPADDBaseMod:
1385
1386     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1387         """ IEEE754 FP Add
1388
1389             * width: bit-width of IEEE754.  supported: 16, 32, 64
1390             * id_wid: an identifier that is sync-connected to the input
1391             * single_cycle: True indicates each stage to complete in 1 clock
1392             * compact: True indicates a reduced number of stages
1393         """
1394         self.width = width
1395         self.id_wid = id_wid
1396         self.single_cycle = single_cycle
1397         self.compact = compact
1398
1399         self.in_t = Trigger()
1400         self.i = self.ispec()
1401         self.o = self.ospec()
1402
1403         self.states = []
1404
1405     def ispec(self):
1406         return FPADDBaseData(self.width, self.id_wid)
1407
1408     def ospec(self):
1409         return FPOpData(self.width, self.id_wid)
1410
1411     def add_state(self, state):
1412         self.states.append(state)
1413         return state
1414
1415     def get_fragment(self, platform=None):
1416         """ creates the HDL code-fragment for FPAdd
1417         """
1418         m = Module()
1419         m.submodules.out_z = self.o.z
1420         m.submodules.in_t = self.in_t
1421         if self.compact:
1422             self.get_compact_fragment(m, platform)
1423         else:
1424             self.get_longer_fragment(m, platform)
1425
1426         with m.FSM() as fsm:
1427
1428             for state in self.states:
1429                 with m.State(state.state_from):
1430                     state.action(m)
1431
1432         return m
1433
1434     def get_longer_fragment(self, m, platform=None):
1435
1436         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1437                                       self.width))
1438         get.setup(m, self.i)
1439         a = get.out_op1
1440         b = get.out_op2
1441         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1442
1443         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1444         sc.setup(m, a, b, self.in_mid)
1445
1446         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1447         dn.setup(m, a, b, sc.in_mid)
1448
1449         if self.single_cycle:
1450             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1451             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1452         else:
1453             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1454             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1455
1456         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1457         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1458
1459         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1460         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1461
1462         if self.single_cycle:
1463             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1464             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1465         else:
1466             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1467             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1468
1469         rn = self.add_state(FPRound(self.width, self.id_wid))
1470         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1471
1472         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1473         cor.setup(m, rn.out_z, rn.in_mid)
1474
1475         pa = self.add_state(FPPack(self.width, self.id_wid))
1476         pa.setup(m, cor.out_z, rn.in_mid)
1477
1478         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1479                                     pa.in_mid, self.out_mid))
1480
1481         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1482                                     pa.in_mid, self.out_mid))
1483
1484     def get_compact_fragment(self, m, platform=None):
1485
1486
1487         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1488         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1489         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1490         n1 = FPNormToPack(self.width, self.id_wid)
1491
1492         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1493
1494         chainlist = [get, sc, alm, n1]
1495         chain = StageChain(chainlist, specallocate=True)
1496         chain.setup(m, self.i)
1497
1498         for mod in chainlist:
1499             sc = self.add_state(mod)
1500
1501         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1502                                     n1.out_z.mid, self.o.mid))
1503
1504         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1505         #                            sc.o.mid, self.o.mid))
1506
1507
1508 class FPADDBase(FPState):
1509
1510     def __init__(self, width, id_wid=None, single_cycle=False):
1511         """ IEEE754 FP Add
1512
1513             * width: bit-width of IEEE754.  supported: 16, 32, 64
1514             * id_wid: an identifier that is sync-connected to the input
1515             * single_cycle: True indicates each stage to complete in 1 clock
1516         """
1517         FPState.__init__(self, "fpadd")
1518         self.width = width
1519         self.single_cycle = single_cycle
1520         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1521         self.o = self.ospec()
1522
1523         self.in_t = Trigger()
1524         self.i = self.ispec()
1525
1526         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1527         self.in_accept = Signal(reset_less=True)
1528         self.add_stb = Signal(reset_less=True)
1529         self.add_ack = Signal(reset=0, reset_less=True)
1530
1531     def ispec(self):
1532         return self.mod.ispec()
1533
1534     def ospec(self):
1535         return self.mod.ospec()
1536
1537     def setup(self, m, i, add_stb, in_mid):
1538         m.d.comb += [self.i.eq(i),
1539                      self.mod.i.eq(self.i),
1540                      self.z_done.eq(self.mod.o.z.trigger),
1541                      #self.add_stb.eq(add_stb),
1542                      self.mod.in_t.stb.eq(self.in_t.stb),
1543                      self.in_t.ack.eq(self.mod.in_t.ack),
1544                      self.o.mid.eq(self.mod.o.mid),
1545                      self.o.z.v.eq(self.mod.o.z.v),
1546                      self.o.z.stb.eq(self.mod.o.z.stb),
1547                      self.mod.o.z.ack.eq(self.o.z.ack),
1548                     ]
1549
1550         m.d.sync += self.add_stb.eq(add_stb)
1551         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1552         m.d.sync += self.o.z.ack.eq(0) # likewise
1553         #m.d.sync += self.in_t.stb.eq(0)
1554
1555         m.submodules.fpadd = self.mod
1556
1557     def action(self, m):
1558
1559         # in_accept is set on incoming strobe HIGH and ack LOW.
1560         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1561
1562         #with m.If(self.in_t.ack):
1563         #    m.d.sync += self.in_t.stb.eq(0)
1564         with m.If(~self.z_done):
1565             # not done: test for accepting an incoming operand pair
1566             with m.If(self.in_accept):
1567                 m.d.sync += [
1568                     self.add_ack.eq(1), # acknowledge receipt...
1569                     self.in_t.stb.eq(1), # initiate add
1570                 ]
1571             with m.Else():
1572                 m.d.sync += [self.add_ack.eq(0),
1573                              self.in_t.stb.eq(0),
1574                              self.o.z.ack.eq(1),
1575                             ]
1576         with m.Else():
1577             # done: acknowledge, and write out id and value
1578             m.d.sync += [self.add_ack.eq(1),
1579                          self.in_t.stb.eq(0)
1580                         ]
1581             m.next = "put_z"
1582
1583             return
1584
1585             if self.in_mid is not None:
1586                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1587
1588             m.d.sync += [
1589               self.out_z.v.eq(self.mod.out_z.v)
1590             ]
1591             # move to output state on detecting z ack
1592             with m.If(self.out_z.trigger):
1593                 m.d.sync += self.out_z.stb.eq(0)
1594                 m.next = "put_z"
1595             with m.Else():
1596                 m.d.sync += self.out_z.stb.eq(1)
1597
1598
1599 class FPADDBasePipe(ControlBase):
1600     def __init__(self, width, id_wid):
1601         ControlBase.__init__(self)
1602         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1603         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1604         self.pipe3 = FPNormToPack(width, id_wid)
1605
1606         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1607
1608     def elaborate(self, platform):
1609         m = Module()
1610         m.submodules.scnorm = self.pipe1
1611         m.submodules.addalign = self.pipe2
1612         m.submodules.normpack = self.pipe3
1613         m.d.comb += self._eqs
1614         return m
1615
1616
1617 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1618     def __init__(self, width, id_wid, num_rows):
1619         self.num_rows = num_rows
1620         def iospec(): return FPADDBaseData(width, id_wid)
1621         stage = PassThroughStage(iospec)
1622         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1623
1624
1625 class FPADDMuxOutPipe(CombMuxOutPipe):
1626     def __init__(self, width, id_wid, num_rows):
1627         self.num_rows = num_rows
1628         def iospec(): return FPPackData(width, id_wid)
1629         stage = PassThroughStage(iospec)
1630         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1631
1632
1633 class FPADDMuxInOut:
1634     """ Reservation-Station version of FPADD pipeline.
1635
1636         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1637         * 3-stage adder pipeline
1638         * fan-out on outputs (an array of FPPackData: z,mid)
1639
1640         Fan-in and Fan-out are combinatorial.
1641     """
1642     def __init__(self, width, id_wid, num_rows):
1643         self.num_rows = num_rows
1644         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1645         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1646         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1647
1648         self.p = self.inpipe.p  # kinda annoying,
1649         self.n = self.outpipe.n # use pipe in/out as this class in/out
1650         self._ports = self.inpipe.ports() + self.outpipe.ports()
1651
1652     def elaborate(self, platform):
1653         m = Module()
1654         m.submodules.inpipe = self.inpipe
1655         m.submodules.fpadd = self.fpadd
1656         m.submodules.outpipe = self.outpipe
1657
1658         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1659         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1660
1661         return m
1662
1663     def ports(self):
1664         return self._ports
1665
1666
1667 class FPADD(FPID):
1668     """ FPADD: stages as follows:
1669
1670         FPGetOp (a)
1671            |
1672         FPGetOp (b)
1673            |
1674         FPAddBase---> FPAddBaseMod
1675            |            |
1676         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1677
1678         FPAddBase is tricky: it is both a stage and *has* stages.
1679         Connection to FPAddBaseMod therefore requires an in stb/ack
1680         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1681         needs to be the thing that raises the incoming stb.
1682     """
1683
1684     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1685         """ IEEE754 FP Add
1686
1687             * width: bit-width of IEEE754.  supported: 16, 32, 64
1688             * id_wid: an identifier that is sync-connected to the input
1689             * single_cycle: True indicates each stage to complete in 1 clock
1690         """
1691         self.width = width
1692         self.id_wid = id_wid
1693         self.single_cycle = single_cycle
1694
1695         #self.out_z = FPOp(width)
1696         self.ids = FPID(id_wid)
1697
1698         rs = []
1699         for i in range(rs_sz):
1700             in_a  = FPOp(width)
1701             in_b  = FPOp(width)
1702             in_a.name = "in_a_%d" % i
1703             in_b.name = "in_b_%d" % i
1704             rs.append((in_a, in_b))
1705         self.rs = Array(rs)
1706
1707         res = []
1708         for i in range(rs_sz):
1709             out_z = FPOp(width)
1710             out_z.name = "out_z_%d" % i
1711             res.append(out_z)
1712         self.res = Array(res)
1713
1714         self.states = []
1715
1716     def add_state(self, state):
1717         self.states.append(state)
1718         return state
1719
1720     def get_fragment(self, platform=None):
1721         """ creates the HDL code-fragment for FPAdd
1722         """
1723         m = Module()
1724         m.submodules += self.rs
1725
1726         in_a = self.rs[0][0]
1727         in_b = self.rs[0][1]
1728
1729         geta = self.add_state(FPGetOp("get_a", "get_b",
1730                                       in_a, self.width))
1731         geta.setup(m, in_a)
1732         a = geta.out_op
1733
1734         getb = self.add_state(FPGetOp("get_b", "fpadd",
1735                                       in_b, self.width))
1736         getb.setup(m, in_b)
1737         b = getb.out_op
1738
1739         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1740         ab = self.add_state(ab)
1741         abd = ab.ispec() # create an input spec object for FPADDBase
1742         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1743         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1744         o = ab.o
1745
1746         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1747                                     o.mid, "get_a"))
1748
1749         with m.FSM() as fsm:
1750
1751             for state in self.states:
1752                 with m.State(state.state_from):
1753                     state.action(m)
1754
1755         return m
1756
1757
1758 if __name__ == "__main__":
1759     if True:
1760         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1761         main(alu, ports=alu.rs[0][0].ports() + \
1762                         alu.rs[0][1].ports() + \
1763                         alu.res[0].ports() + \
1764                         [alu.ids.in_mid, alu.ids.out_mid])
1765     else:
1766         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1767         main(alu, ports=[alu.in_a, alu.in_b] + \
1768                         alu.in_t.ports() + \
1769                         alu.out_z.ports() + \
1770                         [alu.in_mid, alu.out_mid])
1771
1772
1773     # works... but don't use, just do "python fname.py convert -t v"
1774     #print (verilog.convert(alu, ports=[
1775     #                        ports=alu.in_a.ports() + \
1776     #                              alu.in_b.ports() + \
1777     #                              alu.out_z.ports())