src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
  19                             FPGet2OpMod, FPGet2Op)
  20 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
  21                                 FPAddSpecialCasesDeNorm)
  22 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  23 from fpcommon.postcalc import FPAddStage1Data
  24 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  25                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  26 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  27 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  28 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  29 from fpcommon.normtopack import FPNormToPack
  30 from fpcommon.putz import (FPPutZ, FPPutZIdx)
  31
  32
  33 class FPAddAlignMultiMod(FPState):
  34
  35     def __init__(self, width):
  36         self.in_a = FPNumBase(width)
  37         self.in_b = FPNumBase(width)
  38         self.out_a = FPNumIn(None, width)
  39         self.out_b = FPNumIn(None, width)
  40         self.exp_eq = Signal(reset_less=True)
  41
  42     def elaborate(self, platform):
  43         # This one however (single-cycle) will do the shift
  44         # in one go.
  45
  46         m = Module()
  47
  48         m.submodules.align_in_a = self.in_a
  49         m.submodules.align_in_b = self.in_b
  50         m.submodules.align_out_a = self.out_a
  51         m.submodules.align_out_b = self.out_b
  52
  53         # NOTE: this does *not* do single-cycle multi-shifting,
  54         #       it *STAYS* in the align state until exponents match
  55
  56         # exponent of a greater than b: shift b down
  57         m.d.comb += self.exp_eq.eq(0)
  58         m.d.comb += self.out_a.eq(self.in_a)
  59         m.d.comb += self.out_b.eq(self.in_b)
  60         agtb = Signal(reset_less=True)
  61         altb = Signal(reset_less=True)
  62         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
  63         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
  64         with m.If(agtb):
  65             m.d.comb += self.out_b.shift_down(self.in_b)
  66         # exponent of b greater than a: shift a down
  67         with m.Elif(altb):
  68             m.d.comb += self.out_a.shift_down(self.in_a)
  69         # exponents equal: move to next stage.
  70         with m.Else():
  71             m.d.comb += self.exp_eq.eq(1)
  72         return m
  73
  74
  75 class FPAddAlignMulti(FPState):
  76
  77     def __init__(self, width, id_wid):
  78         FPState.__init__(self, "align")
  79         self.mod = FPAddAlignMultiMod(width)
  80         self.out_a = FPNumIn(None, width)
  81         self.out_b = FPNumIn(None, width)
  82         self.exp_eq = Signal(reset_less=True)
  83
  84     def setup(self, m, in_a, in_b):
  85         """ links module to inputs and outputs
  86         """
  87         m.submodules.align = self.mod
  88         m.d.comb += self.mod.in_a.eq(in_a)
  89         m.d.comb += self.mod.in_b.eq(in_b)
  90         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
  91         m.d.sync += self.out_a.eq(self.mod.out_a)
  92         m.d.sync += self.out_b.eq(self.mod.out_b)
  93
  94     def action(self, m):
  95         with m.If(self.exp_eq):
  96             m.next = "add_0"
  97
  98
  99 class FPNumIn2Ops:
 100
 101     def __init__(self, width, id_wid):
 102         self.a = FPNumIn(None, width)
 103         self.b = FPNumIn(None, width)
 104         self.z = FPNumOut(width, False)
 105         self.out_do_z = Signal(reset_less=True)
 106         self.oz = Signal(width, reset_less=True)
 107         self.mid = Signal(id_wid, reset_less=True)
 108
 109     def eq(self, i):
 110         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 111                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 112
 113
 114 class FPAddAlignSingleMod:
 115
 116     def __init__(self, width, id_wid):
 117         self.width = width
 118         self.id_wid = id_wid
 119         self.i = self.ispec()
 120         self.o = self.ospec()
 121
 122     def ispec(self):
 123         return FPSCData(self.width, self.id_wid)
 124
 125     def ospec(self):
 126         return FPNumIn2Ops(self.width, self.id_wid)
 127
 128     def process(self, i):
 129         return self.o
 130
 131     def setup(self, m, i):
 132         """ links module to inputs and outputs
 133         """
 134         m.submodules.align = self
 135         m.d.comb += self.i.eq(i)
 136
 137     def elaborate(self, platform):
 138         """ Aligns A against B or B against A, depending on which has the
 139             greater exponent.  This is done in a *single* cycle using
 140             variable-width bit-shift
 141
 142             the shifter used here is quite expensive in terms of gates.
 143             Mux A or B in (and out) into temporaries, as only one of them
 144             needs to be aligned against the other
 145         """
 146         m = Module()
 147
 148         m.submodules.align_in_a = self.i.a
 149         m.submodules.align_in_b = self.i.b
 150         m.submodules.align_out_a = self.o.a
 151         m.submodules.align_out_b = self.o.b
 152
 153         # temporary (muxed) input and output to be shifted
 154         t_inp = FPNumBase(self.width)
 155         t_out = FPNumIn(None, self.width)
 156         espec = (len(self.i.a.e), True)
 157         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 158         m.submodules.align_t_in = t_inp
 159         m.submodules.align_t_out = t_out
 160         m.submodules.multishift_r = msr
 161
 162         ediff = Signal(espec, reset_less=True)
 163         ediffr = Signal(espec, reset_less=True)
 164         tdiff = Signal(espec, reset_less=True)
 165         elz = Signal(reset_less=True)
 166         egz = Signal(reset_less=True)
 167
 168         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 169         m.d.comb += msr.inp.eq(t_inp.m)
 170         m.d.comb += msr.diff.eq(tdiff)
 171         m.d.comb += t_out.m.eq(msr.m)
 172         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 173         m.d.comb += t_out.s.eq(t_inp.s)
 174
 175         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 176         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 177         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 178         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 179
 180         # default: A-exp == B-exp, A and B untouched (fall through)
 181         m.d.comb += self.o.a.eq(self.i.a)
 182         m.d.comb += self.o.b.eq(self.i.b)
 183         # only one shifter (muxed)
 184         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 185         # exponent of a greater than b: shift b down
 186         with m.If(~self.i.out_do_z):
 187             with m.If(egz):
 188                 m.d.comb += [t_inp.eq(self.i.b),
 189                              tdiff.eq(ediff),
 190                              self.o.b.eq(t_out),
 191                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 192                             ]
 193             # exponent of b greater than a: shift a down
 194             with m.Elif(elz):
 195                 m.d.comb += [t_inp.eq(self.i.a),
 196                              tdiff.eq(ediffr),
 197                              self.o.a.eq(t_out),
 198                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 199                             ]
 200
 201         m.d.comb += self.o.mid.eq(self.i.mid)
 202         m.d.comb += self.o.z.eq(self.i.z)
 203         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 204         m.d.comb += self.o.oz.eq(self.i.oz)
 205
 206         return m
 207
 208
 209 class FPAddAlignSingle(FPState):
 210
 211     def __init__(self, width, id_wid):
 212         FPState.__init__(self, "align")
 213         self.mod = FPAddAlignSingleMod(width, id_wid)
 214         self.out_a = FPNumIn(None, width)
 215         self.out_b = FPNumIn(None, width)
 216
 217     def setup(self, m, i):
 218         """ links module to inputs and outputs
 219         """
 220         self.mod.setup(m, i)
 221
 222         # NOTE: could be done as comb
 223         m.d.sync += self.out_a.eq(self.mod.out_a)
 224         m.d.sync += self.out_b.eq(self.mod.out_b)
 225
 226     def action(self, m):
 227         m.next = "add_0"
 228
 229
 230 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 231
 232     def __init__(self, width, id_wid):
 233         FPState.__init__(self, "align")
 234         self.width = width
 235         self.id_wid = id_wid
 236         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 237         self.a1o = self.ospec()
 238
 239     def ispec(self):
 240         return FPSCData(self.width, self.id_wid)
 241
 242     def ospec(self):
 243         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 244
 245     def setup(self, m, i):
 246         """ links module to inputs and outputs
 247         """
 248
 249         # chain AddAlignSingle, AddStage0 and AddStage1
 250         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 251         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 252         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 253
 254         chain = StageChain([mod, a0mod, a1mod])
 255         chain.setup(m, i)
 256
 257         self.o = a1mod.o
 258
 259     def process(self, i):
 260         return self.o
 261
 262     def action(self, m):
 263         m.d.sync += self.a1o.eq(self.process(None))
 264         m.next = "normalise_1"
 265
 266
 267 class FPAddStage0Data:
 268
 269     def __init__(self, width, id_wid):
 270         self.z = FPNumBase(width, False)
 271         self.out_do_z = Signal(reset_less=True)
 272         self.oz = Signal(width, reset_less=True)
 273         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 274         self.mid = Signal(id_wid, reset_less=True)
 275
 276     def eq(self, i):
 277         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 278                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 279
 280
 281 class FPAddStage0Mod:
 282
 283     def __init__(self, width, id_wid):
 284         self.width = width
 285         self.id_wid = id_wid
 286         self.i = self.ispec()
 287         self.o = self.ospec()
 288
 289     def ispec(self):
 290         return FPSCData(self.width, self.id_wid)
 291
 292     def ospec(self):
 293         return FPAddStage0Data(self.width, self.id_wid)
 294
 295     def process(self, i):
 296         return self.o
 297
 298     def setup(self, m, i):
 299         """ links module to inputs and outputs
 300         """
 301         m.submodules.add0 = self
 302         m.d.comb += self.i.eq(i)
 303
 304     def elaborate(self, platform):
 305         m = Module()
 306         m.submodules.add0_in_a = self.i.a
 307         m.submodules.add0_in_b = self.i.b
 308         m.submodules.add0_out_z = self.o.z
 309
 310         # store intermediate tests (and zero-extended mantissas)
 311         seq = Signal(reset_less=True)
 312         mge = Signal(reset_less=True)
 313         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 314         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 315         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 316                      mge.eq(self.i.a.m >= self.i.b.m),
 317                      am0.eq(Cat(self.i.a.m, 0)),
 318                      bm0.eq(Cat(self.i.b.m, 0))
 319                     ]
 320         # same-sign (both negative or both positive) add mantissas
 321         with m.If(~self.i.out_do_z):
 322             m.d.comb += self.o.z.e.eq(self.i.a.e)
 323             with m.If(seq):
 324                 m.d.comb += [
 325                     self.o.tot.eq(am0 + bm0),
 326                     self.o.z.s.eq(self.i.a.s)
 327                 ]
 328             # a mantissa greater than b, use a
 329             with m.Elif(mge):
 330                 m.d.comb += [
 331                     self.o.tot.eq(am0 - bm0),
 332                     self.o.z.s.eq(self.i.a.s)
 333                 ]
 334             # b mantissa greater than a, use b
 335             with m.Else():
 336                 m.d.comb += [
 337                     self.o.tot.eq(bm0 - am0),
 338                     self.o.z.s.eq(self.i.b.s)
 339             ]
 340
 341         m.d.comb += self.o.oz.eq(self.i.oz)
 342         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 343         m.d.comb += self.o.mid.eq(self.i.mid)
 344         return m
 345
 346
 347 class FPAddStage0(FPState):
 348     """ First stage of add.  covers same-sign (add) and subtract
 349         special-casing when mantissas are greater or equal, to
 350         give greatest accuracy.
 351     """
 352
 353     def __init__(self, width, id_wid):
 354         FPState.__init__(self, "add_0")
 355         self.mod = FPAddStage0Mod(width)
 356         self.o = self.mod.ospec()
 357
 358     def setup(self, m, i):
 359         """ links module to inputs and outputs
 360         """
 361         self.mod.setup(m, i)
 362
 363         # NOTE: these could be done as combinatorial (merge add0+add1)
 364         m.d.sync += self.o.eq(self.mod.o)
 365
 366     def action(self, m):
 367         m.next = "add_1"
 368
 369
 370 class FPAddStage1Mod(FPState):
 371     """ Second stage of add: preparation for normalisation.
 372         detects when tot sum is too big (tot[27] is kinda a carry bit)
 373     """
 374
 375     def __init__(self, width, id_wid):
 376         self.width = width
 377         self.id_wid = id_wid
 378         self.i = self.ispec()
 379         self.o = self.ospec()
 380
 381     def ispec(self):
 382         return FPAddStage0Data(self.width, self.id_wid)
 383
 384     def ospec(self):
 385         return FPAddStage1Data(self.width, self.id_wid)
 386
 387     def process(self, i):
 388         return self.o
 389
 390     def setup(self, m, i):
 391         """ links module to inputs and outputs
 392         """
 393         m.submodules.add1 = self
 394         m.submodules.add1_out_overflow = self.o.of
 395
 396         m.d.comb += self.i.eq(i)
 397
 398     def elaborate(self, platform):
 399         m = Module()
 400         m.d.comb += self.o.z.eq(self.i.z)
 401         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 402         with m.If(~self.i.out_do_z):
 403             with m.If(self.i.tot[-1]):
 404                 m.d.comb += [
 405                     self.o.z.m.eq(self.i.tot[4:]),
 406                     self.o.of.m0.eq(self.i.tot[4]),
 407                     self.o.of.guard.eq(self.i.tot[3]),
 408                     self.o.of.round_bit.eq(self.i.tot[2]),
 409                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 410                     self.o.z.e.eq(self.i.z.e + 1)
 411             ]
 412             # tot[-1] (MSB) zero case
 413             with m.Else():
 414                 m.d.comb += [
 415                     self.o.z.m.eq(self.i.tot[3:]),
 416                     self.o.of.m0.eq(self.i.tot[3]),
 417                     self.o.of.guard.eq(self.i.tot[2]),
 418                     self.o.of.round_bit.eq(self.i.tot[1]),
 419                     self.o.of.sticky.eq(self.i.tot[0])
 420             ]
 421
 422         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 423         m.d.comb += self.o.oz.eq(self.i.oz)
 424         m.d.comb += self.o.mid.eq(self.i.mid)
 425
 426         return m
 427
 428
 429 class FPAddStage1(FPState):
 430
 431     def __init__(self, width, id_wid):
 432         FPState.__init__(self, "add_1")
 433         self.mod = FPAddStage1Mod(width)
 434         self.out_z = FPNumBase(width, False)
 435         self.out_of = Overflow()
 436         self.norm_stb = Signal()
 437
 438     def setup(self, m, i):
 439         """ links module to inputs and outputs
 440         """
 441         self.mod.setup(m, i)
 442
 443         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 444
 445         m.d.sync += self.out_of.eq(self.mod.out_of)
 446         m.d.sync += self.out_z.eq(self.mod.out_z)
 447         m.d.sync += self.norm_stb.eq(1)
 448
 449     def action(self, m):
 450         m.next = "normalise_1"
 451
 452
 453
 454
 455 class FPOpData:
 456     def __init__(self, width, id_wid):
 457         self.z = FPOp(width)
 458         self.mid = Signal(id_wid, reset_less=True)
 459
 460     def eq(self, i):
 461         return [self.z.eq(i.z), self.mid.eq(i.mid)]
 462
 463     def ports(self):
 464         return [self.z, self.mid]
 465
 466
 467 class FPADDBaseMod:
 468
 469     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
 470         """ IEEE754 FP Add
 471
 472             * width: bit-width of IEEE754.  supported: 16, 32, 64
 473             * id_wid: an identifier that is sync-connected to the input
 474             * single_cycle: True indicates each stage to complete in 1 clock
 475             * compact: True indicates a reduced number of stages
 476         """
 477         self.width = width
 478         self.id_wid = id_wid
 479         self.single_cycle = single_cycle
 480         self.compact = compact
 481
 482         self.in_t = Trigger()
 483         self.i = self.ispec()
 484         self.o = self.ospec()
 485
 486         self.states = []
 487
 488     def ispec(self):
 489         return FPADDBaseData(self.width, self.id_wid)
 490
 491     def ospec(self):
 492         return FPOpData(self.width, self.id_wid)
 493
 494     def add_state(self, state):
 495         self.states.append(state)
 496         return state
 497
 498     def get_fragment(self, platform=None):
 499         """ creates the HDL code-fragment for FPAdd
 500         """
 501         m = Module()
 502         m.submodules.out_z = self.o.z
 503         m.submodules.in_t = self.in_t
 504         if self.compact:
 505             self.get_compact_fragment(m, platform)
 506         else:
 507             self.get_longer_fragment(m, platform)
 508
 509         with m.FSM() as fsm:
 510
 511             for state in self.states:
 512                 with m.State(state.state_from):
 513                     state.action(m)
 514
 515         return m
 516
 517     def get_longer_fragment(self, m, platform=None):
 518
 519         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 520                                       self.width))
 521         get.setup(m, self.i)
 522         a = get.out_op1
 523         b = get.out_op2
 524         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 525
 526         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 527         sc.setup(m, a, b, self.in_mid)
 528
 529         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 530         dn.setup(m, a, b, sc.in_mid)
 531
 532         if self.single_cycle:
 533             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 534             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 535         else:
 536             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 537             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 538
 539         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 540         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 541
 542         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 543         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 544
 545         if self.single_cycle:
 546             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 547             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 548         else:
 549             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 550             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 551
 552         rn = self.add_state(FPRound(self.width, self.id_wid))
 553         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 554
 555         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 556         cor.setup(m, rn.out_z, rn.in_mid)
 557
 558         pa = self.add_state(FPPack(self.width, self.id_wid))
 559         pa.setup(m, cor.out_z, rn.in_mid)
 560
 561         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 562                                     pa.in_mid, self.out_mid))
 563
 564         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 565                                     pa.in_mid, self.out_mid))
 566
 567     def get_compact_fragment(self, m, platform=None):
 568
 569
 570         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 571         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 572         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 573         n1 = FPNormToPack(self.width, self.id_wid)
 574
 575         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 576
 577         chainlist = [get, sc, alm, n1]
 578         chain = StageChain(chainlist, specallocate=True)
 579         chain.setup(m, self.i)
 580
 581         for mod in chainlist:
 582             sc = self.add_state(mod)
 583
 584         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 585                                     n1.out_z.mid, self.o.mid))
 586
 587         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 588         #                            sc.o.mid, self.o.mid))
 589
 590
 591 class FPADDBase(FPState):
 592
 593     def __init__(self, width, id_wid=None, single_cycle=False):
 594         """ IEEE754 FP Add
 595
 596             * width: bit-width of IEEE754.  supported: 16, 32, 64
 597             * id_wid: an identifier that is sync-connected to the input
 598             * single_cycle: True indicates each stage to complete in 1 clock
 599         """
 600         FPState.__init__(self, "fpadd")
 601         self.width = width
 602         self.single_cycle = single_cycle
 603         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 604         self.o = self.ospec()
 605
 606         self.in_t = Trigger()
 607         self.i = self.ispec()
 608
 609         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 610         self.in_accept = Signal(reset_less=True)
 611         self.add_stb = Signal(reset_less=True)
 612         self.add_ack = Signal(reset=0, reset_less=True)
 613
 614     def ispec(self):
 615         return self.mod.ispec()
 616
 617     def ospec(self):
 618         return self.mod.ospec()
 619
 620     def setup(self, m, i, add_stb, in_mid):
 621         m.d.comb += [self.i.eq(i),
 622                      self.mod.i.eq(self.i),
 623                      self.z_done.eq(self.mod.o.z.trigger),
 624                      #self.add_stb.eq(add_stb),
 625                      self.mod.in_t.stb.eq(self.in_t.stb),
 626                      self.in_t.ack.eq(self.mod.in_t.ack),
 627                      self.o.mid.eq(self.mod.o.mid),
 628                      self.o.z.v.eq(self.mod.o.z.v),
 629                      self.o.z.stb.eq(self.mod.o.z.stb),
 630                      self.mod.o.z.ack.eq(self.o.z.ack),
 631                     ]
 632
 633         m.d.sync += self.add_stb.eq(add_stb)
 634         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 635         m.d.sync += self.o.z.ack.eq(0) # likewise
 636         #m.d.sync += self.in_t.stb.eq(0)
 637
 638         m.submodules.fpadd = self.mod
 639
 640     def action(self, m):
 641
 642         # in_accept is set on incoming strobe HIGH and ack LOW.
 643         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 644
 645         #with m.If(self.in_t.ack):
 646         #    m.d.sync += self.in_t.stb.eq(0)
 647         with m.If(~self.z_done):
 648             # not done: test for accepting an incoming operand pair
 649             with m.If(self.in_accept):
 650                 m.d.sync += [
 651                     self.add_ack.eq(1), # acknowledge receipt...
 652                     self.in_t.stb.eq(1), # initiate add
 653                 ]
 654             with m.Else():
 655                 m.d.sync += [self.add_ack.eq(0),
 656                              self.in_t.stb.eq(0),
 657                              self.o.z.ack.eq(1),
 658                             ]
 659         with m.Else():
 660             # done: acknowledge, and write out id and value
 661             m.d.sync += [self.add_ack.eq(1),
 662                          self.in_t.stb.eq(0)
 663                         ]
 664             m.next = "put_z"
 665
 666             return
 667
 668             if self.in_mid is not None:
 669                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 670
 671             m.d.sync += [
 672               self.out_z.v.eq(self.mod.out_z.v)
 673             ]
 674             # move to output state on detecting z ack
 675             with m.If(self.out_z.trigger):
 676                 m.d.sync += self.out_z.stb.eq(0)
 677                 m.next = "put_z"
 678             with m.Else():
 679                 m.d.sync += self.out_z.stb.eq(1)
 680
 681
 682 class FPADDBasePipe(ControlBase):
 683     def __init__(self, width, id_wid):
 684         ControlBase.__init__(self)
 685         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
 686         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
 687         self.pipe3 = FPNormToPack(width, id_wid)
 688
 689         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 690
 691     def elaborate(self, platform):
 692         m = Module()
 693         m.submodules.scnorm = self.pipe1
 694         m.submodules.addalign = self.pipe2
 695         m.submodules.normpack = self.pipe3
 696         m.d.comb += self._eqs
 697         return m
 698
 699
 700 class FPADDInMuxPipe(PriorityCombMuxInPipe):
 701     def __init__(self, width, id_wid, num_rows):
 702         self.num_rows = num_rows
 703         def iospec(): return FPADDBaseData(width, id_wid)
 704         stage = PassThroughStage(iospec)
 705         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
 706
 707
 708 class FPADDMuxOutPipe(CombMuxOutPipe):
 709     def __init__(self, width, id_wid, num_rows):
 710         self.num_rows = num_rows
 711         def iospec(): return FPPackData(width, id_wid)
 712         stage = PassThroughStage(iospec)
 713         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
 714
 715
 716 class FPADDMuxInOut:
 717     """ Reservation-Station version of FPADD pipeline.
 718
 719         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 720         * 3-stage adder pipeline
 721         * fan-out on outputs (an array of FPPackData: z,mid)
 722
 723         Fan-in and Fan-out are combinatorial.
 724     """
 725     def __init__(self, width, id_wid, num_rows):
 726         self.num_rows = num_rows
 727         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
 728         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
 729         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
 730
 731         self.p = self.inpipe.p  # kinda annoying,
 732         self.n = self.outpipe.n # use pipe in/out as this class in/out
 733         self._ports = self.inpipe.ports() + self.outpipe.ports()
 734
 735     def elaborate(self, platform):
 736         m = Module()
 737         m.submodules.inpipe = self.inpipe
 738         m.submodules.fpadd = self.fpadd
 739         m.submodules.outpipe = self.outpipe
 740
 741         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
 742         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
 743
 744         return m
 745
 746     def ports(self):
 747         return self._ports
 748
 749
 750 class FPADD(FPID):
 751     """ FPADD: stages as follows:
 752
 753         FPGetOp (a)
 754            |
 755         FPGetOp (b)
 756            |
 757         FPAddBase---> FPAddBaseMod
 758            |            |
 759         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
 760
 761         FPAddBase is tricky: it is both a stage and *has* stages.
 762         Connection to FPAddBaseMod therefore requires an in stb/ack
 763         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
 764         needs to be the thing that raises the incoming stb.
 765     """
 766
 767     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
 768         """ IEEE754 FP Add
 769
 770             * width: bit-width of IEEE754.  supported: 16, 32, 64
 771             * id_wid: an identifier that is sync-connected to the input
 772             * single_cycle: True indicates each stage to complete in 1 clock
 773         """
 774         self.width = width
 775         self.id_wid = id_wid
 776         self.single_cycle = single_cycle
 777
 778         #self.out_z = FPOp(width)
 779         self.ids = FPID(id_wid)
 780
 781         rs = []
 782         for i in range(rs_sz):
 783             in_a  = FPOp(width)
 784             in_b  = FPOp(width)
 785             in_a.name = "in_a_%d" % i
 786             in_b.name = "in_b_%d" % i
 787             rs.append((in_a, in_b))
 788         self.rs = Array(rs)
 789
 790         res = []
 791         for i in range(rs_sz):
 792             out_z = FPOp(width)
 793             out_z.name = "out_z_%d" % i
 794             res.append(out_z)
 795         self.res = Array(res)
 796
 797         self.states = []
 798
 799     def add_state(self, state):
 800         self.states.append(state)
 801         return state
 802
 803     def get_fragment(self, platform=None):
 804         """ creates the HDL code-fragment for FPAdd
 805         """
 806         m = Module()
 807         m.submodules += self.rs
 808
 809         in_a = self.rs[0][0]
 810         in_b = self.rs[0][1]
 811
 812         geta = self.add_state(FPGetOp("get_a", "get_b",
 813                                       in_a, self.width))
 814         geta.setup(m, in_a)
 815         a = geta.out_op
 816
 817         getb = self.add_state(FPGetOp("get_b", "fpadd",
 818                                       in_b, self.width))
 819         getb.setup(m, in_b)
 820         b = getb.out_op
 821
 822         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
 823         ab = self.add_state(ab)
 824         abd = ab.ispec() # create an input spec object for FPADDBase
 825         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
 826         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
 827         o = ab.o
 828
 829         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
 830                                     o.mid, "get_a"))
 831
 832         with m.FSM() as fsm:
 833
 834             for state in self.states:
 835                 with m.State(state.state_from):
 836                     state.action(m)
 837
 838         return m
 839
 840
 841 if __name__ == "__main__":
 842     if True:
 843         alu = FPADD(width=32, id_wid=5, single_cycle=True)
 844         main(alu, ports=alu.rs[0][0].ports() + \
 845                         alu.rs[0][1].ports() + \
 846                         alu.res[0].ports() + \
 847                         [alu.ids.in_mid, alu.ids.out_mid])
 848     else:
 849         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
 850         main(alu, ports=[alu.in_a, alu.in_b] + \
 851                         alu.in_t.ports() + \
 852                         alu.out_z.ports() + \
 853                         [alu.in_mid, alu.out_mid])
 854
 855
 856     # works... but don't use, just do "python fname.py convert -t v"
 857     #print (verilog.convert(alu, ports=[
 858     #                        ports=alu.in_a.ports() + \
 859     #                              alu.in_b.ports() + \
 860     #                              alu.out_z.ports())