src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
  19                             FPGet2OpMod, FPGet2Op)
  20 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  21 from fpcommon.postcalc import FPAddStage1Data
  22 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  23                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  24 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  25 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  26 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  27 from fpcommon.normtopack import FPNormToPack
  28 from fpcommon.putz import (FPPutZ, FPPutZIdx)
  29
  30 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
  31                                 FPAddSpecialCasesDeNorm)
  32 from fpadd.align import (FPAddAlignMulti, FPAddAlignMultiMod, FPNumIn2Ops,
  33                          FPAddAlignSingleMod, FPAddAlignSingle)
  34 from fpadd.add0 import (FPAddStage0Data, FPAddStage0Mod, FPAddStage0)
  35 from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
  36 from fpadd.addstages import FPAddAlignSingleAdd
  37
  38
  39 class FPOpData:
  40     def __init__(self, width, id_wid):
  41         self.z = FPOp(width)
  42         self.mid = Signal(id_wid, reset_less=True)
  43
  44     def eq(self, i):
  45         return [self.z.eq(i.z), self.mid.eq(i.mid)]
  46
  47     def ports(self):
  48         return [self.z, self.mid]
  49
  50
  51 class FPADDBaseMod:
  52
  53     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
  54         """ IEEE754 FP Add
  55
  56             * width: bit-width of IEEE754.  supported: 16, 32, 64
  57             * id_wid: an identifier that is sync-connected to the input
  58             * single_cycle: True indicates each stage to complete in 1 clock
  59             * compact: True indicates a reduced number of stages
  60         """
  61         self.width = width
  62         self.id_wid = id_wid
  63         self.single_cycle = single_cycle
  64         self.compact = compact
  65
  66         self.in_t = Trigger()
  67         self.i = self.ispec()
  68         self.o = self.ospec()
  69
  70         self.states = []
  71
  72     def ispec(self):
  73         return FPADDBaseData(self.width, self.id_wid)
  74
  75     def ospec(self):
  76         return FPOpData(self.width, self.id_wid)
  77
  78     def add_state(self, state):
  79         self.states.append(state)
  80         return state
  81
  82     def get_fragment(self, platform=None):
  83         """ creates the HDL code-fragment for FPAdd
  84         """
  85         m = Module()
  86         m.submodules.out_z = self.o.z
  87         m.submodules.in_t = self.in_t
  88         if self.compact:
  89             self.get_compact_fragment(m, platform)
  90         else:
  91             self.get_longer_fragment(m, platform)
  92
  93         with m.FSM() as fsm:
  94
  95             for state in self.states:
  96                 with m.State(state.state_from):
  97                     state.action(m)
  98
  99         return m
 100
 101     def get_longer_fragment(self, m, platform=None):
 102
 103         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 104                                       self.width))
 105         get.setup(m, self.i)
 106         a = get.out_op1
 107         b = get.out_op2
 108         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 109
 110         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 111         sc.setup(m, a, b, self.in_mid)
 112
 113         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 114         dn.setup(m, a, b, sc.in_mid)
 115
 116         if self.single_cycle:
 117             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 118             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 119         else:
 120             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 121             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 122
 123         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 124         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 125
 126         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 127         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 128
 129         if self.single_cycle:
 130             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 131             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 132         else:
 133             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 134             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 135
 136         rn = self.add_state(FPRound(self.width, self.id_wid))
 137         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 138
 139         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 140         cor.setup(m, rn.out_z, rn.in_mid)
 141
 142         pa = self.add_state(FPPack(self.width, self.id_wid))
 143         pa.setup(m, cor.out_z, rn.in_mid)
 144
 145         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 146                                     pa.in_mid, self.out_mid))
 147
 148         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 149                                     pa.in_mid, self.out_mid))
 150
 151     def get_compact_fragment(self, m, platform=None):
 152
 153         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 154         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 155         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 156         n1 = FPNormToPack(self.width, self.id_wid)
 157
 158         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 159
 160         chainlist = [get, sc, alm, n1]
 161         chain = StageChain(chainlist, specallocate=True)
 162         chain.setup(m, self.i)
 163
 164         for mod in chainlist:
 165             sc = self.add_state(mod)
 166
 167         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 168                                     n1.out_z.mid, self.o.mid))
 169
 170         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 171         #                            sc.o.mid, self.o.mid))
 172
 173
 174 class FPADDBase(FPState):
 175
 176     def __init__(self, width, id_wid=None, single_cycle=False):
 177         """ IEEE754 FP Add
 178
 179             * width: bit-width of IEEE754.  supported: 16, 32, 64
 180             * id_wid: an identifier that is sync-connected to the input
 181             * single_cycle: True indicates each stage to complete in 1 clock
 182         """
 183         FPState.__init__(self, "fpadd")
 184         self.width = width
 185         self.single_cycle = single_cycle
 186         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 187         self.o = self.ospec()
 188
 189         self.in_t = Trigger()
 190         self.i = self.ispec()
 191
 192         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 193         self.in_accept = Signal(reset_less=True)
 194         self.add_stb = Signal(reset_less=True)
 195         self.add_ack = Signal(reset=0, reset_less=True)
 196
 197     def ispec(self):
 198         return self.mod.ispec()
 199
 200     def ospec(self):
 201         return self.mod.ospec()
 202
 203     def setup(self, m, i, add_stb, in_mid):
 204         m.d.comb += [self.i.eq(i),
 205                      self.mod.i.eq(self.i),
 206                      self.z_done.eq(self.mod.o.z.trigger),
 207                      #self.add_stb.eq(add_stb),
 208                      self.mod.in_t.stb.eq(self.in_t.stb),
 209                      self.in_t.ack.eq(self.mod.in_t.ack),
 210                      self.o.mid.eq(self.mod.o.mid),
 211                      self.o.z.v.eq(self.mod.o.z.v),
 212                      self.o.z.stb.eq(self.mod.o.z.stb),
 213                      self.mod.o.z.ack.eq(self.o.z.ack),
 214                     ]
 215
 216         m.d.sync += self.add_stb.eq(add_stb)
 217         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 218         m.d.sync += self.o.z.ack.eq(0) # likewise
 219         #m.d.sync += self.in_t.stb.eq(0)
 220
 221         m.submodules.fpadd = self.mod
 222
 223     def action(self, m):
 224
 225         # in_accept is set on incoming strobe HIGH and ack LOW.
 226         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 227
 228         #with m.If(self.in_t.ack):
 229         #    m.d.sync += self.in_t.stb.eq(0)
 230         with m.If(~self.z_done):
 231             # not done: test for accepting an incoming operand pair
 232             with m.If(self.in_accept):
 233                 m.d.sync += [
 234                     self.add_ack.eq(1), # acknowledge receipt...
 235                     self.in_t.stb.eq(1), # initiate add
 236                 ]
 237             with m.Else():
 238                 m.d.sync += [self.add_ack.eq(0),
 239                              self.in_t.stb.eq(0),
 240                              self.o.z.ack.eq(1),
 241                             ]
 242         with m.Else():
 243             # done: acknowledge, and write out id and value
 244             m.d.sync += [self.add_ack.eq(1),
 245                          self.in_t.stb.eq(0)
 246                         ]
 247             m.next = "put_z"
 248
 249             return
 250
 251             if self.in_mid is not None:
 252                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 253
 254             m.d.sync += [
 255               self.out_z.v.eq(self.mod.out_z.v)
 256             ]
 257             # move to output state on detecting z ack
 258             with m.If(self.out_z.trigger):
 259                 m.d.sync += self.out_z.stb.eq(0)
 260                 m.next = "put_z"
 261             with m.Else():
 262                 m.d.sync += self.out_z.stb.eq(1)
 263
 264
 265 class FPADDBasePipe(ControlBase):
 266     def __init__(self, width, id_wid):
 267         ControlBase.__init__(self)
 268         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
 269         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
 270         self.pipe3 = FPNormToPack(width, id_wid)
 271
 272         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
 273
 274     def elaborate(self, platform):
 275         m = Module()
 276         m.submodules.scnorm = self.pipe1
 277         m.submodules.addalign = self.pipe2
 278         m.submodules.normpack = self.pipe3
 279         m.d.comb += self._eqs
 280         return m
 281
 282
 283 class FPADDInMuxPipe(PriorityCombMuxInPipe):
 284     def __init__(self, width, id_wid, num_rows):
 285         self.num_rows = num_rows
 286         def iospec(): return FPADDBaseData(width, id_wid)
 287         stage = PassThroughStage(iospec)
 288         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
 289
 290
 291 class FPADDMuxOutPipe(CombMuxOutPipe):
 292     def __init__(self, width, id_wid, num_rows):
 293         self.num_rows = num_rows
 294         def iospec(): return FPPackData(width, id_wid)
 295         stage = PassThroughStage(iospec)
 296         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
 297
 298
 299 class FPADDMuxInOut:
 300     """ Reservation-Station version of FPADD pipeline.
 301
 302         * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
 303         * 3-stage adder pipeline
 304         * fan-out on outputs (an array of FPPackData: z,mid)
 305
 306         Fan-in and Fan-out are combinatorial.
 307     """
 308     def __init__(self, width, id_wid, num_rows):
 309         self.num_rows = num_rows
 310         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
 311         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
 312         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
 313
 314         self.p = self.inpipe.p  # kinda annoying,
 315         self.n = self.outpipe.n # use pipe in/out as this class in/out
 316         self._ports = self.inpipe.ports() + self.outpipe.ports()
 317
 318     def elaborate(self, platform):
 319         m = Module()
 320         m.submodules.inpipe = self.inpipe
 321         m.submodules.fpadd = self.fpadd
 322         m.submodules.outpipe = self.outpipe
 323
 324         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
 325         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
 326
 327         return m
 328
 329     def ports(self):
 330         return self._ports
 331
 332
 333 class FPADD(FPID):
 334     """ FPADD: stages as follows:
 335
 336         FPGetOp (a)
 337            |
 338         FPGetOp (b)
 339            |
 340         FPAddBase---> FPAddBaseMod
 341            |            |
 342         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
 343
 344         FPAddBase is tricky: it is both a stage and *has* stages.
 345         Connection to FPAddBaseMod therefore requires an in stb/ack
 346         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
 347         needs to be the thing that raises the incoming stb.
 348     """
 349
 350     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
 351         """ IEEE754 FP Add
 352
 353             * width: bit-width of IEEE754.  supported: 16, 32, 64
 354             * id_wid: an identifier that is sync-connected to the input
 355             * single_cycle: True indicates each stage to complete in 1 clock
 356         """
 357         self.width = width
 358         self.id_wid = id_wid
 359         self.single_cycle = single_cycle
 360
 361         #self.out_z = FPOp(width)
 362         self.ids = FPID(id_wid)
 363
 364         rs = []
 365         for i in range(rs_sz):
 366             in_a  = FPOp(width)
 367             in_b  = FPOp(width)
 368             in_a.name = "in_a_%d" % i
 369             in_b.name = "in_b_%d" % i
 370             rs.append((in_a, in_b))
 371         self.rs = Array(rs)
 372
 373         res = []
 374         for i in range(rs_sz):
 375             out_z = FPOp(width)
 376             out_z.name = "out_z_%d" % i
 377             res.append(out_z)
 378         self.res = Array(res)
 379
 380         self.states = []
 381
 382     def add_state(self, state):
 383         self.states.append(state)
 384         return state
 385
 386     def get_fragment(self, platform=None):
 387         """ creates the HDL code-fragment for FPAdd
 388         """
 389         m = Module()
 390         m.submodules += self.rs
 391
 392         in_a = self.rs[0][0]
 393         in_b = self.rs[0][1]
 394
 395         geta = self.add_state(FPGetOp("get_a", "get_b",
 396                                       in_a, self.width))
 397         geta.setup(m, in_a)
 398         a = geta.out_op
 399
 400         getb = self.add_state(FPGetOp("get_b", "fpadd",
 401                                       in_b, self.width))
 402         getb.setup(m, in_b)
 403         b = getb.out_op
 404
 405         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
 406         ab = self.add_state(ab)
 407         abd = ab.ispec() # create an input spec object for FPADDBase
 408         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
 409         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
 410         o = ab.o
 411
 412         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
 413                                     o.mid, "get_a"))
 414
 415         with m.FSM() as fsm:
 416
 417             for state in self.states:
 418                 with m.State(state.state_from):
 419                     state.action(m)
 420
 421         return m
 422
 423
 424 if __name__ == "__main__":
 425     if True:
 426         alu = FPADD(width=32, id_wid=5, single_cycle=True)
 427         main(alu, ports=alu.rs[0][0].ports() + \
 428                         alu.rs[0][1].ports() + \
 429                         alu.res[0].ports() + \
 430                         [alu.ids.in_mid, alu.ids.out_mid])
 431     else:
 432         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
 433         main(alu, ports=[alu.in_a, alu.in_b] + \
 434                         alu.in_t.ports() + \
 435                         alu.out_z.ports() + \
 436                         [alu.in_mid, alu.out_mid])
 437
 438
 439     # works... but don't use, just do "python fname.py convert -t v"
 440     #print (verilog.convert(alu, ports=[
 441     #                        ports=alu.in_a.ports() + \
 442     #                              alu.in_b.ports() + \
 443     #                              alu.out_z.ports())