src/add/fpadd/statemachine.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 from fpbase import FPState, FPID
  18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData,
  19                             FPGet2OpMod, FPGet2Op)
  20 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
  21 from fpcommon.postcalc import FPAddStage1Data
  22 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
  23                             FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
  24 from fpcommon.roundz import (FPRoundData, FPRoundMod, FPRound)
  25 from fpcommon.corrections import (FPCorrectionsMod, FPCorrections)
  26 from fpcommon.pack import (FPPackData, FPPackMod, FPPack)
  27 from fpcommon.normtopack import FPNormToPack
  28 from fpcommon.putz import (FPPutZ, FPPutZIdx)
  29
  30 from fpadd.specialcases import (FPAddSpecialCasesMod, FPAddSpecialCases,
  31                                 FPAddSpecialCasesDeNorm)
  32 from fpadd.align import (FPAddAlignMulti, FPAddAlignMultiMod, FPNumIn2Ops,
  33                          FPAddAlignSingleMod, FPAddAlignSingle)
  34 from fpadd.add0 import (FPAddStage0Data, FPAddStage0Mod, FPAddStage0)
  35 from fpadd.add1 import (FPAddStage1Mod, FPAddStage1)
  36 from fpadd.addstages import FPAddAlignSingleAdd
  37
  38
  39 class FPOpData:
  40     def __init__(self, width, id_wid):
  41         self.z = FPOp(width)
  42         self.mid = Signal(id_wid, reset_less=True)
  43
  44     def eq(self, i):
  45         return [self.z.eq(i.z), self.mid.eq(i.mid)]
  46
  47     def ports(self):
  48         return [self.z, self.mid]
  49
  50
  51 class FPADDBaseMod:
  52
  53     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
  54         """ IEEE754 FP Add
  55
  56             * width: bit-width of IEEE754.  supported: 16, 32, 64
  57             * id_wid: an identifier that is sync-connected to the input
  58             * single_cycle: True indicates each stage to complete in 1 clock
  59             * compact: True indicates a reduced number of stages
  60         """
  61         self.width = width
  62         self.id_wid = id_wid
  63         self.single_cycle = single_cycle
  64         self.compact = compact
  65
  66         self.in_t = Trigger()
  67         self.i = self.ispec()
  68         self.o = self.ospec()
  69
  70         self.states = []
  71
  72     def ispec(self):
  73         return FPADDBaseData(self.width, self.id_wid)
  74
  75     def ospec(self):
  76         return FPOpData(self.width, self.id_wid)
  77
  78     def add_state(self, state):
  79         self.states.append(state)
  80         return state
  81
  82     def get_fragment(self, platform=None):
  83         """ creates the HDL code-fragment for FPAdd
  84         """
  85         m = Module()
  86         m.submodules.out_z = self.o.z
  87         m.submodules.in_t = self.in_t
  88         if self.compact:
  89             self.get_compact_fragment(m, platform)
  90         else:
  91             self.get_longer_fragment(m, platform)
  92
  93         with m.FSM() as fsm:
  94
  95             for state in self.states:
  96                 with m.State(state.state_from):
  97                     state.action(m)
  98
  99         return m
 100
 101     def get_longer_fragment(self, m, platform=None):
 102
 103         get = self.add_state(FPGet2Op("get_ops", "special_cases",
 104                                       self.width))
 105         get.setup(m, self.i)
 106         a = get.out_op1
 107         b = get.out_op2
 108         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 109
 110         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
 111         sc.setup(m, a, b, self.in_mid)
 112
 113         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
 114         dn.setup(m, a, b, sc.in_mid)
 115
 116         if self.single_cycle:
 117             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
 118             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 119         else:
 120             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
 121             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
 122
 123         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
 124         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
 125
 126         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
 127         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
 128
 129         if self.single_cycle:
 130             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
 131             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
 132         else:
 133             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
 134             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
 135
 136         rn = self.add_state(FPRound(self.width, self.id_wid))
 137         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
 138
 139         cor = self.add_state(FPCorrections(self.width, self.id_wid))
 140         cor.setup(m, rn.out_z, rn.in_mid)
 141
 142         pa = self.add_state(FPPack(self.width, self.id_wid))
 143         pa.setup(m, cor.out_z, rn.in_mid)
 144
 145         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
 146                                     pa.in_mid, self.out_mid))
 147
 148         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
 149                                     pa.in_mid, self.out_mid))
 150
 151     def get_compact_fragment(self, m, platform=None):
 152
 153         get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
 154         sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
 155         alm = FPAddAlignSingleAdd(self.width, self.id_wid)
 156         n1 = FPNormToPack(self.width, self.id_wid)
 157
 158         get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
 159
 160         chainlist = [get, sc, alm, n1]
 161         chain = StageChain(chainlist, specallocate=True)
 162         chain.setup(m, self.i)
 163
 164         for mod in chainlist:
 165             sc = self.add_state(mod)
 166
 167         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
 168                                     n1.out_z.mid, self.o.mid))
 169
 170         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
 171         #                            sc.o.mid, self.o.mid))
 172
 173
 174 class FPADDBase(FPState):
 175
 176     def __init__(self, width, id_wid=None, single_cycle=False):
 177         """ IEEE754 FP Add
 178
 179             * width: bit-width of IEEE754.  supported: 16, 32, 64
 180             * id_wid: an identifier that is sync-connected to the input
 181             * single_cycle: True indicates each stage to complete in 1 clock
 182         """
 183         FPState.__init__(self, "fpadd")
 184         self.width = width
 185         self.single_cycle = single_cycle
 186         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
 187         self.o = self.ospec()
 188
 189         self.in_t = Trigger()
 190         self.i = self.ispec()
 191
 192         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
 193         self.in_accept = Signal(reset_less=True)
 194         self.add_stb = Signal(reset_less=True)
 195         self.add_ack = Signal(reset=0, reset_less=True)
 196
 197     def ispec(self):
 198         return self.mod.ispec()
 199
 200     def ospec(self):
 201         return self.mod.ospec()
 202
 203     def setup(self, m, i, add_stb, in_mid):
 204         m.d.comb += [self.i.eq(i),
 205                      self.mod.i.eq(self.i),
 206                      self.z_done.eq(self.mod.o.z.trigger),
 207                      #self.add_stb.eq(add_stb),
 208                      self.mod.in_t.stb.eq(self.in_t.stb),
 209                      self.in_t.ack.eq(self.mod.in_t.ack),
 210                      self.o.mid.eq(self.mod.o.mid),
 211                      self.o.z.v.eq(self.mod.o.z.v),
 212                      self.o.z.stb.eq(self.mod.o.z.stb),
 213                      self.mod.o.z.ack.eq(self.o.z.ack),
 214                     ]
 215
 216         m.d.sync += self.add_stb.eq(add_stb)
 217         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
 218         m.d.sync += self.o.z.ack.eq(0) # likewise
 219         #m.d.sync += self.in_t.stb.eq(0)
 220
 221         m.submodules.fpadd = self.mod
 222
 223     def action(self, m):
 224
 225         # in_accept is set on incoming strobe HIGH and ack LOW.
 226         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
 227
 228         #with m.If(self.in_t.ack):
 229         #    m.d.sync += self.in_t.stb.eq(0)
 230         with m.If(~self.z_done):
 231             # not done: test for accepting an incoming operand pair
 232             with m.If(self.in_accept):
 233                 m.d.sync += [
 234                     self.add_ack.eq(1), # acknowledge receipt...
 235                     self.in_t.stb.eq(1), # initiate add
 236                 ]
 237             with m.Else():
 238                 m.d.sync += [self.add_ack.eq(0),
 239                              self.in_t.stb.eq(0),
 240                              self.o.z.ack.eq(1),
 241                             ]
 242         with m.Else():
 243             # done: acknowledge, and write out id and value
 244             m.d.sync += [self.add_ack.eq(1),
 245                          self.in_t.stb.eq(0)
 246                         ]
 247             m.next = "put_z"
 248
 249             return
 250
 251             if self.in_mid is not None:
 252                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
 253
 254             m.d.sync += [
 255               self.out_z.v.eq(self.mod.out_z.v)
 256             ]
 257             # move to output state on detecting z ack
 258             with m.If(self.out_z.trigger):
 259                 m.d.sync += self.out_z.stb.eq(0)
 260                 m.next = "put_z"
 261             with m.Else():
 262                 m.d.sync += self.out_z.stb.eq(1)
 263
 264
 265 class FPADD(FPID):
 266     """ FPADD: stages as follows:
 267
 268         FPGetOp (a)
 269            |
 270         FPGetOp (b)
 271            |
 272         FPAddBase---> FPAddBaseMod
 273            |            |
 274         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
 275
 276         FPAddBase is tricky: it is both a stage and *has* stages.
 277         Connection to FPAddBaseMod therefore requires an in stb/ack
 278         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
 279         needs to be the thing that raises the incoming stb.
 280     """
 281
 282     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
 283         """ IEEE754 FP Add
 284
 285             * width: bit-width of IEEE754.  supported: 16, 32, 64
 286             * id_wid: an identifier that is sync-connected to the input
 287             * single_cycle: True indicates each stage to complete in 1 clock
 288         """
 289         self.width = width
 290         self.id_wid = id_wid
 291         self.single_cycle = single_cycle
 292
 293         #self.out_z = FPOp(width)
 294         self.ids = FPID(id_wid)
 295
 296         rs = []
 297         for i in range(rs_sz):
 298             in_a  = FPOp(width)
 299             in_b  = FPOp(width)
 300             in_a.name = "in_a_%d" % i
 301             in_b.name = "in_b_%d" % i
 302             rs.append((in_a, in_b))
 303         self.rs = Array(rs)
 304
 305         res = []
 306         for i in range(rs_sz):
 307             out_z = FPOp(width)
 308             out_z.name = "out_z_%d" % i
 309             res.append(out_z)
 310         self.res = Array(res)
 311
 312         self.states = []
 313
 314     def add_state(self, state):
 315         self.states.append(state)
 316         return state
 317
 318     def get_fragment(self, platform=None):
 319         """ creates the HDL code-fragment for FPAdd
 320         """
 321         m = Module()
 322         m.submodules += self.rs
 323
 324         in_a = self.rs[0][0]
 325         in_b = self.rs[0][1]
 326
 327         geta = self.add_state(FPGetOp("get_a", "get_b",
 328                                       in_a, self.width))
 329         geta.setup(m, in_a)
 330         a = geta.out_op
 331
 332         getb = self.add_state(FPGetOp("get_b", "fpadd",
 333                                       in_b, self.width))
 334         getb.setup(m, in_b)
 335         b = getb.out_op
 336
 337         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
 338         ab = self.add_state(ab)
 339         abd = ab.ispec() # create an input spec object for FPADDBase
 340         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
 341         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
 342         o = ab.o
 343
 344         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
 345                                     o.mid, "get_a"))
 346
 347         with m.FSM() as fsm:
 348
 349             for state in self.states:
 350                 with m.State(state.state_from):
 351                     state.action(m)
 352
 353         return m
 354
 355
 356 if __name__ == "__main__":
 357     if True:
 358         alu = FPADD(width=32, id_wid=5, single_cycle=True)
 359         main(alu, ports=alu.rs[0][0].ports() + \
 360                         alu.rs[0][1].ports() + \
 361                         alu.res[0].ports() + \
 362                         [alu.ids.in_mid, alu.ids.out_mid])
 363     else:
 364         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
 365         main(alu, ports=[alu.in_a, alu.in_b] + \
 366                         alu.in_t.ports() + \
 367                         alu.out_z.ports() + \
 368                         [alu.in_mid, alu.out_mid])
 369
 370
 371     # works... but don't use, just do "python fname.py convert -t v"
 372     #print (verilog.convert(alu, ports=[
 373     #                        ports=alu.in_a.ports() + \
 374     #                              alu.in_b.ports() + \
 375     #                              alu.out_z.ports())