src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8
   9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  10 from fpbase import MultiShiftRMerge, Trigger
  11 #from fpbase import FPNumShiftMultiRight
  12
  13 class FPState(FPBase):
  14     def __init__(self, state_from):
  15         self.state_from = state_from
  16
  17     def set_inputs(self, inputs):
  18         self.inputs = inputs
  19         for k,v in inputs.items():
  20             setattr(self, k, v)
  21
  22     def set_outputs(self, outputs):
  23         self.outputs = outputs
  24         for k,v in outputs.items():
  25             setattr(self, k, v)
  26
  27
  28 class FPGetOpMod:
  29     def __init__(self, width):
  30         self.in_op = FPOp(width)
  31         self.out_op = Signal(width)
  32         self.out_decode = Signal(reset_less=True)
  33
  34     def elaborate(self, platform):
  35         m = Module()
  36         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
  37         m.submodules.get_op_in = self.in_op
  38         #m.submodules.get_op_out = self.out_op
  39         with m.If(self.out_decode):
  40             m.d.comb += [
  41                 self.out_op.eq(self.in_op.v),
  42             ]
  43         return m
  44
  45
  46 class FPGetOp(FPState):
  47     """ gets operand
  48     """
  49
  50     def __init__(self, in_state, out_state, in_op, width):
  51         FPState.__init__(self, in_state)
  52         self.out_state = out_state
  53         self.mod = FPGetOpMod(width)
  54         self.in_op = in_op
  55         self.out_op = Signal(width)
  56         self.out_decode = Signal(reset_less=True)
  57
  58     def setup(self, m, in_op):
  59         """ links module to inputs and outputs
  60         """
  61         setattr(m.submodules, self.state_from, self.mod)
  62         m.d.comb += self.mod.in_op.copy(in_op)
  63         #m.d.comb += self.out_op.eq(self.mod.out_op)
  64         m.d.comb += self.out_decode.eq(self.mod.out_decode)
  65
  66     def action(self, m):
  67         with m.If(self.out_decode):
  68             m.next = self.out_state
  69             m.d.sync += [
  70                 self.in_op.ack.eq(0),
  71                 self.out_op.eq(self.mod.out_op)
  72             ]
  73         with m.Else():
  74             m.d.sync += self.in_op.ack.eq(1)
  75
  76
  77 class FPGet2OpMod(Trigger):
  78     def __init__(self, width):
  79         Trigger.__init__(self)
  80         self.in_op1 = Signal(width, reset_less=True)
  81         self.in_op2 = Signal(width, reset_less=True)
  82         self.out_op1 = FPNumIn(None, width)
  83         self.out_op2 = FPNumIn(None, width)
  84
  85     def elaborate(self, platform):
  86         m = Trigger.elaborate(self, platform)
  87         #m.submodules.get_op_in = self.in_op
  88         m.submodules.get_op1_out = self.out_op1
  89         m.submodules.get_op2_out = self.out_op2
  90         with m.If(self.trigger):
  91             m.d.comb += [
  92                 self.out_op1.decode(self.in_op1),
  93                 self.out_op2.decode(self.in_op2),
  94             ]
  95         return m
  96
  97
  98 class FPGet2Op(FPState):
  99     """ gets operands
 100     """
 101
 102     def __init__(self, in_state, out_state, in_op1, in_op2, width):
 103         FPState.__init__(self, in_state)
 104         self.out_state = out_state
 105         self.mod = FPGet2OpMod(width)
 106         self.in_op1 = in_op1
 107         self.in_op2 = in_op2
 108         self.out_op1 = FPNumIn(None, width)
 109         self.out_op2 = FPNumIn(None, width)
 110         self.in_stb = Signal(reset_less=True)
 111         self.out_ack = Signal(reset_less=True)
 112         self.out_decode = Signal(reset_less=True)
 113
 114     def setup(self, m, in_op1, in_op2, in_stb, in_ack):
 115         """ links module to inputs and outputs
 116         """
 117         m.submodules.get_ops = self.mod
 118         m.d.comb += self.mod.in_op1.eq(in_op1)
 119         m.d.comb += self.mod.in_op2.eq(in_op2)
 120         m.d.comb += self.mod.stb.eq(in_stb)
 121         m.d.comb += self.out_ack.eq(self.mod.ack)
 122         m.d.comb += self.out_decode.eq(self.mod.trigger)
 123         m.d.comb += in_ack.eq(self.mod.ack)
 124
 125     def action(self, m):
 126         with m.If(self.out_decode):
 127             m.next = self.out_state
 128             m.d.sync += [
 129                 self.mod.ack.eq(0),
 130                 #self.out_op1.v.eq(self.mod.out_op1.v),
 131                 #self.out_op2.v.eq(self.mod.out_op2.v),
 132                 self.out_op1.copy(self.mod.out_op1),
 133                 self.out_op2.copy(self.mod.out_op2)
 134             ]
 135         with m.Else():
 136             m.d.sync += self.mod.ack.eq(1)
 137
 138
 139 class FPAddSpecialCasesMod:
 140     """ special cases: NaNs, infs, zeros, denormalised
 141         NOTE: some of these are unique to add.  see "Special Operations"
 142         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 143     """
 144
 145     def __init__(self, width):
 146         self.in_a = FPNumBase(width)
 147         self.in_b = FPNumBase(width)
 148         self.out_z = FPNumOut(width, False)
 149         self.out_do_z = Signal(reset_less=True)
 150
 151     def setup(self, m, in_a, in_b, out_do_z):
 152         """ links module to inputs and outputs
 153         """
 154         m.submodules.specialcases = self
 155         m.d.comb += self.in_a.copy(in_a)
 156         m.d.comb += self.in_b.copy(in_b)
 157         m.d.comb += out_do_z.eq(self.out_do_z)
 158
 159     def elaborate(self, platform):
 160         m = Module()
 161
 162         m.submodules.sc_in_a = self.in_a
 163         m.submodules.sc_in_b = self.in_b
 164         m.submodules.sc_out_z = self.out_z
 165
 166         s_nomatch = Signal()
 167         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 168
 169         m_match = Signal()
 170         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 171
 172         # if a is NaN or b is NaN return NaN
 173         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 174             m.d.comb += self.out_do_z.eq(1)
 175             m.d.comb += self.out_z.nan(0)
 176
 177         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 178         # under review
 179
 180         ## if a is zero and b is NaN return -b
 181         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 182         #    m.d.comb += self.out_do_z.eq(1)
 183         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 184
 185         ## if b is zero and a is NaN return -a
 186         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 187         #    m.d.comb += self.out_do_z.eq(1)
 188         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 189
 190         ## if a is -zero and b is NaN return -b
 191         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 192         #    m.d.comb += self.out_do_z.eq(1)
 193         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 194
 195         ## if b is -zero and a is NaN return -a
 196         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 197         #    m.d.comb += self.out_do_z.eq(1)
 198         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 199
 200         # if a is inf return inf (or NaN)
 201         with m.Elif(self.in_a.is_inf):
 202             m.d.comb += self.out_do_z.eq(1)
 203             m.d.comb += self.out_z.inf(self.in_a.s)
 204             # if a is inf and signs don't match return NaN
 205             with m.If(self.in_b.exp_128 & s_nomatch):
 206                 m.d.comb += self.out_z.nan(0)
 207
 208         # if b is inf return inf
 209         with m.Elif(self.in_b.is_inf):
 210             m.d.comb += self.out_do_z.eq(1)
 211             m.d.comb += self.out_z.inf(self.in_b.s)
 212
 213         # if a is zero and b zero return signed-a/b
 214         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 215             m.d.comb += self.out_do_z.eq(1)
 216             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 217                                           self.in_b.e,
 218                                           self.in_b.m[3:-1])
 219
 220         # if a is zero return b
 221         with m.Elif(self.in_a.is_zero):
 222             m.d.comb += self.out_do_z.eq(1)
 223             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 224                                       self.in_b.m[3:-1])
 225
 226         # if b is zero return a
 227         with m.Elif(self.in_b.is_zero):
 228             m.d.comb += self.out_do_z.eq(1)
 229             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 230                                       self.in_a.m[3:-1])
 231
 232         # if a equal to -b return zero (+ve zero)
 233         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 234             m.d.comb += self.out_do_z.eq(1)
 235             m.d.comb += self.out_z.zero(0)
 236
 237         # Denormalised Number checks
 238         with m.Else():
 239             m.d.comb += self.out_do_z.eq(0)
 240
 241         return m
 242
 243
 244 class FPID:
 245     def __init__(self, id_wid):
 246         self.id_wid = id_wid
 247         if self.id_wid:
 248             self.in_mid = Signal(id_wid, reset_less=True)
 249             self.out_mid = Signal(id_wid, reset_less=True)
 250         else:
 251             self.in_mid = None
 252             self.out_mid = None
 253
 254     def idsync(self, m):
 255         if self.id_wid is not None:
 256             m.d.sync += self.out_mid.eq(self.in_mid)
 257
 258
 259 class FPAddSpecialCases(FPState, FPID):
 260     """ special cases: NaNs, infs, zeros, denormalised
 261         NOTE: some of these are unique to add.  see "Special Operations"
 262         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 263     """
 264
 265     def __init__(self, width, id_wid):
 266         FPState.__init__(self, "special_cases")
 267         FPID.__init__(self, id_wid)
 268         self.mod = FPAddSpecialCasesMod(width)
 269         self.out_z = FPNumOut(width, False)
 270         self.out_do_z = Signal(reset_less=True)
 271
 272     def setup(self, m, in_a, in_b, in_mid):
 273         """ links module to inputs and outputs
 274         """
 275         self.mod.setup(m, in_a, in_b, self.out_do_z)
 276         if self.in_mid is not None:
 277             m.d.comb += self.in_mid.eq(in_mid)
 278
 279     def action(self, m):
 280         self.idsync(m)
 281         with m.If(self.out_do_z):
 282             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 283             m.next = "put_z"
 284         with m.Else():
 285             m.next = "denormalise"
 286
 287
 288 class FPAddDeNormMod(FPState):
 289
 290     def __init__(self, width):
 291         self.in_a = FPNumBase(width)
 292         self.in_b = FPNumBase(width)
 293         self.out_a = FPNumBase(width)
 294         self.out_b = FPNumBase(width)
 295
 296     def elaborate(self, platform):
 297         m = Module()
 298         m.submodules.denorm_in_a = self.in_a
 299         m.submodules.denorm_in_b = self.in_b
 300         m.submodules.denorm_out_a = self.out_a
 301         m.submodules.denorm_out_b = self.out_b
 302         # hmmm, don't like repeating identical code
 303         m.d.comb += self.out_a.copy(self.in_a)
 304         with m.If(self.in_a.exp_n127):
 305             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 306         with m.Else():
 307             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 308
 309         m.d.comb += self.out_b.copy(self.in_b)
 310         with m.If(self.in_b.exp_n127):
 311             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 312         with m.Else():
 313             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 314
 315         return m
 316
 317
 318 class FPAddDeNorm(FPState, FPID):
 319
 320     def __init__(self, width, id_wid):
 321         FPState.__init__(self, "denormalise")
 322         FPID.__init__(self, id_wid)
 323         self.mod = FPAddDeNormMod(width)
 324         self.out_a = FPNumBase(width)
 325         self.out_b = FPNumBase(width)
 326
 327     def setup(self, m, in_a, in_b, in_mid):
 328         """ links module to inputs and outputs
 329         """
 330         m.submodules.denormalise = self.mod
 331         m.d.comb += self.mod.in_a.copy(in_a)
 332         m.d.comb += self.mod.in_b.copy(in_b)
 333         if self.in_mid is not None:
 334             m.d.comb += self.in_mid.eq(in_mid)
 335
 336     def action(self, m):
 337         self.idsync(m)
 338         # Denormalised Number checks
 339         m.next = "align"
 340         m.d.sync += self.out_a.copy(self.mod.out_a)
 341         m.d.sync += self.out_b.copy(self.mod.out_b)
 342
 343
 344 class FPAddAlignMultiMod(FPState):
 345
 346     def __init__(self, width):
 347         self.in_a = FPNumBase(width)
 348         self.in_b = FPNumBase(width)
 349         self.out_a = FPNumIn(None, width)
 350         self.out_b = FPNumIn(None, width)
 351         self.exp_eq = Signal(reset_less=True)
 352
 353     def elaborate(self, platform):
 354         # This one however (single-cycle) will do the shift
 355         # in one go.
 356
 357         m = Module()
 358
 359         m.submodules.align_in_a = self.in_a
 360         m.submodules.align_in_b = self.in_b
 361         m.submodules.align_out_a = self.out_a
 362         m.submodules.align_out_b = self.out_b
 363
 364         # NOTE: this does *not* do single-cycle multi-shifting,
 365         #       it *STAYS* in the align state until exponents match
 366
 367         # exponent of a greater than b: shift b down
 368         m.d.comb += self.exp_eq.eq(0)
 369         m.d.comb += self.out_a.copy(self.in_a)
 370         m.d.comb += self.out_b.copy(self.in_b)
 371         agtb = Signal(reset_less=True)
 372         altb = Signal(reset_less=True)
 373         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 374         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 375         with m.If(agtb):
 376             m.d.comb += self.out_b.shift_down(self.in_b)
 377         # exponent of b greater than a: shift a down
 378         with m.Elif(altb):
 379             m.d.comb += self.out_a.shift_down(self.in_a)
 380         # exponents equal: move to next stage.
 381         with m.Else():
 382             m.d.comb += self.exp_eq.eq(1)
 383         return m
 384
 385
 386 class FPAddAlignMulti(FPState, FPID):
 387
 388     def __init__(self, width, id_wid):
 389         FPID.__init__(self, id_wid)
 390         FPState.__init__(self, "align")
 391         self.mod = FPAddAlignMultiMod(width)
 392         self.out_a = FPNumIn(None, width)
 393         self.out_b = FPNumIn(None, width)
 394         self.exp_eq = Signal(reset_less=True)
 395
 396     def setup(self, m, in_a, in_b, in_mid):
 397         """ links module to inputs and outputs
 398         """
 399         m.submodules.align = self.mod
 400         m.d.comb += self.mod.in_a.copy(in_a)
 401         m.d.comb += self.mod.in_b.copy(in_b)
 402         #m.d.comb += self.out_a.copy(self.mod.out_a)
 403         #m.d.comb += self.out_b.copy(self.mod.out_b)
 404         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 405         if self.in_mid is not None:
 406             m.d.comb += self.in_mid.eq(in_mid)
 407
 408     def action(self, m):
 409         self.idsync(m)
 410         m.d.sync += self.out_a.copy(self.mod.out_a)
 411         m.d.sync += self.out_b.copy(self.mod.out_b)
 412         with m.If(self.exp_eq):
 413             m.next = "add_0"
 414
 415
 416 class FPAddAlignSingleMod:
 417
 418     def __init__(self, width):
 419         self.width = width
 420         self.in_a = FPNumBase(width)
 421         self.in_b = FPNumBase(width)
 422         self.out_a = FPNumIn(None, width)
 423         self.out_b = FPNumIn(None, width)
 424
 425     def elaborate(self, platform):
 426         """ Aligns A against B or B against A, depending on which has the
 427             greater exponent.  This is done in a *single* cycle using
 428             variable-width bit-shift
 429
 430             the shifter used here is quite expensive in terms of gates.
 431             Mux A or B in (and out) into temporaries, as only one of them
 432             needs to be aligned against the other
 433         """
 434         m = Module()
 435
 436         m.submodules.align_in_a = self.in_a
 437         m.submodules.align_in_b = self.in_b
 438         m.submodules.align_out_a = self.out_a
 439         m.submodules.align_out_b = self.out_b
 440
 441         # temporary (muxed) input and output to be shifted
 442         t_inp = FPNumBase(self.width)
 443         t_out = FPNumIn(None, self.width)
 444         espec = (len(self.in_a.e), True)
 445         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 446         m.submodules.align_t_in = t_inp
 447         m.submodules.align_t_out = t_out
 448         m.submodules.multishift_r = msr
 449
 450         ediff = Signal(espec, reset_less=True)
 451         ediffr = Signal(espec, reset_less=True)
 452         tdiff = Signal(espec, reset_less=True)
 453         elz = Signal(reset_less=True)
 454         egz = Signal(reset_less=True)
 455
 456         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 457         m.d.comb += msr.inp.eq(t_inp.m)
 458         m.d.comb += msr.diff.eq(tdiff)
 459         m.d.comb += t_out.m.eq(msr.m)
 460         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 461         m.d.comb += t_out.s.eq(t_inp.s)
 462
 463         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 464         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 465         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 466         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 467
 468         # default: A-exp == B-exp, A and B untouched (fall through)
 469         m.d.comb += self.out_a.copy(self.in_a)
 470         m.d.comb += self.out_b.copy(self.in_b)
 471         # only one shifter (muxed)
 472         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 473         # exponent of a greater than b: shift b down
 474         with m.If(egz):
 475             m.d.comb += [t_inp.copy(self.in_b),
 476                          tdiff.eq(ediff),
 477                          self.out_b.copy(t_out),
 478                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 479                         ]
 480         # exponent of b greater than a: shift a down
 481         with m.Elif(elz):
 482             m.d.comb += [t_inp.copy(self.in_a),
 483                          tdiff.eq(ediffr),
 484                          self.out_a.copy(t_out),
 485                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 486                         ]
 487         return m
 488
 489
 490 class FPAddAlignSingle(FPState, FPID):
 491
 492     def __init__(self, width, id_wid):
 493         FPState.__init__(self, "align")
 494         FPID.__init__(self, id_wid)
 495         self.mod = FPAddAlignSingleMod(width)
 496         self.out_a = FPNumIn(None, width)
 497         self.out_b = FPNumIn(None, width)
 498
 499     def setup(self, m, in_a, in_b, in_mid):
 500         """ links module to inputs and outputs
 501         """
 502         m.submodules.align = self.mod
 503         m.d.comb += self.mod.in_a.copy(in_a)
 504         m.d.comb += self.mod.in_b.copy(in_b)
 505         if self.in_mid is not None:
 506             m.d.comb += self.in_mid.eq(in_mid)
 507
 508     def action(self, m):
 509         self.idsync(m)
 510         # NOTE: could be done as comb
 511         m.d.sync += self.out_a.copy(self.mod.out_a)
 512         m.d.sync += self.out_b.copy(self.mod.out_b)
 513         m.next = "add_0"
 514
 515
 516 class FPAddStage0Mod:
 517
 518     def __init__(self, width):
 519         self.in_a = FPNumBase(width)
 520         self.in_b = FPNumBase(width)
 521         self.in_z = FPNumBase(width, False)
 522         self.out_z = FPNumBase(width, False)
 523         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 524
 525     def elaborate(self, platform):
 526         m = Module()
 527         m.submodules.add0_in_a = self.in_a
 528         m.submodules.add0_in_b = self.in_b
 529         m.submodules.add0_out_z = self.out_z
 530
 531         m.d.comb += self.out_z.e.eq(self.in_a.e)
 532
 533         # store intermediate tests (and zero-extended mantissas)
 534         seq = Signal(reset_less=True)
 535         mge = Signal(reset_less=True)
 536         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 537         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 538         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 539                      mge.eq(self.in_a.m >= self.in_b.m),
 540                      am0.eq(Cat(self.in_a.m, 0)),
 541                      bm0.eq(Cat(self.in_b.m, 0))
 542                     ]
 543         # same-sign (both negative or both positive) add mantissas
 544         with m.If(seq):
 545             m.d.comb += [
 546                 self.out_tot.eq(am0 + bm0),
 547                 self.out_z.s.eq(self.in_a.s)
 548             ]
 549         # a mantissa greater than b, use a
 550         with m.Elif(mge):
 551             m.d.comb += [
 552                 self.out_tot.eq(am0 - bm0),
 553                 self.out_z.s.eq(self.in_a.s)
 554             ]
 555         # b mantissa greater than a, use b
 556         with m.Else():
 557             m.d.comb += [
 558                 self.out_tot.eq(bm0 - am0),
 559                 self.out_z.s.eq(self.in_b.s)
 560         ]
 561         return m
 562
 563
 564 class FPAddStage0(FPState, FPID):
 565     """ First stage of add.  covers same-sign (add) and subtract
 566         special-casing when mantissas are greater or equal, to
 567         give greatest accuracy.
 568     """
 569
 570     def __init__(self, width, id_wid):
 571         FPState.__init__(self, "add_0")
 572         FPID.__init__(self, id_wid)
 573         self.mod = FPAddStage0Mod(width)
 574         self.out_z = FPNumBase(width, False)
 575         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 576
 577     def setup(self, m, in_a, in_b, in_mid):
 578         """ links module to inputs and outputs
 579         """
 580         m.submodules.add0 = self.mod
 581         m.d.comb += self.mod.in_a.copy(in_a)
 582         m.d.comb += self.mod.in_b.copy(in_b)
 583         if self.in_mid is not None:
 584             m.d.comb += self.in_mid.eq(in_mid)
 585
 586     def action(self, m):
 587         self.idsync(m)
 588         # NOTE: these could be done as combinatorial (merge add0+add1)
 589         m.d.sync += self.out_z.copy(self.mod.out_z)
 590         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 591         m.next = "add_1"
 592
 593
 594 class FPAddStage1Mod(FPState):
 595     """ Second stage of add: preparation for normalisation.
 596         detects when tot sum is too big (tot[27] is kinda a carry bit)
 597     """
 598
 599     def __init__(self, width):
 600         self.out_norm = Signal(reset_less=True)
 601         self.in_z = FPNumBase(width, False)
 602         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 603         self.out_z = FPNumBase(width, False)
 604         self.out_of = Overflow()
 605
 606     def elaborate(self, platform):
 607         m = Module()
 608         #m.submodules.norm1_in_overflow = self.in_of
 609         #m.submodules.norm1_out_overflow = self.out_of
 610         #m.submodules.norm1_in_z = self.in_z
 611         #m.submodules.norm1_out_z = self.out_z
 612         m.d.comb += self.out_z.copy(self.in_z)
 613         # tot[27] gets set when the sum overflows. shift result down
 614         with m.If(self.in_tot[-1]):
 615             m.d.comb += [
 616                 self.out_z.m.eq(self.in_tot[4:]),
 617                 self.out_of.m0.eq(self.in_tot[4]),
 618                 self.out_of.guard.eq(self.in_tot[3]),
 619                 self.out_of.round_bit.eq(self.in_tot[2]),
 620                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 621                 self.out_z.e.eq(self.in_z.e + 1)
 622         ]
 623         # tot[27] zero case
 624         with m.Else():
 625             m.d.comb += [
 626                 self.out_z.m.eq(self.in_tot[3:]),
 627                 self.out_of.m0.eq(self.in_tot[3]),
 628                 self.out_of.guard.eq(self.in_tot[2]),
 629                 self.out_of.round_bit.eq(self.in_tot[1]),
 630                 self.out_of.sticky.eq(self.in_tot[0])
 631         ]
 632         return m
 633
 634
 635 class FPAddStage1(FPState, FPID):
 636
 637     def __init__(self, width, id_wid):
 638         FPState.__init__(self, "add_1")
 639         FPID.__init__(self, id_wid)
 640         self.mod = FPAddStage1Mod(width)
 641         self.out_z = FPNumBase(width, False)
 642         self.out_of = Overflow()
 643         self.norm_stb = Signal()
 644
 645     def setup(self, m, in_tot, in_z, in_mid):
 646         """ links module to inputs and outputs
 647         """
 648         m.submodules.add1 = self.mod
 649         m.submodules.add1_out_overflow = self.out_of
 650
 651         m.d.comb += self.mod.in_z.copy(in_z)
 652         m.d.comb += self.mod.in_tot.eq(in_tot)
 653
 654         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 655
 656         if self.in_mid is not None:
 657             m.d.comb += self.in_mid.eq(in_mid)
 658
 659     def action(self, m):
 660         self.idsync(m)
 661         m.d.sync += self.out_of.copy(self.mod.out_of)
 662         m.d.sync += self.out_z.copy(self.mod.out_z)
 663         m.d.sync += self.norm_stb.eq(1)
 664         m.next = "normalise_1"
 665
 666
 667 class FPNorm1ModSingle:
 668
 669     def __init__(self, width):
 670         self.width = width
 671         self.out_norm = Signal(reset_less=True)
 672         self.in_z = FPNumBase(width, False)
 673         self.in_of = Overflow()
 674         self.out_z = FPNumBase(width, False)
 675         self.out_of = Overflow()
 676
 677     def setup(self, m, in_z, in_of, out_z):
 678         """ links module to inputs and outputs
 679         """
 680         m.submodules.normalise_1 = self
 681
 682         m.d.comb += self.in_z.copy(in_z)
 683         m.d.comb += self.in_of.copy(in_of)
 684
 685         m.d.comb += out_z.copy(self.out_z)
 686
 687     def elaborate(self, platform):
 688         m = Module()
 689
 690         mwid = self.out_z.m_width+2
 691         pe = PriorityEncoder(mwid)
 692         m.submodules.norm_pe = pe
 693
 694         m.submodules.norm1_out_z = self.out_z
 695         m.submodules.norm1_out_overflow = self.out_of
 696         m.submodules.norm1_in_z = self.in_z
 697         m.submodules.norm1_in_overflow = self.in_of
 698
 699         in_z = FPNumBase(self.width, False)
 700         in_of = Overflow()
 701         m.submodules.norm1_insel_z = in_z
 702         m.submodules.norm1_insel_overflow = in_of
 703
 704         espec = (len(in_z.e), True)
 705         ediff_n126 = Signal(espec, reset_less=True)
 706         msr = MultiShiftRMerge(mwid, espec)
 707         m.submodules.multishift_r = msr
 708
 709         m.d.comb += in_z.copy(self.in_z)
 710         m.d.comb += in_of.copy(self.in_of)
 711         # initialise out from in (overridden below)
 712         m.d.comb += self.out_z.copy(in_z)
 713         m.d.comb += self.out_of.copy(in_of)
 714         # normalisation increase/decrease conditions
 715         decrease = Signal(reset_less=True)
 716         increase = Signal(reset_less=True)
 717         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 718         m.d.comb += increase.eq(in_z.exp_lt_n126)
 719         # decrease exponent
 720         with m.If(decrease):
 721             # *sigh* not entirely obvious: count leading zeros (clz)
 722             # with a PriorityEncoder: to find from the MSB
 723             # we reverse the order of the bits.
 724             temp_m = Signal(mwid, reset_less=True)
 725             temp_s = Signal(mwid+1, reset_less=True)
 726             clz = Signal((len(in_z.e), True), reset_less=True)
 727             # make sure that the amount to decrease by does NOT
 728             # go below the minimum non-INF/NaN exponent
 729             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 730                          in_z.exp_sub_n126)
 731             m.d.comb += [
 732                 # cat round and guard bits back into the mantissa
 733                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 734                 pe.i.eq(temp_m[::-1]),          # inverted
 735                 clz.eq(limclz),                 # count zeros from MSB down
 736                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 737                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 738                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 739                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 740                 # overflow in bits 0..1: got shifted too (leave sticky)
 741                 self.out_of.guard.eq(temp_s[1]),     # guard
 742                 self.out_of.round_bit.eq(temp_s[0]), # round
 743             ]
 744         # increase exponent
 745         with m.Elif(increase):
 746             temp_m = Signal(mwid+1, reset_less=True)
 747             m.d.comb += [
 748                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 749                               in_z.m)),
 750                 ediff_n126.eq(in_z.N126 - in_z.e),
 751                 # connect multi-shifter to inp/out mantissa (and ediff)
 752                 msr.inp.eq(temp_m),
 753                 msr.diff.eq(ediff_n126),
 754                 self.out_z.m.eq(msr.m[3:]),
 755                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 756                 # overflow in bits 0..1: got shifted too (leave sticky)
 757                 self.out_of.guard.eq(temp_s[2]),     # guard
 758                 self.out_of.round_bit.eq(temp_s[1]), # round
 759                 self.out_of.sticky.eq(temp_s[0]), # sticky
 760                 self.out_z.e.eq(in_z.e + ediff_n126),
 761             ]
 762
 763         return m
 764
 765
 766 class FPNorm1ModMulti:
 767
 768     def __init__(self, width, single_cycle=True):
 769         self.width = width
 770         self.in_select = Signal(reset_less=True)
 771         self.out_norm = Signal(reset_less=True)
 772         self.in_z = FPNumBase(width, False)
 773         self.in_of = Overflow()
 774         self.temp_z = FPNumBase(width, False)
 775         self.temp_of = Overflow()
 776         self.out_z = FPNumBase(width, False)
 777         self.out_of = Overflow()
 778
 779     def elaborate(self, platform):
 780         m = Module()
 781
 782         m.submodules.norm1_out_z = self.out_z
 783         m.submodules.norm1_out_overflow = self.out_of
 784         m.submodules.norm1_temp_z = self.temp_z
 785         m.submodules.norm1_temp_of = self.temp_of
 786         m.submodules.norm1_in_z = self.in_z
 787         m.submodules.norm1_in_overflow = self.in_of
 788
 789         in_z = FPNumBase(self.width, False)
 790         in_of = Overflow()
 791         m.submodules.norm1_insel_z = in_z
 792         m.submodules.norm1_insel_overflow = in_of
 793
 794         # select which of temp or in z/of to use
 795         with m.If(self.in_select):
 796             m.d.comb += in_z.copy(self.in_z)
 797             m.d.comb += in_of.copy(self.in_of)
 798         with m.Else():
 799             m.d.comb += in_z.copy(self.temp_z)
 800             m.d.comb += in_of.copy(self.temp_of)
 801         # initialise out from in (overridden below)
 802         m.d.comb += self.out_z.copy(in_z)
 803         m.d.comb += self.out_of.copy(in_of)
 804         # normalisation increase/decrease conditions
 805         decrease = Signal(reset_less=True)
 806         increase = Signal(reset_less=True)
 807         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 808         m.d.comb += increase.eq(in_z.exp_lt_n126)
 809         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 810         # decrease exponent
 811         with m.If(decrease):
 812             m.d.comb += [
 813                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 814                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 815                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 816                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 817                 self.out_of.round_bit.eq(0),        # reset round bit
 818                 self.out_of.m0.eq(in_of.guard),
 819             ]
 820         # increase exponent
 821         with m.Elif(increase):
 822             m.d.comb += [
 823                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 824                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 825                 self.out_of.guard.eq(in_z.m[0]),
 826                 self.out_of.m0.eq(in_z.m[1]),
 827                 self.out_of.round_bit.eq(in_of.guard),
 828                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 829             ]
 830
 831         return m
 832
 833
 834 class FPNorm1Single(FPState, FPID):
 835
 836     def __init__(self, width, id_wid, single_cycle=True):
 837         FPID.__init__(self, id_wid)
 838         FPState.__init__(self, "normalise_1")
 839         self.mod = FPNorm1ModSingle(width)
 840         self.out_norm = Signal(reset_less=True)
 841         self.out_z = FPNumBase(width)
 842         self.out_roundz = Signal(reset_less=True)
 843
 844     def setup(self, m, in_z, in_of, in_mid):
 845         """ links module to inputs and outputs
 846         """
 847         self.mod.setup(m, in_z, in_of, self.out_z)
 848
 849         if self.in_mid is not None:
 850             m.d.comb += self.in_mid.eq(in_mid)
 851
 852     def action(self, m):
 853         self.idsync(m)
 854         m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 855         m.next = "round"
 856
 857
 858 class FPNorm1Multi(FPState, FPID):
 859
 860     def __init__(self, width, id_wid):
 861         FPID.__init__(self, id_wid)
 862         FPState.__init__(self, "normalise_1")
 863         self.mod = FPNorm1ModMulti(width)
 864         self.stb = Signal(reset_less=True)
 865         self.ack = Signal(reset=0, reset_less=True)
 866         self.out_norm = Signal(reset_less=True)
 867         self.in_accept = Signal(reset_less=True)
 868         self.temp_z = FPNumBase(width)
 869         self.temp_of = Overflow()
 870         self.out_z = FPNumBase(width)
 871         self.out_roundz = Signal(reset_less=True)
 872
 873     def setup(self, m, in_z, in_of, norm_stb, in_mid):
 874         """ links module to inputs and outputs
 875         """
 876         self.mod.setup(m, in_z, in_of, norm_stb,
 877                        self.in_accept, self.temp_z, self.temp_of,
 878                        self.out_z, self.out_norm)
 879
 880         m.d.comb += self.stb.eq(norm_stb)
 881         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
 882
 883         if self.in_mid is not None:
 884             m.d.comb += self.in_mid.eq(in_mid)
 885
 886     def action(self, m):
 887         self.idsync(m)
 888         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
 889         m.d.sync += self.temp_of.copy(self.mod.out_of)
 890         m.d.sync += self.temp_z.copy(self.out_z)
 891         with m.If(self.out_norm):
 892             with m.If(self.in_accept):
 893                 m.d.sync += [
 894                     self.ack.eq(1),
 895                 ]
 896             with m.Else():
 897                 m.d.sync += self.ack.eq(0)
 898         with m.Else():
 899             # normalisation not required (or done).
 900             m.next = "round"
 901             m.d.sync += self.ack.eq(1)
 902             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 903
 904
 905 class FPNormToPack(FPState, FPID):
 906
 907     def __init__(self, width, id_wid):
 908         FPID.__init__(self, id_wid)
 909         FPState.__init__(self, "normalise_1")
 910         self.width = width
 911
 912     def setup(self, m, in_z, in_of, in_mid):
 913         """ links module to inputs and outputs
 914         """
 915
 916         # Normalisation (chained to input in_z+in_of)
 917         nmod = FPNorm1ModSingle(self.width)
 918         n_out_z = FPNumBase(self.width)
 919         n_out_roundz = Signal(reset_less=True)
 920         nmod.setup(m, in_z, in_of, n_out_z)
 921
 922         # Rounding (chained to normalisation)
 923         rmod = FPRoundMod(self.width)
 924         r_out_z = FPNumBase(self.width)
 925         rmod.setup(m, n_out_z, n_out_roundz)
 926         m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
 927         m.d.comb += r_out_z.copy(rmod.out_z)
 928
 929         # Corrections (chained to rounding)
 930         cmod = FPCorrectionsMod(self.width)
 931         c_out_z = FPNumBase(self.width)
 932         cmod.setup(m, r_out_z)
 933         m.d.comb += c_out_z.copy(cmod.out_z)
 934
 935         # Pack (chained to corrections)
 936         self.pmod = FPPackMod(self.width)
 937         self.out_z = FPNumBase(self.width)
 938         self.pmod.setup(m, c_out_z)
 939
 940         # Multiplex ID
 941         if self.in_mid is not None:
 942             m.d.comb += self.in_mid.eq(in_mid)
 943
 944     def action(self, m):
 945         self.idsync(m) # copies incoming ID to outgoing
 946         m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
 947         m.next = "pack_put_z"
 948
 949
 950 class FPRoundMod:
 951
 952     def __init__(self, width):
 953         self.in_roundz = Signal(reset_less=True)
 954         self.in_z = FPNumBase(width, False)
 955         self.out_z = FPNumBase(width, False)
 956
 957     def setup(self, m, in_z, roundz):
 958         m.submodules.roundz = self
 959
 960         m.d.comb += self.in_z.copy(in_z)
 961         m.d.comb += self.in_roundz.eq(roundz)
 962
 963     def elaborate(self, platform):
 964         m = Module()
 965         m.d.comb += self.out_z.copy(self.in_z)
 966         with m.If(self.in_roundz):
 967             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
 968             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
 969                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
 970         return m
 971
 972
 973 class FPRound(FPState, FPID):
 974
 975     def __init__(self, width, id_wid):
 976         FPState.__init__(self, "round")
 977         FPID.__init__(self, id_wid)
 978         self.mod = FPRoundMod(width)
 979         self.out_z = FPNumBase(width)
 980
 981     def setup(self, m, in_z, roundz, in_mid):
 982         """ links module to inputs and outputs
 983         """
 984         self.mod.setup(m, in_z, roundz)
 985
 986         if self.in_mid is not None:
 987             m.d.comb += self.in_mid.eq(in_mid)
 988
 989     def action(self, m):
 990         self.idsync(m)
 991         m.d.sync += self.out_z.copy(self.mod.out_z)
 992         m.next = "corrections"
 993
 994
 995 class FPCorrectionsMod:
 996
 997     def __init__(self, width):
 998         self.in_z = FPNumOut(width, False)
 999         self.out_z = FPNumOut(width, False)
1000
1001     def setup(self, m, in_z):
1002         """ links module to inputs and outputs
1003         """
1004         m.submodules.corrections = self
1005         m.d.comb += self.in_z.copy(in_z)
1006
1007     def elaborate(self, platform):
1008         m = Module()
1009         m.submodules.corr_in_z = self.in_z
1010         m.submodules.corr_out_z = self.out_z
1011         m.d.comb += self.out_z.copy(self.in_z)
1012         with m.If(self.in_z.is_denormalised):
1013             m.d.comb += self.out_z.e.eq(self.in_z.N127)
1014         return m
1015
1016
1017 class FPCorrections(FPState, FPID):
1018
1019     def __init__(self, width, id_wid):
1020         FPState.__init__(self, "corrections")
1021         FPID.__init__(self, id_wid)
1022         self.mod = FPCorrectionsMod(width)
1023         self.out_z = FPNumBase(width)
1024
1025     def setup(self, m, in_z, in_mid):
1026         """ links module to inputs and outputs
1027         """
1028         self.mod.setup(m, in_z)
1029         if self.in_mid is not None:
1030             m.d.comb += self.in_mid.eq(in_mid)
1031
1032     def action(self, m):
1033         self.idsync(m)
1034         m.d.sync += self.out_z.copy(self.mod.out_z)
1035         m.next = "pack"
1036
1037
1038 class FPPackMod:
1039
1040     def __init__(self, width):
1041         self.in_z = FPNumOut(width, False)
1042         self.out_z = FPNumOut(width, False)
1043
1044     def setup(self, m, in_z):
1045         """ links module to inputs and outputs
1046         """
1047         m.submodules.pack = self
1048         m.d.comb += self.in_z.copy(in_z)
1049
1050     def elaborate(self, platform):
1051         m = Module()
1052         m.submodules.pack_in_z = self.in_z
1053         with m.If(self.in_z.is_overflowed):
1054             m.d.comb += self.out_z.inf(self.in_z.s)
1055         with m.Else():
1056             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1057         return m
1058
1059
1060 class FPPack(FPState, FPID):
1061
1062     def __init__(self, width, id_wid):
1063         FPState.__init__(self, "pack")
1064         FPID.__init__(self, id_wid)
1065         self.mod = FPPackMod(width)
1066         self.out_z = FPNumOut(width, False)
1067
1068     def setup(self, m, in_z, in_mid):
1069         """ links module to inputs and outputs
1070         """
1071         self.mod.setup(m, in_z)
1072         if self.in_mid is not None:
1073             m.d.comb += self.in_mid.eq(in_mid)
1074
1075     def action(self, m):
1076         self.idsync(m)
1077         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1078         m.next = "pack_put_z"
1079
1080
1081 class FPPutZ(FPState):
1082
1083     def __init__(self, state, in_z, out_z, in_mid, out_mid):
1084         FPState.__init__(self, state)
1085         self.in_z = in_z
1086         self.out_z = out_z
1087         self.in_mid = in_mid
1088         self.out_mid = out_mid
1089
1090     def action(self, m):
1091         if self.in_mid is not None:
1092             m.d.sync += self.out_mid.eq(self.in_mid)
1093         m.d.sync += [
1094           self.out_z.v.eq(self.in_z.v)
1095         ]
1096         with m.If(self.out_z.stb & self.out_z.ack):
1097             m.d.sync += self.out_z.stb.eq(0)
1098             m.next = "get_ops"
1099         with m.Else():
1100             m.d.sync += self.out_z.stb.eq(1)
1101
1102
1103 class FPADDBaseMod(FPID):
1104
1105     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1106         """ IEEE754 FP Add
1107
1108             * width: bit-width of IEEE754.  supported: 16, 32, 64
1109             * id_wid: an identifier that is sync-connected to the input
1110             * single_cycle: True indicates each stage to complete in 1 clock
1111             * compact: True indicates a reduced number of stages
1112         """
1113         FPID.__init__(self, id_wid)
1114         self.width = width
1115         self.single_cycle = single_cycle
1116         self.compact = compact
1117
1118         self.in_t = Trigger()
1119         self.in_a  = Signal(width)
1120         self.in_b  = Signal(width)
1121         self.out_z = FPOp(width)
1122
1123         self.states = []
1124
1125     def add_state(self, state):
1126         self.states.append(state)
1127         return state
1128
1129     def get_fragment(self, platform=None):
1130         """ creates the HDL code-fragment for FPAdd
1131         """
1132         m = Module()
1133         m.submodules.out_z = self.out_z
1134         m.submodules.in_t = self.in_t
1135         if self.compact:
1136             self.get_compact_fragment(m, platform)
1137         else:
1138             self.get_longer_fragment(m, platform)
1139
1140         with m.FSM() as fsm:
1141
1142             for state in self.states:
1143                 with m.State(state.state_from):
1144                     state.action(m)
1145
1146         return m
1147
1148     def get_longer_fragment(self, m, platform=None):
1149
1150         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1151                                       self.in_a, self.in_b, self.width))
1152         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1153         a = get.out_op1
1154         b = get.out_op2
1155
1156         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1157         sc.setup(m, a, b, self.in_mid)
1158
1159         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1160         dn.setup(m, a, b, sc.in_mid)
1161
1162         if self.single_cycle:
1163             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1164             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1165         else:
1166             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1167             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1168
1169         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1170         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1171
1172         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1173         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1174
1175         if self.single_cycle:
1176             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1177             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1178         else:
1179             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1180             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1181
1182         rn = self.add_state(FPRound(self.width, self.id_wid))
1183         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1184
1185         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1186         cor.setup(m, rn.out_z, rn.in_mid)
1187
1188         pa = self.add_state(FPPack(self.width, self.id_wid))
1189         pa.setup(m, cor.out_z, rn.in_mid)
1190
1191         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1192                                     pa.in_mid, self.out_mid))
1193
1194         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1195                                     pa.in_mid, self.out_mid))
1196
1197     def get_compact_fragment(self, m, platform=None):
1198
1199         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1200                                       self.in_a, self.in_b, self.width))
1201         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1202         a = get.out_op1
1203         b = get.out_op2
1204
1205         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1206         sc.setup(m, a, b, self.in_mid)
1207
1208         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1209         dn.setup(m, a, b, sc.in_mid)
1210
1211         if self.single_cycle:
1212             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1213             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1214         else:
1215             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1216             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1217
1218         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1219         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1220
1221         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1222         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1223
1224         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1225         n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1226
1227         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1228                                     n1.in_mid, self.out_mid))
1229
1230         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1231                                     sc.in_mid, self.out_mid))
1232
1233
1234 class FPADDBase(FPState, FPID):
1235
1236     def __init__(self, width, id_wid=None, single_cycle=False):
1237         """ IEEE754 FP Add
1238
1239             * width: bit-width of IEEE754.  supported: 16, 32, 64
1240             * id_wid: an identifier that is sync-connected to the input
1241             * single_cycle: True indicates each stage to complete in 1 clock
1242         """
1243         FPID.__init__(self, id_wid)
1244         FPState.__init__(self, "fpadd")
1245         self.width = width
1246         self.single_cycle = single_cycle
1247         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1248
1249         self.in_t = Trigger()
1250         self.in_a  = Signal(width)
1251         self.in_b  = Signal(width)
1252         #self.out_z = FPOp(width)
1253
1254         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1255         self.in_accept = Signal(reset_less=True)
1256         self.add_stb = Signal(reset_less=True)
1257         self.add_ack = Signal(reset=0, reset_less=True)
1258
1259     def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1260         self.out_z = out_z
1261         self.out_mid = out_mid
1262         m.d.comb += [self.in_a.eq(a),
1263                      self.in_b.eq(b),
1264                      self.mod.in_a.eq(self.in_a),
1265                      self.mod.in_b.eq(self.in_b),
1266                      self.in_mid.eq(in_mid),
1267                      self.mod.in_mid.eq(self.in_mid),
1268                      self.z_done.eq(self.mod.out_z.trigger),
1269                      #self.add_stb.eq(add_stb),
1270                      self.mod.in_t.stb.eq(self.in_t.stb),
1271                      self.in_t.ack.eq(self.mod.in_t.ack),
1272                      self.out_mid.eq(self.mod.out_mid),
1273                      self.out_z.v.eq(self.mod.out_z.v),
1274                      self.out_z.stb.eq(self.mod.out_z.stb),
1275                      self.mod.out_z.ack.eq(self.out_z.ack),
1276                     ]
1277
1278         m.d.sync += self.add_stb.eq(add_stb)
1279         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1280         #m.d.sync += self.in_t.stb.eq(0)
1281
1282         m.submodules.fpadd = self.mod
1283
1284     def action(self, m):
1285
1286         # in_accept is set on incoming strobe HIGH and ack LOW.
1287         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1288
1289         #with m.If(self.in_t.ack):
1290         #    m.d.sync += self.in_t.stb.eq(0)
1291         with m.If(~self.z_done):
1292             # not done: test for accepting an incoming operand pair
1293             with m.If(self.in_accept):
1294                 m.d.sync += [
1295                     self.add_ack.eq(1), # acknowledge receipt...
1296                     self.in_t.stb.eq(1), # initiate add
1297                 ]
1298             with m.Else():
1299                 m.d.sync += [self.add_ack.eq(0),
1300                              self.in_t.stb.eq(0),
1301                             ]
1302         with m.Else():
1303             # done: acknowledge, and write out id and value
1304             m.d.sync += [self.add_ack.eq(1),
1305                          self.in_t.stb.eq(0)
1306                         ]
1307             m.next = "get_a"
1308
1309             return
1310
1311             if self.in_mid is not None:
1312                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1313
1314             m.d.sync += [
1315               self.out_z.v.eq(self.mod.out_z.v)
1316             ]
1317             # move to output state on detecting z ack
1318             with m.If(self.out_z.trigger):
1319                 m.d.sync += self.out_z.stb.eq(0)
1320                 m.next = "put_z"
1321             with m.Else():
1322                 m.d.sync += self.out_z.stb.eq(1)
1323
1324
1325 class FPADD(FPID):
1326     """ FPADD: stages as follows:
1327
1328         FPGetOp (a)
1329            |
1330         FPGetOp (b)
1331            |
1332         FPAddBase---> FPAddBaseMod
1333            |            |
1334         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1335
1336         FPAddBase is tricky: it is both a stage and *has* stages.
1337         Connection to FPAddBaseMod therefore requires an in stb/ack
1338         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1339         needs to be the thing that raises the incoming stb.
1340     """
1341
1342     def __init__(self, width, id_wid=None, single_cycle=False):
1343         """ IEEE754 FP Add
1344
1345             * width: bit-width of IEEE754.  supported: 16, 32, 64
1346             * id_wid: an identifier that is sync-connected to the input
1347             * single_cycle: True indicates each stage to complete in 1 clock
1348         """
1349         FPID.__init__(self, id_wid)
1350         self.width = width
1351         self.id_wid = id_wid
1352         self.single_cycle = single_cycle
1353
1354         self.in_a  = FPOp(width)
1355         self.in_b  = FPOp(width)
1356         self.out_z = FPOp(width)
1357
1358         self.states = []
1359
1360     def add_state(self, state):
1361         self.states.append(state)
1362         return state
1363
1364     def get_fragment(self, platform=None):
1365         """ creates the HDL code-fragment for FPAdd
1366         """
1367         m = Module()
1368         m.submodules.in_a = self.in_a
1369         m.submodules.in_b = self.in_b
1370         m.submodules.out_z = self.out_z
1371
1372         geta = self.add_state(FPGetOp("get_a", "get_b",
1373                                       self.in_a, self.width))
1374         geta.setup(m, self.in_a)
1375         a = geta.out_op
1376
1377         getb = self.add_state(FPGetOp("get_b", "fpadd",
1378                                       self.in_b, self.width))
1379         getb.setup(m, self.in_b)
1380         b = getb.out_op
1381
1382         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1383         ab = self.add_state(ab)
1384         ab.setup(m, a, b, getb.out_decode, self.in_mid,
1385                  self.out_z, self.out_mid)
1386
1387         #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1388         #                            ab.out_mid, self.out_mid))
1389
1390         with m.FSM() as fsm:
1391
1392             for state in self.states:
1393                 with m.State(state.state_from):
1394                     state.action(m)
1395
1396         return m
1397
1398
1399 if __name__ == "__main__":
1400     if True:
1401         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1402         main(alu, ports=alu.in_a.ports() + \
1403                         alu.in_b.ports() + \
1404                         alu.out_z.ports() + \
1405                         [alu.in_mid, alu.out_mid])
1406     else:
1407         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1408         main(alu, ports=[alu.in_a, alu.in_b] + \
1409                         alu.in_t.ports() + \
1410                         alu.out_z.ports() + \
1411                         [alu.in_mid, alu.out_mid])
1412
1413
1414     # works... but don't use, just do "python fname.py convert -t v"
1415     #print (verilog.convert(alu, ports=[
1416     #                        ports=alu.in_a.ports() + \
1417     #                              alu.in_b.ports() + \
1418     #                              alu.out_z.ports())