src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8
   9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  10 from fpbase import MultiShiftRMerge
  11 #from fpbase import FPNumShiftMultiRight
  12
  13 class FPState(FPBase):
  14     def __init__(self, state_from):
  15         self.state_from = state_from
  16
  17     def set_inputs(self, inputs):
  18         self.inputs = inputs
  19         for k,v in inputs.items():
  20             setattr(self, k, v)
  21
  22     def set_outputs(self, outputs):
  23         self.outputs = outputs
  24         for k,v in outputs.items():
  25             setattr(self, k, v)
  26
  27
  28 class FPGetOpMod:
  29     def __init__(self, width):
  30         self.in_op = FPOp(width)
  31         self.out_op = FPNumIn(self.in_op, width)
  32         self.out_decode = Signal(reset_less=True)
  33
  34     def elaborate(self, platform):
  35         m = Module()
  36         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
  37         #m.submodules.get_op_in = self.in_op
  38         m.submodules.get_op_out = self.out_op
  39         with m.If(self.out_decode):
  40             m.d.comb += [
  41                 self.out_op.decode(self.in_op.v),
  42             ]
  43         return m
  44
  45
  46 class FPGetOp(FPState):
  47     """ gets operand
  48     """
  49
  50     def __init__(self, in_state, out_state, in_op, width):
  51         FPState.__init__(self, in_state)
  52         self.out_state = out_state
  53         self.mod = FPGetOpMod(width)
  54         self.in_op = in_op
  55         self.out_op = FPNumIn(in_op, width)
  56         self.out_decode = Signal(reset_less=True)
  57
  58     def setup(self, m, in_op):
  59         """ links module to inputs and outputs
  60         """
  61         setattr(m.submodules, self.state_from, self.mod)
  62         m.d.comb += self.mod.in_op.copy(in_op)
  63         m.d.comb += self.out_op.v.eq(self.mod.out_op.v)
  64         m.d.comb += self.out_decode.eq(self.mod.out_decode)
  65
  66     def action(self, m):
  67         with m.If(self.out_decode):
  68             m.next = self.out_state
  69             m.d.sync += [
  70                 self.in_op.ack.eq(0),
  71                 self.out_op.copy(self.mod.out_op)
  72             ]
  73         with m.Else():
  74             m.d.sync += self.in_op.ack.eq(1)
  75
  76
  77 class FPAddSpecialCasesMod:
  78     """ special cases: NaNs, infs, zeros, denormalised
  79         NOTE: some of these are unique to add.  see "Special Operations"
  80         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
  81     """
  82
  83     def __init__(self, width):
  84         self.in_a = FPNumBase(width)
  85         self.in_b = FPNumBase(width)
  86         self.out_z = FPNumOut(width, False)
  87         self.out_do_z = Signal(reset_less=True)
  88
  89     def setup(self, m, in_a, in_b, out_z, out_do_z):
  90         """ links module to inputs and outputs
  91         """
  92         m.d.comb += self.in_a.copy(in_a)
  93         m.d.comb += self.in_b.copy(in_b)
  94         #m.d.comb += out_z.v.eq(self.out_z.v)
  95         m.d.comb += out_do_z.eq(self.out_do_z)
  96
  97     def elaborate(self, platform):
  98         m = Module()
  99
 100         m.submodules.sc_in_a = self.in_a
 101         m.submodules.sc_in_b = self.in_b
 102         m.submodules.sc_out_z = self.out_z
 103
 104         s_nomatch = Signal()
 105         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 106
 107         m_match = Signal()
 108         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 109
 110         # if a is NaN or b is NaN return NaN
 111         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 112             m.d.comb += self.out_do_z.eq(1)
 113             m.d.comb += self.out_z.nan(0)
 114
 115         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 116         # under review
 117
 118         ## if a is zero and b is NaN return -b
 119         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 120         #    m.d.comb += self.out_do_z.eq(1)
 121         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 122
 123         ## if b is zero and a is NaN return -a
 124         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 125         #    m.d.comb += self.out_do_z.eq(1)
 126         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 127
 128         ## if a is -zero and b is NaN return -b
 129         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 130         #    m.d.comb += self.out_do_z.eq(1)
 131         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 132
 133         ## if b is -zero and a is NaN return -a
 134         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 135         #    m.d.comb += self.out_do_z.eq(1)
 136         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 137
 138         # if a is inf return inf (or NaN)
 139         with m.Elif(self.in_a.is_inf):
 140             m.d.comb += self.out_do_z.eq(1)
 141             m.d.comb += self.out_z.inf(self.in_a.s)
 142             # if a is inf and signs don't match return NaN
 143             with m.If(self.in_b.exp_128 & s_nomatch):
 144                 m.d.comb += self.out_z.nan(0)
 145
 146         # if b is inf return inf
 147         with m.Elif(self.in_b.is_inf):
 148             m.d.comb += self.out_do_z.eq(1)
 149             m.d.comb += self.out_z.inf(self.in_b.s)
 150
 151         # if a is zero and b zero return signed-a/b
 152         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 153             m.d.comb += self.out_do_z.eq(1)
 154             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 155                                           self.in_b.e,
 156                                           self.in_b.m[3:-1])
 157
 158         # if a is zero return b
 159         with m.Elif(self.in_a.is_zero):
 160             m.d.comb += self.out_do_z.eq(1)
 161             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 162                                       self.in_b.m[3:-1])
 163
 164         # if b is zero return a
 165         with m.Elif(self.in_b.is_zero):
 166             m.d.comb += self.out_do_z.eq(1)
 167             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 168                                       self.in_a.m[3:-1])
 169
 170         # if a equal to -b return zero (+ve zero)
 171         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 172             m.d.comb += self.out_do_z.eq(1)
 173             m.d.comb += self.out_z.zero(0)
 174
 175         # Denormalised Number checks
 176         with m.Else():
 177             m.d.comb += self.out_do_z.eq(0)
 178
 179         return m
 180
 181
 182 class FPAddSpecialCases(FPState):
 183     """ special cases: NaNs, infs, zeros, denormalised
 184         NOTE: some of these are unique to add.  see "Special Operations"
 185         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 186     """
 187
 188     def __init__(self, width):
 189         FPState.__init__(self, "special_cases")
 190         self.mod = FPAddSpecialCasesMod(width)
 191         self.out_z = FPNumOut(width, False)
 192         self.out_do_z = Signal(reset_less=True)
 193
 194     def action(self, m):
 195         with m.If(self.out_do_z):
 196             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 197             m.next = "put_z"
 198         with m.Else():
 199             m.next = "denormalise"
 200
 201
 202 class FPAddDeNormMod(FPState):
 203
 204     def __init__(self, width):
 205         self.in_a = FPNumBase(width)
 206         self.in_b = FPNumBase(width)
 207         self.out_a = FPNumBase(width)
 208         self.out_b = FPNumBase(width)
 209
 210     def elaborate(self, platform):
 211         m = Module()
 212         m.submodules.denorm_in_a = self.in_a
 213         m.submodules.denorm_in_b = self.in_b
 214         m.submodules.denorm_out_a = self.out_a
 215         m.submodules.denorm_out_b = self.out_b
 216         # hmmm, don't like repeating identical code
 217         m.d.comb += self.out_a.copy(self.in_a)
 218         with m.If(self.in_a.exp_n127):
 219             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 220         with m.Else():
 221             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 222
 223         m.d.comb += self.out_b.copy(self.in_b)
 224         with m.If(self.in_b.exp_n127):
 225             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 226         with m.Else():
 227             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 228
 229         return m
 230
 231
 232 class FPAddDeNorm(FPState):
 233
 234     def __init__(self, width):
 235         FPState.__init__(self, "denormalise")
 236         self.mod = FPAddDeNormMod(width)
 237         self.out_a = FPNumBase(width)
 238         self.out_b = FPNumBase(width)
 239
 240     def setup(self, m, in_a, in_b):
 241         """ links module to inputs and outputs
 242         """
 243         m.submodules.denormalise = self.mod
 244         m.d.comb += self.mod.in_a.copy(in_a)
 245         m.d.comb += self.mod.in_b.copy(in_b)
 246
 247     def action(self, m):
 248         # Denormalised Number checks
 249         m.next = "align"
 250         m.d.sync += self.out_a.copy(self.mod.out_a)
 251         m.d.sync += self.out_b.copy(self.mod.out_b)
 252
 253
 254 class FPAddAlignMultiMod(FPState):
 255
 256     def __init__(self, width):
 257         self.in_a = FPNumBase(width)
 258         self.in_b = FPNumBase(width)
 259         self.out_a = FPNumIn(None, width)
 260         self.out_b = FPNumIn(None, width)
 261         self.exp_eq = Signal(reset_less=True)
 262
 263     def elaborate(self, platform):
 264         # This one however (single-cycle) will do the shift
 265         # in one go.
 266
 267         m = Module()
 268
 269         m.submodules.align_in_a = self.in_a
 270         m.submodules.align_in_b = self.in_b
 271         m.submodules.align_out_a = self.out_a
 272         m.submodules.align_out_b = self.out_b
 273
 274         # NOTE: this does *not* do single-cycle multi-shifting,
 275         #       it *STAYS* in the align state until exponents match
 276
 277         # exponent of a greater than b: shift b down
 278         m.d.comb += self.exp_eq.eq(0)
 279         m.d.comb += self.out_a.copy(self.in_a)
 280         m.d.comb += self.out_b.copy(self.in_b)
 281         agtb = Signal(reset_less=True)
 282         altb = Signal(reset_less=True)
 283         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 284         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 285         with m.If(agtb):
 286             m.d.comb += self.out_b.shift_down(self.in_b)
 287         # exponent of b greater than a: shift a down
 288         with m.Elif(altb):
 289             m.d.comb += self.out_a.shift_down(self.in_a)
 290         # exponents equal: move to next stage.
 291         with m.Else():
 292             m.d.comb += self.exp_eq.eq(1)
 293         return m
 294
 295
 296 class FPAddAlignMulti(FPState):
 297
 298     def __init__(self, width):
 299         FPState.__init__(self, "align")
 300         self.mod = FPAddAlignMultiMod(width)
 301         self.out_a = FPNumIn(None, width)
 302         self.out_b = FPNumIn(None, width)
 303         self.exp_eq = Signal(reset_less=True)
 304
 305     def setup(self, m, in_a, in_b):
 306         """ links module to inputs and outputs
 307         """
 308         m.submodules.align = self.mod
 309         m.d.comb += self.mod.in_a.copy(in_a)
 310         m.d.comb += self.mod.in_b.copy(in_b)
 311         #m.d.comb += self.out_a.copy(self.mod.out_a)
 312         #m.d.comb += self.out_b.copy(self.mod.out_b)
 313         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 314
 315     def action(self, m):
 316         m.d.sync += self.out_a.copy(self.mod.out_a)
 317         m.d.sync += self.out_b.copy(self.mod.out_b)
 318         with m.If(self.exp_eq):
 319             m.next = "add_0"
 320
 321
 322 class FPAddAlignSingleMod:
 323
 324     def __init__(self, width):
 325         self.width = width
 326         self.in_a = FPNumBase(width)
 327         self.in_b = FPNumBase(width)
 328         self.out_a = FPNumIn(None, width)
 329         self.out_b = FPNumIn(None, width)
 330
 331     def elaborate(self, platform):
 332         """ Aligns A against B or B against A, depending on which has the
 333             greater exponent.  This is done in a *single* cycle using
 334             variable-width bit-shift
 335
 336             the shifter used here is quite expensive in terms of gates.
 337             Mux A or B in (and out) into temporaries, as only one of them
 338             needs to be aligned against the other
 339         """
 340         m = Module()
 341
 342         m.submodules.align_in_a = self.in_a
 343         m.submodules.align_in_b = self.in_b
 344         m.submodules.align_out_a = self.out_a
 345         m.submodules.align_out_b = self.out_b
 346
 347         # temporary (muxed) input and output to be shifted
 348         t_inp = FPNumBase(self.width)
 349         t_out = FPNumIn(None, self.width)
 350         espec = (len(self.in_a.e), True)
 351         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 352         m.submodules.align_t_in = t_inp
 353         m.submodules.align_t_out = t_out
 354         m.submodules.multishift_r = msr
 355
 356         ediff = Signal(espec, reset_less=True)
 357         ediffr = Signal(espec, reset_less=True)
 358         tdiff = Signal(espec, reset_less=True)
 359         elz = Signal(reset_less=True)
 360         egz = Signal(reset_less=True)
 361
 362         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 363         m.d.comb += msr.inp.eq(t_inp.m)
 364         m.d.comb += msr.diff.eq(tdiff)
 365         m.d.comb += t_out.m.eq(msr.m)
 366         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 367         m.d.comb += t_out.s.eq(t_inp.s)
 368
 369         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 370         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 371         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 372         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 373
 374         # default: A-exp == B-exp, A and B untouched (fall through)
 375         m.d.comb += self.out_a.copy(self.in_a)
 376         m.d.comb += self.out_b.copy(self.in_b)
 377         # only one shifter (muxed)
 378         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 379         # exponent of a greater than b: shift b down
 380         with m.If(egz):
 381             m.d.comb += [t_inp.copy(self.in_b),
 382                          tdiff.eq(ediff),
 383                          self.out_b.copy(t_out),
 384                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 385                         ]
 386         # exponent of b greater than a: shift a down
 387         with m.Elif(elz):
 388             m.d.comb += [t_inp.copy(self.in_a),
 389                          tdiff.eq(ediffr),
 390                          self.out_a.copy(t_out),
 391                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 392                         ]
 393         return m
 394
 395
 396 class FPAddAlignSingle(FPState):
 397
 398     def __init__(self, width):
 399         FPState.__init__(self, "align")
 400         self.mod = FPAddAlignSingleMod(width)
 401         self.out_a = FPNumIn(None, width)
 402         self.out_b = FPNumIn(None, width)
 403
 404     def setup(self, m, in_a, in_b):
 405         """ links module to inputs and outputs
 406         """
 407         m.submodules.align = self.mod
 408         m.d.comb += self.mod.in_a.copy(in_a)
 409         m.d.comb += self.mod.in_b.copy(in_b)
 410
 411     def action(self, m):
 412         # NOTE: could be done as comb
 413         m.d.sync += self.out_a.copy(self.mod.out_a)
 414         m.d.sync += self.out_b.copy(self.mod.out_b)
 415         m.next = "add_0"
 416
 417
 418 class FPAddStage0Mod:
 419
 420     def __init__(self, width):
 421         self.in_a = FPNumBase(width)
 422         self.in_b = FPNumBase(width)
 423         self.in_z = FPNumBase(width, False)
 424         self.out_z = FPNumBase(width, False)
 425         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 426
 427     def elaborate(self, platform):
 428         m = Module()
 429         m.submodules.add0_in_a = self.in_a
 430         m.submodules.add0_in_b = self.in_b
 431         m.submodules.add0_out_z = self.out_z
 432
 433         m.d.comb += self.out_z.e.eq(self.in_a.e)
 434
 435         # store intermediate tests (and zero-extended mantissas)
 436         seq = Signal(reset_less=True)
 437         mge = Signal(reset_less=True)
 438         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 439         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 440         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 441                      mge.eq(self.in_a.m >= self.in_b.m),
 442                      am0.eq(Cat(self.in_a.m, 0)),
 443                      bm0.eq(Cat(self.in_b.m, 0))
 444                     ]
 445         # same-sign (both negative or both positive) add mantissas
 446         with m.If(seq):
 447             m.d.comb += [
 448                 self.out_tot.eq(am0 + bm0),
 449                 self.out_z.s.eq(self.in_a.s)
 450             ]
 451         # a mantissa greater than b, use a
 452         with m.Elif(mge):
 453             m.d.comb += [
 454                 self.out_tot.eq(am0 - bm0),
 455                 self.out_z.s.eq(self.in_a.s)
 456             ]
 457         # b mantissa greater than a, use b
 458         with m.Else():
 459             m.d.comb += [
 460                 self.out_tot.eq(bm0 - am0),
 461                 self.out_z.s.eq(self.in_b.s)
 462         ]
 463         return m
 464
 465
 466 class FPAddStage0(FPState):
 467     """ First stage of add.  covers same-sign (add) and subtract
 468         special-casing when mantissas are greater or equal, to
 469         give greatest accuracy.
 470     """
 471
 472     def __init__(self, width):
 473         FPState.__init__(self, "add_0")
 474         self.mod = FPAddStage0Mod(width)
 475         self.out_z = FPNumBase(width, False)
 476         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 477
 478     def setup(self, m, in_a, in_b):
 479         """ links module to inputs and outputs
 480         """
 481         m.submodules.add0 = self.mod
 482
 483         m.d.comb += self.mod.in_a.copy(in_a)
 484         m.d.comb += self.mod.in_b.copy(in_b)
 485
 486     def action(self, m):
 487         m.next = "add_1"
 488         # NOTE: these could be done as combinatorial (merge add0+add1)
 489         m.d.sync += self.out_z.copy(self.mod.out_z)
 490         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 491
 492
 493 class FPAddStage1Mod(FPState):
 494     """ Second stage of add: preparation for normalisation.
 495         detects when tot sum is too big (tot[27] is kinda a carry bit)
 496     """
 497
 498     def __init__(self, width):
 499         self.out_norm = Signal(reset_less=True)
 500         self.in_z = FPNumBase(width, False)
 501         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 502         self.out_z = FPNumBase(width, False)
 503         self.out_of = Overflow()
 504
 505     def elaborate(self, platform):
 506         m = Module()
 507         #m.submodules.norm1_in_overflow = self.in_of
 508         #m.submodules.norm1_out_overflow = self.out_of
 509         #m.submodules.norm1_in_z = self.in_z
 510         #m.submodules.norm1_out_z = self.out_z
 511         m.d.comb += self.out_z.copy(self.in_z)
 512         # tot[27] gets set when the sum overflows. shift result down
 513         with m.If(self.in_tot[-1]):
 514             m.d.comb += [
 515                 self.out_z.m.eq(self.in_tot[4:]),
 516                 self.out_of.m0.eq(self.in_tot[4]),
 517                 self.out_of.guard.eq(self.in_tot[3]),
 518                 self.out_of.round_bit.eq(self.in_tot[2]),
 519                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 520                 self.out_z.e.eq(self.in_z.e + 1)
 521         ]
 522         # tot[27] zero case
 523         with m.Else():
 524             m.d.comb += [
 525                 self.out_z.m.eq(self.in_tot[3:]),
 526                 self.out_of.m0.eq(self.in_tot[3]),
 527                 self.out_of.guard.eq(self.in_tot[2]),
 528                 self.out_of.round_bit.eq(self.in_tot[1]),
 529                 self.out_of.sticky.eq(self.in_tot[0])
 530         ]
 531         return m
 532
 533
 534 class FPAddStage1(FPState):
 535
 536     def __init__(self, width):
 537         FPState.__init__(self, "add_1")
 538         self.mod = FPAddStage1Mod(width)
 539         self.out_z = FPNumBase(width, False)
 540         self.out_of = Overflow()
 541         self.norm_stb = Signal()
 542
 543     def setup(self, m, in_tot, in_z):
 544         """ links module to inputs and outputs
 545         """
 546         m.submodules.add1 = self.mod
 547
 548         m.d.comb += self.mod.in_z.copy(in_z)
 549         m.d.comb += self.mod.in_tot.eq(in_tot)
 550
 551         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 552
 553     def action(self, m):
 554         m.submodules.add1_out_overflow = self.out_of
 555         m.d.sync += self.out_of.copy(self.mod.out_of)
 556         m.d.sync += self.out_z.copy(self.mod.out_z)
 557         m.d.sync += self.norm_stb.eq(1)
 558         m.next = "normalise_1"
 559
 560
 561 class FPNorm1ModSingle:
 562
 563     def __init__(self, width):
 564         self.width = width
 565         self.in_select = Signal(reset_less=True)
 566         self.out_norm = Signal(reset_less=True)
 567         self.in_z = FPNumBase(width, False)
 568         self.in_of = Overflow()
 569         self.temp_z = FPNumBase(width, False)
 570         self.temp_of = Overflow()
 571         self.out_z = FPNumBase(width, False)
 572         self.out_of = Overflow()
 573
 574     def elaborate(self, platform):
 575         m = Module()
 576
 577         mwid = self.out_z.m_width+2
 578         pe = PriorityEncoder(mwid)
 579         m.submodules.norm_pe = pe
 580
 581         m.submodules.norm1_out_z = self.out_z
 582         m.submodules.norm1_out_overflow = self.out_of
 583         m.submodules.norm1_temp_z = self.temp_z
 584         m.submodules.norm1_temp_of = self.temp_of
 585         m.submodules.norm1_in_z = self.in_z
 586         m.submodules.norm1_in_overflow = self.in_of
 587
 588         in_z = FPNumBase(self.width, False)
 589         in_of = Overflow()
 590         m.submodules.norm1_insel_z = in_z
 591         m.submodules.norm1_insel_overflow = in_of
 592
 593         espec = (len(in_z.e), True)
 594         ediff_n126 = Signal(espec, reset_less=True)
 595         msr = MultiShiftRMerge(mwid, espec)
 596         m.submodules.multishift_r = msr
 597
 598         # select which of temp or in z/of to use
 599         with m.If(self.in_select):
 600             m.d.comb += in_z.copy(self.in_z)
 601             m.d.comb += in_of.copy(self.in_of)
 602         with m.Else():
 603             m.d.comb += in_z.copy(self.temp_z)
 604             m.d.comb += in_of.copy(self.temp_of)
 605         # initialise out from in (overridden below)
 606         m.d.comb += self.out_z.copy(in_z)
 607         m.d.comb += self.out_of.copy(in_of)
 608         # normalisation increase/decrease conditions
 609         decrease = Signal(reset_less=True)
 610         increase = Signal(reset_less=True)
 611         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 612         m.d.comb += increase.eq(in_z.exp_lt_n126)
 613         m.d.comb += self.out_norm.eq(0) # loop-end condition
 614         # decrease exponent
 615         with m.If(decrease):
 616             # *sigh* not entirely obvious: count leading zeros (clz)
 617             # with a PriorityEncoder: to find from the MSB
 618             # we reverse the order of the bits.
 619             temp_m = Signal(mwid, reset_less=True)
 620             temp_s = Signal(mwid+1, reset_less=True)
 621             clz = Signal((len(in_z.e), True), reset_less=True)
 622             # make sure that the amount to decrease by does NOT
 623             # go below the minimum non-INF/NaN exponent
 624             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 625                          in_z.exp_sub_n126)
 626             m.d.comb += [
 627                 # cat round and guard bits back into the mantissa
 628                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 629                 pe.i.eq(temp_m[::-1]),          # inverted
 630                 clz.eq(limclz),                 # count zeros from MSB down
 631                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 632                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 633                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 634                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 635                 # overflow in bits 0..1: got shifted too (leave sticky)
 636                 self.out_of.guard.eq(temp_s[1]),     # guard
 637                 self.out_of.round_bit.eq(temp_s[0]), # round
 638             ]
 639         # increase exponent
 640         with m.Elif(increase):
 641             temp_m = Signal(mwid+1, reset_less=True)
 642             m.d.comb += [
 643                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 644                               in_z.m)),
 645                 ediff_n126.eq(in_z.N126 - in_z.e),
 646                 # connect multi-shifter to inp/out mantissa (and ediff)
 647                 msr.inp.eq(temp_m),
 648                 msr.diff.eq(ediff_n126),
 649                 self.out_z.m.eq(msr.m[3:]),
 650                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 651                 # overflow in bits 0..1: got shifted too (leave sticky)
 652                 self.out_of.guard.eq(temp_s[2]),     # guard
 653                 self.out_of.round_bit.eq(temp_s[1]), # round
 654                 self.out_of.sticky.eq(temp_s[0]), # sticky
 655                 self.out_z.e.eq(in_z.e + ediff_n126),
 656             ]
 657
 658         return m
 659
 660
 661 class FPNorm1ModMulti:
 662
 663     def __init__(self, width, single_cycle=True):
 664         self.width = width
 665         self.in_select = Signal(reset_less=True)
 666         self.out_norm = Signal(reset_less=True)
 667         self.in_z = FPNumBase(width, False)
 668         self.in_of = Overflow()
 669         self.temp_z = FPNumBase(width, False)
 670         self.temp_of = Overflow()
 671         self.out_z = FPNumBase(width, False)
 672         self.out_of = Overflow()
 673
 674     def elaborate(self, platform):
 675         m = Module()
 676
 677         m.submodules.norm1_out_z = self.out_z
 678         m.submodules.norm1_out_overflow = self.out_of
 679         m.submodules.norm1_temp_z = self.temp_z
 680         m.submodules.norm1_temp_of = self.temp_of
 681         m.submodules.norm1_in_z = self.in_z
 682         m.submodules.norm1_in_overflow = self.in_of
 683
 684         in_z = FPNumBase(self.width, False)
 685         in_of = Overflow()
 686         m.submodules.norm1_insel_z = in_z
 687         m.submodules.norm1_insel_overflow = in_of
 688
 689         # select which of temp or in z/of to use
 690         with m.If(self.in_select):
 691             m.d.comb += in_z.copy(self.in_z)
 692             m.d.comb += in_of.copy(self.in_of)
 693         with m.Else():
 694             m.d.comb += in_z.copy(self.temp_z)
 695             m.d.comb += in_of.copy(self.temp_of)
 696         # initialise out from in (overridden below)
 697         m.d.comb += self.out_z.copy(in_z)
 698         m.d.comb += self.out_of.copy(in_of)
 699         # normalisation increase/decrease conditions
 700         decrease = Signal(reset_less=True)
 701         increase = Signal(reset_less=True)
 702         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 703         m.d.comb += increase.eq(in_z.exp_lt_n126)
 704         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 705         # decrease exponent
 706         with m.If(decrease):
 707             m.d.comb += [
 708                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 709                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 710                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 711                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 712                 self.out_of.round_bit.eq(0),        # reset round bit
 713                 self.out_of.m0.eq(in_of.guard),
 714             ]
 715         # increase exponent
 716         with m.Elif(increase):
 717             m.d.comb += [
 718                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 719                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 720                 self.out_of.guard.eq(in_z.m[0]),
 721                 self.out_of.m0.eq(in_z.m[1]),
 722                 self.out_of.round_bit.eq(in_of.guard),
 723                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 724             ]
 725
 726         return m
 727
 728
 729 class FPNorm1(FPState):
 730
 731     def __init__(self, width, single_cycle=True):
 732         FPState.__init__(self, "normalise_1")
 733         if single_cycle:
 734             self.mod = FPNorm1ModSingle(width)
 735         else:
 736             self.mod = FPNorm1ModMulti(width)
 737         self.stb = Signal(reset_less=True)
 738         self.ack = Signal(reset=0, reset_less=True)
 739         self.out_norm = Signal(reset_less=True)
 740         self.in_accept = Signal(reset_less=True)
 741         self.temp_z = FPNumBase(width)
 742         self.temp_of = Overflow()
 743         self.out_z = FPNumBase(width)
 744         self.out_roundz = Signal(reset_less=True)
 745
 746     def setup(self, m, in_z, in_of, norm_stb):
 747         """ links module to inputs and outputs
 748         """
 749         m.submodules.normalise_1 = self.mod
 750
 751         m.d.comb += self.mod.in_z.copy(in_z)
 752         m.d.comb += self.mod.in_of.copy(in_of)
 753
 754         m.d.comb += self.mod.in_select.eq(self.in_accept)
 755         m.d.comb += self.mod.temp_z.copy(self.temp_z)
 756         m.d.comb += self.mod.temp_of.copy(self.temp_of)
 757
 758         m.d.comb += self.out_z.copy(self.mod.out_z)
 759         m.d.comb += self.out_norm.eq(self.mod.out_norm)
 760
 761         m.d.comb += self.stb.eq(norm_stb)
 762         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
 763
 764     def action(self, m):
 765
 766         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
 767         m.d.sync += self.temp_of.copy(self.mod.out_of)
 768         m.d.sync += self.temp_z.copy(self.out_z)
 769         with m.If(self.out_norm):
 770             with m.If(self.in_accept):
 771                 m.d.sync += [
 772                     self.ack.eq(1),
 773                 ]
 774             with m.Else():
 775                 m.d.sync += self.ack.eq(0)
 776         with m.Else():
 777             # normalisation not required (or done).
 778             m.next = "round"
 779             m.d.sync += self.ack.eq(1)
 780             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 781
 782
 783 class FPRoundMod:
 784
 785     def __init__(self, width):
 786         self.in_roundz = Signal(reset_less=True)
 787         self.in_z = FPNumBase(width, False)
 788         self.out_z = FPNumBase(width, False)
 789
 790     def elaborate(self, platform):
 791         m = Module()
 792         m.d.comb += self.out_z.copy(self.in_z)
 793         with m.If(self.in_roundz):
 794             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
 795             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
 796                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
 797         return m
 798
 799
 800 class FPRound(FPState):
 801
 802     def __init__(self, width):
 803         FPState.__init__(self, "round")
 804         self.mod = FPRoundMod(width)
 805         self.out_z = FPNumBase(width)
 806
 807     def setup(self, m, in_z, roundz):
 808         """ links module to inputs and outputs
 809         """
 810         m.submodules.roundz = self.mod
 811
 812         m.d.comb += self.mod.in_z.copy(in_z)
 813         m.d.comb += self.mod.in_roundz.eq(roundz)
 814
 815     def action(self, m):
 816         m.d.sync += self.out_z.copy(self.mod.out_z)
 817         m.next = "corrections"
 818
 819
 820 class FPCorrectionsMod:
 821
 822     def __init__(self, width):
 823         self.in_z = FPNumOut(width, False)
 824         self.out_z = FPNumOut(width, False)
 825
 826     def elaborate(self, platform):
 827         m = Module()
 828         m.submodules.corr_in_z = self.in_z
 829         m.submodules.corr_out_z = self.out_z
 830         m.d.comb += self.out_z.copy(self.in_z)
 831         with m.If(self.in_z.is_denormalised):
 832             m.d.comb += self.out_z.e.eq(self.in_z.N127)
 833         return m
 834
 835
 836 class FPCorrections(FPState):
 837
 838     def __init__(self, width):
 839         FPState.__init__(self, "corrections")
 840         self.mod = FPCorrectionsMod(width)
 841         self.out_z = FPNumBase(width)
 842
 843     def setup(self, m, in_z):
 844         """ links module to inputs and outputs
 845         """
 846         m.submodules.corrections = self.mod
 847         m.d.comb += self.mod.in_z.copy(in_z)
 848
 849     def action(self, m):
 850         m.d.sync += self.out_z.copy(self.mod.out_z)
 851         m.next = "pack"
 852
 853
 854 class FPPackMod:
 855
 856     def __init__(self, width):
 857         self.in_z = FPNumOut(width, False)
 858         self.out_z = FPNumOut(width, False)
 859
 860     def elaborate(self, platform):
 861         m = Module()
 862         m.submodules.pack_in_z = self.in_z
 863         with m.If(self.in_z.is_overflowed):
 864             m.d.comb += self.out_z.inf(self.in_z.s)
 865         with m.Else():
 866             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
 867         return m
 868
 869
 870 class FPPack(FPState):
 871
 872     def __init__(self, width):
 873         FPState.__init__(self, "pack")
 874         self.mod = FPPackMod(width)
 875         self.out_z = FPNumOut(width, False)
 876
 877     def setup(self, m, in_z):
 878         """ links module to inputs and outputs
 879         """
 880         m.submodules.pack = self.mod
 881         m.d.comb += self.mod.in_z.copy(in_z)
 882
 883     def action(self, m):
 884         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
 885         m.next = "pack_put_z"
 886
 887
 888 class FPPutZ(FPState):
 889
 890     def __init__(self, state, in_z, out_z):
 891         FPState.__init__(self, state)
 892         self.in_z = in_z
 893         self.out_z = out_z
 894
 895     def action(self, m):
 896         m.d.sync += [
 897           self.out_z.v.eq(self.in_z.v)
 898         ]
 899         with m.If(self.out_z.stb & self.out_z.ack):
 900             m.d.sync += self.out_z.stb.eq(0)
 901             m.next = "get_a"
 902         with m.Else():
 903             m.d.sync += self.out_z.stb.eq(1)
 904
 905
 906 class FPADD:
 907
 908     def __init__(self, width, single_cycle=False):
 909         self.width = width
 910         self.single_cycle = single_cycle
 911
 912         self.in_a  = FPOp(width)
 913         self.in_b  = FPOp(width)
 914         self.out_z = FPOp(width)
 915
 916         self.states = []
 917
 918     def add_state(self, state):
 919         self.states.append(state)
 920         return state
 921
 922     def get_fragment(self, platform=None):
 923         """ creates the HDL code-fragment for FPAdd
 924         """
 925         m = Module()
 926         m.submodules.in_a = self.in_a
 927         m.submodules.in_b = self.in_b
 928         m.submodules.out_z = self.out_z
 929
 930         geta = self.add_state(FPGetOp("get_a", "get_b",
 931                                       self.in_a, self.width))
 932         geta.setup(m, self.in_a)
 933         a = geta.out_op
 934
 935         getb = self.add_state(FPGetOp("get_b", "special_cases",
 936                                       self.in_b, self.width))
 937         getb.setup(m, self.in_b)
 938         b = getb.out_op
 939
 940         sc = self.add_state(FPAddSpecialCases(self.width))
 941         sc.mod.setup(m, a, b, sc.out_z, sc.out_do_z)
 942         m.submodules.specialcases = sc.mod
 943
 944         dn = self.add_state(FPAddDeNorm(self.width))
 945         dn.setup(m, a, b)
 946
 947         if self.single_cycle:
 948             alm = self.add_state(FPAddAlignSingle(self.width))
 949             alm.setup(m, dn.out_a, dn.out_b)
 950         else:
 951             alm = self.add_state(FPAddAlignMulti(self.width))
 952             alm.setup(m, dn.out_a, dn.out_b)
 953
 954         add0 = self.add_state(FPAddStage0(self.width))
 955         add0.setup(m, alm.out_a, alm.out_b)
 956
 957         add1 = self.add_state(FPAddStage1(self.width))
 958         add1.setup(m, add0.out_tot, add0.out_z)
 959
 960         n1 = self.add_state(FPNorm1(self.width))
 961         n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb)
 962
 963         rn = self.add_state(FPRound(self.width))
 964         rn.setup(m, n1.out_z, n1.out_roundz)
 965
 966         cor = self.add_state(FPCorrections(self.width))
 967         cor.setup(m, rn.out_z)
 968
 969         pa = self.add_state(FPPack(self.width))
 970         pa.setup(m, cor.out_z)
 971
 972         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z))
 973
 974         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z))
 975
 976         with m.FSM() as fsm:
 977
 978             for state in self.states:
 979                 with m.State(state.state_from):
 980                     state.action(m)
 981
 982         return m
 983
 984
 985 if __name__ == "__main__":
 986     alu = FPADD(width=32, single_cycle=True)
 987     main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
 988
 989
 990     # works... but don't use, just do "python fname.py convert -t v"
 991     #print (verilog.convert(alu, ports=[
 992     #                        ports=alu.in_a.ports() + \
 993     #                              alu.in_b.ports() + \
 994     #                              alu.out_z.ports())