src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8
   9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  10 from fpbase import MultiShiftRMerge, Trigger
  11 #from fpbase import FPNumShiftMultiRight
  12
  13
  14 class FPState(FPBase):
  15     def __init__(self, state_from):
  16         self.state_from = state_from
  17
  18     def set_inputs(self, inputs):
  19         self.inputs = inputs
  20         for k,v in inputs.items():
  21             setattr(self, k, v)
  22
  23     def set_outputs(self, outputs):
  24         self.outputs = outputs
  25         for k,v in outputs.items():
  26             setattr(self, k, v)
  27
  28
  29 class FPGetOpMod:
  30     def __init__(self, width):
  31         self.in_op = FPOp(width)
  32         self.out_op = Signal(width)
  33         self.out_decode = Signal(reset_less=True)
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
  38         m.submodules.get_op_in = self.in_op
  39         #m.submodules.get_op_out = self.out_op
  40         with m.If(self.out_decode):
  41             m.d.comb += [
  42                 self.out_op.eq(self.in_op.v),
  43             ]
  44         return m
  45
  46
  47 class FPGetOp(FPState):
  48     """ gets operand
  49     """
  50
  51     def __init__(self, in_state, out_state, in_op, width):
  52         FPState.__init__(self, in_state)
  53         self.out_state = out_state
  54         self.mod = FPGetOpMod(width)
  55         self.in_op = in_op
  56         self.out_op = Signal(width)
  57         self.out_decode = Signal(reset_less=True)
  58
  59     def setup(self, m, in_op):
  60         """ links module to inputs and outputs
  61         """
  62         setattr(m.submodules, self.state_from, self.mod)
  63         m.d.comb += self.mod.in_op.copy(in_op)
  64         #m.d.comb += self.out_op.eq(self.mod.out_op)
  65         m.d.comb += self.out_decode.eq(self.mod.out_decode)
  66
  67     def action(self, m):
  68         with m.If(self.out_decode):
  69             m.next = self.out_state
  70             m.d.sync += [
  71                 self.in_op.ack.eq(0),
  72                 self.out_op.eq(self.mod.out_op)
  73             ]
  74         with m.Else():
  75             m.d.sync += self.in_op.ack.eq(1)
  76
  77
  78 class FPGet2OpMod(Trigger):
  79     def __init__(self, width):
  80         Trigger.__init__(self)
  81         self.in_op1 = Signal(width, reset_less=True)
  82         self.in_op2 = Signal(width, reset_less=True)
  83         self.out_op1 = FPNumIn(None, width)
  84         self.out_op2 = FPNumIn(None, width)
  85
  86     def elaborate(self, platform):
  87         m = Trigger.elaborate(self, platform)
  88         #m.submodules.get_op_in = self.in_op
  89         m.submodules.get_op1_out = self.out_op1
  90         m.submodules.get_op2_out = self.out_op2
  91         with m.If(self.trigger):
  92             m.d.comb += [
  93                 self.out_op1.decode(self.in_op1),
  94                 self.out_op2.decode(self.in_op2),
  95             ]
  96         return m
  97
  98
  99 class FPGet2Op(FPState):
 100     """ gets operands
 101     """
 102
 103     def __init__(self, in_state, out_state, in_op1, in_op2, width):
 104         FPState.__init__(self, in_state)
 105         self.out_state = out_state
 106         self.mod = FPGet2OpMod(width)
 107         self.in_op1 = in_op1
 108         self.in_op2 = in_op2
 109         self.out_op1 = FPNumIn(None, width)
 110         self.out_op2 = FPNumIn(None, width)
 111         self.in_stb = Signal(reset_less=True)
 112         self.out_ack = Signal(reset_less=True)
 113         self.out_decode = Signal(reset_less=True)
 114
 115     def setup(self, m, in_op1, in_op2, in_stb, in_ack):
 116         """ links module to inputs and outputs
 117         """
 118         m.submodules.get_ops = self.mod
 119         m.d.comb += self.mod.in_op1.eq(in_op1)
 120         m.d.comb += self.mod.in_op2.eq(in_op2)
 121         m.d.comb += self.mod.stb.eq(in_stb)
 122         m.d.comb += self.out_ack.eq(self.mod.ack)
 123         m.d.comb += self.out_decode.eq(self.mod.trigger)
 124         m.d.comb += in_ack.eq(self.mod.ack)
 125
 126     def action(self, m):
 127         with m.If(self.out_decode):
 128             m.next = self.out_state
 129             m.d.sync += [
 130                 self.mod.ack.eq(0),
 131                 #self.out_op1.v.eq(self.mod.out_op1.v),
 132                 #self.out_op2.v.eq(self.mod.out_op2.v),
 133                 self.out_op1.copy(self.mod.out_op1),
 134                 self.out_op2.copy(self.mod.out_op2)
 135             ]
 136         with m.Else():
 137             m.d.sync += self.mod.ack.eq(1)
 138
 139
 140 class FPAddSpecialCasesMod:
 141     """ special cases: NaNs, infs, zeros, denormalised
 142         NOTE: some of these are unique to add.  see "Special Operations"
 143         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 144     """
 145
 146     def __init__(self, width):
 147         self.in_a = FPNumBase(width)
 148         self.in_b = FPNumBase(width)
 149         self.out_z = FPNumOut(width, False)
 150         self.out_do_z = Signal(reset_less=True)
 151
 152     def setup(self, m, in_a, in_b, out_do_z):
 153         """ links module to inputs and outputs
 154         """
 155         m.submodules.specialcases = self
 156         m.d.comb += self.in_a.copy(in_a)
 157         m.d.comb += self.in_b.copy(in_b)
 158         m.d.comb += out_do_z.eq(self.out_do_z)
 159
 160     def elaborate(self, platform):
 161         m = Module()
 162
 163         m.submodules.sc_in_a = self.in_a
 164         m.submodules.sc_in_b = self.in_b
 165         m.submodules.sc_out_z = self.out_z
 166
 167         s_nomatch = Signal()
 168         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 169
 170         m_match = Signal()
 171         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 172
 173         # if a is NaN or b is NaN return NaN
 174         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 175             m.d.comb += self.out_do_z.eq(1)
 176             m.d.comb += self.out_z.nan(0)
 177
 178         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 179         # under review
 180
 181         ## if a is zero and b is NaN return -b
 182         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 183         #    m.d.comb += self.out_do_z.eq(1)
 184         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 185
 186         ## if b is zero and a is NaN return -a
 187         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 188         #    m.d.comb += self.out_do_z.eq(1)
 189         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 190
 191         ## if a is -zero and b is NaN return -b
 192         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 193         #    m.d.comb += self.out_do_z.eq(1)
 194         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 195
 196         ## if b is -zero and a is NaN return -a
 197         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 198         #    m.d.comb += self.out_do_z.eq(1)
 199         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 200
 201         # if a is inf return inf (or NaN)
 202         with m.Elif(self.in_a.is_inf):
 203             m.d.comb += self.out_do_z.eq(1)
 204             m.d.comb += self.out_z.inf(self.in_a.s)
 205             # if a is inf and signs don't match return NaN
 206             with m.If(self.in_b.exp_128 & s_nomatch):
 207                 m.d.comb += self.out_z.nan(0)
 208
 209         # if b is inf return inf
 210         with m.Elif(self.in_b.is_inf):
 211             m.d.comb += self.out_do_z.eq(1)
 212             m.d.comb += self.out_z.inf(self.in_b.s)
 213
 214         # if a is zero and b zero return signed-a/b
 215         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 216             m.d.comb += self.out_do_z.eq(1)
 217             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 218                                           self.in_b.e,
 219                                           self.in_b.m[3:-1])
 220
 221         # if a is zero return b
 222         with m.Elif(self.in_a.is_zero):
 223             m.d.comb += self.out_do_z.eq(1)
 224             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 225                                       self.in_b.m[3:-1])
 226
 227         # if b is zero return a
 228         with m.Elif(self.in_b.is_zero):
 229             m.d.comb += self.out_do_z.eq(1)
 230             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 231                                       self.in_a.m[3:-1])
 232
 233         # if a equal to -b return zero (+ve zero)
 234         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 235             m.d.comb += self.out_do_z.eq(1)
 236             m.d.comb += self.out_z.zero(0)
 237
 238         # Denormalised Number checks
 239         with m.Else():
 240             m.d.comb += self.out_do_z.eq(0)
 241
 242         return m
 243
 244
 245 class FPID:
 246     def __init__(self, id_wid):
 247         self.id_wid = id_wid
 248         if self.id_wid:
 249             self.in_mid = Signal(id_wid, reset_less=True)
 250             self.out_mid = Signal(id_wid, reset_less=True)
 251         else:
 252             self.in_mid = None
 253             self.out_mid = None
 254
 255     def idsync(self, m):
 256         if self.id_wid is not None:
 257             m.d.sync += self.out_mid.eq(self.in_mid)
 258
 259
 260 class FPAddSpecialCases(FPState, FPID):
 261     """ special cases: NaNs, infs, zeros, denormalised
 262         NOTE: some of these are unique to add.  see "Special Operations"
 263         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 264     """
 265
 266     def __init__(self, width, id_wid):
 267         FPState.__init__(self, "special_cases")
 268         FPID.__init__(self, id_wid)
 269         self.mod = FPAddSpecialCasesMod(width)
 270         self.out_z = FPNumOut(width, False)
 271         self.out_do_z = Signal(reset_less=True)
 272
 273     def setup(self, m, in_a, in_b, in_mid):
 274         """ links module to inputs and outputs
 275         """
 276         self.mod.setup(m, in_a, in_b, self.out_do_z)
 277         if self.in_mid is not None:
 278             m.d.comb += self.in_mid.eq(in_mid)
 279
 280     def action(self, m):
 281         self.idsync(m)
 282         with m.If(self.out_do_z):
 283             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 284             m.next = "put_z"
 285         with m.Else():
 286             m.next = "denormalise"
 287
 288
 289 class FPAddSpecialCasesDeNorm(FPState, FPID):
 290     """ special cases: NaNs, infs, zeros, denormalised
 291         NOTE: some of these are unique to add.  see "Special Operations"
 292         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 293     """
 294
 295     def __init__(self, width, id_wid):
 296         FPState.__init__(self, "special_cases")
 297         FPID.__init__(self, id_wid)
 298         self.smod = FPAddSpecialCasesMod(width)
 299         self.out_z = FPNumOut(width, False)
 300         self.out_do_z = Signal(reset_less=True)
 301
 302         self.dmod = FPAddDeNormMod(width)
 303         self.out_a = FPNumBase(width)
 304         self.out_b = FPNumBase(width)
 305
 306     def setup(self, m, in_a, in_b, in_mid):
 307         """ links module to inputs and outputs
 308         """
 309         self.smod.setup(m, in_a, in_b, self.out_do_z)
 310         self.dmod.setup(m, in_a, in_b)
 311         if self.in_mid is not None:
 312             m.d.comb += self.in_mid.eq(in_mid)
 313
 314     def action(self, m):
 315         self.idsync(m)
 316         with m.If(self.out_do_z):
 317             m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
 318             m.next = "put_z"
 319         with m.Else():
 320             m.next = "align"
 321             m.d.sync += self.out_a.copy(self.dmod.out_a)
 322             m.d.sync += self.out_b.copy(self.dmod.out_b)
 323
 324
 325 class FPAddDeNormMod(FPState):
 326
 327     def __init__(self, width):
 328         self.in_a = FPNumBase(width)
 329         self.in_b = FPNumBase(width)
 330         self.out_a = FPNumBase(width)
 331         self.out_b = FPNumBase(width)
 332
 333     def setup(self, m, in_a, in_b):
 334         """ links module to inputs and outputs
 335         """
 336         m.submodules.denormalise = self
 337         m.d.comb += self.in_a.copy(in_a)
 338         m.d.comb += self.in_b.copy(in_b)
 339
 340     def elaborate(self, platform):
 341         m = Module()
 342         m.submodules.denorm_in_a = self.in_a
 343         m.submodules.denorm_in_b = self.in_b
 344         m.submodules.denorm_out_a = self.out_a
 345         m.submodules.denorm_out_b = self.out_b
 346         # hmmm, don't like repeating identical code
 347         m.d.comb += self.out_a.copy(self.in_a)
 348         with m.If(self.in_a.exp_n127):
 349             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 350         with m.Else():
 351             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 352
 353         m.d.comb += self.out_b.copy(self.in_b)
 354         with m.If(self.in_b.exp_n127):
 355             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 356         with m.Else():
 357             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 358
 359         return m
 360
 361
 362 class FPAddDeNorm(FPState, FPID):
 363
 364     def __init__(self, width, id_wid):
 365         FPState.__init__(self, "denormalise")
 366         FPID.__init__(self, id_wid)
 367         self.mod = FPAddDeNormMod(width)
 368         self.out_a = FPNumBase(width)
 369         self.out_b = FPNumBase(width)
 370
 371     def setup(self, m, in_a, in_b, in_mid):
 372         """ links module to inputs and outputs
 373         """
 374         self.mod.setup(m, in_a, in_b)
 375         if self.in_mid is not None:
 376             m.d.comb += self.in_mid.eq(in_mid)
 377
 378     def action(self, m):
 379         self.idsync(m)
 380         # Denormalised Number checks
 381         m.next = "align"
 382         m.d.sync += self.out_a.copy(self.mod.out_a)
 383         m.d.sync += self.out_b.copy(self.mod.out_b)
 384
 385
 386 class FPAddAlignMultiMod(FPState):
 387
 388     def __init__(self, width):
 389         self.in_a = FPNumBase(width)
 390         self.in_b = FPNumBase(width)
 391         self.out_a = FPNumIn(None, width)
 392         self.out_b = FPNumIn(None, width)
 393         self.exp_eq = Signal(reset_less=True)
 394
 395     def elaborate(self, platform):
 396         # This one however (single-cycle) will do the shift
 397         # in one go.
 398
 399         m = Module()
 400
 401         m.submodules.align_in_a = self.in_a
 402         m.submodules.align_in_b = self.in_b
 403         m.submodules.align_out_a = self.out_a
 404         m.submodules.align_out_b = self.out_b
 405
 406         # NOTE: this does *not* do single-cycle multi-shifting,
 407         #       it *STAYS* in the align state until exponents match
 408
 409         # exponent of a greater than b: shift b down
 410         m.d.comb += self.exp_eq.eq(0)
 411         m.d.comb += self.out_a.copy(self.in_a)
 412         m.d.comb += self.out_b.copy(self.in_b)
 413         agtb = Signal(reset_less=True)
 414         altb = Signal(reset_less=True)
 415         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 416         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 417         with m.If(agtb):
 418             m.d.comb += self.out_b.shift_down(self.in_b)
 419         # exponent of b greater than a: shift a down
 420         with m.Elif(altb):
 421             m.d.comb += self.out_a.shift_down(self.in_a)
 422         # exponents equal: move to next stage.
 423         with m.Else():
 424             m.d.comb += self.exp_eq.eq(1)
 425         return m
 426
 427
 428 class FPAddAlignMulti(FPState, FPID):
 429
 430     def __init__(self, width, id_wid):
 431         FPID.__init__(self, id_wid)
 432         FPState.__init__(self, "align")
 433         self.mod = FPAddAlignMultiMod(width)
 434         self.out_a = FPNumIn(None, width)
 435         self.out_b = FPNumIn(None, width)
 436         self.exp_eq = Signal(reset_less=True)
 437
 438     def setup(self, m, in_a, in_b, in_mid):
 439         """ links module to inputs and outputs
 440         """
 441         m.submodules.align = self.mod
 442         m.d.comb += self.mod.in_a.copy(in_a)
 443         m.d.comb += self.mod.in_b.copy(in_b)
 444         #m.d.comb += self.out_a.copy(self.mod.out_a)
 445         #m.d.comb += self.out_b.copy(self.mod.out_b)
 446         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 447         if self.in_mid is not None:
 448             m.d.comb += self.in_mid.eq(in_mid)
 449
 450     def action(self, m):
 451         self.idsync(m)
 452         m.d.sync += self.out_a.copy(self.mod.out_a)
 453         m.d.sync += self.out_b.copy(self.mod.out_b)
 454         with m.If(self.exp_eq):
 455             m.next = "add_0"
 456
 457
 458 class FPAddAlignSingleMod:
 459
 460     def __init__(self, width):
 461         self.width = width
 462         self.in_a = FPNumBase(width)
 463         self.in_b = FPNumBase(width)
 464         self.out_a = FPNumIn(None, width)
 465         self.out_b = FPNumIn(None, width)
 466
 467     def setup(self, m, in_a, in_b):
 468         """ links module to inputs and outputs
 469         """
 470         m.submodules.align = self
 471         m.d.comb += self.in_a.copy(in_a)
 472         m.d.comb += self.in_b.copy(in_b)
 473
 474     def elaborate(self, platform):
 475         """ Aligns A against B or B against A, depending on which has the
 476             greater exponent.  This is done in a *single* cycle using
 477             variable-width bit-shift
 478
 479             the shifter used here is quite expensive in terms of gates.
 480             Mux A or B in (and out) into temporaries, as only one of them
 481             needs to be aligned against the other
 482         """
 483         m = Module()
 484
 485         m.submodules.align_in_a = self.in_a
 486         m.submodules.align_in_b = self.in_b
 487         m.submodules.align_out_a = self.out_a
 488         m.submodules.align_out_b = self.out_b
 489
 490         # temporary (muxed) input and output to be shifted
 491         t_inp = FPNumBase(self.width)
 492         t_out = FPNumIn(None, self.width)
 493         espec = (len(self.in_a.e), True)
 494         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 495         m.submodules.align_t_in = t_inp
 496         m.submodules.align_t_out = t_out
 497         m.submodules.multishift_r = msr
 498
 499         ediff = Signal(espec, reset_less=True)
 500         ediffr = Signal(espec, reset_less=True)
 501         tdiff = Signal(espec, reset_less=True)
 502         elz = Signal(reset_less=True)
 503         egz = Signal(reset_less=True)
 504
 505         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 506         m.d.comb += msr.inp.eq(t_inp.m)
 507         m.d.comb += msr.diff.eq(tdiff)
 508         m.d.comb += t_out.m.eq(msr.m)
 509         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 510         m.d.comb += t_out.s.eq(t_inp.s)
 511
 512         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 513         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 514         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 515         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 516
 517         # default: A-exp == B-exp, A and B untouched (fall through)
 518         m.d.comb += self.out_a.copy(self.in_a)
 519         m.d.comb += self.out_b.copy(self.in_b)
 520         # only one shifter (muxed)
 521         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 522         # exponent of a greater than b: shift b down
 523         with m.If(egz):
 524             m.d.comb += [t_inp.copy(self.in_b),
 525                          tdiff.eq(ediff),
 526                          self.out_b.copy(t_out),
 527                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 528                         ]
 529         # exponent of b greater than a: shift a down
 530         with m.Elif(elz):
 531             m.d.comb += [t_inp.copy(self.in_a),
 532                          tdiff.eq(ediffr),
 533                          self.out_a.copy(t_out),
 534                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 535                         ]
 536         return m
 537
 538
 539 class FPAddAlignSingle(FPState, FPID):
 540
 541     def __init__(self, width, id_wid):
 542         FPState.__init__(self, "align")
 543         FPID.__init__(self, id_wid)
 544         self.mod = FPAddAlignSingleMod(width)
 545         self.out_a = FPNumIn(None, width)
 546         self.out_b = FPNumIn(None, width)
 547
 548     def setup(self, m, in_a, in_b, in_mid):
 549         """ links module to inputs and outputs
 550         """
 551         self.mod.setup(m, in_a, in_b)
 552         if self.in_mid is not None:
 553             m.d.comb += self.in_mid.eq(in_mid)
 554
 555     def action(self, m):
 556         self.idsync(m)
 557         # NOTE: could be done as comb
 558         m.d.sync += self.out_a.copy(self.mod.out_a)
 559         m.d.sync += self.out_b.copy(self.mod.out_b)
 560         m.next = "add_0"
 561
 562
 563 class FPAddAlignSingleAdd(FPState, FPID):
 564
 565     def __init__(self, width, id_wid):
 566         FPState.__init__(self, "align")
 567         FPID.__init__(self, id_wid)
 568         self.mod = FPAddAlignSingleMod(width)
 569         self.out_a = FPNumIn(None, width)
 570         self.out_b = FPNumIn(None, width)
 571
 572         self.a0mod = FPAddStage0Mod(width)
 573         self.a0_out_z = FPNumBase(width, False)
 574         self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
 575         self.a0_out_z = FPNumBase(width, False)
 576
 577         self.a1mod = FPAddStage1Mod(width)
 578         self.out_z = FPNumBase(width, False)
 579         self.out_of = Overflow()
 580
 581     def setup(self, m, in_a, in_b, in_mid):
 582         """ links module to inputs and outputs
 583         """
 584         self.mod.setup(m, in_a, in_b)
 585         m.d.comb += self.out_a.copy(self.mod.out_a)
 586         m.d.comb += self.out_b.copy(self.mod.out_b)
 587
 588         self.a0mod.setup(m, self.out_a, self.out_b)
 589         m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
 590         m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
 591
 592         self.a1mod.setup(m, self.out_tot, self.a0_out_z)
 593
 594         if self.in_mid is not None:
 595             m.d.comb += self.in_mid.eq(in_mid)
 596
 597     def action(self, m):
 598         self.idsync(m)
 599         m.d.sync += self.out_of.copy(self.a1mod.out_of)
 600         m.d.sync += self.out_z.copy(self.a1mod.out_z)
 601         m.next = "normalise_1"
 602
 603
 604 class FPAddStage0Mod:
 605
 606     def __init__(self, width):
 607         self.in_a = FPNumBase(width)
 608         self.in_b = FPNumBase(width)
 609         self.in_z = FPNumBase(width, False)
 610         self.out_z = FPNumBase(width, False)
 611         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 612
 613     def setup(self, m, in_a, in_b):
 614         """ links module to inputs and outputs
 615         """
 616         m.submodules.add0 = self
 617         m.d.comb += self.in_a.copy(in_a)
 618         m.d.comb += self.in_b.copy(in_b)
 619
 620     def elaborate(self, platform):
 621         m = Module()
 622         m.submodules.add0_in_a = self.in_a
 623         m.submodules.add0_in_b = self.in_b
 624         m.submodules.add0_out_z = self.out_z
 625
 626         m.d.comb += self.out_z.e.eq(self.in_a.e)
 627
 628         # store intermediate tests (and zero-extended mantissas)
 629         seq = Signal(reset_less=True)
 630         mge = Signal(reset_less=True)
 631         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 632         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 633         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 634                      mge.eq(self.in_a.m >= self.in_b.m),
 635                      am0.eq(Cat(self.in_a.m, 0)),
 636                      bm0.eq(Cat(self.in_b.m, 0))
 637                     ]
 638         # same-sign (both negative or both positive) add mantissas
 639         with m.If(seq):
 640             m.d.comb += [
 641                 self.out_tot.eq(am0 + bm0),
 642                 self.out_z.s.eq(self.in_a.s)
 643             ]
 644         # a mantissa greater than b, use a
 645         with m.Elif(mge):
 646             m.d.comb += [
 647                 self.out_tot.eq(am0 - bm0),
 648                 self.out_z.s.eq(self.in_a.s)
 649             ]
 650         # b mantissa greater than a, use b
 651         with m.Else():
 652             m.d.comb += [
 653                 self.out_tot.eq(bm0 - am0),
 654                 self.out_z.s.eq(self.in_b.s)
 655         ]
 656         return m
 657
 658
 659 class FPAddStage0(FPState, FPID):
 660     """ First stage of add.  covers same-sign (add) and subtract
 661         special-casing when mantissas are greater or equal, to
 662         give greatest accuracy.
 663     """
 664
 665     def __init__(self, width, id_wid):
 666         FPState.__init__(self, "add_0")
 667         FPID.__init__(self, id_wid)
 668         self.mod = FPAddStage0Mod(width)
 669         self.out_z = FPNumBase(width, False)
 670         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 671
 672     def setup(self, m, in_a, in_b, in_mid):
 673         """ links module to inputs and outputs
 674         """
 675         self.mod.setup(m, in_a, in_b)
 676         if self.in_mid is not None:
 677             m.d.comb += self.in_mid.eq(in_mid)
 678
 679     def action(self, m):
 680         self.idsync(m)
 681         # NOTE: these could be done as combinatorial (merge add0+add1)
 682         m.d.sync += self.out_z.copy(self.mod.out_z)
 683         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 684         m.next = "add_1"
 685
 686
 687 class FPAddStage1Mod(FPState):
 688     """ Second stage of add: preparation for normalisation.
 689         detects when tot sum is too big (tot[27] is kinda a carry bit)
 690     """
 691
 692     def __init__(self, width):
 693         self.out_norm = Signal(reset_less=True)
 694         self.in_z = FPNumBase(width, False)
 695         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 696         self.out_z = FPNumBase(width, False)
 697         self.out_of = Overflow()
 698
 699     def setup(self, m, in_tot, in_z):
 700         """ links module to inputs and outputs
 701         """
 702         m.submodules.add1 = self
 703         m.submodules.add1_out_overflow = self.out_of
 704
 705         m.d.comb += self.in_z.copy(in_z)
 706         m.d.comb += self.in_tot.eq(in_tot)
 707
 708     def elaborate(self, platform):
 709         m = Module()
 710         #m.submodules.norm1_in_overflow = self.in_of
 711         #m.submodules.norm1_out_overflow = self.out_of
 712         #m.submodules.norm1_in_z = self.in_z
 713         #m.submodules.norm1_out_z = self.out_z
 714         m.d.comb += self.out_z.copy(self.in_z)
 715         # tot[27] gets set when the sum overflows. shift result down
 716         with m.If(self.in_tot[-1]):
 717             m.d.comb += [
 718                 self.out_z.m.eq(self.in_tot[4:]),
 719                 self.out_of.m0.eq(self.in_tot[4]),
 720                 self.out_of.guard.eq(self.in_tot[3]),
 721                 self.out_of.round_bit.eq(self.in_tot[2]),
 722                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 723                 self.out_z.e.eq(self.in_z.e + 1)
 724         ]
 725         # tot[27] zero case
 726         with m.Else():
 727             m.d.comb += [
 728                 self.out_z.m.eq(self.in_tot[3:]),
 729                 self.out_of.m0.eq(self.in_tot[3]),
 730                 self.out_of.guard.eq(self.in_tot[2]),
 731                 self.out_of.round_bit.eq(self.in_tot[1]),
 732                 self.out_of.sticky.eq(self.in_tot[0])
 733         ]
 734         return m
 735
 736
 737 class FPAddStage1(FPState, FPID):
 738
 739     def __init__(self, width, id_wid):
 740         FPState.__init__(self, "add_1")
 741         FPID.__init__(self, id_wid)
 742         self.mod = FPAddStage1Mod(width)
 743         self.out_z = FPNumBase(width, False)
 744         self.out_of = Overflow()
 745         self.norm_stb = Signal()
 746
 747     def setup(self, m, in_tot, in_z, in_mid):
 748         """ links module to inputs and outputs
 749         """
 750         self.mod.setup(m, in_tot, in_z)
 751
 752         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 753
 754         if self.in_mid is not None:
 755             m.d.comb += self.in_mid.eq(in_mid)
 756
 757     def action(self, m):
 758         self.idsync(m)
 759         m.d.sync += self.out_of.copy(self.mod.out_of)
 760         m.d.sync += self.out_z.copy(self.mod.out_z)
 761         m.d.sync += self.norm_stb.eq(1)
 762         m.next = "normalise_1"
 763
 764
 765 class FPNorm1ModSingle:
 766
 767     def __init__(self, width):
 768         self.width = width
 769         self.out_norm = Signal(reset_less=True)
 770         self.in_z = FPNumBase(width, False)
 771         self.in_of = Overflow()
 772         self.out_z = FPNumBase(width, False)
 773         self.out_of = Overflow()
 774
 775     def setup(self, m, in_z, in_of, out_z):
 776         """ links module to inputs and outputs
 777         """
 778         m.submodules.normalise_1 = self
 779
 780         m.d.comb += self.in_z.copy(in_z)
 781         m.d.comb += self.in_of.copy(in_of)
 782
 783         m.d.comb += out_z.copy(self.out_z)
 784
 785     def elaborate(self, platform):
 786         m = Module()
 787
 788         mwid = self.out_z.m_width+2
 789         pe = PriorityEncoder(mwid)
 790         m.submodules.norm_pe = pe
 791
 792         m.submodules.norm1_out_z = self.out_z
 793         m.submodules.norm1_out_overflow = self.out_of
 794         m.submodules.norm1_in_z = self.in_z
 795         m.submodules.norm1_in_overflow = self.in_of
 796
 797         in_z = FPNumBase(self.width, False)
 798         in_of = Overflow()
 799         m.submodules.norm1_insel_z = in_z
 800         m.submodules.norm1_insel_overflow = in_of
 801
 802         espec = (len(in_z.e), True)
 803         ediff_n126 = Signal(espec, reset_less=True)
 804         msr = MultiShiftRMerge(mwid, espec)
 805         m.submodules.multishift_r = msr
 806
 807         m.d.comb += in_z.copy(self.in_z)
 808         m.d.comb += in_of.copy(self.in_of)
 809         # initialise out from in (overridden below)
 810         m.d.comb += self.out_z.copy(in_z)
 811         m.d.comb += self.out_of.copy(in_of)
 812         # normalisation increase/decrease conditions
 813         decrease = Signal(reset_less=True)
 814         increase = Signal(reset_less=True)
 815         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 816         m.d.comb += increase.eq(in_z.exp_lt_n126)
 817         # decrease exponent
 818         with m.If(decrease):
 819             # *sigh* not entirely obvious: count leading zeros (clz)
 820             # with a PriorityEncoder: to find from the MSB
 821             # we reverse the order of the bits.
 822             temp_m = Signal(mwid, reset_less=True)
 823             temp_s = Signal(mwid+1, reset_less=True)
 824             clz = Signal((len(in_z.e), True), reset_less=True)
 825             # make sure that the amount to decrease by does NOT
 826             # go below the minimum non-INF/NaN exponent
 827             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 828                          in_z.exp_sub_n126)
 829             m.d.comb += [
 830                 # cat round and guard bits back into the mantissa
 831                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 832                 pe.i.eq(temp_m[::-1]),          # inverted
 833                 clz.eq(limclz),                 # count zeros from MSB down
 834                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 835                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 836                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 837                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 838                 # overflow in bits 0..1: got shifted too (leave sticky)
 839                 self.out_of.guard.eq(temp_s[1]),     # guard
 840                 self.out_of.round_bit.eq(temp_s[0]), # round
 841             ]
 842         # increase exponent
 843         with m.Elif(increase):
 844             temp_m = Signal(mwid+1, reset_less=True)
 845             m.d.comb += [
 846                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 847                               in_z.m)),
 848                 ediff_n126.eq(in_z.N126 - in_z.e),
 849                 # connect multi-shifter to inp/out mantissa (and ediff)
 850                 msr.inp.eq(temp_m),
 851                 msr.diff.eq(ediff_n126),
 852                 self.out_z.m.eq(msr.m[3:]),
 853                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 854                 # overflow in bits 0..1: got shifted too (leave sticky)
 855                 self.out_of.guard.eq(temp_s[2]),     # guard
 856                 self.out_of.round_bit.eq(temp_s[1]), # round
 857                 self.out_of.sticky.eq(temp_s[0]), # sticky
 858                 self.out_z.e.eq(in_z.e + ediff_n126),
 859             ]
 860
 861         return m
 862
 863
 864 class FPNorm1ModMulti:
 865
 866     def __init__(self, width, single_cycle=True):
 867         self.width = width
 868         self.in_select = Signal(reset_less=True)
 869         self.out_norm = Signal(reset_less=True)
 870         self.in_z = FPNumBase(width, False)
 871         self.in_of = Overflow()
 872         self.temp_z = FPNumBase(width, False)
 873         self.temp_of = Overflow()
 874         self.out_z = FPNumBase(width, False)
 875         self.out_of = Overflow()
 876
 877     def elaborate(self, platform):
 878         m = Module()
 879
 880         m.submodules.norm1_out_z = self.out_z
 881         m.submodules.norm1_out_overflow = self.out_of
 882         m.submodules.norm1_temp_z = self.temp_z
 883         m.submodules.norm1_temp_of = self.temp_of
 884         m.submodules.norm1_in_z = self.in_z
 885         m.submodules.norm1_in_overflow = self.in_of
 886
 887         in_z = FPNumBase(self.width, False)
 888         in_of = Overflow()
 889         m.submodules.norm1_insel_z = in_z
 890         m.submodules.norm1_insel_overflow = in_of
 891
 892         # select which of temp or in z/of to use
 893         with m.If(self.in_select):
 894             m.d.comb += in_z.copy(self.in_z)
 895             m.d.comb += in_of.copy(self.in_of)
 896         with m.Else():
 897             m.d.comb += in_z.copy(self.temp_z)
 898             m.d.comb += in_of.copy(self.temp_of)
 899         # initialise out from in (overridden below)
 900         m.d.comb += self.out_z.copy(in_z)
 901         m.d.comb += self.out_of.copy(in_of)
 902         # normalisation increase/decrease conditions
 903         decrease = Signal(reset_less=True)
 904         increase = Signal(reset_less=True)
 905         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 906         m.d.comb += increase.eq(in_z.exp_lt_n126)
 907         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
 908         # decrease exponent
 909         with m.If(decrease):
 910             m.d.comb += [
 911                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
 912                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
 913                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
 914                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
 915                 self.out_of.round_bit.eq(0),        # reset round bit
 916                 self.out_of.m0.eq(in_of.guard),
 917             ]
 918         # increase exponent
 919         with m.Elif(increase):
 920             m.d.comb += [
 921                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
 922                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
 923                 self.out_of.guard.eq(in_z.m[0]),
 924                 self.out_of.m0.eq(in_z.m[1]),
 925                 self.out_of.round_bit.eq(in_of.guard),
 926                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
 927             ]
 928
 929         return m
 930
 931
 932 class FPNorm1Single(FPState, FPID):
 933
 934     def __init__(self, width, id_wid, single_cycle=True):
 935         FPID.__init__(self, id_wid)
 936         FPState.__init__(self, "normalise_1")
 937         self.mod = FPNorm1ModSingle(width)
 938         self.out_norm = Signal(reset_less=True)
 939         self.out_z = FPNumBase(width)
 940         self.out_roundz = Signal(reset_less=True)
 941
 942     def setup(self, m, in_z, in_of, in_mid):
 943         """ links module to inputs and outputs
 944         """
 945         self.mod.setup(m, in_z, in_of, self.out_z)
 946
 947         if self.in_mid is not None:
 948             m.d.comb += self.in_mid.eq(in_mid)
 949
 950     def action(self, m):
 951         self.idsync(m)
 952         m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
 953         m.next = "round"
 954
 955
 956 class FPNorm1Multi(FPState, FPID):
 957
 958     def __init__(self, width, id_wid):
 959         FPID.__init__(self, id_wid)
 960         FPState.__init__(self, "normalise_1")
 961         self.mod = FPNorm1ModMulti(width)
 962         self.stb = Signal(reset_less=True)
 963         self.ack = Signal(reset=0, reset_less=True)
 964         self.out_norm = Signal(reset_less=True)
 965         self.in_accept = Signal(reset_less=True)
 966         self.temp_z = FPNumBase(width)
 967         self.temp_of = Overflow()
 968         self.out_z = FPNumBase(width)
 969         self.out_roundz = Signal(reset_less=True)
 970
 971     def setup(self, m, in_z, in_of, norm_stb, in_mid):
 972         """ links module to inputs and outputs
 973         """
 974         self.mod.setup(m, in_z, in_of, norm_stb,
 975                        self.in_accept, self.temp_z, self.temp_of,
 976                        self.out_z, self.out_norm)
 977
 978         m.d.comb += self.stb.eq(norm_stb)
 979         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
 980
 981         if self.in_mid is not None:
 982             m.d.comb += self.in_mid.eq(in_mid)
 983
 984     def action(self, m):
 985         self.idsync(m)
 986         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
 987         m.d.sync += self.temp_of.copy(self.mod.out_of)
 988         m.d.sync += self.temp_z.copy(self.out_z)
 989         with m.If(self.out_norm):
 990             with m.If(self.in_accept):
 991                 m.d.sync += [
 992                     self.ack.eq(1),
 993                 ]
 994             with m.Else():
 995                 m.d.sync += self.ack.eq(0)
 996         with m.Else():
 997             # normalisation not required (or done).
 998             m.next = "round"
 999             m.d.sync += self.ack.eq(1)
1000             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1001
1002
1003 class FPNormToPack(FPState, FPID):
1004
1005     def __init__(self, width, id_wid):
1006         FPID.__init__(self, id_wid)
1007         FPState.__init__(self, "normalise_1")
1008         self.width = width
1009
1010     def setup(self, m, in_z, in_of, in_mid):
1011         """ links module to inputs and outputs
1012         """
1013
1014         # Normalisation (chained to input in_z+in_of)
1015         nmod = FPNorm1ModSingle(self.width)
1016         n_out_z = FPNumBase(self.width)
1017         n_out_roundz = Signal(reset_less=True)
1018         nmod.setup(m, in_z, in_of, n_out_z)
1019
1020         # Rounding (chained to normalisation)
1021         rmod = FPRoundMod(self.width)
1022         r_out_z = FPNumBase(self.width)
1023         rmod.setup(m, n_out_z, n_out_roundz)
1024         m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1025         m.d.comb += r_out_z.copy(rmod.out_z)
1026
1027         # Corrections (chained to rounding)
1028         cmod = FPCorrectionsMod(self.width)
1029         c_out_z = FPNumBase(self.width)
1030         cmod.setup(m, r_out_z)
1031         m.d.comb += c_out_z.copy(cmod.out_z)
1032
1033         # Pack (chained to corrections)
1034         self.pmod = FPPackMod(self.width)
1035         self.out_z = FPNumBase(self.width)
1036         self.pmod.setup(m, c_out_z)
1037
1038         # Multiplex ID
1039         if self.in_mid is not None:
1040             m.d.comb += self.in_mid.eq(in_mid)
1041
1042     def action(self, m):
1043         self.idsync(m) # copies incoming ID to outgoing
1044         m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1045         m.next = "pack_put_z"
1046
1047
1048 class FPRoundMod:
1049
1050     def __init__(self, width):
1051         self.in_roundz = Signal(reset_less=True)
1052         self.in_z = FPNumBase(width, False)
1053         self.out_z = FPNumBase(width, False)
1054
1055     def setup(self, m, in_z, roundz):
1056         m.submodules.roundz = self
1057
1058         m.d.comb += self.in_z.copy(in_z)
1059         m.d.comb += self.in_roundz.eq(roundz)
1060
1061     def elaborate(self, platform):
1062         m = Module()
1063         m.d.comb += self.out_z.copy(self.in_z)
1064         with m.If(self.in_roundz):
1065             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1066             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1067                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1068         return m
1069
1070
1071 class FPRound(FPState, FPID):
1072
1073     def __init__(self, width, id_wid):
1074         FPState.__init__(self, "round")
1075         FPID.__init__(self, id_wid)
1076         self.mod = FPRoundMod(width)
1077         self.out_z = FPNumBase(width)
1078
1079     def setup(self, m, in_z, roundz, in_mid):
1080         """ links module to inputs and outputs
1081         """
1082         self.mod.setup(m, in_z, roundz)
1083
1084         if self.in_mid is not None:
1085             m.d.comb += self.in_mid.eq(in_mid)
1086
1087     def action(self, m):
1088         self.idsync(m)
1089         m.d.sync += self.out_z.copy(self.mod.out_z)
1090         m.next = "corrections"
1091
1092
1093 class FPCorrectionsMod:
1094
1095     def __init__(self, width):
1096         self.in_z = FPNumOut(width, False)
1097         self.out_z = FPNumOut(width, False)
1098
1099     def setup(self, m, in_z):
1100         """ links module to inputs and outputs
1101         """
1102         m.submodules.corrections = self
1103         m.d.comb += self.in_z.copy(in_z)
1104
1105     def elaborate(self, platform):
1106         m = Module()
1107         m.submodules.corr_in_z = self.in_z
1108         m.submodules.corr_out_z = self.out_z
1109         m.d.comb += self.out_z.copy(self.in_z)
1110         with m.If(self.in_z.is_denormalised):
1111             m.d.comb += self.out_z.e.eq(self.in_z.N127)
1112         return m
1113
1114
1115 class FPCorrections(FPState, FPID):
1116
1117     def __init__(self, width, id_wid):
1118         FPState.__init__(self, "corrections")
1119         FPID.__init__(self, id_wid)
1120         self.mod = FPCorrectionsMod(width)
1121         self.out_z = FPNumBase(width)
1122
1123     def setup(self, m, in_z, in_mid):
1124         """ links module to inputs and outputs
1125         """
1126         self.mod.setup(m, in_z)
1127         if self.in_mid is not None:
1128             m.d.comb += self.in_mid.eq(in_mid)
1129
1130     def action(self, m):
1131         self.idsync(m)
1132         m.d.sync += self.out_z.copy(self.mod.out_z)
1133         m.next = "pack"
1134
1135
1136 class FPPackMod:
1137
1138     def __init__(self, width):
1139         self.in_z = FPNumOut(width, False)
1140         self.out_z = FPNumOut(width, False)
1141
1142     def setup(self, m, in_z):
1143         """ links module to inputs and outputs
1144         """
1145         m.submodules.pack = self
1146         m.d.comb += self.in_z.copy(in_z)
1147
1148     def elaborate(self, platform):
1149         m = Module()
1150         m.submodules.pack_in_z = self.in_z
1151         with m.If(self.in_z.is_overflowed):
1152             m.d.comb += self.out_z.inf(self.in_z.s)
1153         with m.Else():
1154             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1155         return m
1156
1157
1158 class FPPack(FPState, FPID):
1159
1160     def __init__(self, width, id_wid):
1161         FPState.__init__(self, "pack")
1162         FPID.__init__(self, id_wid)
1163         self.mod = FPPackMod(width)
1164         self.out_z = FPNumOut(width, False)
1165
1166     def setup(self, m, in_z, in_mid):
1167         """ links module to inputs and outputs
1168         """
1169         self.mod.setup(m, in_z)
1170         if self.in_mid is not None:
1171             m.d.comb += self.in_mid.eq(in_mid)
1172
1173     def action(self, m):
1174         self.idsync(m)
1175         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1176         m.next = "pack_put_z"
1177
1178
1179 class FPPutZ(FPState):
1180
1181     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1182         FPState.__init__(self, state)
1183         if to_state is None:
1184             to_state = "get_ops"
1185         self.to_state = to_state
1186         self.in_z = in_z
1187         self.out_z = out_z
1188         self.in_mid = in_mid
1189         self.out_mid = out_mid
1190
1191     def action(self, m):
1192         if self.in_mid is not None:
1193             m.d.sync += self.out_mid.eq(self.in_mid)
1194         m.d.sync += [
1195           self.out_z.v.eq(self.in_z.v)
1196         ]
1197         with m.If(self.out_z.stb & self.out_z.ack):
1198             m.d.sync += self.out_z.stb.eq(0)
1199             m.next = self.to_state
1200         with m.Else():
1201             m.d.sync += self.out_z.stb.eq(1)
1202
1203
1204 class FPPutZIdx(FPState):
1205
1206     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1207         FPState.__init__(self, state)
1208         if to_state is None:
1209             to_state = "get_ops"
1210         self.to_state = to_state
1211         self.in_z = in_z
1212         self.out_zs = out_zs
1213         self.in_mid = in_mid
1214
1215     def action(self, m):
1216         outz_stb = Signal(reset_less=True)
1217         outz_ack = Signal(reset_less=True)
1218         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1219                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1220                     ]
1221         m.d.sync += [
1222           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1223         ]
1224         with m.If(outz_stb & outz_ack):
1225             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1226             m.next = self.to_state
1227         with m.Else():
1228             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1229
1230
1231 class FPADDBaseMod(FPID):
1232
1233     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1234         """ IEEE754 FP Add
1235
1236             * width: bit-width of IEEE754.  supported: 16, 32, 64
1237             * id_wid: an identifier that is sync-connected to the input
1238             * single_cycle: True indicates each stage to complete in 1 clock
1239             * compact: True indicates a reduced number of stages
1240         """
1241         FPID.__init__(self, id_wid)
1242         self.width = width
1243         self.single_cycle = single_cycle
1244         self.compact = compact
1245
1246         self.in_t = Trigger()
1247         self.in_a  = Signal(width)
1248         self.in_b  = Signal(width)
1249         self.out_z = FPOp(width)
1250
1251         self.states = []
1252
1253     def add_state(self, state):
1254         self.states.append(state)
1255         return state
1256
1257     def get_fragment(self, platform=None):
1258         """ creates the HDL code-fragment for FPAdd
1259         """
1260         m = Module()
1261         m.submodules.out_z = self.out_z
1262         m.submodules.in_t = self.in_t
1263         if self.compact:
1264             self.get_compact_fragment(m, platform)
1265         else:
1266             self.get_longer_fragment(m, platform)
1267
1268         with m.FSM() as fsm:
1269
1270             for state in self.states:
1271                 with m.State(state.state_from):
1272                     state.action(m)
1273
1274         return m
1275
1276     def get_longer_fragment(self, m, platform=None):
1277
1278         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1279                                       self.in_a, self.in_b, self.width))
1280         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1281         a = get.out_op1
1282         b = get.out_op2
1283
1284         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1285         sc.setup(m, a, b, self.in_mid)
1286
1287         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1288         dn.setup(m, a, b, sc.in_mid)
1289
1290         if self.single_cycle:
1291             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1292             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1293         else:
1294             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1295             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1296
1297         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1298         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1299
1300         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1301         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1302
1303         if self.single_cycle:
1304             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1305             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1306         else:
1307             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1308             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1309
1310         rn = self.add_state(FPRound(self.width, self.id_wid))
1311         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1312
1313         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1314         cor.setup(m, rn.out_z, rn.in_mid)
1315
1316         pa = self.add_state(FPPack(self.width, self.id_wid))
1317         pa.setup(m, cor.out_z, rn.in_mid)
1318
1319         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1320                                     pa.in_mid, self.out_mid))
1321
1322         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1323                                     pa.in_mid, self.out_mid))
1324
1325     def get_compact_fragment(self, m, platform=None):
1326
1327         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1328                                       self.in_a, self.in_b, self.width))
1329         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1330         a = get.out_op1
1331         b = get.out_op2
1332
1333         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1334         sc.setup(m, a, b, self.in_mid)
1335
1336         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1337         alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1338
1339         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1340         n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1341
1342         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1343                                     n1.in_mid, self.out_mid))
1344
1345         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1346                                     sc.in_mid, self.out_mid))
1347
1348
1349 class FPADDBase(FPState, FPID):
1350
1351     def __init__(self, width, id_wid=None, single_cycle=False):
1352         """ IEEE754 FP Add
1353
1354             * width: bit-width of IEEE754.  supported: 16, 32, 64
1355             * id_wid: an identifier that is sync-connected to the input
1356             * single_cycle: True indicates each stage to complete in 1 clock
1357         """
1358         FPID.__init__(self, id_wid)
1359         FPState.__init__(self, "fpadd")
1360         self.width = width
1361         self.single_cycle = single_cycle
1362         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1363
1364         self.in_t = Trigger()
1365         self.in_a  = Signal(width)
1366         self.in_b  = Signal(width)
1367         #self.out_z = FPOp(width)
1368
1369         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1370         self.in_accept = Signal(reset_less=True)
1371         self.add_stb = Signal(reset_less=True)
1372         self.add_ack = Signal(reset=0, reset_less=True)
1373
1374     def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1375         self.out_z = out_z
1376         self.out_mid = out_mid
1377         m.d.comb += [self.in_a.eq(a),
1378                      self.in_b.eq(b),
1379                      self.mod.in_a.eq(self.in_a),
1380                      self.mod.in_b.eq(self.in_b),
1381                      self.in_mid.eq(in_mid),
1382                      self.mod.in_mid.eq(self.in_mid),
1383                      self.z_done.eq(self.mod.out_z.trigger),
1384                      #self.add_stb.eq(add_stb),
1385                      self.mod.in_t.stb.eq(self.in_t.stb),
1386                      self.in_t.ack.eq(self.mod.in_t.ack),
1387                      self.out_mid.eq(self.mod.out_mid),
1388                      self.out_z.v.eq(self.mod.out_z.v),
1389                      self.out_z.stb.eq(self.mod.out_z.stb),
1390                      self.mod.out_z.ack.eq(self.out_z.ack),
1391                     ]
1392
1393         m.d.sync += self.add_stb.eq(add_stb)
1394         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1395         m.d.sync += self.out_z.ack.eq(0) # likewise
1396         #m.d.sync += self.in_t.stb.eq(0)
1397
1398         m.submodules.fpadd = self.mod
1399
1400     def action(self, m):
1401
1402         # in_accept is set on incoming strobe HIGH and ack LOW.
1403         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1404
1405         #with m.If(self.in_t.ack):
1406         #    m.d.sync += self.in_t.stb.eq(0)
1407         with m.If(~self.z_done):
1408             # not done: test for accepting an incoming operand pair
1409             with m.If(self.in_accept):
1410                 m.d.sync += [
1411                     self.add_ack.eq(1), # acknowledge receipt...
1412                     self.in_t.stb.eq(1), # initiate add
1413                 ]
1414             with m.Else():
1415                 m.d.sync += [self.add_ack.eq(0),
1416                              self.in_t.stb.eq(0),
1417                              self.out_z.ack.eq(1),
1418                             ]
1419         with m.Else():
1420             # done: acknowledge, and write out id and value
1421             m.d.sync += [self.add_ack.eq(1),
1422                          self.in_t.stb.eq(0)
1423                         ]
1424             m.next = "put_z"
1425
1426             return
1427
1428             if self.in_mid is not None:
1429                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1430
1431             m.d.sync += [
1432               self.out_z.v.eq(self.mod.out_z.v)
1433             ]
1434             # move to output state on detecting z ack
1435             with m.If(self.out_z.trigger):
1436                 m.d.sync += self.out_z.stb.eq(0)
1437                 m.next = "put_z"
1438             with m.Else():
1439                 m.d.sync += self.out_z.stb.eq(1)
1440
1441 class ResArray:
1442     def __init__(self, width, id_wid):
1443         self.width = width
1444         self.id_wid = id_wid
1445         res = []
1446         for i in range(rs_sz):
1447             out_z = FPOp(width)
1448             out_z.name = "out_z_%d" % i
1449             res.append(out_z)
1450         self.res = Array(res)
1451         self.in_z = FPOp(width)
1452         self.in_mid = Signal(self.id_wid, reset_less=True)
1453
1454     def setup(self, m, in_z, in_mid):
1455         m.d.comb += [self.in_z.copy(in_z),
1456                      self.in_mid.eq(in_mid)]
1457
1458     def get_fragment(self, platform=None):
1459         """ creates the HDL code-fragment for FPAdd
1460         """
1461         m = Module()
1462         m.submodules.res_in_z = self.in_z
1463         m.submodules += self.res
1464
1465         return m
1466
1467     def ports(self):
1468         res = []
1469         for z in self.res:
1470             res += z.ports()
1471         return res
1472
1473
1474 class FPADD(FPID):
1475     """ FPADD: stages as follows:
1476
1477         FPGetOp (a)
1478            |
1479         FPGetOp (b)
1480            |
1481         FPAddBase---> FPAddBaseMod
1482            |            |
1483         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1484
1485         FPAddBase is tricky: it is both a stage and *has* stages.
1486         Connection to FPAddBaseMod therefore requires an in stb/ack
1487         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1488         needs to be the thing that raises the incoming stb.
1489     """
1490
1491     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1492         """ IEEE754 FP Add
1493
1494             * width: bit-width of IEEE754.  supported: 16, 32, 64
1495             * id_wid: an identifier that is sync-connected to the input
1496             * single_cycle: True indicates each stage to complete in 1 clock
1497         """
1498         self.width = width
1499         self.id_wid = id_wid
1500         self.single_cycle = single_cycle
1501
1502         #self.out_z = FPOp(width)
1503         self.ids = FPID(id_wid)
1504
1505         rs = []
1506         for i in range(rs_sz):
1507             in_a  = FPOp(width)
1508             in_b  = FPOp(width)
1509             in_a.name = "in_a_%d" % i
1510             in_b.name = "in_b_%d" % i
1511             rs.append((in_a, in_b))
1512         self.rs = Array(rs)
1513
1514         res = []
1515         for i in range(rs_sz):
1516             out_z = FPOp(width)
1517             out_z.name = "out_z_%d" % i
1518             res.append(out_z)
1519         self.res = Array(res)
1520
1521         self.states = []
1522
1523     def add_state(self, state):
1524         self.states.append(state)
1525         return state
1526
1527     def get_fragment(self, platform=None):
1528         """ creates the HDL code-fragment for FPAdd
1529         """
1530         m = Module()
1531         m.submodules += self.rs
1532
1533         in_a = self.rs[0][0]
1534         in_b = self.rs[0][1]
1535
1536         out_z = FPOp(self.width)
1537         out_mid = Signal(self.id_wid, reset_less=True)
1538         m.submodules.out_z = out_z
1539
1540         geta = self.add_state(FPGetOp("get_a", "get_b",
1541                                       in_a, self.width))
1542         geta.setup(m, in_a)
1543         a = geta.out_op
1544
1545         getb = self.add_state(FPGetOp("get_b", "fpadd",
1546                                       in_b, self.width))
1547         getb.setup(m, in_b)
1548         b = getb.out_op
1549
1550         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1551         ab = self.add_state(ab)
1552         ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1553                  out_z, out_mid)
1554
1555         pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1556                                     out_mid, "get_a"))
1557
1558         with m.FSM() as fsm:
1559
1560             for state in self.states:
1561                 with m.State(state.state_from):
1562                     state.action(m)
1563
1564         return m
1565
1566
1567 if __name__ == "__main__":
1568     if True:
1569         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1570         main(alu, ports=alu.rs[0][0].ports() + \
1571                         alu.rs[0][1].ports() + \
1572                         alu.res[0].ports() + \
1573                         [alu.ids.in_mid, alu.ids.out_mid])
1574     else:
1575         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1576         main(alu, ports=[alu.in_a, alu.in_b] + \
1577                         alu.in_t.ports() + \
1578                         alu.out_z.ports() + \
1579                         [alu.in_mid, alu.out_mid])
1580
1581
1582     # works... but don't use, just do "python fname.py convert -t v"
1583     #print (verilog.convert(alu, ports=[
1584     #                        ports=alu.in_a.ports() + \
1585     #                              alu.in_b.ports() + \
1586     #                              alu.out_z.ports())