src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPGet2OpMod(Trigger):
 191     def __init__(self, width, id_wid):
 192         Trigger.__init__(self)
 193         self.width = width
 194         self.id_wid = id_wid
 195         self.i = self.ispec()
 196         self.o = self.ospec()
 197
 198     def ispec(self):
 199         return FPADDBaseData(self.width, self.id_wid)
 200
 201     def ospec(self):
 202         return FPNumBase2Ops(self.width, self.id_wid)
 203
 204     def elaborate(self, platform):
 205         m = Trigger.elaborate(self, platform)
 206         m.submodules.get_op1_out = self.o.a
 207         m.submodules.get_op2_out = self.o.b
 208         out_op1 = FPNumIn(None, self.width)
 209         out_op2 = FPNumIn(None, self.width)
 210         with m.If(self.trigger):
 211             m.d.comb += [
 212                 out_op1.decode(self.i.a),
 213                 out_op2.decode(self.i.b),
 214                 self.o.a.eq(out_op1),
 215                 self.o.b.eq(out_op2),
 216                 self.o.mid.eq(self.i.mid)
 217             ]
 218         return m
 219
 220
 221 class FPGet2Op(FPState):
 222     """ gets operands
 223     """
 224
 225     def __init__(self, in_state, out_state, width, id_wid):
 226         FPState.__init__(self, in_state)
 227         self.out_state = out_state
 228         self.mod = FPGet2OpMod(width, id_wid)
 229         self.o = self.mod.ospec()
 230         self.in_stb = Signal(reset_less=True)
 231         self.out_ack = Signal(reset_less=True)
 232         self.out_decode = Signal(reset_less=True)
 233
 234     def setup(self, m, i, in_stb, in_ack):
 235         """ links module to inputs and outputs
 236         """
 237         m.submodules.get_ops = self.mod
 238         m.d.comb += self.mod.i.eq(i)
 239         m.d.comb += self.mod.stb.eq(in_stb)
 240         m.d.comb += self.out_ack.eq(self.mod.ack)
 241         m.d.comb += self.out_decode.eq(self.mod.trigger)
 242         m.d.comb += in_ack.eq(self.mod.ack)
 243
 244     def action(self, m):
 245         with m.If(self.out_decode):
 246             m.next = self.out_state
 247             m.d.sync += [
 248                 self.mod.ack.eq(0),
 249                 self.o.eq(self.mod.o),
 250             ]
 251         with m.Else():
 252             m.d.sync += self.mod.ack.eq(1)
 253
 254
 255 class FPNumBase2Ops:
 256
 257     def __init__(self, width, id_wid, m_extra=True):
 258         self.a = FPNumBase(width, m_extra)
 259         self.b = FPNumBase(width, m_extra)
 260         self.mid = Signal(id_wid, reset_less=True)
 261
 262     def eq(self, i):
 263         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 264
 265
 266 class FPSCData:
 267
 268     def __init__(self, width, id_wid):
 269         self.a = FPNumBase(width, True)
 270         self.b = FPNumBase(width, True)
 271         self.z = FPNumOut(width, False)
 272         self.oz = Signal(width, reset_less=True)
 273         self.out_do_z = Signal(reset_less=True)
 274         self.mid = Signal(id_wid, reset_less=True)
 275
 276     def eq(self, i):
 277         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 278                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 279
 280
 281 class FPAddSpecialCasesMod:
 282     """ special cases: NaNs, infs, zeros, denormalised
 283         NOTE: some of these are unique to add.  see "Special Operations"
 284         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 285     """
 286
 287     def __init__(self, width, id_wid):
 288         self.width = width
 289         self.id_wid = id_wid
 290         self.i = self.ispec()
 291         self.o = self.ospec()
 292
 293     def ispec(self):
 294         return FPNumBase2Ops(self.width, self.id_wid)
 295
 296     def ospec(self):
 297         return FPSCData(self.width, self.id_wid)
 298
 299     def setup(self, m, i):
 300         """ links module to inputs and outputs
 301         """
 302         m.submodules.specialcases = self
 303         m.d.comb += self.i.eq(i)
 304
 305     def process(self, i):
 306         return self.o
 307
 308     def elaborate(self, platform):
 309         m = Module()
 310
 311         m.submodules.sc_in_a = self.i.a
 312         m.submodules.sc_in_b = self.i.b
 313         m.submodules.sc_out_z = self.o.z
 314
 315         s_nomatch = Signal()
 316         m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
 317
 318         m_match = Signal()
 319         m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
 320
 321         # if a is NaN or b is NaN return NaN
 322         with m.If(self.i.a.is_nan | self.i.b.is_nan):
 323             m.d.comb += self.o.out_do_z.eq(1)
 324             m.d.comb += self.o.z.nan(0)
 325
 326         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 327         # under review
 328
 329         ## if a is zero and b is NaN return -b
 330         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 331         #    m.d.comb += self.o.out_do_z.eq(1)
 332         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 333
 334         ## if b is zero and a is NaN return -a
 335         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 336         #    m.d.comb += self.o.out_do_z.eq(1)
 337         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 338
 339         ## if a is -zero and b is NaN return -b
 340         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 341         #    m.d.comb += self.o.out_do_z.eq(1)
 342         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 343
 344         ## if b is -zero and a is NaN return -a
 345         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 346         #    m.d.comb += self.o.out_do_z.eq(1)
 347         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 348
 349         # if a is inf return inf (or NaN)
 350         with m.Elif(self.i.a.is_inf):
 351             m.d.comb += self.o.out_do_z.eq(1)
 352             m.d.comb += self.o.z.inf(self.i.a.s)
 353             # if a is inf and signs don't match return NaN
 354             with m.If(self.i.b.exp_128 & s_nomatch):
 355                 m.d.comb += self.o.z.nan(0)
 356
 357         # if b is inf return inf
 358         with m.Elif(self.i.b.is_inf):
 359             m.d.comb += self.o.out_do_z.eq(1)
 360             m.d.comb += self.o.z.inf(self.i.b.s)
 361
 362         # if a is zero and b zero return signed-a/b
 363         with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
 364             m.d.comb += self.o.out_do_z.eq(1)
 365             m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
 366                                           self.i.b.e,
 367                                           self.i.b.m[3:-1])
 368
 369         # if a is zero return b
 370         with m.Elif(self.i.a.is_zero):
 371             m.d.comb += self.o.out_do_z.eq(1)
 372             m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
 373                                       self.i.b.m[3:-1])
 374
 375         # if b is zero return a
 376         with m.Elif(self.i.b.is_zero):
 377             m.d.comb += self.o.out_do_z.eq(1)
 378             m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
 379                                       self.i.a.m[3:-1])
 380
 381         # if a equal to -b return zero (+ve zero)
 382         with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
 383             m.d.comb += self.o.out_do_z.eq(1)
 384             m.d.comb += self.o.z.zero(0)
 385
 386         # Denormalised Number checks next, so pass a/b data through
 387         with m.Else():
 388             m.d.comb += self.o.out_do_z.eq(0)
 389             m.d.comb += self.o.a.eq(self.i.a)
 390             m.d.comb += self.o.b.eq(self.i.b)
 391
 392         m.d.comb += self.o.oz.eq(self.o.z.v)
 393         m.d.comb += self.o.mid.eq(self.i.mid)
 394
 395         return m
 396
 397
 398 class FPID:
 399     def __init__(self, id_wid):
 400         self.id_wid = id_wid
 401         if self.id_wid:
 402             self.in_mid = Signal(id_wid, reset_less=True)
 403             self.out_mid = Signal(id_wid, reset_less=True)
 404         else:
 405             self.in_mid = None
 406             self.out_mid = None
 407
 408     def idsync(self, m):
 409         if self.id_wid is not None:
 410             m.d.sync += self.out_mid.eq(self.in_mid)
 411
 412
 413 class FPAddSpecialCases(FPState):
 414     """ special cases: NaNs, infs, zeros, denormalised
 415         NOTE: some of these are unique to add.  see "Special Operations"
 416         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 417     """
 418
 419     def __init__(self, width, id_wid):
 420         FPState.__init__(self, "special_cases")
 421         self.mod = FPAddSpecialCasesMod(width)
 422         self.out_z = self.mod.ospec()
 423         self.out_do_z = Signal(reset_less=True)
 424
 425     def setup(self, m, i):
 426         """ links module to inputs and outputs
 427         """
 428         self.mod.setup(m, i, self.out_do_z)
 429         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 430         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 431
 432     def action(self, m):
 433         self.idsync(m)
 434         with m.If(self.out_do_z):
 435             m.next = "put_z"
 436         with m.Else():
 437             m.next = "denormalise"
 438
 439
 440 class FPAddSpecialCasesDeNorm(FPState):
 441     """ special cases: NaNs, infs, zeros, denormalised
 442         NOTE: some of these are unique to add.  see "Special Operations"
 443         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 444     """
 445
 446     def __init__(self, width, id_wid):
 447         FPState.__init__(self, "special_cases")
 448         self.smod = FPAddSpecialCasesMod(width, id_wid)
 449         self.dmod = FPAddDeNormMod(width, id_wid)
 450         self.o = self.ospec()
 451
 452     def ispec(self):
 453         return self.smod.ispec()
 454
 455     def ospec(self):
 456         return self.dmod.ospec()
 457
 458     def setup(self, m, i):
 459         """ links module to inputs and outputs
 460         """
 461         # these only needed for break-out (early-out)
 462         # out_z = self.smod.ospec()
 463         # out_do_z = Signal(reset_less=True)
 464         self.smod.setup(m, i)
 465         self.dmod.setup(m, self.smod.o)
 466         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 467
 468         # out_do_z=True, only needed for early-out (split pipeline)
 469         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 470         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 471
 472         # out_do_z=False
 473         m.d.sync += self.o.eq(self.dmod.o)
 474
 475     def process(self, i):
 476         return self.o
 477
 478     def action(self, m):
 479         #with m.If(self.out_do_z):
 480         #    m.next = "put_z"
 481         #with m.Else():
 482             m.next = "align"
 483
 484
 485 class FPAddDeNormMod(FPState):
 486
 487     def __init__(self, width, id_wid):
 488         self.width = width
 489         self.id_wid = id_wid
 490         self.i = self.ispec()
 491         self.o = self.ospec()
 492
 493     def ispec(self):
 494         return FPSCData(self.width, self.id_wid)
 495
 496     def ospec(self):
 497         return FPSCData(self.width, self.id_wid)
 498
 499     def setup(self, m, i):
 500         """ links module to inputs and outputs
 501         """
 502         m.submodules.denormalise = self
 503         m.d.comb += self.i.eq(i)
 504
 505     def elaborate(self, platform):
 506         m = Module()
 507         m.submodules.denorm_in_a = self.i.a
 508         m.submodules.denorm_in_b = self.i.b
 509         m.submodules.denorm_out_a = self.o.a
 510         m.submodules.denorm_out_b = self.o.b
 511
 512         with m.If(~self.i.out_do_z):
 513             # XXX hmmm, don't like repeating identical code
 514             m.d.comb += self.o.a.eq(self.i.a)
 515             with m.If(self.i.a.exp_n127):
 516                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 517             with m.Else():
 518                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 519
 520             m.d.comb += self.o.b.eq(self.i.b)
 521             with m.If(self.i.b.exp_n127):
 522                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 523             with m.Else():
 524                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 525
 526         m.d.comb += self.o.mid.eq(self.i.mid)
 527         m.d.comb += self.o.z.eq(self.i.z)
 528         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 529         m.d.comb += self.o.oz.eq(self.i.oz)
 530
 531         return m
 532
 533
 534 class FPAddDeNorm(FPState):
 535
 536     def __init__(self, width, id_wid):
 537         FPState.__init__(self, "denormalise")
 538         self.mod = FPAddDeNormMod(width)
 539         self.out_a = FPNumBase(width)
 540         self.out_b = FPNumBase(width)
 541
 542     def setup(self, m, i):
 543         """ links module to inputs and outputs
 544         """
 545         self.mod.setup(m, i)
 546
 547         m.d.sync += self.out_a.eq(self.mod.out_a)
 548         m.d.sync += self.out_b.eq(self.mod.out_b)
 549
 550     def action(self, m):
 551         # Denormalised Number checks
 552         m.next = "align"
 553
 554
 555 class FPAddAlignMultiMod(FPState):
 556
 557     def __init__(self, width):
 558         self.in_a = FPNumBase(width)
 559         self.in_b = FPNumBase(width)
 560         self.out_a = FPNumIn(None, width)
 561         self.out_b = FPNumIn(None, width)
 562         self.exp_eq = Signal(reset_less=True)
 563
 564     def elaborate(self, platform):
 565         # This one however (single-cycle) will do the shift
 566         # in one go.
 567
 568         m = Module()
 569
 570         m.submodules.align_in_a = self.in_a
 571         m.submodules.align_in_b = self.in_b
 572         m.submodules.align_out_a = self.out_a
 573         m.submodules.align_out_b = self.out_b
 574
 575         # NOTE: this does *not* do single-cycle multi-shifting,
 576         #       it *STAYS* in the align state until exponents match
 577
 578         # exponent of a greater than b: shift b down
 579         m.d.comb += self.exp_eq.eq(0)
 580         m.d.comb += self.out_a.eq(self.in_a)
 581         m.d.comb += self.out_b.eq(self.in_b)
 582         agtb = Signal(reset_less=True)
 583         altb = Signal(reset_less=True)
 584         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 585         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 586         with m.If(agtb):
 587             m.d.comb += self.out_b.shift_down(self.in_b)
 588         # exponent of b greater than a: shift a down
 589         with m.Elif(altb):
 590             m.d.comb += self.out_a.shift_down(self.in_a)
 591         # exponents equal: move to next stage.
 592         with m.Else():
 593             m.d.comb += self.exp_eq.eq(1)
 594         return m
 595
 596
 597 class FPAddAlignMulti(FPState):
 598
 599     def __init__(self, width, id_wid):
 600         FPState.__init__(self, "align")
 601         self.mod = FPAddAlignMultiMod(width)
 602         self.out_a = FPNumIn(None, width)
 603         self.out_b = FPNumIn(None, width)
 604         self.exp_eq = Signal(reset_less=True)
 605
 606     def setup(self, m, in_a, in_b):
 607         """ links module to inputs and outputs
 608         """
 609         m.submodules.align = self.mod
 610         m.d.comb += self.mod.in_a.eq(in_a)
 611         m.d.comb += self.mod.in_b.eq(in_b)
 612         #m.d.comb += self.out_a.eq(self.mod.out_a)
 613         #m.d.comb += self.out_b.eq(self.mod.out_b)
 614         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 615         m.d.sync += self.out_a.eq(self.mod.out_a)
 616         m.d.sync += self.out_b.eq(self.mod.out_b)
 617
 618     def action(self, m):
 619         with m.If(self.exp_eq):
 620             m.next = "add_0"
 621
 622
 623 class FPNumIn2Ops:
 624
 625     def __init__(self, width, id_wid):
 626         self.a = FPNumIn(None, width)
 627         self.b = FPNumIn(None, width)
 628         self.z = FPNumOut(width, False)
 629         self.out_do_z = Signal(reset_less=True)
 630         self.oz = Signal(width, reset_less=True)
 631         self.mid = Signal(id_wid, reset_less=True)
 632
 633     def eq(self, i):
 634         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 635                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 636
 637
 638 class FPAddAlignSingleMod:
 639
 640     def __init__(self, width, id_wid):
 641         self.width = width
 642         self.id_wid = id_wid
 643         self.i = self.ispec()
 644         self.o = self.ospec()
 645
 646     def ispec(self):
 647         return FPSCData(self.width, self.id_wid)
 648
 649     def ospec(self):
 650         return FPNumIn2Ops(self.width, self.id_wid)
 651
 652     def process(self, i):
 653         return self.o
 654
 655     def setup(self, m, i):
 656         """ links module to inputs and outputs
 657         """
 658         m.submodules.align = self
 659         m.d.comb += self.i.eq(i)
 660
 661     def elaborate(self, platform):
 662         """ Aligns A against B or B against A, depending on which has the
 663             greater exponent.  This is done in a *single* cycle using
 664             variable-width bit-shift
 665
 666             the shifter used here is quite expensive in terms of gates.
 667             Mux A or B in (and out) into temporaries, as only one of them
 668             needs to be aligned against the other
 669         """
 670         m = Module()
 671
 672         m.submodules.align_in_a = self.i.a
 673         m.submodules.align_in_b = self.i.b
 674         m.submodules.align_out_a = self.o.a
 675         m.submodules.align_out_b = self.o.b
 676
 677         # temporary (muxed) input and output to be shifted
 678         t_inp = FPNumBase(self.width)
 679         t_out = FPNumIn(None, self.width)
 680         espec = (len(self.i.a.e), True)
 681         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 682         m.submodules.align_t_in = t_inp
 683         m.submodules.align_t_out = t_out
 684         m.submodules.multishift_r = msr
 685
 686         ediff = Signal(espec, reset_less=True)
 687         ediffr = Signal(espec, reset_less=True)
 688         tdiff = Signal(espec, reset_less=True)
 689         elz = Signal(reset_less=True)
 690         egz = Signal(reset_less=True)
 691
 692         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 693         m.d.comb += msr.inp.eq(t_inp.m)
 694         m.d.comb += msr.diff.eq(tdiff)
 695         m.d.comb += t_out.m.eq(msr.m)
 696         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 697         m.d.comb += t_out.s.eq(t_inp.s)
 698
 699         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 700         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 701         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 702         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 703
 704         # default: A-exp == B-exp, A and B untouched (fall through)
 705         m.d.comb += self.o.a.eq(self.i.a)
 706         m.d.comb += self.o.b.eq(self.i.b)
 707         # only one shifter (muxed)
 708         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 709         # exponent of a greater than b: shift b down
 710         with m.If(~self.i.out_do_z):
 711             with m.If(egz):
 712                 m.d.comb += [t_inp.eq(self.i.b),
 713                              tdiff.eq(ediff),
 714                              self.o.b.eq(t_out),
 715                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 716                             ]
 717             # exponent of b greater than a: shift a down
 718             with m.Elif(elz):
 719                 m.d.comb += [t_inp.eq(self.i.a),
 720                              tdiff.eq(ediffr),
 721                              self.o.a.eq(t_out),
 722                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 723                             ]
 724
 725         m.d.comb += self.o.mid.eq(self.i.mid)
 726         m.d.comb += self.o.z.eq(self.i.z)
 727         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 728         m.d.comb += self.o.oz.eq(self.i.oz)
 729
 730         return m
 731
 732
 733 class FPAddAlignSingle(FPState):
 734
 735     def __init__(self, width, id_wid):
 736         FPState.__init__(self, "align")
 737         self.mod = FPAddAlignSingleMod(width, id_wid)
 738         self.out_a = FPNumIn(None, width)
 739         self.out_b = FPNumIn(None, width)
 740
 741     def setup(self, m, i):
 742         """ links module to inputs and outputs
 743         """
 744         self.mod.setup(m, i)
 745
 746         # NOTE: could be done as comb
 747         m.d.sync += self.out_a.eq(self.mod.out_a)
 748         m.d.sync += self.out_b.eq(self.mod.out_b)
 749
 750     def action(self, m):
 751         m.next = "add_0"
 752
 753
 754 class FPAddAlignSingleAdd(FPState):
 755
 756     def __init__(self, width, id_wid):
 757         FPState.__init__(self, "align")
 758         self.width = width
 759         self.id_wid = id_wid
 760         self.a1o = self.ospec()
 761
 762     def ispec(self):
 763         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 764
 765     def ospec(self):
 766         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 767
 768     def setup(self, m, i):
 769         """ links module to inputs and outputs
 770         """
 771
 772         # chain AddAlignSingle, AddStage0 and AddStage1
 773         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 774         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 775         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 776
 777         chain = StageChain([mod, a0mod, a1mod])
 778         chain.setup(m, i)
 779
 780         m.d.sync += self.a1o.eq(a1mod.o)
 781
 782     def process(self, i):
 783         return self.a1o
 784
 785     def action(self, m):
 786         m.next = "normalise_1"
 787
 788
 789 class FPAddStage0Data:
 790
 791     def __init__(self, width, id_wid):
 792         self.z = FPNumBase(width, False)
 793         self.out_do_z = Signal(reset_less=True)
 794         self.oz = Signal(width, reset_less=True)
 795         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 796         self.mid = Signal(id_wid, reset_less=True)
 797
 798     def eq(self, i):
 799         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 800                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 801
 802
 803 class FPAddStage0Mod:
 804
 805     def __init__(self, width, id_wid):
 806         self.width = width
 807         self.id_wid = id_wid
 808         self.i = self.ispec()
 809         self.o = self.ospec()
 810
 811     def ispec(self):
 812         return FPSCData(self.width, self.id_wid)
 813
 814     def ospec(self):
 815         return FPAddStage0Data(self.width, self.id_wid)
 816
 817     def process(self, i):
 818         return self.o
 819
 820     def setup(self, m, i):
 821         """ links module to inputs and outputs
 822         """
 823         m.submodules.add0 = self
 824         m.d.comb += self.i.eq(i)
 825
 826     def elaborate(self, platform):
 827         m = Module()
 828         m.submodules.add0_in_a = self.i.a
 829         m.submodules.add0_in_b = self.i.b
 830         m.submodules.add0_out_z = self.o.z
 831
 832         # store intermediate tests (and zero-extended mantissas)
 833         seq = Signal(reset_less=True)
 834         mge = Signal(reset_less=True)
 835         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 836         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 837         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 838                      mge.eq(self.i.a.m >= self.i.b.m),
 839                      am0.eq(Cat(self.i.a.m, 0)),
 840                      bm0.eq(Cat(self.i.b.m, 0))
 841                     ]
 842         # same-sign (both negative or both positive) add mantissas
 843         with m.If(~self.i.out_do_z):
 844             m.d.comb += self.o.z.e.eq(self.i.a.e)
 845             with m.If(seq):
 846                 m.d.comb += [
 847                     self.o.tot.eq(am0 + bm0),
 848                     self.o.z.s.eq(self.i.a.s)
 849                 ]
 850             # a mantissa greater than b, use a
 851             with m.Elif(mge):
 852                 m.d.comb += [
 853                     self.o.tot.eq(am0 - bm0),
 854                     self.o.z.s.eq(self.i.a.s)
 855                 ]
 856             # b mantissa greater than a, use b
 857             with m.Else():
 858                 m.d.comb += [
 859                     self.o.tot.eq(bm0 - am0),
 860                     self.o.z.s.eq(self.i.b.s)
 861             ]
 862
 863         m.d.comb += self.o.oz.eq(self.i.oz)
 864         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 865         m.d.comb += self.o.mid.eq(self.i.mid)
 866         return m
 867
 868
 869 class FPAddStage0(FPState):
 870     """ First stage of add.  covers same-sign (add) and subtract
 871         special-casing when mantissas are greater or equal, to
 872         give greatest accuracy.
 873     """
 874
 875     def __init__(self, width, id_wid):
 876         FPState.__init__(self, "add_0")
 877         self.mod = FPAddStage0Mod(width)
 878         self.o = self.mod.ospec()
 879
 880     def setup(self, m, i):
 881         """ links module to inputs and outputs
 882         """
 883         self.mod.setup(m, i)
 884
 885         # NOTE: these could be done as combinatorial (merge add0+add1)
 886         m.d.sync += self.o.eq(self.mod.o)
 887
 888     def action(self, m):
 889         m.next = "add_1"
 890
 891
 892 class FPAddStage1Data:
 893
 894     def __init__(self, width, id_wid):
 895         self.z = FPNumBase(width, False)
 896         self.out_do_z = Signal(reset_less=True)
 897         self.oz = Signal(width, reset_less=True)
 898         self.of = Overflow()
 899         self.mid = Signal(id_wid, reset_less=True)
 900
 901     def eq(self, i):
 902         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 903                 self.of.eq(i.of), self.mid.eq(i.mid)]
 904
 905
 906
 907 class FPAddStage1Mod(FPState):
 908     """ Second stage of add: preparation for normalisation.
 909         detects when tot sum is too big (tot[27] is kinda a carry bit)
 910     """
 911
 912     def __init__(self, width, id_wid):
 913         self.width = width
 914         self.id_wid = id_wid
 915         self.i = self.ispec()
 916         self.o = self.ospec()
 917
 918     def ispec(self):
 919         return FPAddStage0Data(self.width, self.id_wid)
 920
 921     def ospec(self):
 922         return FPAddStage1Data(self.width, self.id_wid)
 923
 924     def process(self, i):
 925         return self.o
 926
 927     def setup(self, m, i):
 928         """ links module to inputs and outputs
 929         """
 930         m.submodules.add1 = self
 931         m.submodules.add1_out_overflow = self.o.of
 932
 933         m.d.comb += self.i.eq(i)
 934
 935     def elaborate(self, platform):
 936         m = Module()
 937         #m.submodules.norm1_in_overflow = self.in_of
 938         #m.submodules.norm1_out_overflow = self.out_of
 939         #m.submodules.norm1_in_z = self.in_z
 940         #m.submodules.norm1_out_z = self.out_z
 941         m.d.comb += self.o.z.eq(self.i.z)
 942         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 943         with m.If(~self.i.out_do_z):
 944             with m.If(self.i.tot[-1]):
 945                 m.d.comb += [
 946                     self.o.z.m.eq(self.i.tot[4:]),
 947                     self.o.of.m0.eq(self.i.tot[4]),
 948                     self.o.of.guard.eq(self.i.tot[3]),
 949                     self.o.of.round_bit.eq(self.i.tot[2]),
 950                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 951                     self.o.z.e.eq(self.i.z.e + 1)
 952             ]
 953             # tot[-1] (MSB) zero case
 954             with m.Else():
 955                 m.d.comb += [
 956                     self.o.z.m.eq(self.i.tot[3:]),
 957                     self.o.of.m0.eq(self.i.tot[3]),
 958                     self.o.of.guard.eq(self.i.tot[2]),
 959                     self.o.of.round_bit.eq(self.i.tot[1]),
 960                     self.o.of.sticky.eq(self.i.tot[0])
 961             ]
 962
 963         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 964         m.d.comb += self.o.oz.eq(self.i.oz)
 965         m.d.comb += self.o.mid.eq(self.i.mid)
 966
 967         return m
 968
 969
 970 class FPAddStage1(FPState):
 971
 972     def __init__(self, width, id_wid):
 973         FPState.__init__(self, "add_1")
 974         self.mod = FPAddStage1Mod(width)
 975         self.out_z = FPNumBase(width, False)
 976         self.out_of = Overflow()
 977         self.norm_stb = Signal()
 978
 979     def setup(self, m, i):
 980         """ links module to inputs and outputs
 981         """
 982         self.mod.setup(m, i)
 983
 984         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 985
 986         m.d.sync += self.out_of.eq(self.mod.out_of)
 987         m.d.sync += self.out_z.eq(self.mod.out_z)
 988         m.d.sync += self.norm_stb.eq(1)
 989
 990     def action(self, m):
 991         m.next = "normalise_1"
 992
 993
 994 class FPNormaliseModSingle:
 995
 996     def __init__(self, width):
 997         self.width = width
 998         self.in_z = self.ispec()
 999         self.out_z = self.ospec()
1000
1001     def ispec(self):
1002         return FPNumBase(self.width, False)
1003
1004     def ospec(self):
1005         return FPNumBase(self.width, False)
1006
1007     def setup(self, m, i):
1008         """ links module to inputs and outputs
1009         """
1010         m.submodules.normalise = self
1011         m.d.comb += self.i.eq(i)
1012
1013     def elaborate(self, platform):
1014         m = Module()
1015
1016         mwid = self.out_z.m_width+2
1017         pe = PriorityEncoder(mwid)
1018         m.submodules.norm_pe = pe
1019
1020         m.submodules.norm1_out_z = self.out_z
1021         m.submodules.norm1_in_z = self.in_z
1022
1023         in_z = FPNumBase(self.width, False)
1024         in_of = Overflow()
1025         m.submodules.norm1_insel_z = in_z
1026         m.submodules.norm1_insel_overflow = in_of
1027
1028         espec = (len(in_z.e), True)
1029         ediff_n126 = Signal(espec, reset_less=True)
1030         msr = MultiShiftRMerge(mwid, espec)
1031         m.submodules.multishift_r = msr
1032
1033         m.d.comb += in_z.eq(self.in_z)
1034         m.d.comb += in_of.eq(self.in_of)
1035         # initialise out from in (overridden below)
1036         m.d.comb += self.out_z.eq(in_z)
1037         m.d.comb += self.out_of.eq(in_of)
1038         # normalisation decrease condition
1039         decrease = Signal(reset_less=True)
1040         m.d.comb += decrease.eq(in_z.m_msbzero)
1041         # decrease exponent
1042         with m.If(decrease):
1043             # *sigh* not entirely obvious: count leading zeros (clz)
1044             # with a PriorityEncoder: to find from the MSB
1045             # we reverse the order of the bits.
1046             temp_m = Signal(mwid, reset_less=True)
1047             temp_s = Signal(mwid+1, reset_less=True)
1048             clz = Signal((len(in_z.e), True), reset_less=True)
1049             m.d.comb += [
1050                 # cat round and guard bits back into the mantissa
1051                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1052                 pe.i.eq(temp_m[::-1]),          # inverted
1053                 clz.eq(pe.o),                   # count zeros from MSB down
1054                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1055                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1056                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1057             ]
1058
1059         return m
1060
1061 class FPNorm1Data:
1062
1063     def __init__(self, width, id_wid):
1064         self.roundz = Signal(reset_less=True)
1065         self.z = FPNumBase(width, False)
1066         self.out_do_z = Signal(reset_less=True)
1067         self.oz = Signal(width, reset_less=True)
1068         self.mid = Signal(id_wid, reset_less=True)
1069
1070     def eq(self, i):
1071         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1072                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1073
1074
1075 class FPNorm1ModSingle:
1076
1077     def __init__(self, width, id_wid):
1078         self.width = width
1079         self.id_wid = id_wid
1080         self.i = self.ispec()
1081         self.o = self.ospec()
1082
1083     def ispec(self):
1084         return FPAddStage1Data(self.width, self.id_wid)
1085
1086     def ospec(self):
1087         return FPNorm1Data(self.width, self.id_wid)
1088
1089     def setup(self, m, i):
1090         """ links module to inputs and outputs
1091         """
1092         m.submodules.normalise_1 = self
1093         m.d.comb += self.i.eq(i)
1094
1095     def process(self, i):
1096         return self.o
1097
1098     def elaborate(self, platform):
1099         m = Module()
1100
1101         mwid = self.o.z.m_width+2
1102         pe = PriorityEncoder(mwid)
1103         m.submodules.norm_pe = pe
1104
1105         of = Overflow()
1106         m.d.comb += self.o.roundz.eq(of.roundz)
1107
1108         m.submodules.norm1_out_z = self.o.z
1109         m.submodules.norm1_out_overflow = of
1110         m.submodules.norm1_in_z = self.i.z
1111         m.submodules.norm1_in_overflow = self.i.of
1112
1113         i = self.ispec()
1114         m.submodules.norm1_insel_z = i.z
1115         m.submodules.norm1_insel_overflow = i.of
1116
1117         espec = (len(i.z.e), True)
1118         ediff_n126 = Signal(espec, reset_less=True)
1119         msr = MultiShiftRMerge(mwid, espec)
1120         m.submodules.multishift_r = msr
1121
1122         m.d.comb += i.eq(self.i)
1123         # initialise out from in (overridden below)
1124         m.d.comb += self.o.z.eq(i.z)
1125         m.d.comb += of.eq(i.of)
1126         # normalisation increase/decrease conditions
1127         decrease = Signal(reset_less=True)
1128         increase = Signal(reset_less=True)
1129         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1130         m.d.comb += increase.eq(i.z.exp_lt_n126)
1131         # decrease exponent
1132         with m.If(~self.i.out_do_z):
1133             with m.If(decrease):
1134                 # *sigh* not entirely obvious: count leading zeros (clz)
1135                 # with a PriorityEncoder: to find from the MSB
1136                 # we reverse the order of the bits.
1137                 temp_m = Signal(mwid, reset_less=True)
1138                 temp_s = Signal(mwid+1, reset_less=True)
1139                 clz = Signal((len(i.z.e), True), reset_less=True)
1140                 # make sure that the amount to decrease by does NOT
1141                 # go below the minimum non-INF/NaN exponent
1142                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1143                              i.z.exp_sub_n126)
1144                 m.d.comb += [
1145                     # cat round and guard bits back into the mantissa
1146                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1147                     pe.i.eq(temp_m[::-1]),          # inverted
1148                     clz.eq(limclz),                 # count zeros from MSB down
1149                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1150                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1151                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1152                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1153                     # overflow in bits 0..1: got shifted too (leave sticky)
1154                     of.guard.eq(temp_s[1]),       # guard
1155                     of.round_bit.eq(temp_s[0]),   # round
1156                 ]
1157             # increase exponent
1158             with m.Elif(increase):
1159                 temp_m = Signal(mwid+1, reset_less=True)
1160                 m.d.comb += [
1161                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1162                                   i.z.m)),
1163                     ediff_n126.eq(i.z.N126 - i.z.e),
1164                     # connect multi-shifter to inp/out mantissa (and ediff)
1165                     msr.inp.eq(temp_m),
1166                     msr.diff.eq(ediff_n126),
1167                     self.o.z.m.eq(msr.m[3:]),
1168                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1169                     # overflow in bits 0..1: got shifted too (leave sticky)
1170                     of.guard.eq(temp_s[2]),     # guard
1171                     of.round_bit.eq(temp_s[1]), # round
1172                     of.sticky.eq(temp_s[0]),    # sticky
1173                     self.o.z.e.eq(i.z.e + ediff_n126),
1174                 ]
1175
1176         m.d.comb += self.o.mid.eq(self.i.mid)
1177         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1178         m.d.comb += self.o.oz.eq(self.i.oz)
1179
1180         return m
1181
1182
1183 class FPNorm1ModMulti:
1184
1185     def __init__(self, width, single_cycle=True):
1186         self.width = width
1187         self.in_select = Signal(reset_less=True)
1188         self.in_z = FPNumBase(width, False)
1189         self.in_of = Overflow()
1190         self.temp_z = FPNumBase(width, False)
1191         self.temp_of = Overflow()
1192         self.out_z = FPNumBase(width, False)
1193         self.out_of = Overflow()
1194
1195     def elaborate(self, platform):
1196         m = Module()
1197
1198         m.submodules.norm1_out_z = self.out_z
1199         m.submodules.norm1_out_overflow = self.out_of
1200         m.submodules.norm1_temp_z = self.temp_z
1201         m.submodules.norm1_temp_of = self.temp_of
1202         m.submodules.norm1_in_z = self.in_z
1203         m.submodules.norm1_in_overflow = self.in_of
1204
1205         in_z = FPNumBase(self.width, False)
1206         in_of = Overflow()
1207         m.submodules.norm1_insel_z = in_z
1208         m.submodules.norm1_insel_overflow = in_of
1209
1210         # select which of temp or in z/of to use
1211         with m.If(self.in_select):
1212             m.d.comb += in_z.eq(self.in_z)
1213             m.d.comb += in_of.eq(self.in_of)
1214         with m.Else():
1215             m.d.comb += in_z.eq(self.temp_z)
1216             m.d.comb += in_of.eq(self.temp_of)
1217         # initialise out from in (overridden below)
1218         m.d.comb += self.out_z.eq(in_z)
1219         m.d.comb += self.out_of.eq(in_of)
1220         # normalisation increase/decrease conditions
1221         decrease = Signal(reset_less=True)
1222         increase = Signal(reset_less=True)
1223         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1224         m.d.comb += increase.eq(in_z.exp_lt_n126)
1225         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1226         # decrease exponent
1227         with m.If(decrease):
1228             m.d.comb += [
1229                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1230                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1231                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1232                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1233                 self.out_of.round_bit.eq(0),        # reset round bit
1234                 self.out_of.m0.eq(in_of.guard),
1235             ]
1236         # increase exponent
1237         with m.Elif(increase):
1238             m.d.comb += [
1239                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1240                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1241                 self.out_of.guard.eq(in_z.m[0]),
1242                 self.out_of.m0.eq(in_z.m[1]),
1243                 self.out_of.round_bit.eq(in_of.guard),
1244                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1245             ]
1246
1247         return m
1248
1249
1250 class FPNorm1Single(FPState):
1251
1252     def __init__(self, width, id_wid, single_cycle=True):
1253         FPState.__init__(self, "normalise_1")
1254         self.mod = FPNorm1ModSingle(width)
1255         self.o = self.ospec()
1256         self.out_z = FPNumBase(width, False)
1257         self.out_roundz = Signal(reset_less=True)
1258
1259     def ispec(self):
1260         return self.mod.ispec()
1261
1262     def ospec(self):
1263         return self.mod.ospec()
1264
1265     def setup(self, m, i):
1266         """ links module to inputs and outputs
1267         """
1268         self.mod.setup(m, i)
1269
1270     def action(self, m):
1271         m.next = "round"
1272
1273
1274 class FPNorm1Multi(FPState):
1275
1276     def __init__(self, width, id_wid):
1277         FPState.__init__(self, "normalise_1")
1278         self.mod = FPNorm1ModMulti(width)
1279         self.stb = Signal(reset_less=True)
1280         self.ack = Signal(reset=0, reset_less=True)
1281         self.out_norm = Signal(reset_less=True)
1282         self.in_accept = Signal(reset_less=True)
1283         self.temp_z = FPNumBase(width)
1284         self.temp_of = Overflow()
1285         self.out_z = FPNumBase(width)
1286         self.out_roundz = Signal(reset_less=True)
1287
1288     def setup(self, m, in_z, in_of, norm_stb):
1289         """ links module to inputs and outputs
1290         """
1291         self.mod.setup(m, in_z, in_of, norm_stb,
1292                        self.in_accept, self.temp_z, self.temp_of,
1293                        self.out_z, self.out_norm)
1294
1295         m.d.comb += self.stb.eq(norm_stb)
1296         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1297
1298     def action(self, m):
1299         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1300         m.d.sync += self.temp_of.eq(self.mod.out_of)
1301         m.d.sync += self.temp_z.eq(self.out_z)
1302         with m.If(self.out_norm):
1303             with m.If(self.in_accept):
1304                 m.d.sync += [
1305                     self.ack.eq(1),
1306                 ]
1307             with m.Else():
1308                 m.d.sync += self.ack.eq(0)
1309         with m.Else():
1310             # normalisation not required (or done).
1311             m.next = "round"
1312             m.d.sync += self.ack.eq(1)
1313             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1314
1315
1316 class FPNormToPack(FPState):
1317
1318     def __init__(self, width, id_wid):
1319         FPState.__init__(self, "normalise_1")
1320         self.id_wid = id_wid
1321         self.width = width
1322
1323     def ispec(self):
1324         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1325
1326     def ospec(self):
1327         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1328
1329     def setup(self, m, i):
1330         """ links module to inputs and outputs
1331         """
1332
1333         # Normalisation, Rounding Corrections, Pack - in a chain
1334         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1335         rmod = FPRoundMod(self.width, self.id_wid)
1336         cmod = FPCorrectionsMod(self.width, self.id_wid)
1337         pmod = FPPackMod(self.width, self.id_wid)
1338         chain = StageChain([nmod, rmod, cmod, pmod])
1339         chain.setup(m, i)
1340         self.out_z = pmod.ospec()
1341
1342         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1343         m.d.sync += self.out_z.z.v.eq(pmod.o.z.v) # outputs packed result
1344
1345     def process(self, i):
1346         return self.out_z
1347
1348     def action(self, m):
1349         m.next = "pack_put_z"
1350
1351
1352 class FPRoundData:
1353
1354     def __init__(self, width, id_wid):
1355         self.z = FPNumBase(width, False)
1356         self.out_do_z = Signal(reset_less=True)
1357         self.oz = Signal(width, reset_less=True)
1358         self.mid = Signal(id_wid, reset_less=True)
1359
1360     def eq(self, i):
1361         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1362                 self.mid.eq(i.mid)]
1363
1364
1365 class FPRoundMod:
1366
1367     def __init__(self, width, id_wid):
1368         self.width = width
1369         self.id_wid = id_wid
1370         self.i = self.ispec()
1371         self.out_z = self.ospec()
1372
1373     def ispec(self):
1374         return FPNorm1Data(self.width, self.id_wid)
1375
1376     def ospec(self):
1377         return FPRoundData(self.width, self.id_wid)
1378
1379     def process(self, i):
1380         return self.out_z
1381
1382     def setup(self, m, i):
1383         m.submodules.roundz = self
1384         m.d.comb += self.i.eq(i)
1385
1386     def elaborate(self, platform):
1387         m = Module()
1388         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1389         with m.If(~self.i.out_do_z):
1390             with m.If(self.i.roundz):
1391                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1392                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1393                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1394
1395         return m
1396
1397
1398 class FPRound(FPState):
1399
1400     def __init__(self, width, id_wid):
1401         FPState.__init__(self, "round")
1402         self.mod = FPRoundMod(width)
1403         self.out_z = self.ospec()
1404
1405     def ispec(self):
1406         return self.mod.ispec()
1407
1408     def ospec(self):
1409         return self.mod.ospec()
1410
1411     def setup(self, m, i):
1412         """ links module to inputs and outputs
1413         """
1414         self.mod.setup(m, i)
1415
1416         self.idsync(m)
1417         m.d.sync += self.out_z.eq(self.mod.out_z)
1418         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1419
1420     def action(self, m):
1421         m.next = "corrections"
1422
1423
1424 class FPCorrectionsMod:
1425
1426     def __init__(self, width, id_wid):
1427         self.width = width
1428         self.id_wid = id_wid
1429         self.i = self.ispec()
1430         self.out_z = self.ospec()
1431
1432     def ispec(self):
1433         return FPRoundData(self.width, self.id_wid)
1434
1435     def ospec(self):
1436         return FPRoundData(self.width, self.id_wid)
1437
1438     def process(self, i):
1439         return self.out_z
1440
1441     def setup(self, m, i):
1442         """ links module to inputs and outputs
1443         """
1444         m.submodules.corrections = self
1445         m.d.comb += self.i.eq(i)
1446
1447     def elaborate(self, platform):
1448         m = Module()
1449         m.submodules.corr_in_z = self.i.z
1450         m.submodules.corr_out_z = self.out_z.z
1451         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1452         with m.If(~self.i.out_do_z):
1453             with m.If(self.i.z.is_denormalised):
1454                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1455         return m
1456
1457
1458 class FPCorrections(FPState):
1459
1460     def __init__(self, width, id_wid):
1461         FPState.__init__(self, "corrections")
1462         self.mod = FPCorrectionsMod(width)
1463         self.out_z = self.ospec()
1464
1465     def ispec(self):
1466         return self.mod.ispec()
1467
1468     def ospec(self):
1469         return self.mod.ospec()
1470
1471     def setup(self, m, in_z):
1472         """ links module to inputs and outputs
1473         """
1474         self.mod.setup(m, in_z)
1475
1476         m.d.sync += self.out_z.eq(self.mod.out_z)
1477         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1478
1479     def action(self, m):
1480         m.next = "pack"
1481
1482
1483 class FPPackData:
1484
1485     def __init__(self, width, id_wid):
1486         self.z = FPNumOut(width, False)
1487         self.mid = Signal(id_wid, reset_less=True)
1488
1489     def eq(self, i):
1490         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1491
1492
1493 class FPPackMod:
1494
1495     def __init__(self, width, id_wid):
1496         self.width = width
1497         self.id_wid = id_wid
1498         self.i = self.ispec()
1499         self.o = self.ospec()
1500
1501     def ispec(self):
1502         return FPRoundData(self.width, self.id_wid)
1503
1504     def ospec(self):
1505         return FPPackData(self.width, self.id_wid)
1506
1507     def process(self, i):
1508         return self.o
1509
1510     def setup(self, m, in_z):
1511         """ links module to inputs and outputs
1512         """
1513         m.submodules.pack = self
1514         m.d.comb += self.i.eq(in_z)
1515
1516     def elaborate(self, platform):
1517         m = Module()
1518         m.submodules.pack_in_z = self.i.z
1519         m.d.comb += self.o.mid.eq(self.i.mid)
1520         with m.If(~self.i.out_do_z):
1521             with m.If(self.i.z.is_overflowed):
1522                 m.d.comb += self.o.z.inf(self.i.z.s)
1523             with m.Else():
1524                 m.d.comb += self.o.z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1525         with m.Else():
1526             m.d.comb += self.o.z.v.eq(self.i.oz)
1527         return m
1528
1529
1530 class FPPack(FPState):
1531
1532     def __init__(self, width, id_wid):
1533         FPState.__init__(self, "pack")
1534         self.mod = FPPackMod(width)
1535         self.out_z = self.ospec()
1536
1537     def ispec(self):
1538         return self.mod.ispec()
1539
1540     def ospec(self):
1541         return self.mod.ospec()
1542
1543     def setup(self, m, in_z):
1544         """ links module to inputs and outputs
1545         """
1546         self.mod.setup(m, in_z)
1547
1548         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1549         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1550
1551     def action(self, m):
1552         m.next = "pack_put_z"
1553
1554
1555 class FPPutZ(FPState):
1556
1557     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1558         FPState.__init__(self, state)
1559         if to_state is None:
1560             to_state = "get_ops"
1561         self.to_state = to_state
1562         self.in_z = in_z
1563         self.out_z = out_z
1564         self.in_mid = in_mid
1565         self.out_mid = out_mid
1566
1567     def action(self, m):
1568         if self.in_mid is not None:
1569             m.d.sync += self.out_mid.eq(self.in_mid)
1570         m.d.sync += [
1571           self.out_z.z.v.eq(self.in_z.v)
1572         ]
1573         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1574             m.d.sync += self.out_z.z.stb.eq(0)
1575             m.next = self.to_state
1576         with m.Else():
1577             m.d.sync += self.out_z.z.stb.eq(1)
1578
1579
1580 class FPPutZIdx(FPState):
1581
1582     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1583         FPState.__init__(self, state)
1584         if to_state is None:
1585             to_state = "get_ops"
1586         self.to_state = to_state
1587         self.in_z = in_z
1588         self.out_zs = out_zs
1589         self.in_mid = in_mid
1590
1591     def action(self, m):
1592         outz_stb = Signal(reset_less=True)
1593         outz_ack = Signal(reset_less=True)
1594         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1595                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1596                     ]
1597         m.d.sync += [
1598           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1599         ]
1600         with m.If(outz_stb & outz_ack):
1601             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1602             m.next = self.to_state
1603         with m.Else():
1604             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1605
1606 class FPADDBaseData:
1607
1608     def __init__(self, width, id_wid):
1609         self.width = width
1610         self.id_wid = id_wid
1611         self.a  = Signal(width)
1612         self.b  = Signal(width)
1613         self.mid = Signal(id_wid, reset_less=True)
1614
1615     def eq(self, i):
1616         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1617
1618     def ports(self):
1619         return [self.a, self.b, self.mid]
1620
1621 class FPOpData:
1622     def __init__(self, width, id_wid):
1623         self.z = FPOp(width)
1624         self.mid = Signal(id_wid, reset_less=True)
1625
1626     def eq(self, i):
1627         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1628
1629     def ports(self):
1630         return [self.z, self.mid]
1631
1632
1633 class FPADDBaseMod:
1634
1635     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1636         """ IEEE754 FP Add
1637
1638             * width: bit-width of IEEE754.  supported: 16, 32, 64
1639             * id_wid: an identifier that is sync-connected to the input
1640             * single_cycle: True indicates each stage to complete in 1 clock
1641             * compact: True indicates a reduced number of stages
1642         """
1643         self.width = width
1644         self.id_wid = id_wid
1645         self.single_cycle = single_cycle
1646         self.compact = compact
1647
1648         self.in_t = Trigger()
1649         self.i = self.ispec()
1650         self.o = self.ospec()
1651
1652         self.states = []
1653
1654     def ispec(self):
1655         return FPADDBaseData(self.width, self.id_wid)
1656
1657     def ospec(self):
1658         return FPOpData(self.width, self.id_wid)
1659
1660     def add_state(self, state):
1661         self.states.append(state)
1662         return state
1663
1664     def get_fragment(self, platform=None):
1665         """ creates the HDL code-fragment for FPAdd
1666         """
1667         m = Module()
1668         m.submodules.out_z = self.o.z
1669         m.submodules.in_t = self.in_t
1670         if self.compact:
1671             self.get_compact_fragment(m, platform)
1672         else:
1673             self.get_longer_fragment(m, platform)
1674
1675         with m.FSM() as fsm:
1676
1677             for state in self.states:
1678                 with m.State(state.state_from):
1679                     state.action(m)
1680
1681         return m
1682
1683     def get_longer_fragment(self, m, platform=None):
1684
1685         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1686                                       self.width))
1687         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1688         a = get.out_op1
1689         b = get.out_op2
1690
1691         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1692         sc.setup(m, a, b, self.in_mid)
1693
1694         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1695         dn.setup(m, a, b, sc.in_mid)
1696
1697         if self.single_cycle:
1698             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1699             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1700         else:
1701             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1702             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1703
1704         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1705         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1706
1707         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1708         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1709
1710         if self.single_cycle:
1711             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1712             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1713         else:
1714             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1715             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1716
1717         rn = self.add_state(FPRound(self.width, self.id_wid))
1718         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1719
1720         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1721         cor.setup(m, rn.out_z, rn.in_mid)
1722
1723         pa = self.add_state(FPPack(self.width, self.id_wid))
1724         pa.setup(m, cor.out_z, rn.in_mid)
1725
1726         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1727                                     pa.in_mid, self.out_mid))
1728
1729         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1730                                     pa.in_mid, self.out_mid))
1731
1732     def get_compact_fragment(self, m, platform=None):
1733
1734         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1735                                       self.width, self.id_wid))
1736         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1737
1738         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1739         sc.setup(m, get.o)
1740
1741         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1742         alm.setup(m, sc.o)
1743
1744         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1745         n1.setup(m, alm.a1o)
1746
1747         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1748                                     n1.out_z.mid, self.o.mid))
1749
1750         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1751         #                            sc.o.mid, self.o.mid))
1752
1753
1754 class FPADDBase(FPState):
1755
1756     def __init__(self, width, id_wid=None, single_cycle=False):
1757         """ IEEE754 FP Add
1758
1759             * width: bit-width of IEEE754.  supported: 16, 32, 64
1760             * id_wid: an identifier that is sync-connected to the input
1761             * single_cycle: True indicates each stage to complete in 1 clock
1762         """
1763         FPState.__init__(self, "fpadd")
1764         self.width = width
1765         self.single_cycle = single_cycle
1766         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1767         self.o = self.ospec()
1768
1769         self.in_t = Trigger()
1770         self.i = self.ispec()
1771
1772         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1773         self.in_accept = Signal(reset_less=True)
1774         self.add_stb = Signal(reset_less=True)
1775         self.add_ack = Signal(reset=0, reset_less=True)
1776
1777     def ispec(self):
1778         return self.mod.ispec()
1779
1780     def ospec(self):
1781         return self.mod.ospec()
1782
1783     def setup(self, m, i, add_stb, in_mid):
1784         m.d.comb += [self.i.eq(i),
1785                      self.mod.i.eq(self.i),
1786                      self.z_done.eq(self.mod.o.z.trigger),
1787                      #self.add_stb.eq(add_stb),
1788                      self.mod.in_t.stb.eq(self.in_t.stb),
1789                      self.in_t.ack.eq(self.mod.in_t.ack),
1790                      self.o.mid.eq(self.mod.o.mid),
1791                      self.o.z.v.eq(self.mod.o.z.v),
1792                      self.o.z.stb.eq(self.mod.o.z.stb),
1793                      self.mod.o.z.ack.eq(self.o.z.ack),
1794                     ]
1795
1796         m.d.sync += self.add_stb.eq(add_stb)
1797         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1798         m.d.sync += self.o.z.ack.eq(0) # likewise
1799         #m.d.sync += self.in_t.stb.eq(0)
1800
1801         m.submodules.fpadd = self.mod
1802
1803     def action(self, m):
1804
1805         # in_accept is set on incoming strobe HIGH and ack LOW.
1806         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1807
1808         #with m.If(self.in_t.ack):
1809         #    m.d.sync += self.in_t.stb.eq(0)
1810         with m.If(~self.z_done):
1811             # not done: test for accepting an incoming operand pair
1812             with m.If(self.in_accept):
1813                 m.d.sync += [
1814                     self.add_ack.eq(1), # acknowledge receipt...
1815                     self.in_t.stb.eq(1), # initiate add
1816                 ]
1817             with m.Else():
1818                 m.d.sync += [self.add_ack.eq(0),
1819                              self.in_t.stb.eq(0),
1820                              self.o.z.ack.eq(1),
1821                             ]
1822         with m.Else():
1823             # done: acknowledge, and write out id and value
1824             m.d.sync += [self.add_ack.eq(1),
1825                          self.in_t.stb.eq(0)
1826                         ]
1827             m.next = "put_z"
1828
1829             return
1830
1831             if self.in_mid is not None:
1832                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1833
1834             m.d.sync += [
1835               self.out_z.v.eq(self.mod.out_z.v)
1836             ]
1837             # move to output state on detecting z ack
1838             with m.If(self.out_z.trigger):
1839                 m.d.sync += self.out_z.stb.eq(0)
1840                 m.next = "put_z"
1841             with m.Else():
1842                 m.d.sync += self.out_z.stb.eq(1)
1843
1844
1845 class FPADDStageOut:
1846     def __init__(self, width, id_wid):
1847         self.z = Signal(width)
1848         self.mid = Signal(id_wid, reset_less=True)
1849
1850     def eq(self, i):
1851         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1852
1853     def ports(self):
1854         return [self.z, self.mid]
1855
1856
1857 # matches the format of FPADDStageOut, allows eq function to do assignments
1858 class PlaceHolder: pass
1859
1860
1861 class FPAddBaseStage:
1862     def __init__(self, width, id_wid):
1863         self.width = width
1864         self.id_wid = id_wid
1865
1866     def ispec(self):
1867         return FPADDBaseData(self.width, self.id_wid)
1868
1869     def ospec(self):
1870         return FPADDStageOut(self.width, self.id_wid)
1871
1872     def process(self, i):
1873         o = PlaceHolder()
1874         o.z = i.a + i.b
1875         o.mid = i.mid
1876         return o
1877
1878
1879 class FPADDBasePipe1(UnbufferedPipeline):
1880     def __init__(self, width, id_wid):
1881         stage = FPAddBaseStage(width, id_wid)
1882         UnbufferedPipeline.__init__(self, stage)
1883
1884
1885 class FPADDBasePipe(ControlBase):
1886     def __init__(self, width, id_wid):
1887         ControlBase.__init__(self)
1888         self.pipe1 = FPADDBasePipe1(width, id_wid)
1889         self._eqs = self.connect([self.pipe1])
1890
1891     def elaborate(self, platform):
1892         m = Module()
1893         m.submodules.pipe1 = self.pipe1
1894         m.d.comb += self._eqs
1895         return m
1896
1897
1898 class PriorityCombPipeline(CombMultiInPipeline):
1899     def __init__(self, stage, p_len):
1900         p_mux = InputPriorityArbiter(self, p_len)
1901         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1902
1903     def ports(self):
1904         return self.p_mux.ports()
1905
1906
1907 class FPAddInPassThruStage:
1908     def __init__(self, width, id_wid):
1909         self.width, self.id_wid = width, id_wid
1910     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1911     def ospec(self): return self.ispec()
1912     def process(self, i): return i
1913
1914
1915 class FPADDInMuxPipe(PriorityCombPipeline):
1916     def __init__(self, width, id_width, num_rows):
1917         self.num_rows = num_rows
1918         stage = FPAddInPassThruStage(width, id_width)
1919         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1920         #self.p.i_data = stage.ispec()
1921         #self.n.o_data = stage.ospec()
1922
1923     def ports(self):
1924         res = []
1925         for i in range(len(self.p)):
1926             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1927                     self.p[i].i_data.ports()
1928         res += [self.n.i_ready, self.n.o_valid] + \
1929                 self.n.o_data.ports()
1930         return res
1931
1932
1933 class MuxCombPipeline(CombMultiOutPipeline):
1934     def __init__(self, stage, n_len):
1935         # HACK: stage is also the n-way multiplexer
1936         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1937
1938         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1939         stage.m_id = self.p.i_data.mid
1940
1941     def ports(self):
1942         return self.p_mux.ports()
1943
1944
1945 class FPAddOutPassThruStage:
1946     def __init__(self, width, id_wid):
1947         self.width, self.id_wid = width, id_wid
1948     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1949     def ospec(self): return self.ispec()
1950     def process(self, i): return i
1951
1952
1953 class FPADDMuxOutPipe(MuxCombPipeline):
1954     def __init__(self, width, id_wid, num_rows):
1955         self.num_rows = num_rows
1956         stage = FPAddOutPassThruStage(width, id_wid)
1957         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1958         #self.p.i_data = stage.ispec()
1959         #self.n.o_data = stage.ospec()
1960
1961     def ports(self):
1962         res = [self.p.i_valid, self.p.o_ready] + \
1963                 self.p.i_data.ports()
1964         for i in range(len(self.n)):
1965             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1966                     self.n[i].o_data.ports()
1967         return res
1968
1969
1970 class FPADDMuxInOut:
1971     """ Reservation-Station version of FPADD pipeline.
1972
1973         fan-in on
1974     """
1975     def __init__(self, width, id_wid, num_rows):
1976         self.num_rows = num_rows
1977         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1978         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1979         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1980
1981         self.p = self.inpipe.p  # kinda annoying,
1982         self.n = self.outpipe.n # use pipe in/out as this class in/out
1983         self._ports = self.inpipe.ports() + self.outpipe.ports()
1984
1985     def elaborate(self, platform):
1986         m = Module()
1987         m.submodules.inpipe = self.inpipe
1988         m.submodules.fpadd = self.fpadd
1989         m.submodules.outpipe = self.outpipe
1990
1991         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1992         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1993
1994         return m
1995
1996     def ports(self):
1997         return self._ports
1998
1999
2000 class ResArray:
2001     def __init__(self, width, id_wid):
2002         self.width = width
2003         self.id_wid = id_wid
2004         res = []
2005         for i in range(rs_sz):
2006             out_z = FPOp(width)
2007             out_z.name = "out_z_%d" % i
2008             res.append(out_z)
2009         self.res = Array(res)
2010         self.in_z = FPOp(width)
2011         self.in_mid = Signal(self.id_wid, reset_less=True)
2012
2013     def setup(self, m, in_z, in_mid):
2014         m.d.comb += [self.in_z.eq(in_z),
2015                      self.in_mid.eq(in_mid)]
2016
2017     def get_fragment(self, platform=None):
2018         """ creates the HDL code-fragment for FPAdd
2019         """
2020         m = Module()
2021         m.submodules.res_in_z = self.in_z
2022         m.submodules += self.res
2023
2024         return m
2025
2026     def ports(self):
2027         res = []
2028         for z in self.res:
2029             res += z.ports()
2030         return res
2031
2032
2033 class FPADD(FPID):
2034     """ FPADD: stages as follows:
2035
2036         FPGetOp (a)
2037            |
2038         FPGetOp (b)
2039            |
2040         FPAddBase---> FPAddBaseMod
2041            |            |
2042         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2043
2044         FPAddBase is tricky: it is both a stage and *has* stages.
2045         Connection to FPAddBaseMod therefore requires an in stb/ack
2046         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2047         needs to be the thing that raises the incoming stb.
2048     """
2049
2050     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2051         """ IEEE754 FP Add
2052
2053             * width: bit-width of IEEE754.  supported: 16, 32, 64
2054             * id_wid: an identifier that is sync-connected to the input
2055             * single_cycle: True indicates each stage to complete in 1 clock
2056         """
2057         self.width = width
2058         self.id_wid = id_wid
2059         self.single_cycle = single_cycle
2060
2061         #self.out_z = FPOp(width)
2062         self.ids = FPID(id_wid)
2063
2064         rs = []
2065         for i in range(rs_sz):
2066             in_a  = FPOp(width)
2067             in_b  = FPOp(width)
2068             in_a.name = "in_a_%d" % i
2069             in_b.name = "in_b_%d" % i
2070             rs.append((in_a, in_b))
2071         self.rs = Array(rs)
2072
2073         res = []
2074         for i in range(rs_sz):
2075             out_z = FPOp(width)
2076             out_z.name = "out_z_%d" % i
2077             res.append(out_z)
2078         self.res = Array(res)
2079
2080         self.states = []
2081
2082     def add_state(self, state):
2083         self.states.append(state)
2084         return state
2085
2086     def get_fragment(self, platform=None):
2087         """ creates the HDL code-fragment for FPAdd
2088         """
2089         m = Module()
2090         m.submodules += self.rs
2091
2092         in_a = self.rs[0][0]
2093         in_b = self.rs[0][1]
2094
2095         geta = self.add_state(FPGetOp("get_a", "get_b",
2096                                       in_a, self.width))
2097         geta.setup(m, in_a)
2098         a = geta.out_op
2099
2100         getb = self.add_state(FPGetOp("get_b", "fpadd",
2101                                       in_b, self.width))
2102         getb.setup(m, in_b)
2103         b = getb.out_op
2104
2105         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2106         ab = self.add_state(ab)
2107         abd = ab.ispec() # create an input spec object for FPADDBase
2108         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2109         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2110         o = ab.o
2111
2112         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2113                                     o.mid, "get_a"))
2114
2115         with m.FSM() as fsm:
2116
2117             for state in self.states:
2118                 with m.State(state.state_from):
2119                     state.action(m)
2120
2121         return m
2122
2123
2124 if __name__ == "__main__":
2125     if True:
2126         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2127         main(alu, ports=alu.rs[0][0].ports() + \
2128                         alu.rs[0][1].ports() + \
2129                         alu.res[0].ports() + \
2130                         [alu.ids.in_mid, alu.ids.out_mid])
2131     else:
2132         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2133         main(alu, ports=[alu.in_a, alu.in_b] + \
2134                         alu.in_t.ports() + \
2135                         alu.out_z.ports() + \
2136                         [alu.in_mid, alu.out_mid])
2137
2138
2139     # works... but don't use, just do "python fname.py convert -t v"
2140     #print (verilog.convert(alu, ports=[
2141     #                        ports=alu.in_a.ports() + \
2142     #                              alu.in_b.ports() + \
2143     #                              alu.out_z.ports())