src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPGet2OpMod(Trigger):
 191     def __init__(self, width, id_wid):
 192         Trigger.__init__(self)
 193         self.width = width
 194         self.id_wid = id_wid
 195         self.i = self.ispec()
 196         self.o = self.ospec()
 197
 198     def ispec(self):
 199         return FPADDBaseData(self.width, self.id_wid)
 200
 201     def ospec(self):
 202         return FPNumBase2Ops(self.width, self.id_wid)
 203
 204     def elaborate(self, platform):
 205         m = Trigger.elaborate(self, platform)
 206         m.submodules.get_op1_out = self.o.a
 207         m.submodules.get_op2_out = self.o.b
 208         out_op1 = FPNumIn(None, self.width)
 209         out_op2 = FPNumIn(None, self.width)
 210         with m.If(self.trigger):
 211             m.d.comb += [
 212                 out_op1.decode(self.i.a),
 213                 out_op2.decode(self.i.b),
 214                 self.o.a.eq(out_op1),
 215                 self.o.b.eq(out_op2),
 216                 self.o.mid.eq(self.i.mid)
 217             ]
 218         return m
 219
 220
 221 class FPGet2Op(FPState):
 222     """ gets operands
 223     """
 224
 225     def __init__(self, in_state, out_state, width, id_wid):
 226         FPState.__init__(self, in_state)
 227         self.out_state = out_state
 228         self.mod = FPGet2OpMod(width, id_wid)
 229         self.o = self.mod.ospec()
 230         self.in_stb = Signal(reset_less=True)
 231         self.out_ack = Signal(reset_less=True)
 232         self.out_decode = Signal(reset_less=True)
 233
 234     def setup(self, m, i, in_stb, in_ack):
 235         """ links module to inputs and outputs
 236         """
 237         m.submodules.get_ops = self.mod
 238         m.d.comb += self.mod.i.eq(i)
 239         m.d.comb += self.mod.stb.eq(in_stb)
 240         m.d.comb += self.out_ack.eq(self.mod.ack)
 241         m.d.comb += self.out_decode.eq(self.mod.trigger)
 242         m.d.comb += in_ack.eq(self.mod.ack)
 243
 244     def action(self, m):
 245         with m.If(self.out_decode):
 246             m.next = self.out_state
 247             m.d.sync += [
 248                 self.mod.ack.eq(0),
 249                 self.o.eq(self.mod.o),
 250             ]
 251         with m.Else():
 252             m.d.sync += self.mod.ack.eq(1)
 253
 254
 255 class FPNumBase2Ops:
 256
 257     def __init__(self, width, id_wid, m_extra=True):
 258         self.a = FPNumBase(width, m_extra)
 259         self.b = FPNumBase(width, m_extra)
 260         self.mid = Signal(id_wid, reset_less=True)
 261
 262     def eq(self, i):
 263         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 264
 265
 266 class FPSCData:
 267
 268     def __init__(self, width, id_wid):
 269         self.a = FPNumBase(width, True)
 270         self.b = FPNumBase(width, True)
 271         self.z = FPNumOut(width, False)
 272         self.oz = Signal(width, reset_less=True)
 273         self.out_do_z = Signal(reset_less=True)
 274         self.mid = Signal(id_wid, reset_less=True)
 275
 276     def eq(self, i):
 277         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 278                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 279
 280
 281 class FPAddSpecialCasesMod:
 282     """ special cases: NaNs, infs, zeros, denormalised
 283         NOTE: some of these are unique to add.  see "Special Operations"
 284         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 285     """
 286
 287     def __init__(self, width, id_wid):
 288         self.width = width
 289         self.id_wid = id_wid
 290         self.i = self.ispec()
 291         self.o = self.ospec()
 292
 293     def ispec(self):
 294         return FPNumBase2Ops(self.width, self.id_wid)
 295
 296     def ospec(self):
 297         return FPSCData(self.width, self.id_wid)
 298
 299     def setup(self, m, i):
 300         """ links module to inputs and outputs
 301         """
 302         m.submodules.specialcases = self
 303         m.d.comb += self.i.eq(i)
 304
 305     def process(self, i):
 306         return self.o
 307
 308     def elaborate(self, platform):
 309         m = Module()
 310
 311         m.submodules.sc_in_a = self.i.a
 312         m.submodules.sc_in_b = self.i.b
 313         m.submodules.sc_out_z = self.o.z
 314
 315         s_nomatch = Signal()
 316         m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
 317
 318         m_match = Signal()
 319         m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
 320
 321         # if a is NaN or b is NaN return NaN
 322         with m.If(self.i.a.is_nan | self.i.b.is_nan):
 323             m.d.comb += self.o.out_do_z.eq(1)
 324             m.d.comb += self.o.z.nan(0)
 325
 326         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 327         # under review
 328
 329         ## if a is zero and b is NaN return -b
 330         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 331         #    m.d.comb += self.o.out_do_z.eq(1)
 332         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 333
 334         ## if b is zero and a is NaN return -a
 335         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 336         #    m.d.comb += self.o.out_do_z.eq(1)
 337         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 338
 339         ## if a is -zero and b is NaN return -b
 340         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 341         #    m.d.comb += self.o.out_do_z.eq(1)
 342         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 343
 344         ## if b is -zero and a is NaN return -a
 345         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 346         #    m.d.comb += self.o.out_do_z.eq(1)
 347         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 348
 349         # if a is inf return inf (or NaN)
 350         with m.Elif(self.i.a.is_inf):
 351             m.d.comb += self.o.out_do_z.eq(1)
 352             m.d.comb += self.o.z.inf(self.i.a.s)
 353             # if a is inf and signs don't match return NaN
 354             with m.If(self.i.b.exp_128 & s_nomatch):
 355                 m.d.comb += self.o.z.nan(0)
 356
 357         # if b is inf return inf
 358         with m.Elif(self.i.b.is_inf):
 359             m.d.comb += self.o.out_do_z.eq(1)
 360             m.d.comb += self.o.z.inf(self.i.b.s)
 361
 362         # if a is zero and b zero return signed-a/b
 363         with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
 364             m.d.comb += self.o.out_do_z.eq(1)
 365             m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
 366                                           self.i.b.e,
 367                                           self.i.b.m[3:-1])
 368
 369         # if a is zero return b
 370         with m.Elif(self.i.a.is_zero):
 371             m.d.comb += self.o.out_do_z.eq(1)
 372             m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
 373                                       self.i.b.m[3:-1])
 374
 375         # if b is zero return a
 376         with m.Elif(self.i.b.is_zero):
 377             m.d.comb += self.o.out_do_z.eq(1)
 378             m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
 379                                       self.i.a.m[3:-1])
 380
 381         # if a equal to -b return zero (+ve zero)
 382         with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
 383             m.d.comb += self.o.out_do_z.eq(1)
 384             m.d.comb += self.o.z.zero(0)
 385
 386         # Denormalised Number checks next, so pass a/b data through
 387         with m.Else():
 388             m.d.comb += self.o.out_do_z.eq(0)
 389             m.d.comb += self.o.a.eq(self.i.a)
 390             m.d.comb += self.o.b.eq(self.i.b)
 391
 392         m.d.comb += self.o.oz.eq(self.o.z.v)
 393         m.d.comb += self.o.mid.eq(self.i.mid)
 394
 395         return m
 396
 397
 398 class FPID:
 399     def __init__(self, id_wid):
 400         self.id_wid = id_wid
 401         if self.id_wid:
 402             self.in_mid = Signal(id_wid, reset_less=True)
 403             self.out_mid = Signal(id_wid, reset_less=True)
 404         else:
 405             self.in_mid = None
 406             self.out_mid = None
 407
 408     def idsync(self, m):
 409         if self.id_wid is not None:
 410             m.d.sync += self.out_mid.eq(self.in_mid)
 411
 412
 413 class FPAddSpecialCases(FPState):
 414     """ special cases: NaNs, infs, zeros, denormalised
 415         NOTE: some of these are unique to add.  see "Special Operations"
 416         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 417     """
 418
 419     def __init__(self, width, id_wid):
 420         FPState.__init__(self, "special_cases")
 421         self.mod = FPAddSpecialCasesMod(width)
 422         self.out_z = self.mod.ospec()
 423         self.out_do_z = Signal(reset_less=True)
 424
 425     def setup(self, m, i):
 426         """ links module to inputs and outputs
 427         """
 428         self.mod.setup(m, i, self.out_do_z)
 429         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 430         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 431
 432     def action(self, m):
 433         self.idsync(m)
 434         with m.If(self.out_do_z):
 435             m.next = "put_z"
 436         with m.Else():
 437             m.next = "denormalise"
 438
 439
 440 class FPAddSpecialCasesDeNorm(FPState):
 441     """ special cases: NaNs, infs, zeros, denormalised
 442         NOTE: some of these are unique to add.  see "Special Operations"
 443         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 444     """
 445
 446     def __init__(self, width, id_wid):
 447         FPState.__init__(self, "special_cases")
 448         self.smod = FPAddSpecialCasesMod(width, id_wid)
 449         self.dmod = FPAddDeNormMod(width, id_wid)
 450         self.o = self.ospec()
 451
 452     def ispec(self):
 453         return self.smod.ispec()
 454
 455     def ospec(self):
 456         return self.dmod.ospec()
 457
 458     def setup(self, m, i):
 459         """ links module to inputs and outputs
 460         """
 461         # these only needed for break-out (early-out)
 462         # out_z = self.smod.ospec()
 463         # out_do_z = Signal(reset_less=True)
 464         self.smod.setup(m, i)
 465         self.dmod.setup(m, self.smod.o)
 466         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 467
 468         # out_do_z=True, only needed for early-out (split pipeline)
 469         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 470         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 471
 472         # out_do_z=False
 473         m.d.sync += self.o.eq(self.dmod.o)
 474
 475     def process(self, i):
 476         return self.o
 477
 478     def action(self, m):
 479         #with m.If(self.out_do_z):
 480         #    m.next = "put_z"
 481         #with m.Else():
 482             m.next = "align"
 483
 484
 485 class FPAddDeNormMod(FPState):
 486
 487     def __init__(self, width, id_wid):
 488         self.width = width
 489         self.id_wid = id_wid
 490         self.i = self.ispec()
 491         self.o = self.ospec()
 492
 493     def ispec(self):
 494         return FPSCData(self.width, self.id_wid)
 495
 496     def ospec(self):
 497         return FPSCData(self.width, self.id_wid)
 498
 499     def setup(self, m, i):
 500         """ links module to inputs and outputs
 501         """
 502         m.submodules.denormalise = self
 503         m.d.comb += self.i.eq(i)
 504
 505     def elaborate(self, platform):
 506         m = Module()
 507         m.submodules.denorm_in_a = self.i.a
 508         m.submodules.denorm_in_b = self.i.b
 509         m.submodules.denorm_out_a = self.o.a
 510         m.submodules.denorm_out_b = self.o.b
 511
 512         with m.If(~self.i.out_do_z):
 513             # XXX hmmm, don't like repeating identical code
 514             m.d.comb += self.o.a.eq(self.i.a)
 515             with m.If(self.i.a.exp_n127):
 516                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 517             with m.Else():
 518                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 519
 520             m.d.comb += self.o.b.eq(self.i.b)
 521             with m.If(self.i.b.exp_n127):
 522                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 523             with m.Else():
 524                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 525
 526         m.d.comb += self.o.mid.eq(self.i.mid)
 527         m.d.comb += self.o.z.eq(self.i.z)
 528         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 529         m.d.comb += self.o.oz.eq(self.i.oz)
 530
 531         return m
 532
 533
 534 class FPAddDeNorm(FPState):
 535
 536     def __init__(self, width, id_wid):
 537         FPState.__init__(self, "denormalise")
 538         self.mod = FPAddDeNormMod(width)
 539         self.out_a = FPNumBase(width)
 540         self.out_b = FPNumBase(width)
 541
 542     def setup(self, m, i):
 543         """ links module to inputs and outputs
 544         """
 545         self.mod.setup(m, i)
 546
 547         m.d.sync += self.out_a.eq(self.mod.out_a)
 548         m.d.sync += self.out_b.eq(self.mod.out_b)
 549
 550     def action(self, m):
 551         # Denormalised Number checks
 552         m.next = "align"
 553
 554
 555 class FPAddAlignMultiMod(FPState):
 556
 557     def __init__(self, width):
 558         self.in_a = FPNumBase(width)
 559         self.in_b = FPNumBase(width)
 560         self.out_a = FPNumIn(None, width)
 561         self.out_b = FPNumIn(None, width)
 562         self.exp_eq = Signal(reset_less=True)
 563
 564     def elaborate(self, platform):
 565         # This one however (single-cycle) will do the shift
 566         # in one go.
 567
 568         m = Module()
 569
 570         m.submodules.align_in_a = self.in_a
 571         m.submodules.align_in_b = self.in_b
 572         m.submodules.align_out_a = self.out_a
 573         m.submodules.align_out_b = self.out_b
 574
 575         # NOTE: this does *not* do single-cycle multi-shifting,
 576         #       it *STAYS* in the align state until exponents match
 577
 578         # exponent of a greater than b: shift b down
 579         m.d.comb += self.exp_eq.eq(0)
 580         m.d.comb += self.out_a.eq(self.in_a)
 581         m.d.comb += self.out_b.eq(self.in_b)
 582         agtb = Signal(reset_less=True)
 583         altb = Signal(reset_less=True)
 584         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 585         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 586         with m.If(agtb):
 587             m.d.comb += self.out_b.shift_down(self.in_b)
 588         # exponent of b greater than a: shift a down
 589         with m.Elif(altb):
 590             m.d.comb += self.out_a.shift_down(self.in_a)
 591         # exponents equal: move to next stage.
 592         with m.Else():
 593             m.d.comb += self.exp_eq.eq(1)
 594         return m
 595
 596
 597 class FPAddAlignMulti(FPState):
 598
 599     def __init__(self, width, id_wid):
 600         FPState.__init__(self, "align")
 601         self.mod = FPAddAlignMultiMod(width)
 602         self.out_a = FPNumIn(None, width)
 603         self.out_b = FPNumIn(None, width)
 604         self.exp_eq = Signal(reset_less=True)
 605
 606     def setup(self, m, in_a, in_b):
 607         """ links module to inputs and outputs
 608         """
 609         m.submodules.align = self.mod
 610         m.d.comb += self.mod.in_a.eq(in_a)
 611         m.d.comb += self.mod.in_b.eq(in_b)
 612         #m.d.comb += self.out_a.eq(self.mod.out_a)
 613         #m.d.comb += self.out_b.eq(self.mod.out_b)
 614         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 615         m.d.sync += self.out_a.eq(self.mod.out_a)
 616         m.d.sync += self.out_b.eq(self.mod.out_b)
 617
 618     def action(self, m):
 619         with m.If(self.exp_eq):
 620             m.next = "add_0"
 621
 622
 623 class FPNumIn2Ops:
 624
 625     def __init__(self, width, id_wid):
 626         self.a = FPNumIn(None, width)
 627         self.b = FPNumIn(None, width)
 628         self.z = FPNumOut(width, False)
 629         self.out_do_z = Signal(reset_less=True)
 630         self.oz = Signal(width, reset_less=True)
 631         self.mid = Signal(id_wid, reset_less=True)
 632
 633     def eq(self, i):
 634         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 635                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 636
 637
 638 class FPAddAlignSingleMod:
 639
 640     def __init__(self, width, id_wid):
 641         self.width = width
 642         self.id_wid = id_wid
 643         self.i = self.ispec()
 644         self.o = self.ospec()
 645
 646     def ispec(self):
 647         return FPSCData(self.width, self.id_wid)
 648
 649     def ospec(self):
 650         return FPNumIn2Ops(self.width, self.id_wid)
 651
 652     def process(self, i):
 653         return self.o
 654
 655     def setup(self, m, i):
 656         """ links module to inputs and outputs
 657         """
 658         m.submodules.align = self
 659         m.d.comb += self.i.eq(i)
 660
 661     def elaborate(self, platform):
 662         """ Aligns A against B or B against A, depending on which has the
 663             greater exponent.  This is done in a *single* cycle using
 664             variable-width bit-shift
 665
 666             the shifter used here is quite expensive in terms of gates.
 667             Mux A or B in (and out) into temporaries, as only one of them
 668             needs to be aligned against the other
 669         """
 670         m = Module()
 671
 672         m.submodules.align_in_a = self.i.a
 673         m.submodules.align_in_b = self.i.b
 674         m.submodules.align_out_a = self.o.a
 675         m.submodules.align_out_b = self.o.b
 676
 677         # temporary (muxed) input and output to be shifted
 678         t_inp = FPNumBase(self.width)
 679         t_out = FPNumIn(None, self.width)
 680         espec = (len(self.i.a.e), True)
 681         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 682         m.submodules.align_t_in = t_inp
 683         m.submodules.align_t_out = t_out
 684         m.submodules.multishift_r = msr
 685
 686         ediff = Signal(espec, reset_less=True)
 687         ediffr = Signal(espec, reset_less=True)
 688         tdiff = Signal(espec, reset_less=True)
 689         elz = Signal(reset_less=True)
 690         egz = Signal(reset_less=True)
 691
 692         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 693         m.d.comb += msr.inp.eq(t_inp.m)
 694         m.d.comb += msr.diff.eq(tdiff)
 695         m.d.comb += t_out.m.eq(msr.m)
 696         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 697         m.d.comb += t_out.s.eq(t_inp.s)
 698
 699         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 700         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 701         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 702         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 703
 704         # default: A-exp == B-exp, A and B untouched (fall through)
 705         m.d.comb += self.o.a.eq(self.i.a)
 706         m.d.comb += self.o.b.eq(self.i.b)
 707         # only one shifter (muxed)
 708         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 709         # exponent of a greater than b: shift b down
 710         with m.If(~self.i.out_do_z):
 711             with m.If(egz):
 712                 m.d.comb += [t_inp.eq(self.i.b),
 713                              tdiff.eq(ediff),
 714                              self.o.b.eq(t_out),
 715                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 716                             ]
 717             # exponent of b greater than a: shift a down
 718             with m.Elif(elz):
 719                 m.d.comb += [t_inp.eq(self.i.a),
 720                              tdiff.eq(ediffr),
 721                              self.o.a.eq(t_out),
 722                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 723                             ]
 724
 725         m.d.comb += self.o.mid.eq(self.i.mid)
 726         m.d.comb += self.o.z.eq(self.i.z)
 727         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 728         m.d.comb += self.o.oz.eq(self.i.oz)
 729
 730         return m
 731
 732
 733 class FPAddAlignSingle(FPState):
 734
 735     def __init__(self, width, id_wid):
 736         FPState.__init__(self, "align")
 737         self.mod = FPAddAlignSingleMod(width, id_wid)
 738         self.out_a = FPNumIn(None, width)
 739         self.out_b = FPNumIn(None, width)
 740
 741     def setup(self, m, i):
 742         """ links module to inputs and outputs
 743         """
 744         self.mod.setup(m, i)
 745
 746         # NOTE: could be done as comb
 747         m.d.sync += self.out_a.eq(self.mod.out_a)
 748         m.d.sync += self.out_b.eq(self.mod.out_b)
 749
 750     def action(self, m):
 751         m.next = "add_0"
 752
 753
 754 class FPAddAlignSingleAdd(FPState):
 755
 756     def __init__(self, width, id_wid):
 757         FPState.__init__(self, "align")
 758         self.width = width
 759         self.id_wid = id_wid
 760         self.a1o = self.ospec()
 761
 762     def ispec(self):
 763         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 764
 765     def ospec(self):
 766         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 767
 768     def setup(self, m, i):
 769         """ links module to inputs and outputs
 770         """
 771
 772         # chain AddAlignSingle, AddStage0 and AddStage1
 773         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 774         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 775         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 776
 777         chain = StageChain([mod, a0mod, a1mod])
 778         chain.setup(m, i)
 779
 780         m.d.sync += self.a1o.eq(a1mod.o)
 781
 782     def process(self, i):
 783         return self.a1o
 784
 785     def action(self, m):
 786         m.next = "normalise_1"
 787
 788
 789 class FPAddStage0Data:
 790
 791     def __init__(self, width, id_wid):
 792         self.z = FPNumBase(width, False)
 793         self.out_do_z = Signal(reset_less=True)
 794         self.oz = Signal(width, reset_less=True)
 795         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 796         self.mid = Signal(id_wid, reset_less=True)
 797
 798     def eq(self, i):
 799         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 800                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 801
 802
 803 class FPAddStage0Mod:
 804
 805     def __init__(self, width, id_wid):
 806         self.width = width
 807         self.id_wid = id_wid
 808         self.i = self.ispec()
 809         self.o = self.ospec()
 810
 811     def ispec(self):
 812         return FPSCData(self.width, self.id_wid)
 813
 814     def ospec(self):
 815         return FPAddStage0Data(self.width, self.id_wid)
 816
 817     def process(self, i):
 818         return self.o
 819
 820     def setup(self, m, i):
 821         """ links module to inputs and outputs
 822         """
 823         m.submodules.add0 = self
 824         m.d.comb += self.i.eq(i)
 825
 826     def elaborate(self, platform):
 827         m = Module()
 828         m.submodules.add0_in_a = self.i.a
 829         m.submodules.add0_in_b = self.i.b
 830         m.submodules.add0_out_z = self.o.z
 831
 832         # store intermediate tests (and zero-extended mantissas)
 833         seq = Signal(reset_less=True)
 834         mge = Signal(reset_less=True)
 835         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 836         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 837         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 838                      mge.eq(self.i.a.m >= self.i.b.m),
 839                      am0.eq(Cat(self.i.a.m, 0)),
 840                      bm0.eq(Cat(self.i.b.m, 0))
 841                     ]
 842         # same-sign (both negative or both positive) add mantissas
 843         with m.If(~self.i.out_do_z):
 844             m.d.comb += self.o.z.e.eq(self.i.a.e)
 845             with m.If(seq):
 846                 m.d.comb += [
 847                     self.o.tot.eq(am0 + bm0),
 848                     self.o.z.s.eq(self.i.a.s)
 849                 ]
 850             # a mantissa greater than b, use a
 851             with m.Elif(mge):
 852                 m.d.comb += [
 853                     self.o.tot.eq(am0 - bm0),
 854                     self.o.z.s.eq(self.i.a.s)
 855                 ]
 856             # b mantissa greater than a, use b
 857             with m.Else():
 858                 m.d.comb += [
 859                     self.o.tot.eq(bm0 - am0),
 860                     self.o.z.s.eq(self.i.b.s)
 861             ]
 862
 863         m.d.comb += self.o.oz.eq(self.i.oz)
 864         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 865         m.d.comb += self.o.mid.eq(self.i.mid)
 866         return m
 867
 868
 869 class FPAddStage0(FPState):
 870     """ First stage of add.  covers same-sign (add) and subtract
 871         special-casing when mantissas are greater or equal, to
 872         give greatest accuracy.
 873     """
 874
 875     def __init__(self, width, id_wid):
 876         FPState.__init__(self, "add_0")
 877         self.mod = FPAddStage0Mod(width)
 878         self.o = self.mod.ospec()
 879
 880     def setup(self, m, i):
 881         """ links module to inputs and outputs
 882         """
 883         self.mod.setup(m, i)
 884
 885         # NOTE: these could be done as combinatorial (merge add0+add1)
 886         m.d.sync += self.o.eq(self.mod.o)
 887
 888     def action(self, m):
 889         m.next = "add_1"
 890
 891
 892 class FPAddStage1Data:
 893
 894     def __init__(self, width, id_wid):
 895         self.z = FPNumBase(width, False)
 896         self.out_do_z = Signal(reset_less=True)
 897         self.oz = Signal(width, reset_less=True)
 898         self.of = Overflow()
 899         self.mid = Signal(id_wid, reset_less=True)
 900
 901     def eq(self, i):
 902         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 903                 self.of.eq(i.of), self.mid.eq(i.mid)]
 904
 905
 906
 907 class FPAddStage1Mod(FPState):
 908     """ Second stage of add: preparation for normalisation.
 909         detects when tot sum is too big (tot[27] is kinda a carry bit)
 910     """
 911
 912     def __init__(self, width, id_wid):
 913         self.width = width
 914         self.id_wid = id_wid
 915         self.i = self.ispec()
 916         self.o = self.ospec()
 917
 918     def ispec(self):
 919         return FPAddStage0Data(self.width, self.id_wid)
 920
 921     def ospec(self):
 922         return FPAddStage1Data(self.width, self.id_wid)
 923
 924     def process(self, i):
 925         return self.o
 926
 927     def setup(self, m, i):
 928         """ links module to inputs and outputs
 929         """
 930         m.submodules.add1 = self
 931         m.submodules.add1_out_overflow = self.o.of
 932
 933         m.d.comb += self.i.eq(i)
 934
 935     def elaborate(self, platform):
 936         m = Module()
 937         #m.submodules.norm1_in_overflow = self.in_of
 938         #m.submodules.norm1_out_overflow = self.out_of
 939         #m.submodules.norm1_in_z = self.in_z
 940         #m.submodules.norm1_out_z = self.out_z
 941         m.d.comb += self.o.z.eq(self.i.z)
 942         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 943         with m.If(~self.i.out_do_z):
 944             with m.If(self.i.tot[-1]):
 945                 m.d.comb += [
 946                     self.o.z.m.eq(self.i.tot[4:]),
 947                     self.o.of.m0.eq(self.i.tot[4]),
 948                     self.o.of.guard.eq(self.i.tot[3]),
 949                     self.o.of.round_bit.eq(self.i.tot[2]),
 950                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 951                     self.o.z.e.eq(self.i.z.e + 1)
 952             ]
 953             # tot[-1] (MSB) zero case
 954             with m.Else():
 955                 m.d.comb += [
 956                     self.o.z.m.eq(self.i.tot[3:]),
 957                     self.o.of.m0.eq(self.i.tot[3]),
 958                     self.o.of.guard.eq(self.i.tot[2]),
 959                     self.o.of.round_bit.eq(self.i.tot[1]),
 960                     self.o.of.sticky.eq(self.i.tot[0])
 961             ]
 962
 963         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 964         m.d.comb += self.o.oz.eq(self.i.oz)
 965         m.d.comb += self.o.mid.eq(self.i.mid)
 966
 967         return m
 968
 969
 970 class FPAddStage1(FPState):
 971
 972     def __init__(self, width, id_wid):
 973         FPState.__init__(self, "add_1")
 974         self.mod = FPAddStage1Mod(width)
 975         self.out_z = FPNumBase(width, False)
 976         self.out_of = Overflow()
 977         self.norm_stb = Signal()
 978
 979     def setup(self, m, i):
 980         """ links module to inputs and outputs
 981         """
 982         self.mod.setup(m, i)
 983
 984         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 985
 986         m.d.sync += self.out_of.eq(self.mod.out_of)
 987         m.d.sync += self.out_z.eq(self.mod.out_z)
 988         m.d.sync += self.norm_stb.eq(1)
 989
 990     def action(self, m):
 991         m.next = "normalise_1"
 992
 993
 994 class FPNormaliseModSingle:
 995
 996     def __init__(self, width):
 997         self.width = width
 998         self.in_z = self.ispec()
 999         self.out_z = self.ospec()
1000
1001     def ispec(self):
1002         return FPNumBase(self.width, False)
1003
1004     def ospec(self):
1005         return FPNumBase(self.width, False)
1006
1007     def setup(self, m, i):
1008         """ links module to inputs and outputs
1009         """
1010         m.submodules.normalise = self
1011         m.d.comb += self.i.eq(i)
1012
1013     def elaborate(self, platform):
1014         m = Module()
1015
1016         mwid = self.out_z.m_width+2
1017         pe = PriorityEncoder(mwid)
1018         m.submodules.norm_pe = pe
1019
1020         m.submodules.norm1_out_z = self.out_z
1021         m.submodules.norm1_in_z = self.in_z
1022
1023         in_z = FPNumBase(self.width, False)
1024         in_of = Overflow()
1025         m.submodules.norm1_insel_z = in_z
1026         m.submodules.norm1_insel_overflow = in_of
1027
1028         espec = (len(in_z.e), True)
1029         ediff_n126 = Signal(espec, reset_less=True)
1030         msr = MultiShiftRMerge(mwid, espec)
1031         m.submodules.multishift_r = msr
1032
1033         m.d.comb += in_z.eq(self.in_z)
1034         m.d.comb += in_of.eq(self.in_of)
1035         # initialise out from in (overridden below)
1036         m.d.comb += self.out_z.eq(in_z)
1037         m.d.comb += self.out_of.eq(in_of)
1038         # normalisation decrease condition
1039         decrease = Signal(reset_less=True)
1040         m.d.comb += decrease.eq(in_z.m_msbzero)
1041         # decrease exponent
1042         with m.If(decrease):
1043             # *sigh* not entirely obvious: count leading zeros (clz)
1044             # with a PriorityEncoder: to find from the MSB
1045             # we reverse the order of the bits.
1046             temp_m = Signal(mwid, reset_less=True)
1047             temp_s = Signal(mwid+1, reset_less=True)
1048             clz = Signal((len(in_z.e), True), reset_less=True)
1049             m.d.comb += [
1050                 # cat round and guard bits back into the mantissa
1051                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1052                 pe.i.eq(temp_m[::-1]),          # inverted
1053                 clz.eq(pe.o),                   # count zeros from MSB down
1054                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1055                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1056                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1057             ]
1058
1059         return m
1060
1061 class FPNorm1Data:
1062
1063     def __init__(self, width, id_wid):
1064         self.roundz = Signal(reset_less=True)
1065         self.z = FPNumBase(width, False)
1066         self.out_do_z = Signal(reset_less=True)
1067         self.oz = Signal(width, reset_less=True)
1068         self.mid = Signal(id_wid, reset_less=True)
1069
1070     def eq(self, i):
1071         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1072                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1073
1074
1075 class FPNorm1ModSingle:
1076
1077     def __init__(self, width, id_wid):
1078         self.width = width
1079         self.id_wid = id_wid
1080         self.i = self.ispec()
1081         self.o = self.ospec()
1082
1083     def ispec(self):
1084         return FPAddStage1Data(self.width, self.id_wid)
1085
1086     def ospec(self):
1087         return FPNorm1Data(self.width, self.id_wid)
1088
1089     def setup(self, m, i):
1090         """ links module to inputs and outputs
1091         """
1092         m.submodules.normalise_1 = self
1093         m.d.comb += self.i.eq(i)
1094
1095     def process(self, i):
1096         return self.o
1097
1098     def elaborate(self, platform):
1099         m = Module()
1100
1101         mwid = self.o.z.m_width+2
1102         pe = PriorityEncoder(mwid)
1103         m.submodules.norm_pe = pe
1104
1105         of = Overflow()
1106         m.d.comb += self.o.roundz.eq(of.roundz)
1107
1108         m.submodules.norm1_out_z = self.o.z
1109         m.submodules.norm1_out_overflow = of
1110         m.submodules.norm1_in_z = self.i.z
1111         m.submodules.norm1_in_overflow = self.i.of
1112
1113         i = self.ispec()
1114         m.submodules.norm1_insel_z = i.z
1115         m.submodules.norm1_insel_overflow = i.of
1116
1117         espec = (len(i.z.e), True)
1118         ediff_n126 = Signal(espec, reset_less=True)
1119         msr = MultiShiftRMerge(mwid, espec)
1120         m.submodules.multishift_r = msr
1121
1122         m.d.comb += i.eq(self.i)
1123         # initialise out from in (overridden below)
1124         m.d.comb += self.o.z.eq(i.z)
1125         m.d.comb += of.eq(i.of)
1126         # normalisation increase/decrease conditions
1127         decrease = Signal(reset_less=True)
1128         increase = Signal(reset_less=True)
1129         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1130         m.d.comb += increase.eq(i.z.exp_lt_n126)
1131         # decrease exponent
1132         with m.If(~self.i.out_do_z):
1133             with m.If(decrease):
1134                 # *sigh* not entirely obvious: count leading zeros (clz)
1135                 # with a PriorityEncoder: to find from the MSB
1136                 # we reverse the order of the bits.
1137                 temp_m = Signal(mwid, reset_less=True)
1138                 temp_s = Signal(mwid+1, reset_less=True)
1139                 clz = Signal((len(i.z.e), True), reset_less=True)
1140                 # make sure that the amount to decrease by does NOT
1141                 # go below the minimum non-INF/NaN exponent
1142                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1143                              i.z.exp_sub_n126)
1144                 m.d.comb += [
1145                     # cat round and guard bits back into the mantissa
1146                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1147                     pe.i.eq(temp_m[::-1]),          # inverted
1148                     clz.eq(limclz),                 # count zeros from MSB down
1149                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1150                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1151                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1152                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1153                     # overflow in bits 0..1: got shifted too (leave sticky)
1154                     of.guard.eq(temp_s[1]),       # guard
1155                     of.round_bit.eq(temp_s[0]),   # round
1156                 ]
1157             # increase exponent
1158             with m.Elif(increase):
1159                 temp_m = Signal(mwid+1, reset_less=True)
1160                 m.d.comb += [
1161                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1162                                   i.z.m)),
1163                     ediff_n126.eq(i.z.N126 - i.z.e),
1164                     # connect multi-shifter to inp/out mantissa (and ediff)
1165                     msr.inp.eq(temp_m),
1166                     msr.diff.eq(ediff_n126),
1167                     self.o.z.m.eq(msr.m[3:]),
1168                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1169                     # overflow in bits 0..1: got shifted too (leave sticky)
1170                     of.guard.eq(temp_s[2]),     # guard
1171                     of.round_bit.eq(temp_s[1]), # round
1172                     of.sticky.eq(temp_s[0]),    # sticky
1173                     self.o.z.e.eq(i.z.e + ediff_n126),
1174                 ]
1175
1176         m.d.comb += self.o.mid.eq(self.i.mid)
1177         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1178         m.d.comb += self.o.oz.eq(self.i.oz)
1179
1180         return m
1181
1182
1183 class FPNorm1ModMulti:
1184
1185     def __init__(self, width, single_cycle=True):
1186         self.width = width
1187         self.in_select = Signal(reset_less=True)
1188         self.in_z = FPNumBase(width, False)
1189         self.in_of = Overflow()
1190         self.temp_z = FPNumBase(width, False)
1191         self.temp_of = Overflow()
1192         self.out_z = FPNumBase(width, False)
1193         self.out_of = Overflow()
1194
1195     def elaborate(self, platform):
1196         m = Module()
1197
1198         m.submodules.norm1_out_z = self.out_z
1199         m.submodules.norm1_out_overflow = self.out_of
1200         m.submodules.norm1_temp_z = self.temp_z
1201         m.submodules.norm1_temp_of = self.temp_of
1202         m.submodules.norm1_in_z = self.in_z
1203         m.submodules.norm1_in_overflow = self.in_of
1204
1205         in_z = FPNumBase(self.width, False)
1206         in_of = Overflow()
1207         m.submodules.norm1_insel_z = in_z
1208         m.submodules.norm1_insel_overflow = in_of
1209
1210         # select which of temp or in z/of to use
1211         with m.If(self.in_select):
1212             m.d.comb += in_z.eq(self.in_z)
1213             m.d.comb += in_of.eq(self.in_of)
1214         with m.Else():
1215             m.d.comb += in_z.eq(self.temp_z)
1216             m.d.comb += in_of.eq(self.temp_of)
1217         # initialise out from in (overridden below)
1218         m.d.comb += self.out_z.eq(in_z)
1219         m.d.comb += self.out_of.eq(in_of)
1220         # normalisation increase/decrease conditions
1221         decrease = Signal(reset_less=True)
1222         increase = Signal(reset_less=True)
1223         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1224         m.d.comb += increase.eq(in_z.exp_lt_n126)
1225         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1226         # decrease exponent
1227         with m.If(decrease):
1228             m.d.comb += [
1229                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1230                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1231                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1232                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1233                 self.out_of.round_bit.eq(0),        # reset round bit
1234                 self.out_of.m0.eq(in_of.guard),
1235             ]
1236         # increase exponent
1237         with m.Elif(increase):
1238             m.d.comb += [
1239                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1240                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1241                 self.out_of.guard.eq(in_z.m[0]),
1242                 self.out_of.m0.eq(in_z.m[1]),
1243                 self.out_of.round_bit.eq(in_of.guard),
1244                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1245             ]
1246
1247         return m
1248
1249
1250 class FPNorm1Single(FPState):
1251
1252     def __init__(self, width, id_wid, single_cycle=True):
1253         FPState.__init__(self, "normalise_1")
1254         self.mod = FPNorm1ModSingle(width)
1255         self.o = self.ospec()
1256         self.out_z = FPNumBase(width, False)
1257         self.out_roundz = Signal(reset_less=True)
1258
1259     def ispec(self):
1260         return self.mod.ispec()
1261
1262     def ospec(self):
1263         return self.mod.ospec()
1264
1265     def setup(self, m, i):
1266         """ links module to inputs and outputs
1267         """
1268         self.mod.setup(m, i)
1269
1270     def action(self, m):
1271         m.next = "round"
1272
1273
1274 class FPNorm1Multi(FPState):
1275
1276     def __init__(self, width, id_wid):
1277         FPState.__init__(self, "normalise_1")
1278         self.mod = FPNorm1ModMulti(width)
1279         self.stb = Signal(reset_less=True)
1280         self.ack = Signal(reset=0, reset_less=True)
1281         self.out_norm = Signal(reset_less=True)
1282         self.in_accept = Signal(reset_less=True)
1283         self.temp_z = FPNumBase(width)
1284         self.temp_of = Overflow()
1285         self.out_z = FPNumBase(width)
1286         self.out_roundz = Signal(reset_less=True)
1287
1288     def setup(self, m, in_z, in_of, norm_stb):
1289         """ links module to inputs and outputs
1290         """
1291         self.mod.setup(m, in_z, in_of, norm_stb,
1292                        self.in_accept, self.temp_z, self.temp_of,
1293                        self.out_z, self.out_norm)
1294
1295         m.d.comb += self.stb.eq(norm_stb)
1296         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1297
1298     def action(self, m):
1299         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1300         m.d.sync += self.temp_of.eq(self.mod.out_of)
1301         m.d.sync += self.temp_z.eq(self.out_z)
1302         with m.If(self.out_norm):
1303             with m.If(self.in_accept):
1304                 m.d.sync += [
1305                     self.ack.eq(1),
1306                 ]
1307             with m.Else():
1308                 m.d.sync += self.ack.eq(0)
1309         with m.Else():
1310             # normalisation not required (or done).
1311             m.next = "round"
1312             m.d.sync += self.ack.eq(1)
1313             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1314
1315
1316 class FPNormToPack(FPState):
1317
1318     def __init__(self, width, id_wid):
1319         FPState.__init__(self, "normalise_1")
1320         self.id_wid = id_wid
1321         self.width = width
1322
1323     def ispec(self):
1324         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1325
1326     def ospec(self):
1327         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1328
1329     def setup(self, m, i):
1330         """ links module to inputs and outputs
1331         """
1332
1333         # Normalisation, Rounding Corrections, Pack - in a chain
1334         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1335         rmod = FPRoundMod(self.width, self.id_wid)
1336         cmod = FPCorrectionsMod(self.width, self.id_wid)
1337         pmod = FPPackMod(self.width, self.id_wid)
1338         chain = StageChain([nmod, rmod, cmod, pmod])
1339         chain.setup(m, i)
1340         self.out_z = pmod.ospec()
1341
1342         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1343         m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1344
1345     def process(self, i):
1346         return self.out_z
1347
1348     def action(self, m):
1349         m.next = "pack_put_z"
1350
1351
1352 class FPRoundData:
1353
1354     def __init__(self, width, id_wid):
1355         self.z = FPNumBase(width, False)
1356         self.out_do_z = Signal(reset_less=True)
1357         self.oz = Signal(width, reset_less=True)
1358         self.mid = Signal(id_wid, reset_less=True)
1359
1360     def eq(self, i):
1361         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1362                 self.mid.eq(i.mid)]
1363
1364
1365 class FPRoundMod:
1366
1367     def __init__(self, width, id_wid):
1368         self.width = width
1369         self.id_wid = id_wid
1370         self.i = self.ispec()
1371         self.out_z = self.ospec()
1372
1373     def ispec(self):
1374         return FPNorm1Data(self.width, self.id_wid)
1375
1376     def ospec(self):
1377         return FPRoundData(self.width, self.id_wid)
1378
1379     def process(self, i):
1380         return self.out_z
1381
1382     def setup(self, m, i):
1383         m.submodules.roundz = self
1384         m.d.comb += self.i.eq(i)
1385
1386     def elaborate(self, platform):
1387         m = Module()
1388         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1389         with m.If(~self.i.out_do_z):
1390             with m.If(self.i.roundz):
1391                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1392                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1393                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1394
1395         return m
1396
1397
1398 class FPRound(FPState):
1399
1400     def __init__(self, width, id_wid):
1401         FPState.__init__(self, "round")
1402         self.mod = FPRoundMod(width)
1403         self.out_z = self.ospec()
1404
1405     def ispec(self):
1406         return self.mod.ispec()
1407
1408     def ospec(self):
1409         return self.mod.ospec()
1410
1411     def setup(self, m, i):
1412         """ links module to inputs and outputs
1413         """
1414         self.mod.setup(m, i)
1415
1416         self.idsync(m)
1417         m.d.sync += self.out_z.eq(self.mod.out_z)
1418         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1419
1420     def action(self, m):
1421         m.next = "corrections"
1422
1423
1424 class FPCorrectionsMod:
1425
1426     def __init__(self, width, id_wid):
1427         self.width = width
1428         self.id_wid = id_wid
1429         self.i = self.ispec()
1430         self.out_z = self.ospec()
1431
1432     def ispec(self):
1433         return FPRoundData(self.width, self.id_wid)
1434
1435     def ospec(self):
1436         return FPRoundData(self.width, self.id_wid)
1437
1438     def process(self, i):
1439         return self.out_z
1440
1441     def setup(self, m, i):
1442         """ links module to inputs and outputs
1443         """
1444         m.submodules.corrections = self
1445         m.d.comb += self.i.eq(i)
1446
1447     def elaborate(self, platform):
1448         m = Module()
1449         m.submodules.corr_in_z = self.i.z
1450         m.submodules.corr_out_z = self.out_z.z
1451         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1452         with m.If(~self.i.out_do_z):
1453             with m.If(self.i.z.is_denormalised):
1454                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1455         return m
1456
1457
1458 class FPCorrections(FPState):
1459
1460     def __init__(self, width, id_wid):
1461         FPState.__init__(self, "corrections")
1462         self.mod = FPCorrectionsMod(width)
1463         self.out_z = self.ospec()
1464
1465     def ispec(self):
1466         return self.mod.ispec()
1467
1468     def ospec(self):
1469         return self.mod.ospec()
1470
1471     def setup(self, m, in_z):
1472         """ links module to inputs and outputs
1473         """
1474         self.mod.setup(m, in_z)
1475
1476         m.d.sync += self.out_z.eq(self.mod.out_z)
1477         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1478
1479     def action(self, m):
1480         m.next = "pack"
1481
1482
1483 class FPPackData:
1484
1485     def __init__(self, width, id_wid):
1486         self.z = Signal(width, reset_less=True)
1487         self.mid = Signal(id_wid, reset_less=True)
1488
1489     def eq(self, i):
1490         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1491
1492
1493 class FPPackMod:
1494
1495     def __init__(self, width, id_wid):
1496         self.width = width
1497         self.id_wid = id_wid
1498         self.i = self.ispec()
1499         self.o = self.ospec()
1500
1501     def ispec(self):
1502         return FPRoundData(self.width, self.id_wid)
1503
1504     def ospec(self):
1505         return FPPackData(self.width, self.id_wid)
1506
1507     def process(self, i):
1508         return self.o
1509
1510     def setup(self, m, in_z):
1511         """ links module to inputs and outputs
1512         """
1513         m.submodules.pack = self
1514         m.d.comb += self.i.eq(in_z)
1515
1516     def elaborate(self, platform):
1517         m = Module()
1518         z = FPNumOut(self.width, False)
1519         m.submodules.pack_in_z = self.i.z
1520         m.submodules.pack_out_z = z
1521         m.d.comb += self.o.mid.eq(self.i.mid)
1522         with m.If(~self.i.out_do_z):
1523             with m.If(self.i.z.is_overflowed):
1524                 m.d.comb += z.inf(self.i.z.s)
1525             with m.Else():
1526                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1527         with m.Else():
1528             m.d.comb += z.v.eq(self.i.oz)
1529         m.d.comb += self.o.z.eq(z.v)
1530         return m
1531
1532
1533 class FPPack(FPState):
1534
1535     def __init__(self, width, id_wid):
1536         FPState.__init__(self, "pack")
1537         self.mod = FPPackMod(width)
1538         self.out_z = self.ospec()
1539
1540     def ispec(self):
1541         return self.mod.ispec()
1542
1543     def ospec(self):
1544         return self.mod.ospec()
1545
1546     def setup(self, m, in_z):
1547         """ links module to inputs and outputs
1548         """
1549         self.mod.setup(m, in_z)
1550
1551         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1552         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1553
1554     def action(self, m):
1555         m.next = "pack_put_z"
1556
1557
1558 class FPPutZ(FPState):
1559
1560     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1561         FPState.__init__(self, state)
1562         if to_state is None:
1563             to_state = "get_ops"
1564         self.to_state = to_state
1565         self.in_z = in_z
1566         self.out_z = out_z
1567         self.in_mid = in_mid
1568         self.out_mid = out_mid
1569
1570     def action(self, m):
1571         if self.in_mid is not None:
1572             m.d.sync += self.out_mid.eq(self.in_mid)
1573         m.d.sync += [
1574           self.out_z.z.v.eq(self.in_z)
1575         ]
1576         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1577             m.d.sync += self.out_z.z.stb.eq(0)
1578             m.next = self.to_state
1579         with m.Else():
1580             m.d.sync += self.out_z.z.stb.eq(1)
1581
1582
1583 class FPPutZIdx(FPState):
1584
1585     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1586         FPState.__init__(self, state)
1587         if to_state is None:
1588             to_state = "get_ops"
1589         self.to_state = to_state
1590         self.in_z = in_z
1591         self.out_zs = out_zs
1592         self.in_mid = in_mid
1593
1594     def action(self, m):
1595         outz_stb = Signal(reset_less=True)
1596         outz_ack = Signal(reset_less=True)
1597         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1598                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1599                     ]
1600         m.d.sync += [
1601           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1602         ]
1603         with m.If(outz_stb & outz_ack):
1604             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1605             m.next = self.to_state
1606         with m.Else():
1607             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1608
1609 class FPADDBaseData:
1610
1611     def __init__(self, width, id_wid):
1612         self.width = width
1613         self.id_wid = id_wid
1614         self.a  = Signal(width)
1615         self.b  = Signal(width)
1616         self.mid = Signal(id_wid, reset_less=True)
1617
1618     def eq(self, i):
1619         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1620
1621     def ports(self):
1622         return [self.a, self.b, self.mid]
1623
1624 class FPOpData:
1625     def __init__(self, width, id_wid):
1626         self.z = FPOp(width)
1627         self.mid = Signal(id_wid, reset_less=True)
1628
1629     def eq(self, i):
1630         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1631
1632     def ports(self):
1633         return [self.z, self.mid]
1634
1635
1636 class FPADDBaseMod:
1637
1638     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1639         """ IEEE754 FP Add
1640
1641             * width: bit-width of IEEE754.  supported: 16, 32, 64
1642             * id_wid: an identifier that is sync-connected to the input
1643             * single_cycle: True indicates each stage to complete in 1 clock
1644             * compact: True indicates a reduced number of stages
1645         """
1646         self.width = width
1647         self.id_wid = id_wid
1648         self.single_cycle = single_cycle
1649         self.compact = compact
1650
1651         self.in_t = Trigger()
1652         self.i = self.ispec()
1653         self.o = self.ospec()
1654
1655         self.states = []
1656
1657     def ispec(self):
1658         return FPADDBaseData(self.width, self.id_wid)
1659
1660     def ospec(self):
1661         return FPOpData(self.width, self.id_wid)
1662
1663     def add_state(self, state):
1664         self.states.append(state)
1665         return state
1666
1667     def get_fragment(self, platform=None):
1668         """ creates the HDL code-fragment for FPAdd
1669         """
1670         m = Module()
1671         m.submodules.out_z = self.o.z
1672         m.submodules.in_t = self.in_t
1673         if self.compact:
1674             self.get_compact_fragment(m, platform)
1675         else:
1676             self.get_longer_fragment(m, platform)
1677
1678         with m.FSM() as fsm:
1679
1680             for state in self.states:
1681                 with m.State(state.state_from):
1682                     state.action(m)
1683
1684         return m
1685
1686     def get_longer_fragment(self, m, platform=None):
1687
1688         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1689                                       self.width))
1690         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1691         a = get.out_op1
1692         b = get.out_op2
1693
1694         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1695         sc.setup(m, a, b, self.in_mid)
1696
1697         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1698         dn.setup(m, a, b, sc.in_mid)
1699
1700         if self.single_cycle:
1701             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1702             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1703         else:
1704             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1705             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1706
1707         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1708         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1709
1710         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1711         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1712
1713         if self.single_cycle:
1714             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1715             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1716         else:
1717             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1718             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1719
1720         rn = self.add_state(FPRound(self.width, self.id_wid))
1721         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1722
1723         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1724         cor.setup(m, rn.out_z, rn.in_mid)
1725
1726         pa = self.add_state(FPPack(self.width, self.id_wid))
1727         pa.setup(m, cor.out_z, rn.in_mid)
1728
1729         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1730                                     pa.in_mid, self.out_mid))
1731
1732         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1733                                     pa.in_mid, self.out_mid))
1734
1735     def get_compact_fragment(self, m, platform=None):
1736
1737         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1738                                       self.width, self.id_wid))
1739         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1740
1741         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1742         sc.setup(m, get.o)
1743
1744         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1745         alm.setup(m, sc.o)
1746
1747         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1748         n1.setup(m, alm.a1o)
1749
1750         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1751                                     n1.out_z.mid, self.o.mid))
1752
1753         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1754         #                            sc.o.mid, self.o.mid))
1755
1756
1757 class FPADDBase(FPState):
1758
1759     def __init__(self, width, id_wid=None, single_cycle=False):
1760         """ IEEE754 FP Add
1761
1762             * width: bit-width of IEEE754.  supported: 16, 32, 64
1763             * id_wid: an identifier that is sync-connected to the input
1764             * single_cycle: True indicates each stage to complete in 1 clock
1765         """
1766         FPState.__init__(self, "fpadd")
1767         self.width = width
1768         self.single_cycle = single_cycle
1769         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1770         self.o = self.ospec()
1771
1772         self.in_t = Trigger()
1773         self.i = self.ispec()
1774
1775         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1776         self.in_accept = Signal(reset_less=True)
1777         self.add_stb = Signal(reset_less=True)
1778         self.add_ack = Signal(reset=0, reset_less=True)
1779
1780     def ispec(self):
1781         return self.mod.ispec()
1782
1783     def ospec(self):
1784         return self.mod.ospec()
1785
1786     def setup(self, m, i, add_stb, in_mid):
1787         m.d.comb += [self.i.eq(i),
1788                      self.mod.i.eq(self.i),
1789                      self.z_done.eq(self.mod.o.z.trigger),
1790                      #self.add_stb.eq(add_stb),
1791                      self.mod.in_t.stb.eq(self.in_t.stb),
1792                      self.in_t.ack.eq(self.mod.in_t.ack),
1793                      self.o.mid.eq(self.mod.o.mid),
1794                      self.o.z.v.eq(self.mod.o.z.v),
1795                      self.o.z.stb.eq(self.mod.o.z.stb),
1796                      self.mod.o.z.ack.eq(self.o.z.ack),
1797                     ]
1798
1799         m.d.sync += self.add_stb.eq(add_stb)
1800         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1801         m.d.sync += self.o.z.ack.eq(0) # likewise
1802         #m.d.sync += self.in_t.stb.eq(0)
1803
1804         m.submodules.fpadd = self.mod
1805
1806     def action(self, m):
1807
1808         # in_accept is set on incoming strobe HIGH and ack LOW.
1809         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1810
1811         #with m.If(self.in_t.ack):
1812         #    m.d.sync += self.in_t.stb.eq(0)
1813         with m.If(~self.z_done):
1814             # not done: test for accepting an incoming operand pair
1815             with m.If(self.in_accept):
1816                 m.d.sync += [
1817                     self.add_ack.eq(1), # acknowledge receipt...
1818                     self.in_t.stb.eq(1), # initiate add
1819                 ]
1820             with m.Else():
1821                 m.d.sync += [self.add_ack.eq(0),
1822                              self.in_t.stb.eq(0),
1823                              self.o.z.ack.eq(1),
1824                             ]
1825         with m.Else():
1826             # done: acknowledge, and write out id and value
1827             m.d.sync += [self.add_ack.eq(1),
1828                          self.in_t.stb.eq(0)
1829                         ]
1830             m.next = "put_z"
1831
1832             return
1833
1834             if self.in_mid is not None:
1835                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1836
1837             m.d.sync += [
1838               self.out_z.v.eq(self.mod.out_z.v)
1839             ]
1840             # move to output state on detecting z ack
1841             with m.If(self.out_z.trigger):
1842                 m.d.sync += self.out_z.stb.eq(0)
1843                 m.next = "put_z"
1844             with m.Else():
1845                 m.d.sync += self.out_z.stb.eq(1)
1846
1847
1848 class FPADDStageOut:
1849     def __init__(self, width, id_wid):
1850         self.z = Signal(width)
1851         self.mid = Signal(id_wid, reset_less=True)
1852
1853     def eq(self, i):
1854         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1855
1856     def ports(self):
1857         return [self.z, self.mid]
1858
1859
1860 # matches the format of FPADDStageOut, allows eq function to do assignments
1861 class PlaceHolder: pass
1862
1863
1864 class FPAddBaseStage:
1865     def __init__(self, width, id_wid):
1866         self.width = width
1867         self.id_wid = id_wid
1868
1869     def ispec(self):
1870         return FPADDBaseData(self.width, self.id_wid)
1871
1872     def ospec(self):
1873         return FPADDStageOut(self.width, self.id_wid)
1874
1875     def process(self, i):
1876         o = PlaceHolder()
1877         o.z = i.a + i.b
1878         o.mid = i.mid
1879         return o
1880
1881
1882 class FPADDBasePipe1(UnbufferedPipeline):
1883     def __init__(self, width, id_wid):
1884         stage = FPAddBaseStage(width, id_wid)
1885         UnbufferedPipeline.__init__(self, stage)
1886
1887
1888 class FPADDBasePipe(ControlBase):
1889     def __init__(self, width, id_wid):
1890         ControlBase.__init__(self)
1891         self.pipe1 = FPADDBasePipe1(width, id_wid)
1892         self._eqs = self.connect([self.pipe1])
1893
1894     def elaborate(self, platform):
1895         m = Module()
1896         m.submodules.pipe1 = self.pipe1
1897         m.d.comb += self._eqs
1898         return m
1899
1900
1901 class PriorityCombPipeline(CombMultiInPipeline):
1902     def __init__(self, stage, p_len):
1903         p_mux = InputPriorityArbiter(self, p_len)
1904         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1905
1906     def ports(self):
1907         return self.p_mux.ports()
1908
1909
1910 class FPAddInPassThruStage:
1911     def __init__(self, width, id_wid):
1912         self.width, self.id_wid = width, id_wid
1913     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1914     def ospec(self): return self.ispec()
1915     def process(self, i): return i
1916
1917
1918 class FPADDInMuxPipe(PriorityCombPipeline):
1919     def __init__(self, width, id_width, num_rows):
1920         self.num_rows = num_rows
1921         stage = FPAddInPassThruStage(width, id_width)
1922         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1923         #self.p.i_data = stage.ispec()
1924         #self.n.o_data = stage.ospec()
1925
1926     def ports(self):
1927         res = []
1928         for i in range(len(self.p)):
1929             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1930                     self.p[i].i_data.ports()
1931         res += [self.n.i_ready, self.n.o_valid] + \
1932                 self.n.o_data.ports()
1933         return res
1934
1935
1936 class MuxCombPipeline(CombMultiOutPipeline):
1937     def __init__(self, stage, n_len):
1938         # HACK: stage is also the n-way multiplexer
1939         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1940
1941         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1942         stage.m_id = self.p.i_data.mid
1943
1944     def ports(self):
1945         return self.p_mux.ports()
1946
1947
1948 class FPAddOutPassThruStage:
1949     def __init__(self, width, id_wid):
1950         self.width, self.id_wid = width, id_wid
1951     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1952     def ospec(self): return self.ispec()
1953     def process(self, i): return i
1954
1955
1956 class FPADDMuxOutPipe(MuxCombPipeline):
1957     def __init__(self, width, id_wid, num_rows):
1958         self.num_rows = num_rows
1959         stage = FPAddOutPassThruStage(width, id_wid)
1960         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1961         #self.p.i_data = stage.ispec()
1962         #self.n.o_data = stage.ospec()
1963
1964     def ports(self):
1965         res = [self.p.i_valid, self.p.o_ready] + \
1966                 self.p.i_data.ports()
1967         for i in range(len(self.n)):
1968             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1969                     self.n[i].o_data.ports()
1970         return res
1971
1972
1973 class FPADDMuxInOut:
1974     """ Reservation-Station version of FPADD pipeline.
1975
1976         fan-in on
1977     """
1978     def __init__(self, width, id_wid, num_rows):
1979         self.num_rows = num_rows
1980         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1981         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1982         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1983
1984         self.p = self.inpipe.p  # kinda annoying,
1985         self.n = self.outpipe.n # use pipe in/out as this class in/out
1986         self._ports = self.inpipe.ports() + self.outpipe.ports()
1987
1988     def elaborate(self, platform):
1989         m = Module()
1990         m.submodules.inpipe = self.inpipe
1991         m.submodules.fpadd = self.fpadd
1992         m.submodules.outpipe = self.outpipe
1993
1994         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1995         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1996
1997         return m
1998
1999     def ports(self):
2000         return self._ports
2001
2002
2003 class ResArray:
2004     def __init__(self, width, id_wid):
2005         self.width = width
2006         self.id_wid = id_wid
2007         res = []
2008         for i in range(rs_sz):
2009             out_z = FPOp(width)
2010             out_z.name = "out_z_%d" % i
2011             res.append(out_z)
2012         self.res = Array(res)
2013         self.in_z = FPOp(width)
2014         self.in_mid = Signal(self.id_wid, reset_less=True)
2015
2016     def setup(self, m, in_z, in_mid):
2017         m.d.comb += [self.in_z.eq(in_z),
2018                      self.in_mid.eq(in_mid)]
2019
2020     def get_fragment(self, platform=None):
2021         """ creates the HDL code-fragment for FPAdd
2022         """
2023         m = Module()
2024         m.submodules.res_in_z = self.in_z
2025         m.submodules += self.res
2026
2027         return m
2028
2029     def ports(self):
2030         res = []
2031         for z in self.res:
2032             res += z.ports()
2033         return res
2034
2035
2036 class FPADD(FPID):
2037     """ FPADD: stages as follows:
2038
2039         FPGetOp (a)
2040            |
2041         FPGetOp (b)
2042            |
2043         FPAddBase---> FPAddBaseMod
2044            |            |
2045         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2046
2047         FPAddBase is tricky: it is both a stage and *has* stages.
2048         Connection to FPAddBaseMod therefore requires an in stb/ack
2049         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2050         needs to be the thing that raises the incoming stb.
2051     """
2052
2053     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2054         """ IEEE754 FP Add
2055
2056             * width: bit-width of IEEE754.  supported: 16, 32, 64
2057             * id_wid: an identifier that is sync-connected to the input
2058             * single_cycle: True indicates each stage to complete in 1 clock
2059         """
2060         self.width = width
2061         self.id_wid = id_wid
2062         self.single_cycle = single_cycle
2063
2064         #self.out_z = FPOp(width)
2065         self.ids = FPID(id_wid)
2066
2067         rs = []
2068         for i in range(rs_sz):
2069             in_a  = FPOp(width)
2070             in_b  = FPOp(width)
2071             in_a.name = "in_a_%d" % i
2072             in_b.name = "in_b_%d" % i
2073             rs.append((in_a, in_b))
2074         self.rs = Array(rs)
2075
2076         res = []
2077         for i in range(rs_sz):
2078             out_z = FPOp(width)
2079             out_z.name = "out_z_%d" % i
2080             res.append(out_z)
2081         self.res = Array(res)
2082
2083         self.states = []
2084
2085     def add_state(self, state):
2086         self.states.append(state)
2087         return state
2088
2089     def get_fragment(self, platform=None):
2090         """ creates the HDL code-fragment for FPAdd
2091         """
2092         m = Module()
2093         m.submodules += self.rs
2094
2095         in_a = self.rs[0][0]
2096         in_b = self.rs[0][1]
2097
2098         geta = self.add_state(FPGetOp("get_a", "get_b",
2099                                       in_a, self.width))
2100         geta.setup(m, in_a)
2101         a = geta.out_op
2102
2103         getb = self.add_state(FPGetOp("get_b", "fpadd",
2104                                       in_b, self.width))
2105         getb.setup(m, in_b)
2106         b = getb.out_op
2107
2108         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2109         ab = self.add_state(ab)
2110         abd = ab.ispec() # create an input spec object for FPADDBase
2111         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2112         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2113         o = ab.o
2114
2115         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2116                                     o.mid, "get_a"))
2117
2118         with m.FSM() as fsm:
2119
2120             for state in self.states:
2121                 with m.State(state.state_from):
2122                     state.action(m)
2123
2124         return m
2125
2126
2127 if __name__ == "__main__":
2128     if True:
2129         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2130         main(alu, ports=alu.rs[0][0].ports() + \
2131                         alu.rs[0][1].ports() + \
2132                         alu.res[0].ports() + \
2133                         [alu.ids.in_mid, alu.ids.out_mid])
2134     else:
2135         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2136         main(alu, ports=[alu.in_a, alu.in_b] + \
2137                         alu.in_t.ports() + \
2138                         alu.out_z.ports() + \
2139                         [alu.in_mid, alu.out_mid])
2140
2141
2142     # works... but don't use, just do "python fname.py convert -t v"
2143     #print (verilog.convert(alu, ports=[
2144     #                        ports=alu.in_a.ports() + \
2145     #                              alu.in_b.ports() + \
2146     #                              alu.out_z.ports())