src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPGet2OpMod(Trigger):
 191     def __init__(self, width, id_wid):
 192         Trigger.__init__(self)
 193         self.width = width
 194         self.id_wid = id_wid
 195         self.i = self.ispec()
 196         self.o = self.ospec()
 197
 198     def ispec(self):
 199         return FPADDBaseData(self.width, self.id_wid)
 200
 201     def ospec(self):
 202         return FPNumBase2Ops(self.width, self.id_wid)
 203
 204     def elaborate(self, platform):
 205         m = Trigger.elaborate(self, platform)
 206         m.submodules.get_op1_out = self.o.a
 207         m.submodules.get_op2_out = self.o.b
 208         out_op1 = FPNumIn(None, self.width)
 209         out_op2 = FPNumIn(None, self.width)
 210         with m.If(self.trigger):
 211             m.d.comb += [
 212                 out_op1.decode(self.i.a),
 213                 out_op2.decode(self.i.b),
 214                 self.o.a.eq(out_op1),
 215                 self.o.b.eq(out_op2),
 216                 self.o.mid.eq(self.i.mid)
 217             ]
 218         return m
 219
 220
 221 class FPGet2Op(FPState):
 222     """ gets operands
 223     """
 224
 225     def __init__(self, in_state, out_state, width, id_wid):
 226         FPState.__init__(self, in_state)
 227         self.out_state = out_state
 228         self.mod = FPGet2OpMod(width, id_wid)
 229         self.o = self.mod.ospec()
 230         self.in_stb = Signal(reset_less=True)
 231         self.out_ack = Signal(reset_less=True)
 232         self.out_decode = Signal(reset_less=True)
 233
 234     def setup(self, m, i, in_stb, in_ack):
 235         """ links module to inputs and outputs
 236         """
 237         m.submodules.get_ops = self.mod
 238         m.d.comb += self.mod.i.eq(i)
 239         m.d.comb += self.mod.stb.eq(in_stb)
 240         m.d.comb += self.out_ack.eq(self.mod.ack)
 241         m.d.comb += self.out_decode.eq(self.mod.trigger)
 242         m.d.comb += in_ack.eq(self.mod.ack)
 243
 244     def action(self, m):
 245         with m.If(self.out_decode):
 246             m.next = self.out_state
 247             m.d.sync += [
 248                 self.mod.ack.eq(0),
 249                 self.o.eq(self.mod.o),
 250             ]
 251         with m.Else():
 252             m.d.sync += self.mod.ack.eq(1)
 253
 254
 255 class FPNumBase2Ops:
 256
 257     def __init__(self, width, id_wid, m_extra=True):
 258         self.a = FPNumBase(width, m_extra)
 259         self.b = FPNumBase(width, m_extra)
 260         self.mid = Signal(id_wid, reset_less=True)
 261
 262     def eq(self, i):
 263         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 264
 265
 266 class FPSCData:
 267
 268     def __init__(self, width, id_wid):
 269         self.a = FPNumBase(width, True)
 270         self.b = FPNumBase(width, True)
 271         self.z = FPNumOut(width, False)
 272         self.oz = Signal(width, reset_less=True)
 273         self.out_do_z = Signal(reset_less=True)
 274         self.mid = Signal(id_wid, reset_less=True)
 275
 276     def eq(self, i):
 277         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 278                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 279
 280
 281 class FPAddSpecialCasesMod:
 282     """ special cases: NaNs, infs, zeros, denormalised
 283         NOTE: some of these are unique to add.  see "Special Operations"
 284         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 285     """
 286
 287     def __init__(self, width, id_wid):
 288         self.width = width
 289         self.id_wid = id_wid
 290         self.i = self.ispec()
 291         self.o = self.ospec()
 292
 293     def ispec(self):
 294         return FPNumBase2Ops(self.width, self.id_wid)
 295
 296     def ospec(self):
 297         return FPSCData(self.width, self.id_wid)
 298
 299     def setup(self, m, i):
 300         """ links module to inputs and outputs
 301         """
 302         m.submodules.specialcases = self
 303         m.d.comb += self.i.eq(i)
 304
 305     def process(self, i):
 306         return self.o
 307
 308     def elaborate(self, platform):
 309         m = Module()
 310
 311         m.submodules.sc_in_a = self.i.a
 312         m.submodules.sc_in_b = self.i.b
 313         m.submodules.sc_out_z = self.o.z
 314
 315         s_nomatch = Signal()
 316         m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
 317
 318         m_match = Signal()
 319         m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
 320
 321         # if a is NaN or b is NaN return NaN
 322         with m.If(self.i.a.is_nan | self.i.b.is_nan):
 323             m.d.comb += self.o.out_do_z.eq(1)
 324             m.d.comb += self.o.z.nan(0)
 325
 326         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 327         # under review
 328
 329         ## if a is zero and b is NaN return -b
 330         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 331         #    m.d.comb += self.o.out_do_z.eq(1)
 332         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 333
 334         ## if b is zero and a is NaN return -a
 335         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 336         #    m.d.comb += self.o.out_do_z.eq(1)
 337         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 338
 339         ## if a is -zero and b is NaN return -b
 340         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 341         #    m.d.comb += self.o.out_do_z.eq(1)
 342         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 343
 344         ## if b is -zero and a is NaN return -a
 345         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 346         #    m.d.comb += self.o.out_do_z.eq(1)
 347         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 348
 349         # if a is inf return inf (or NaN)
 350         with m.Elif(self.i.a.is_inf):
 351             m.d.comb += self.o.out_do_z.eq(1)
 352             m.d.comb += self.o.z.inf(self.i.a.s)
 353             # if a is inf and signs don't match return NaN
 354             with m.If(self.i.b.exp_128 & s_nomatch):
 355                 m.d.comb += self.o.z.nan(0)
 356
 357         # if b is inf return inf
 358         with m.Elif(self.i.b.is_inf):
 359             m.d.comb += self.o.out_do_z.eq(1)
 360             m.d.comb += self.o.z.inf(self.i.b.s)
 361
 362         # if a is zero and b zero return signed-a/b
 363         with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
 364             m.d.comb += self.o.out_do_z.eq(1)
 365             m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
 366                                           self.i.b.e,
 367                                           self.i.b.m[3:-1])
 368
 369         # if a is zero return b
 370         with m.Elif(self.i.a.is_zero):
 371             m.d.comb += self.o.out_do_z.eq(1)
 372             m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
 373                                       self.i.b.m[3:-1])
 374
 375         # if b is zero return a
 376         with m.Elif(self.i.b.is_zero):
 377             m.d.comb += self.o.out_do_z.eq(1)
 378             m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
 379                                       self.i.a.m[3:-1])
 380
 381         # if a equal to -b return zero (+ve zero)
 382         with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
 383             m.d.comb += self.o.out_do_z.eq(1)
 384             m.d.comb += self.o.z.zero(0)
 385
 386         # Denormalised Number checks next, so pass a/b data through
 387         with m.Else():
 388             m.d.comb += self.o.out_do_z.eq(0)
 389             m.d.comb += self.o.a.eq(self.i.a)
 390             m.d.comb += self.o.b.eq(self.i.b)
 391
 392         m.d.comb += self.o.oz.eq(self.o.z.v)
 393         m.d.comb += self.o.mid.eq(self.i.mid)
 394
 395         return m
 396
 397
 398 class FPID:
 399     def __init__(self, id_wid):
 400         self.id_wid = id_wid
 401         if self.id_wid:
 402             self.in_mid = Signal(id_wid, reset_less=True)
 403             self.out_mid = Signal(id_wid, reset_less=True)
 404         else:
 405             self.in_mid = None
 406             self.out_mid = None
 407
 408     def idsync(self, m):
 409         if self.id_wid is not None:
 410             m.d.sync += self.out_mid.eq(self.in_mid)
 411
 412
 413 class FPAddSpecialCases(FPState):
 414     """ special cases: NaNs, infs, zeros, denormalised
 415         NOTE: some of these are unique to add.  see "Special Operations"
 416         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 417     """
 418
 419     def __init__(self, width, id_wid):
 420         FPState.__init__(self, "special_cases")
 421         self.mod = FPAddSpecialCasesMod(width)
 422         self.out_z = self.mod.ospec()
 423         self.out_do_z = Signal(reset_less=True)
 424
 425     def setup(self, m, i):
 426         """ links module to inputs and outputs
 427         """
 428         self.mod.setup(m, i, self.out_do_z)
 429         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 430         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 431
 432     def action(self, m):
 433         self.idsync(m)
 434         with m.If(self.out_do_z):
 435             m.next = "put_z"
 436         with m.Else():
 437             m.next = "denormalise"
 438
 439
 440 class FPAddSpecialCasesDeNorm(FPState):
 441     """ special cases: NaNs, infs, zeros, denormalised
 442         NOTE: some of these are unique to add.  see "Special Operations"
 443         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 444     """
 445
 446     def __init__(self, width, id_wid):
 447         FPState.__init__(self, "special_cases")
 448         self.smod = FPAddSpecialCasesMod(width, id_wid)
 449         self.dmod = FPAddDeNormMod(width, id_wid)
 450         self.o = self.ospec()
 451
 452     def ispec(self):
 453         return self.smod.ispec()
 454
 455     def ospec(self):
 456         return self.dmod.ospec()
 457
 458     def setup(self, m, i):
 459         """ links module to inputs and outputs
 460         """
 461         # these only needed for break-out (early-out)
 462         # out_z = self.smod.ospec()
 463         # out_do_z = Signal(reset_less=True)
 464         self.smod.setup(m, i)
 465         self.dmod.setup(m, self.smod.o)
 466         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 467
 468         # out_do_z=True, only needed for early-out (split pipeline)
 469         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 470         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 471
 472         # out_do_z=False
 473         m.d.sync += self.o.eq(self.dmod.o)
 474
 475     def process(self, i):
 476         return self.o
 477
 478     def action(self, m):
 479         #with m.If(self.out_do_z):
 480         #    m.next = "put_z"
 481         #with m.Else():
 482             m.next = "align"
 483
 484
 485 class FPAddDeNormMod(FPState):
 486
 487     def __init__(self, width, id_wid):
 488         self.width = width
 489         self.id_wid = id_wid
 490         self.i = self.ispec()
 491         self.o = self.ospec()
 492
 493     def ispec(self):
 494         return FPSCData(self.width, self.id_wid)
 495
 496     def ospec(self):
 497         return FPSCData(self.width, self.id_wid)
 498
 499     def setup(self, m, i):
 500         """ links module to inputs and outputs
 501         """
 502         m.submodules.denormalise = self
 503         m.d.comb += self.i.eq(i)
 504
 505     def elaborate(self, platform):
 506         m = Module()
 507         m.submodules.denorm_in_a = self.i.a
 508         m.submodules.denorm_in_b = self.i.b
 509         m.submodules.denorm_out_a = self.o.a
 510         m.submodules.denorm_out_b = self.o.b
 511
 512         with m.If(~self.i.out_do_z):
 513             # XXX hmmm, don't like repeating identical code
 514             m.d.comb += self.o.a.eq(self.i.a)
 515             with m.If(self.i.a.exp_n127):
 516                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 517             with m.Else():
 518                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 519
 520             m.d.comb += self.o.b.eq(self.i.b)
 521             with m.If(self.i.b.exp_n127):
 522                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 523             with m.Else():
 524                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 525
 526         m.d.comb += self.o.mid.eq(self.i.mid)
 527         m.d.comb += self.o.z.eq(self.i.z)
 528         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 529         m.d.comb += self.o.oz.eq(self.i.oz)
 530
 531         return m
 532
 533
 534 class FPAddDeNorm(FPState):
 535
 536     def __init__(self, width, id_wid):
 537         FPState.__init__(self, "denormalise")
 538         self.mod = FPAddDeNormMod(width)
 539         self.out_a = FPNumBase(width)
 540         self.out_b = FPNumBase(width)
 541
 542     def setup(self, m, i):
 543         """ links module to inputs and outputs
 544         """
 545         self.mod.setup(m, i)
 546
 547         m.d.sync += self.out_a.eq(self.mod.out_a)
 548         m.d.sync += self.out_b.eq(self.mod.out_b)
 549
 550     def action(self, m):
 551         # Denormalised Number checks
 552         m.next = "align"
 553
 554
 555 class FPAddAlignMultiMod(FPState):
 556
 557     def __init__(self, width):
 558         self.in_a = FPNumBase(width)
 559         self.in_b = FPNumBase(width)
 560         self.out_a = FPNumIn(None, width)
 561         self.out_b = FPNumIn(None, width)
 562         self.exp_eq = Signal(reset_less=True)
 563
 564     def elaborate(self, platform):
 565         # This one however (single-cycle) will do the shift
 566         # in one go.
 567
 568         m = Module()
 569
 570         m.submodules.align_in_a = self.in_a
 571         m.submodules.align_in_b = self.in_b
 572         m.submodules.align_out_a = self.out_a
 573         m.submodules.align_out_b = self.out_b
 574
 575         # NOTE: this does *not* do single-cycle multi-shifting,
 576         #       it *STAYS* in the align state until exponents match
 577
 578         # exponent of a greater than b: shift b down
 579         m.d.comb += self.exp_eq.eq(0)
 580         m.d.comb += self.out_a.eq(self.in_a)
 581         m.d.comb += self.out_b.eq(self.in_b)
 582         agtb = Signal(reset_less=True)
 583         altb = Signal(reset_less=True)
 584         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 585         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 586         with m.If(agtb):
 587             m.d.comb += self.out_b.shift_down(self.in_b)
 588         # exponent of b greater than a: shift a down
 589         with m.Elif(altb):
 590             m.d.comb += self.out_a.shift_down(self.in_a)
 591         # exponents equal: move to next stage.
 592         with m.Else():
 593             m.d.comb += self.exp_eq.eq(1)
 594         return m
 595
 596
 597 class FPAddAlignMulti(FPState):
 598
 599     def __init__(self, width, id_wid):
 600         FPState.__init__(self, "align")
 601         self.mod = FPAddAlignMultiMod(width)
 602         self.out_a = FPNumIn(None, width)
 603         self.out_b = FPNumIn(None, width)
 604         self.exp_eq = Signal(reset_less=True)
 605
 606     def setup(self, m, in_a, in_b):
 607         """ links module to inputs and outputs
 608         """
 609         m.submodules.align = self.mod
 610         m.d.comb += self.mod.in_a.eq(in_a)
 611         m.d.comb += self.mod.in_b.eq(in_b)
 612         #m.d.comb += self.out_a.eq(self.mod.out_a)
 613         #m.d.comb += self.out_b.eq(self.mod.out_b)
 614         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 615         m.d.sync += self.out_a.eq(self.mod.out_a)
 616         m.d.sync += self.out_b.eq(self.mod.out_b)
 617
 618     def action(self, m):
 619         with m.If(self.exp_eq):
 620             m.next = "add_0"
 621
 622
 623 class FPNumIn2Ops:
 624
 625     def __init__(self, width, id_wid):
 626         self.a = FPNumIn(None, width)
 627         self.b = FPNumIn(None, width)
 628         self.z = FPNumOut(width, False)
 629         self.out_do_z = Signal(reset_less=True)
 630         self.oz = Signal(width, reset_less=True)
 631         self.mid = Signal(id_wid, reset_less=True)
 632
 633     def eq(self, i):
 634         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 635                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 636
 637
 638 class FPAddAlignSingleMod:
 639
 640     def __init__(self, width, id_wid):
 641         self.width = width
 642         self.id_wid = id_wid
 643         self.i = self.ispec()
 644         self.o = self.ospec()
 645
 646     def ispec(self):
 647         return FPSCData(self.width, self.id_wid)
 648
 649     def ospec(self):
 650         return FPNumIn2Ops(self.width, self.id_wid)
 651
 652     def process(self, i):
 653         return self.o
 654
 655     def setup(self, m, i):
 656         """ links module to inputs and outputs
 657         """
 658         m.submodules.align = self
 659         m.d.comb += self.i.eq(i)
 660
 661     def elaborate(self, platform):
 662         """ Aligns A against B or B against A, depending on which has the
 663             greater exponent.  This is done in a *single* cycle using
 664             variable-width bit-shift
 665
 666             the shifter used here is quite expensive in terms of gates.
 667             Mux A or B in (and out) into temporaries, as only one of them
 668             needs to be aligned against the other
 669         """
 670         m = Module()
 671
 672         m.submodules.align_in_a = self.i.a
 673         m.submodules.align_in_b = self.i.b
 674         m.submodules.align_out_a = self.o.a
 675         m.submodules.align_out_b = self.o.b
 676
 677         # temporary (muxed) input and output to be shifted
 678         t_inp = FPNumBase(self.width)
 679         t_out = FPNumIn(None, self.width)
 680         espec = (len(self.i.a.e), True)
 681         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 682         m.submodules.align_t_in = t_inp
 683         m.submodules.align_t_out = t_out
 684         m.submodules.multishift_r = msr
 685
 686         ediff = Signal(espec, reset_less=True)
 687         ediffr = Signal(espec, reset_less=True)
 688         tdiff = Signal(espec, reset_less=True)
 689         elz = Signal(reset_less=True)
 690         egz = Signal(reset_less=True)
 691
 692         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 693         m.d.comb += msr.inp.eq(t_inp.m)
 694         m.d.comb += msr.diff.eq(tdiff)
 695         m.d.comb += t_out.m.eq(msr.m)
 696         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 697         m.d.comb += t_out.s.eq(t_inp.s)
 698
 699         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 700         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 701         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 702         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 703
 704         # default: A-exp == B-exp, A and B untouched (fall through)
 705         m.d.comb += self.o.a.eq(self.i.a)
 706         m.d.comb += self.o.b.eq(self.i.b)
 707         # only one shifter (muxed)
 708         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 709         # exponent of a greater than b: shift b down
 710         with m.If(~self.i.out_do_z):
 711             with m.If(egz):
 712                 m.d.comb += [t_inp.eq(self.i.b),
 713                              tdiff.eq(ediff),
 714                              self.o.b.eq(t_out),
 715                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 716                             ]
 717             # exponent of b greater than a: shift a down
 718             with m.Elif(elz):
 719                 m.d.comb += [t_inp.eq(self.i.a),
 720                              tdiff.eq(ediffr),
 721                              self.o.a.eq(t_out),
 722                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 723                             ]
 724
 725         m.d.comb += self.o.mid.eq(self.i.mid)
 726         m.d.comb += self.o.z.eq(self.i.z)
 727         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 728         m.d.comb += self.o.oz.eq(self.i.oz)
 729
 730         return m
 731
 732
 733 class FPAddAlignSingle(FPState):
 734
 735     def __init__(self, width, id_wid):
 736         FPState.__init__(self, "align")
 737         self.mod = FPAddAlignSingleMod(width, id_wid)
 738         self.out_a = FPNumIn(None, width)
 739         self.out_b = FPNumIn(None, width)
 740
 741     def setup(self, m, i):
 742         """ links module to inputs and outputs
 743         """
 744         self.mod.setup(m, i)
 745
 746         # NOTE: could be done as comb
 747         m.d.sync += self.out_a.eq(self.mod.out_a)
 748         m.d.sync += self.out_b.eq(self.mod.out_b)
 749
 750     def action(self, m):
 751         m.next = "add_0"
 752
 753
 754 class FPAddAlignSingleAdd(FPState):
 755
 756     def __init__(self, width, id_wid):
 757         FPState.__init__(self, "align")
 758         self.width = width
 759         self.id_wid = id_wid
 760         self.a1o = self.ospec()
 761
 762     def ispec(self):
 763         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 764
 765     def ospec(self):
 766         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 767
 768     def setup(self, m, i):
 769         """ links module to inputs and outputs
 770         """
 771
 772         # chain AddAlignSingle, AddStage0 and AddStage1
 773         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 774         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 775         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 776
 777         chain = StageChain([mod, a0mod, a1mod])
 778         chain.setup(m, i)
 779
 780         m.d.sync += self.a1o.eq(a1mod.o)
 781
 782     def process(self, i):
 783         return self.a1o
 784
 785     def action(self, m):
 786         m.next = "normalise_1"
 787
 788
 789 class FPAddStage0Data:
 790
 791     def __init__(self, width, id_wid):
 792         self.z = FPNumBase(width, False)
 793         self.out_do_z = Signal(reset_less=True)
 794         self.oz = Signal(width, reset_less=True)
 795         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 796         self.mid = Signal(id_wid, reset_less=True)
 797
 798     def eq(self, i):
 799         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 800                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 801
 802
 803 class FPAddStage0Mod:
 804
 805     def __init__(self, width, id_wid):
 806         self.width = width
 807         self.id_wid = id_wid
 808         self.i = self.ispec()
 809         self.o = self.ospec()
 810
 811     def ispec(self):
 812         return FPSCData(self.width, self.id_wid)
 813
 814     def ospec(self):
 815         return FPAddStage0Data(self.width, self.id_wid)
 816
 817     def process(self, i):
 818         return self.o
 819
 820     def setup(self, m, i):
 821         """ links module to inputs and outputs
 822         """
 823         m.submodules.add0 = self
 824         m.d.comb += self.i.eq(i)
 825
 826     def elaborate(self, platform):
 827         m = Module()
 828         m.submodules.add0_in_a = self.i.a
 829         m.submodules.add0_in_b = self.i.b
 830         m.submodules.add0_out_z = self.o.z
 831
 832         # store intermediate tests (and zero-extended mantissas)
 833         seq = Signal(reset_less=True)
 834         mge = Signal(reset_less=True)
 835         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 836         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 837         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 838                      mge.eq(self.i.a.m >= self.i.b.m),
 839                      am0.eq(Cat(self.i.a.m, 0)),
 840                      bm0.eq(Cat(self.i.b.m, 0))
 841                     ]
 842         # same-sign (both negative or both positive) add mantissas
 843         with m.If(~self.i.out_do_z):
 844             m.d.comb += self.o.z.e.eq(self.i.a.e)
 845             with m.If(seq):
 846                 m.d.comb += [
 847                     self.o.tot.eq(am0 + bm0),
 848                     self.o.z.s.eq(self.i.a.s)
 849                 ]
 850             # a mantissa greater than b, use a
 851             with m.Elif(mge):
 852                 m.d.comb += [
 853                     self.o.tot.eq(am0 - bm0),
 854                     self.o.z.s.eq(self.i.a.s)
 855                 ]
 856             # b mantissa greater than a, use b
 857             with m.Else():
 858                 m.d.comb += [
 859                     self.o.tot.eq(bm0 - am0),
 860                     self.o.z.s.eq(self.i.b.s)
 861             ]
 862
 863         m.d.comb += self.o.oz.eq(self.i.oz)
 864         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 865         m.d.comb += self.o.mid.eq(self.i.mid)
 866         return m
 867
 868
 869 class FPAddStage0(FPState):
 870     """ First stage of add.  covers same-sign (add) and subtract
 871         special-casing when mantissas are greater or equal, to
 872         give greatest accuracy.
 873     """
 874
 875     def __init__(self, width, id_wid):
 876         FPState.__init__(self, "add_0")
 877         self.mod = FPAddStage0Mod(width)
 878         self.o = self.mod.ospec()
 879
 880     def setup(self, m, i):
 881         """ links module to inputs and outputs
 882         """
 883         self.mod.setup(m, i)
 884
 885         # NOTE: these could be done as combinatorial (merge add0+add1)
 886         m.d.sync += self.o.eq(self.mod.o)
 887
 888     def action(self, m):
 889         m.next = "add_1"
 890
 891
 892 class FPAddStage1Data:
 893
 894     def __init__(self, width, id_wid):
 895         self.z = FPNumBase(width, False)
 896         self.out_do_z = Signal(reset_less=True)
 897         self.oz = Signal(width, reset_less=True)
 898         self.of = Overflow()
 899         self.mid = Signal(id_wid, reset_less=True)
 900
 901     def eq(self, i):
 902         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 903                 self.of.eq(i.of), self.mid.eq(i.mid)]
 904
 905
 906
 907 class FPAddStage1Mod(FPState):
 908     """ Second stage of add: preparation for normalisation.
 909         detects when tot sum is too big (tot[27] is kinda a carry bit)
 910     """
 911
 912     def __init__(self, width, id_wid):
 913         self.width = width
 914         self.id_wid = id_wid
 915         self.i = self.ispec()
 916         self.o = self.ospec()
 917
 918     def ispec(self):
 919         return FPAddStage0Data(self.width, self.id_wid)
 920
 921     def ospec(self):
 922         return FPAddStage1Data(self.width, self.id_wid)
 923
 924     def process(self, i):
 925         return self.o
 926
 927     def setup(self, m, i):
 928         """ links module to inputs and outputs
 929         """
 930         m.submodules.add1 = self
 931         m.submodules.add1_out_overflow = self.o.of
 932
 933         m.d.comb += self.i.eq(i)
 934
 935     def elaborate(self, platform):
 936         m = Module()
 937         #m.submodules.norm1_in_overflow = self.in_of
 938         #m.submodules.norm1_out_overflow = self.out_of
 939         #m.submodules.norm1_in_z = self.in_z
 940         #m.submodules.norm1_out_z = self.out_z
 941         m.d.comb += self.o.z.eq(self.i.z)
 942         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 943         with m.If(~self.i.out_do_z):
 944             with m.If(self.i.tot[-1]):
 945                 m.d.comb += [
 946                     self.o.z.m.eq(self.i.tot[4:]),
 947                     self.o.of.m0.eq(self.i.tot[4]),
 948                     self.o.of.guard.eq(self.i.tot[3]),
 949                     self.o.of.round_bit.eq(self.i.tot[2]),
 950                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 951                     self.o.z.e.eq(self.i.z.e + 1)
 952             ]
 953             # tot[-1] (MSB) zero case
 954             with m.Else():
 955                 m.d.comb += [
 956                     self.o.z.m.eq(self.i.tot[3:]),
 957                     self.o.of.m0.eq(self.i.tot[3]),
 958                     self.o.of.guard.eq(self.i.tot[2]),
 959                     self.o.of.round_bit.eq(self.i.tot[1]),
 960                     self.o.of.sticky.eq(self.i.tot[0])
 961             ]
 962
 963         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 964         m.d.comb += self.o.oz.eq(self.i.oz)
 965         m.d.comb += self.o.mid.eq(self.i.mid)
 966
 967         return m
 968
 969
 970 class FPAddStage1(FPState):
 971
 972     def __init__(self, width, id_wid):
 973         FPState.__init__(self, "add_1")
 974         self.mod = FPAddStage1Mod(width)
 975         self.out_z = FPNumBase(width, False)
 976         self.out_of = Overflow()
 977         self.norm_stb = Signal()
 978
 979     def setup(self, m, i):
 980         """ links module to inputs and outputs
 981         """
 982         self.mod.setup(m, i)
 983
 984         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 985
 986         m.d.sync += self.out_of.eq(self.mod.out_of)
 987         m.d.sync += self.out_z.eq(self.mod.out_z)
 988         m.d.sync += self.norm_stb.eq(1)
 989
 990     def action(self, m):
 991         m.next = "normalise_1"
 992
 993
 994 class FPNormaliseModSingle:
 995
 996     def __init__(self, width):
 997         self.width = width
 998         self.in_z = self.ispec()
 999         self.out_z = self.ospec()
1000
1001     def ispec(self):
1002         return FPNumBase(self.width, False)
1003
1004     def ospec(self):
1005         return FPNumBase(self.width, False)
1006
1007     def setup(self, m, i):
1008         """ links module to inputs and outputs
1009         """
1010         m.submodules.normalise = self
1011         m.d.comb += self.i.eq(i)
1012
1013     def elaborate(self, platform):
1014         m = Module()
1015
1016         mwid = self.out_z.m_width+2
1017         pe = PriorityEncoder(mwid)
1018         m.submodules.norm_pe = pe
1019
1020         m.submodules.norm1_out_z = self.out_z
1021         m.submodules.norm1_in_z = self.in_z
1022
1023         in_z = FPNumBase(self.width, False)
1024         in_of = Overflow()
1025         m.submodules.norm1_insel_z = in_z
1026         m.submodules.norm1_insel_overflow = in_of
1027
1028         espec = (len(in_z.e), True)
1029         ediff_n126 = Signal(espec, reset_less=True)
1030         msr = MultiShiftRMerge(mwid, espec)
1031         m.submodules.multishift_r = msr
1032
1033         m.d.comb += in_z.eq(self.in_z)
1034         m.d.comb += in_of.eq(self.in_of)
1035         # initialise out from in (overridden below)
1036         m.d.comb += self.out_z.eq(in_z)
1037         m.d.comb += self.out_of.eq(in_of)
1038         # normalisation decrease condition
1039         decrease = Signal(reset_less=True)
1040         m.d.comb += decrease.eq(in_z.m_msbzero)
1041         # decrease exponent
1042         with m.If(decrease):
1043             # *sigh* not entirely obvious: count leading zeros (clz)
1044             # with a PriorityEncoder: to find from the MSB
1045             # we reverse the order of the bits.
1046             temp_m = Signal(mwid, reset_less=True)
1047             temp_s = Signal(mwid+1, reset_less=True)
1048             clz = Signal((len(in_z.e), True), reset_less=True)
1049             m.d.comb += [
1050                 # cat round and guard bits back into the mantissa
1051                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1052                 pe.i.eq(temp_m[::-1]),          # inverted
1053                 clz.eq(pe.o),                   # count zeros from MSB down
1054                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1055                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1056                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1057             ]
1058
1059         return m
1060
1061 class FPNorm1Data:
1062
1063     def __init__(self, width, id_wid):
1064         self.roundz = Signal(reset_less=True)
1065         self.z = FPNumBase(width, False)
1066         self.out_do_z = Signal(reset_less=True)
1067         self.oz = Signal(width, reset_less=True)
1068         self.mid = Signal(id_wid, reset_less=True)
1069
1070     def eq(self, i):
1071         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1072                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1073
1074
1075 class FPNorm1ModSingle:
1076
1077     def __init__(self, width, id_wid):
1078         self.width = width
1079         self.id_wid = id_wid
1080         self.i = self.ispec()
1081         self.o = self.ospec()
1082
1083     def ispec(self):
1084         return FPAddStage1Data(self.width, self.id_wid)
1085
1086     def ospec(self):
1087         return FPNorm1Data(self.width, self.id_wid)
1088
1089     def setup(self, m, i):
1090         """ links module to inputs and outputs
1091         """
1092         m.submodules.normalise_1 = self
1093         m.d.comb += self.i.eq(i)
1094
1095     def process(self, i):
1096         return self.o
1097
1098     def elaborate(self, platform):
1099         m = Module()
1100
1101         mwid = self.o.z.m_width+2
1102         pe = PriorityEncoder(mwid)
1103         m.submodules.norm_pe = pe
1104
1105         of = Overflow()
1106         m.d.comb += self.o.roundz.eq(of.roundz)
1107
1108         m.submodules.norm1_out_z = self.o.z
1109         m.submodules.norm1_out_overflow = of
1110         m.submodules.norm1_in_z = self.i.z
1111         m.submodules.norm1_in_overflow = self.i.of
1112
1113         i = self.ispec()
1114         m.submodules.norm1_insel_z = i.z
1115         m.submodules.norm1_insel_overflow = i.of
1116
1117         espec = (len(i.z.e), True)
1118         ediff_n126 = Signal(espec, reset_less=True)
1119         msr = MultiShiftRMerge(mwid, espec)
1120         m.submodules.multishift_r = msr
1121
1122         m.d.comb += i.eq(self.i)
1123         # initialise out from in (overridden below)
1124         m.d.comb += self.o.z.eq(i.z)
1125         m.d.comb += of.eq(i.of)
1126         # normalisation increase/decrease conditions
1127         decrease = Signal(reset_less=True)
1128         increase = Signal(reset_less=True)
1129         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1130         m.d.comb += increase.eq(i.z.exp_lt_n126)
1131         # decrease exponent
1132         with m.If(~self.i.out_do_z):
1133             with m.If(decrease):
1134                 # *sigh* not entirely obvious: count leading zeros (clz)
1135                 # with a PriorityEncoder: to find from the MSB
1136                 # we reverse the order of the bits.
1137                 temp_m = Signal(mwid, reset_less=True)
1138                 temp_s = Signal(mwid+1, reset_less=True)
1139                 clz = Signal((len(i.z.e), True), reset_less=True)
1140                 # make sure that the amount to decrease by does NOT
1141                 # go below the minimum non-INF/NaN exponent
1142                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1143                              i.z.exp_sub_n126)
1144                 m.d.comb += [
1145                     # cat round and guard bits back into the mantissa
1146                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1147                     pe.i.eq(temp_m[::-1]),          # inverted
1148                     clz.eq(limclz),                 # count zeros from MSB down
1149                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1150                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1151                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1152                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1153                     # overflow in bits 0..1: got shifted too (leave sticky)
1154                     of.guard.eq(temp_s[1]),       # guard
1155                     of.round_bit.eq(temp_s[0]),   # round
1156                 ]
1157             # increase exponent
1158             with m.Elif(increase):
1159                 temp_m = Signal(mwid+1, reset_less=True)
1160                 m.d.comb += [
1161                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1162                                   i.z.m)),
1163                     ediff_n126.eq(i.z.N126 - i.z.e),
1164                     # connect multi-shifter to inp/out mantissa (and ediff)
1165                     msr.inp.eq(temp_m),
1166                     msr.diff.eq(ediff_n126),
1167                     self.o.z.m.eq(msr.m[3:]),
1168                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1169                     # overflow in bits 0..1: got shifted too (leave sticky)
1170                     of.guard.eq(temp_s[2]),     # guard
1171                     of.round_bit.eq(temp_s[1]), # round
1172                     of.sticky.eq(temp_s[0]),    # sticky
1173                     self.o.z.e.eq(i.z.e + ediff_n126),
1174                 ]
1175
1176         m.d.comb += self.o.mid.eq(self.i.mid)
1177         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1178         m.d.comb += self.o.oz.eq(self.i.oz)
1179
1180         return m
1181
1182
1183 class FPNorm1ModMulti:
1184
1185     def __init__(self, width, single_cycle=True):
1186         self.width = width
1187         self.in_select = Signal(reset_less=True)
1188         self.in_z = FPNumBase(width, False)
1189         self.in_of = Overflow()
1190         self.temp_z = FPNumBase(width, False)
1191         self.temp_of = Overflow()
1192         self.out_z = FPNumBase(width, False)
1193         self.out_of = Overflow()
1194
1195     def elaborate(self, platform):
1196         m = Module()
1197
1198         m.submodules.norm1_out_z = self.out_z
1199         m.submodules.norm1_out_overflow = self.out_of
1200         m.submodules.norm1_temp_z = self.temp_z
1201         m.submodules.norm1_temp_of = self.temp_of
1202         m.submodules.norm1_in_z = self.in_z
1203         m.submodules.norm1_in_overflow = self.in_of
1204
1205         in_z = FPNumBase(self.width, False)
1206         in_of = Overflow()
1207         m.submodules.norm1_insel_z = in_z
1208         m.submodules.norm1_insel_overflow = in_of
1209
1210         # select which of temp or in z/of to use
1211         with m.If(self.in_select):
1212             m.d.comb += in_z.eq(self.in_z)
1213             m.d.comb += in_of.eq(self.in_of)
1214         with m.Else():
1215             m.d.comb += in_z.eq(self.temp_z)
1216             m.d.comb += in_of.eq(self.temp_of)
1217         # initialise out from in (overridden below)
1218         m.d.comb += self.out_z.eq(in_z)
1219         m.d.comb += self.out_of.eq(in_of)
1220         # normalisation increase/decrease conditions
1221         decrease = Signal(reset_less=True)
1222         increase = Signal(reset_less=True)
1223         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1224         m.d.comb += increase.eq(in_z.exp_lt_n126)
1225         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1226         # decrease exponent
1227         with m.If(decrease):
1228             m.d.comb += [
1229                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1230                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1231                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1232                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1233                 self.out_of.round_bit.eq(0),        # reset round bit
1234                 self.out_of.m0.eq(in_of.guard),
1235             ]
1236         # increase exponent
1237         with m.Elif(increase):
1238             m.d.comb += [
1239                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1240                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1241                 self.out_of.guard.eq(in_z.m[0]),
1242                 self.out_of.m0.eq(in_z.m[1]),
1243                 self.out_of.round_bit.eq(in_of.guard),
1244                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1245             ]
1246
1247         return m
1248
1249
1250 class FPNorm1Single(FPState):
1251
1252     def __init__(self, width, id_wid, single_cycle=True):
1253         FPState.__init__(self, "normalise_1")
1254         self.mod = FPNorm1ModSingle(width)
1255         self.o = self.ospec()
1256         self.out_z = FPNumBase(width, False)
1257         self.out_roundz = Signal(reset_less=True)
1258
1259     def ispec(self):
1260         return self.mod.ispec()
1261
1262     def ospec(self):
1263         return self.mod.ospec()
1264
1265     def setup(self, m, i):
1266         """ links module to inputs and outputs
1267         """
1268         self.mod.setup(m, i)
1269
1270     def action(self, m):
1271         m.next = "round"
1272
1273
1274 class FPNorm1Multi(FPState):
1275
1276     def __init__(self, width, id_wid):
1277         FPState.__init__(self, "normalise_1")
1278         self.mod = FPNorm1ModMulti(width)
1279         self.stb = Signal(reset_less=True)
1280         self.ack = Signal(reset=0, reset_less=True)
1281         self.out_norm = Signal(reset_less=True)
1282         self.in_accept = Signal(reset_less=True)
1283         self.temp_z = FPNumBase(width)
1284         self.temp_of = Overflow()
1285         self.out_z = FPNumBase(width)
1286         self.out_roundz = Signal(reset_less=True)
1287
1288     def setup(self, m, in_z, in_of, norm_stb):
1289         """ links module to inputs and outputs
1290         """
1291         self.mod.setup(m, in_z, in_of, norm_stb,
1292                        self.in_accept, self.temp_z, self.temp_of,
1293                        self.out_z, self.out_norm)
1294
1295         m.d.comb += self.stb.eq(norm_stb)
1296         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1297
1298     def action(self, m):
1299         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1300         m.d.sync += self.temp_of.eq(self.mod.out_of)
1301         m.d.sync += self.temp_z.eq(self.out_z)
1302         with m.If(self.out_norm):
1303             with m.If(self.in_accept):
1304                 m.d.sync += [
1305                     self.ack.eq(1),
1306                 ]
1307             with m.Else():
1308                 m.d.sync += self.ack.eq(0)
1309         with m.Else():
1310             # normalisation not required (or done).
1311             m.next = "round"
1312             m.d.sync += self.ack.eq(1)
1313             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1314
1315
1316 class FPNormToPack(FPState):
1317
1318     def __init__(self, width, id_wid):
1319         FPState.__init__(self, "normalise_1")
1320         self.id_wid = id_wid
1321         self.width = width
1322
1323     def ispec(self):
1324         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1325
1326     def ospec(self):
1327         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1328
1329     def setup(self, m, i):
1330         """ links module to inputs and outputs
1331         """
1332
1333         # Normalisation, Rounding Corrections, Pack - in a chain
1334         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1335         rmod = FPRoundMod(self.width, self.id_wid)
1336         cmod = FPCorrectionsMod(self.width, self.id_wid)
1337         pmod = FPPackMod(self.width, self.id_wid)
1338         chain = StageChain([nmod, rmod, cmod, pmod])
1339         chain.setup(m, i)
1340         self.out_z = pmod.ospec()
1341
1342         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1343         m.d.sync += self.out_z.z.v.eq(pmod.o.z.v) # outputs packed result
1344
1345     def process(self, i):
1346         return self.out_z
1347
1348     def action(self, m):
1349         m.next = "pack_put_z"
1350
1351
1352 class FPRoundData:
1353
1354     def __init__(self, width, id_wid):
1355         self.z = FPNumBase(width, False)
1356         self.out_do_z = Signal(reset_less=True)
1357         self.oz = Signal(width, reset_less=True)
1358         self.mid = Signal(id_wid, reset_less=True)
1359
1360     def eq(self, i):
1361         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1362                 self.mid.eq(i.mid)]
1363
1364
1365 class FPRoundMod:
1366
1367     def __init__(self, width, id_wid):
1368         self.width = width
1369         self.id_wid = id_wid
1370         self.i = self.ispec()
1371         self.out_z = self.ospec()
1372
1373     def ispec(self):
1374         return FPNorm1Data(self.width, self.id_wid)
1375
1376     def ospec(self):
1377         return FPRoundData(self.width, self.id_wid)
1378
1379     def process(self, i):
1380         return self.out_z
1381
1382     def setup(self, m, i):
1383         m.submodules.roundz = self
1384         m.d.comb += self.i.eq(i)
1385
1386     def elaborate(self, platform):
1387         m = Module()
1388         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1389         with m.If(~self.i.out_do_z):
1390             with m.If(self.i.roundz):
1391                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1392                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1393                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1394
1395         return m
1396
1397
1398 class FPRound(FPState):
1399
1400     def __init__(self, width, id_wid):
1401         FPState.__init__(self, "round")
1402         self.mod = FPRoundMod(width)
1403         self.out_z = self.ospec()
1404
1405     def ispec(self):
1406         return self.mod.ispec()
1407
1408     def ospec(self):
1409         return self.mod.ospec()
1410
1411     def setup(self, m, i):
1412         """ links module to inputs and outputs
1413         """
1414         self.mod.setup(m, i)
1415
1416         self.idsync(m)
1417         m.d.sync += self.out_z.eq(self.mod.out_z)
1418         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1419
1420     def action(self, m):
1421         m.next = "corrections"
1422
1423
1424 class FPCorrectionsMod:
1425
1426     def __init__(self, width, id_wid):
1427         self.width = width
1428         self.id_wid = id_wid
1429         self.i = self.ispec()
1430         self.out_z = self.ospec()
1431
1432     def ispec(self):
1433         return FPRoundData(self.width, self.id_wid)
1434
1435     def ospec(self):
1436         return FPRoundData(self.width, self.id_wid)
1437
1438     def process(self, i):
1439         return self.out_z
1440
1441     def setup(self, m, i):
1442         """ links module to inputs and outputs
1443         """
1444         m.submodules.corrections = self
1445         m.d.comb += self.i.eq(i)
1446
1447     def elaborate(self, platform):
1448         m = Module()
1449         m.submodules.corr_in_z = self.i.z
1450         m.submodules.corr_out_z = self.out_z.z
1451         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1452         with m.If(~self.i.out_do_z):
1453             with m.If(self.i.z.is_denormalised):
1454                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1455         return m
1456
1457
1458 class FPCorrections(FPState):
1459
1460     def __init__(self, width, id_wid):
1461         FPState.__init__(self, "corrections")
1462         self.mod = FPCorrectionsMod(width)
1463         self.out_z = self.ospec()
1464
1465     def ispec(self):
1466         return self.mod.ispec()
1467
1468     def ospec(self):
1469         return self.mod.ospec()
1470
1471     def setup(self, m, in_z):
1472         """ links module to inputs and outputs
1473         """
1474         self.mod.setup(m, in_z)
1475
1476         m.d.sync += self.out_z.eq(self.mod.out_z)
1477         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1478
1479     def action(self, m):
1480         m.next = "pack"
1481
1482
1483 class FPPackData:
1484
1485     def __init__(self, width, id_wid):
1486         self.z = FPNumOut(width, False)
1487         self.mid = Signal(id_wid, reset_less=True)
1488
1489     def eq(self, i):
1490         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1491
1492
1493 class FPPackMod:
1494
1495     def __init__(self, width, id_wid):
1496         self.width = width
1497         self.id_wid = id_wid
1498         self.i = self.ispec()
1499         self.o = self.ospec()
1500
1501     def ispec(self):
1502         return FPRoundData(self.width, self.id_wid)
1503
1504     def ospec(self):
1505         return FPPackData(self.width, self.id_wid)
1506
1507     def process(self, i):
1508         return self.o
1509
1510     def setup(self, m, in_z):
1511         """ links module to inputs and outputs
1512         """
1513         m.submodules.pack = self
1514         m.d.comb += self.i.eq(in_z)
1515
1516     def elaborate(self, platform):
1517         m = Module()
1518         m.submodules.pack_in_z = self.i.z
1519         m.d.comb += self.o.mid.eq(self.i.mid)
1520         with m.If(~self.i.out_do_z):
1521             with m.If(self.i.z.is_overflowed):
1522                 m.d.comb += self.o.z.inf(self.i.z.s)
1523             with m.Else():
1524                 m.d.comb += self.o.z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1525         with m.Else():
1526             m.d.comb += self.o.z.v.eq(self.i.oz)
1527         return m
1528
1529
1530 class FPPack(FPState):
1531
1532     def __init__(self, width, id_wid):
1533         FPState.__init__(self, "pack")
1534         self.mod = FPPackMod(width)
1535         self.out_z = self.ospec()
1536
1537     def ispec(self):
1538         return self.mod.ispec()
1539
1540     def ospec(self):
1541         return self.mod.ospec()
1542
1543     def setup(self, m, in_z):
1544         """ links module to inputs and outputs
1545         """
1546         self.mod.setup(m, in_z)
1547
1548         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1549         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1550
1551     def action(self, m):
1552         m.next = "pack_put_z"
1553
1554
1555 class FPPutZ(FPState):
1556
1557     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1558         FPState.__init__(self, state)
1559         if to_state is None:
1560             to_state = "get_ops"
1561         self.to_state = to_state
1562         self.in_z = in_z
1563         self.out_z = out_z
1564         self.in_mid = in_mid
1565         self.out_mid = out_mid
1566
1567     def action(self, m):
1568         if self.in_mid is not None:
1569             m.d.sync += self.out_mid.eq(self.in_mid)
1570         m.d.sync += [
1571           self.out_z.z.v.eq(self.in_z.v)
1572         ]
1573         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1574             m.d.sync += self.out_z.z.stb.eq(0)
1575             m.next = self.to_state
1576         with m.Else():
1577             m.d.sync += self.out_z.z.stb.eq(1)
1578
1579
1580 class FPPutZIdx(FPState):
1581
1582     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1583         FPState.__init__(self, state)
1584         if to_state is None:
1585             to_state = "get_ops"
1586         self.to_state = to_state
1587         self.in_z = in_z
1588         self.out_zs = out_zs
1589         self.in_mid = in_mid
1590
1591     def action(self, m):
1592         outz_stb = Signal(reset_less=True)
1593         outz_ack = Signal(reset_less=True)
1594         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1595                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1596                     ]
1597         m.d.sync += [
1598           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1599         ]
1600         with m.If(outz_stb & outz_ack):
1601             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1602             m.next = self.to_state
1603         with m.Else():
1604             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1605
1606 class FPADDBaseData:
1607
1608     def __init__(self, width, id_wid):
1609         self.width = width
1610         self.id_wid = id_wid
1611         self.a  = Signal(width)
1612         self.b  = Signal(width)
1613         self.mid = Signal(id_wid, reset_less=True)
1614
1615     def eq(self, i):
1616         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1617
1618
1619 class FPOpData:
1620     def __init__(self, width, id_wid):
1621         self.z = FPOp(width)
1622         self.mid = Signal(id_wid, reset_less=True)
1623
1624     def eq(self, i):
1625         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1626
1627
1628 class FPADDBaseMod:
1629
1630     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1631         """ IEEE754 FP Add
1632
1633             * width: bit-width of IEEE754.  supported: 16, 32, 64
1634             * id_wid: an identifier that is sync-connected to the input
1635             * single_cycle: True indicates each stage to complete in 1 clock
1636             * compact: True indicates a reduced number of stages
1637         """
1638         self.width = width
1639         self.id_wid = id_wid
1640         self.single_cycle = single_cycle
1641         self.compact = compact
1642
1643         self.in_t = Trigger()
1644         self.i = self.ispec()
1645         self.o = self.ospec()
1646
1647         self.states = []
1648
1649     def ispec(self):
1650         return FPADDBaseData(self.width, self.id_wid)
1651
1652     def ospec(self):
1653         return FPOpData(self.width, self.id_wid)
1654
1655     def add_state(self, state):
1656         self.states.append(state)
1657         return state
1658
1659     def get_fragment(self, platform=None):
1660         """ creates the HDL code-fragment for FPAdd
1661         """
1662         m = Module()
1663         m.submodules.out_z = self.o.z
1664         m.submodules.in_t = self.in_t
1665         if self.compact:
1666             self.get_compact_fragment(m, platform)
1667         else:
1668             self.get_longer_fragment(m, platform)
1669
1670         with m.FSM() as fsm:
1671
1672             for state in self.states:
1673                 with m.State(state.state_from):
1674                     state.action(m)
1675
1676         return m
1677
1678     def get_longer_fragment(self, m, platform=None):
1679
1680         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1681                                       self.width))
1682         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1683         a = get.out_op1
1684         b = get.out_op2
1685
1686         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1687         sc.setup(m, a, b, self.in_mid)
1688
1689         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1690         dn.setup(m, a, b, sc.in_mid)
1691
1692         if self.single_cycle:
1693             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1694             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1695         else:
1696             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1697             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1698
1699         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1700         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1701
1702         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1703         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1704
1705         if self.single_cycle:
1706             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1707             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1708         else:
1709             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1710             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1711
1712         rn = self.add_state(FPRound(self.width, self.id_wid))
1713         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1714
1715         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1716         cor.setup(m, rn.out_z, rn.in_mid)
1717
1718         pa = self.add_state(FPPack(self.width, self.id_wid))
1719         pa.setup(m, cor.out_z, rn.in_mid)
1720
1721         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1722                                     pa.in_mid, self.out_mid))
1723
1724         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1725                                     pa.in_mid, self.out_mid))
1726
1727     def get_compact_fragment(self, m, platform=None):
1728
1729         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1730                                       self.width, self.id_wid))
1731         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1732
1733         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1734         sc.setup(m, get.o)
1735
1736         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1737         alm.setup(m, sc.o)
1738
1739         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1740         n1.setup(m, alm.a1o)
1741
1742         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1743                                     n1.out_z.mid, self.o.mid))
1744
1745         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1746         #                            sc.o.mid, self.o.mid))
1747
1748
1749 class FPADDBase(FPState):
1750
1751     def __init__(self, width, id_wid=None, single_cycle=False):
1752         """ IEEE754 FP Add
1753
1754             * width: bit-width of IEEE754.  supported: 16, 32, 64
1755             * id_wid: an identifier that is sync-connected to the input
1756             * single_cycle: True indicates each stage to complete in 1 clock
1757         """
1758         FPState.__init__(self, "fpadd")
1759         self.width = width
1760         self.single_cycle = single_cycle
1761         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1762         self.o = self.ospec()
1763
1764         self.in_t = Trigger()
1765         self.i = self.ispec()
1766
1767         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1768         self.in_accept = Signal(reset_less=True)
1769         self.add_stb = Signal(reset_less=True)
1770         self.add_ack = Signal(reset=0, reset_less=True)
1771
1772     def ispec(self):
1773         return self.mod.ispec()
1774
1775     def ospec(self):
1776         return self.mod.ospec()
1777
1778     def setup(self, m, i, add_stb, in_mid):
1779         m.d.comb += [self.i.eq(i),
1780                      self.mod.i.eq(self.i),
1781                      self.z_done.eq(self.mod.o.z.trigger),
1782                      #self.add_stb.eq(add_stb),
1783                      self.mod.in_t.stb.eq(self.in_t.stb),
1784                      self.in_t.ack.eq(self.mod.in_t.ack),
1785                      self.o.mid.eq(self.mod.o.mid),
1786                      self.o.z.v.eq(self.mod.o.z.v),
1787                      self.o.z.stb.eq(self.mod.o.z.stb),
1788                      self.mod.o.z.ack.eq(self.o.z.ack),
1789                     ]
1790
1791         m.d.sync += self.add_stb.eq(add_stb)
1792         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1793         m.d.sync += self.o.z.ack.eq(0) # likewise
1794         #m.d.sync += self.in_t.stb.eq(0)
1795
1796         m.submodules.fpadd = self.mod
1797
1798     def action(self, m):
1799
1800         # in_accept is set on incoming strobe HIGH and ack LOW.
1801         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1802
1803         #with m.If(self.in_t.ack):
1804         #    m.d.sync += self.in_t.stb.eq(0)
1805         with m.If(~self.z_done):
1806             # not done: test for accepting an incoming operand pair
1807             with m.If(self.in_accept):
1808                 m.d.sync += [
1809                     self.add_ack.eq(1), # acknowledge receipt...
1810                     self.in_t.stb.eq(1), # initiate add
1811                 ]
1812             with m.Else():
1813                 m.d.sync += [self.add_ack.eq(0),
1814                              self.in_t.stb.eq(0),
1815                              self.o.z.ack.eq(1),
1816                             ]
1817         with m.Else():
1818             # done: acknowledge, and write out id and value
1819             m.d.sync += [self.add_ack.eq(1),
1820                          self.in_t.stb.eq(0)
1821                         ]
1822             m.next = "put_z"
1823
1824             return
1825
1826             if self.in_mid is not None:
1827                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1828
1829             m.d.sync += [
1830               self.out_z.v.eq(self.mod.out_z.v)
1831             ]
1832             # move to output state on detecting z ack
1833             with m.If(self.out_z.trigger):
1834                 m.d.sync += self.out_z.stb.eq(0)
1835                 m.next = "put_z"
1836             with m.Else():
1837                 m.d.sync += self.out_z.stb.eq(1)
1838
1839
1840 class FPADDStageIn:
1841     def __init__(self, width, id_wid):
1842         self.a = Signal(width)
1843         self.b = Signal(width)
1844         self.mid = Signal(id_wid, reset_less=True)
1845
1846     def eq(self, i):
1847         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1848
1849
1850 class FPADDStageOut:
1851     def __init__(self, width, id_wid):
1852         self.z = Signal(width)
1853         self.mid = Signal(id_wid, reset_less=True)
1854
1855     def eq(self, i):
1856         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1857
1858
1859 # matches the format of FPADDStageOut, allows eq function to do assignments
1860 class PlaceHolder: pass
1861
1862
1863 class FPAddBaseStage:
1864     def __init__(self, width, id_wid):
1865         self.width = width
1866         self.id_wid = id_wid
1867
1868     def ispec(self):
1869         return FPADDStageIn(self.width, self.id_wid)
1870
1871     def ospec(self):
1872         return FPADDStageOut(self.width, self.id_wid)
1873
1874     def process(self, i):
1875         o = PlaceHolder()
1876         o.z = i.a + i.b
1877         o.mid = i.mid
1878         return o
1879
1880
1881 class FPADDBasePipe(ControlBase):
1882     def __init__(self, width, id_wid):
1883         ControlBase.__init__(self)
1884
1885     def elaborate(self, platform):
1886         m = Module()
1887         stage1 = FPAddBaseStage(width, id_wid)
1888         m.d.comb += self.connect([stage1])
1889         return m
1890
1891
1892 class PriorityCombPipeline(CombMultiInPipeline):
1893     def __init__(self, stage, p_len):
1894         p_mux = InputPriorityArbiter(self, p_len)
1895         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1896
1897     def ports(self):
1898         return self.p_mux.ports()
1899
1900
1901 class FPAddInPassThruStage:
1902     def __init__(self, width, id_wid):
1903         self.width, self.id_wid = width, id_wid
1904     def ispec(self): return FPADDStageIn(self.width, self.id_wid)
1905     def ospec(self): return self.ospec()
1906     def process(self, i): return i
1907
1908
1909 class FPADDInMuxPipe(PriorityCombPipeline):
1910     def __init__(self, width, id_width, num_rows):
1911         self.num_rows = num_rows
1912         stage = FPAddInPassThruStage(width, id_width)
1913         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1914
1915     def ports(self):
1916         res = []
1917         for i in range(len(self.p)):
1918             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1919                     self.p[i].i_data.ports()
1920         res += [self.n.i_ready, self.n.o_valid] + \
1921                 self.n.o_data.ports()
1922         return res
1923
1924
1925 class MuxCombPipeline(CombMultiOutPipeline):
1926     def __init__(self, stage, n_len):
1927         # HACK: stage is also the n-way multiplexer
1928         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1929
1930         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1931         stage.m_id = self.p.i_data.mid
1932
1933     def ports(self):
1934         return self.p_mux.ports()
1935
1936
1937 class FPAddOutPassThruStage:
1938     def __init__(self, width, id_wid):
1939         self.width, self.id_wid = width, id_wid
1940     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1941     def ospec(self): return self.ospec()
1942     def process(self, i): return i
1943
1944
1945 class FPADDMuxOutPipe(MuxCombPipeline):
1946     def __init__(self, width, id_wid, num_rows):
1947         self.num_rows = num_rows
1948         stage = FPAddOutPassThruStage(width, id_wid)
1949         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1950
1951     def ports(self):
1952         res = [self.p.i_valid, self.p.o_ready] + \
1953                 self.p.i_data.ports()
1954         for i in range(len(self.n)):
1955             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1956                     self.n[i].o_data.ports()
1957         return res
1958
1959
1960
1961
1962 class FPADDMuxInOut:
1963     """ Reservation-Station version of FPADD pipeline.
1964
1965         fan-in on
1966     """
1967     def __init__(self, width, id_wid, num_rows):
1968         self.num_rows = num_rows
1969         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1970         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1971         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1972
1973         self.p = self.inpipe.p  # kinda annoying,
1974         self.n = self.outpipe.n # use pipe in/out as this class in/out
1975         self._ports = self.inpipe.ports() + self.outpipe.ports()
1976
1977     def elaborate(self, platform):
1978         m = Module()
1979         m.submodules.inpipe = self.inpipe
1980         m.submodules.fpadd = self.fpadd
1981         m.submodules.outpipe = self.outpipe
1982
1983         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1984         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1985
1986         return m
1987
1988     def ports(self):
1989         return self._ports
1990
1991
1992 class ResArray:
1993     def __init__(self, width, id_wid):
1994         self.width = width
1995         self.id_wid = id_wid
1996         res = []
1997         for i in range(rs_sz):
1998             out_z = FPOp(width)
1999             out_z.name = "out_z_%d" % i
2000             res.append(out_z)
2001         self.res = Array(res)
2002         self.in_z = FPOp(width)
2003         self.in_mid = Signal(self.id_wid, reset_less=True)
2004
2005     def setup(self, m, in_z, in_mid):
2006         m.d.comb += [self.in_z.eq(in_z),
2007                      self.in_mid.eq(in_mid)]
2008
2009     def get_fragment(self, platform=None):
2010         """ creates the HDL code-fragment for FPAdd
2011         """
2012         m = Module()
2013         m.submodules.res_in_z = self.in_z
2014         m.submodules += self.res
2015
2016         return m
2017
2018     def ports(self):
2019         res = []
2020         for z in self.res:
2021             res += z.ports()
2022         return res
2023
2024
2025 class FPADD(FPID):
2026     """ FPADD: stages as follows:
2027
2028         FPGetOp (a)
2029            |
2030         FPGetOp (b)
2031            |
2032         FPAddBase---> FPAddBaseMod
2033            |            |
2034         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2035
2036         FPAddBase is tricky: it is both a stage and *has* stages.
2037         Connection to FPAddBaseMod therefore requires an in stb/ack
2038         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2039         needs to be the thing that raises the incoming stb.
2040     """
2041
2042     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2043         """ IEEE754 FP Add
2044
2045             * width: bit-width of IEEE754.  supported: 16, 32, 64
2046             * id_wid: an identifier that is sync-connected to the input
2047             * single_cycle: True indicates each stage to complete in 1 clock
2048         """
2049         self.width = width
2050         self.id_wid = id_wid
2051         self.single_cycle = single_cycle
2052
2053         #self.out_z = FPOp(width)
2054         self.ids = FPID(id_wid)
2055
2056         rs = []
2057         for i in range(rs_sz):
2058             in_a  = FPOp(width)
2059             in_b  = FPOp(width)
2060             in_a.name = "in_a_%d" % i
2061             in_b.name = "in_b_%d" % i
2062             rs.append((in_a, in_b))
2063         self.rs = Array(rs)
2064
2065         res = []
2066         for i in range(rs_sz):
2067             out_z = FPOp(width)
2068             out_z.name = "out_z_%d" % i
2069             res.append(out_z)
2070         self.res = Array(res)
2071
2072         self.states = []
2073
2074     def add_state(self, state):
2075         self.states.append(state)
2076         return state
2077
2078     def get_fragment(self, platform=None):
2079         """ creates the HDL code-fragment for FPAdd
2080         """
2081         m = Module()
2082         m.submodules += self.rs
2083
2084         in_a = self.rs[0][0]
2085         in_b = self.rs[0][1]
2086
2087         geta = self.add_state(FPGetOp("get_a", "get_b",
2088                                       in_a, self.width))
2089         geta.setup(m, in_a)
2090         a = geta.out_op
2091
2092         getb = self.add_state(FPGetOp("get_b", "fpadd",
2093                                       in_b, self.width))
2094         getb.setup(m, in_b)
2095         b = getb.out_op
2096
2097         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2098         ab = self.add_state(ab)
2099         abd = ab.ispec() # create an input spec object for FPADDBase
2100         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2101         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2102         o = ab.o
2103
2104         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2105                                     o.mid, "get_a"))
2106
2107         with m.FSM() as fsm:
2108
2109             for state in self.states:
2110                 with m.State(state.state_from):
2111                     state.action(m)
2112
2113         return m
2114
2115
2116 if __name__ == "__main__":
2117     if True:
2118         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2119         main(alu, ports=alu.rs[0][0].ports() + \
2120                         alu.rs[0][1].ports() + \
2121                         alu.res[0].ports() + \
2122                         [alu.ids.in_mid, alu.ids.out_mid])
2123     else:
2124         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2125         main(alu, ports=[alu.in_a, alu.in_b] + \
2126                         alu.in_t.ports() + \
2127                         alu.out_z.ports() + \
2128                         [alu.in_mid, alu.out_mid])
2129
2130
2131     # works... but don't use, just do "python fname.py convert -t v"
2132     #print (verilog.convert(alu, ports=[
2133     #                        ports=alu.in_a.ports() + \
2134     #                              alu.in_b.ports() + \
2135     #                              alu.out_z.ports())