src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMuxOutPipe
  14 from multipipe import PriorityCombMuxInPipe
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPNumBase2Ops:
 191
 192     def __init__(self, width, id_wid, m_extra=True):
 193         self.a = FPNumBase(width, m_extra)
 194         self.b = FPNumBase(width, m_extra)
 195         self.mid = Signal(id_wid, reset_less=True)
 196
 197     def eq(self, i):
 198         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 199
 200     def ports(self):
 201         return [self.a, self.b, self.mid]
 202
 203
 204 class FPADDBaseData:
 205
 206     def __init__(self, width, id_wid):
 207         self.width = width
 208         self.id_wid = id_wid
 209         self.a  = Signal(width)
 210         self.b  = Signal(width)
 211         self.mid = Signal(id_wid, reset_less=True)
 212
 213     def eq(self, i):
 214         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 215
 216     def ports(self):
 217         return [self.a, self.b, self.mid]
 218
 219
 220 class FPGet2OpMod(Trigger):
 221     def __init__(self, width, id_wid):
 222         Trigger.__init__(self)
 223         self.width = width
 224         self.id_wid = id_wid
 225         self.i = self.ispec()
 226         self.o = self.ospec()
 227
 228     def ispec(self):
 229         return FPADDBaseData(self.width, self.id_wid)
 230
 231     def ospec(self):
 232         return FPADDBaseData(self.width, self.id_wid)
 233
 234     def process(self, i):
 235         return self.o
 236
 237     def elaborate(self, platform):
 238         m = Trigger.elaborate(self, platform)
 239         with m.If(self.trigger):
 240             m.d.comb += [
 241                 self.o.eq(self.i),
 242             ]
 243         return m
 244
 245
 246 class FPGet2Op(FPState):
 247     """ gets operands
 248     """
 249
 250     def __init__(self, in_state, out_state, width, id_wid):
 251         FPState.__init__(self, in_state)
 252         self.out_state = out_state
 253         self.mod = FPGet2OpMod(width, id_wid)
 254         self.o = self.mod.ospec()
 255         self.in_stb = Signal(reset_less=True)
 256         self.out_ack = Signal(reset_less=True)
 257         self.out_decode = Signal(reset_less=True)
 258
 259     def setup(self, m, i, in_stb, in_ack):
 260         """ links module to inputs and outputs
 261         """
 262         m.submodules.get_ops = self.mod
 263         m.d.comb += self.mod.i.eq(i)
 264         m.d.comb += self.mod.stb.eq(in_stb)
 265         m.d.comb += self.out_ack.eq(self.mod.ack)
 266         m.d.comb += self.out_decode.eq(self.mod.trigger)
 267         m.d.comb += in_ack.eq(self.mod.ack)
 268
 269     def action(self, m):
 270         with m.If(self.out_decode):
 271             m.next = self.out_state
 272             m.d.sync += [
 273                 self.mod.ack.eq(0),
 274                 self.o.eq(self.mod.o),
 275             ]
 276         with m.Else():
 277             m.d.sync += self.mod.ack.eq(1)
 278
 279
 280 class FPSCData:
 281
 282     def __init__(self, width, id_wid):
 283         self.a = FPNumBase(width, True)
 284         self.b = FPNumBase(width, True)
 285         self.z = FPNumOut(width, False)
 286         self.oz = Signal(width, reset_less=True)
 287         self.out_do_z = Signal(reset_less=True)
 288         self.mid = Signal(id_wid, reset_less=True)
 289
 290     def eq(self, i):
 291         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 292                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 293
 294
 295 class FPAddSpecialCasesMod:
 296     """ special cases: NaNs, infs, zeros, denormalised
 297         NOTE: some of these are unique to add.  see "Special Operations"
 298         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 299     """
 300
 301     def __init__(self, width, id_wid):
 302         self.width = width
 303         self.id_wid = id_wid
 304         self.i = self.ispec()
 305         self.o = self.ospec()
 306
 307     def ispec(self):
 308         return FPADDBaseData(self.width, self.id_wid)
 309
 310     def ospec(self):
 311         return FPSCData(self.width, self.id_wid)
 312
 313     def setup(self, m, i):
 314         """ links module to inputs and outputs
 315         """
 316         m.submodules.specialcases = self
 317         m.d.comb += self.i.eq(i)
 318
 319     def process(self, i):
 320         return self.o
 321
 322     def elaborate(self, platform):
 323         m = Module()
 324
 325         m.submodules.sc_out_z = self.o.z
 326
 327         # decode: XXX really should move to separate stage
 328         a1 = FPNumIn(None, self.width)
 329         b1 = FPNumIn(None, self.width)
 330         m.submodules.sc_decode_a = a1
 331         m.submodules.sc_decode_b = b1
 332         m.d.comb += [a1.decode(self.i.a),
 333                      b1.decode(self.i.b),
 334                     ]
 335
 336         s_nomatch = Signal()
 337         m.d.comb += s_nomatch.eq(a1.s != b1.s)
 338
 339         m_match = Signal()
 340         m.d.comb += m_match.eq(a1.m == b1.m)
 341
 342         # if a is NaN or b is NaN return NaN
 343         with m.If(a1.is_nan | b1.is_nan):
 344             m.d.comb += self.o.out_do_z.eq(1)
 345             m.d.comb += self.o.z.nan(0)
 346
 347         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 348         # under review
 349
 350         ## if a is zero and b is NaN return -b
 351         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 352         #    m.d.comb += self.o.out_do_z.eq(1)
 353         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 354
 355         ## if b is zero and a is NaN return -a
 356         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 357         #    m.d.comb += self.o.out_do_z.eq(1)
 358         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 359
 360         ## if a is -zero and b is NaN return -b
 361         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 362         #    m.d.comb += self.o.out_do_z.eq(1)
 363         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 364
 365         ## if b is -zero and a is NaN return -a
 366         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 367         #    m.d.comb += self.o.out_do_z.eq(1)
 368         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 369
 370         # if a is inf return inf (or NaN)
 371         with m.Elif(a1.is_inf):
 372             m.d.comb += self.o.out_do_z.eq(1)
 373             m.d.comb += self.o.z.inf(a1.s)
 374             # if a is inf and signs don't match return NaN
 375             with m.If(b1.exp_128 & s_nomatch):
 376                 m.d.comb += self.o.z.nan(0)
 377
 378         # if b is inf return inf
 379         with m.Elif(b1.is_inf):
 380             m.d.comb += self.o.out_do_z.eq(1)
 381             m.d.comb += self.o.z.inf(b1.s)
 382
 383         # if a is zero and b zero return signed-a/b
 384         with m.Elif(a1.is_zero & b1.is_zero):
 385             m.d.comb += self.o.out_do_z.eq(1)
 386             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 387
 388         # if a is zero return b
 389         with m.Elif(a1.is_zero):
 390             m.d.comb += self.o.out_do_z.eq(1)
 391             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 392
 393         # if b is zero return a
 394         with m.Elif(b1.is_zero):
 395             m.d.comb += self.o.out_do_z.eq(1)
 396             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 397
 398         # if a equal to -b return zero (+ve zero)
 399         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 400             m.d.comb += self.o.out_do_z.eq(1)
 401             m.d.comb += self.o.z.zero(0)
 402
 403         # Denormalised Number checks next, so pass a/b data through
 404         with m.Else():
 405             m.d.comb += self.o.out_do_z.eq(0)
 406             m.d.comb += self.o.a.eq(a1)
 407             m.d.comb += self.o.b.eq(b1)
 408
 409         m.d.comb += self.o.oz.eq(self.o.z.v)
 410         m.d.comb += self.o.mid.eq(self.i.mid)
 411
 412         return m
 413
 414
 415 class FPID:
 416     def __init__(self, id_wid):
 417         self.id_wid = id_wid
 418         if self.id_wid:
 419             self.in_mid = Signal(id_wid, reset_less=True)
 420             self.out_mid = Signal(id_wid, reset_less=True)
 421         else:
 422             self.in_mid = None
 423             self.out_mid = None
 424
 425     def idsync(self, m):
 426         if self.id_wid is not None:
 427             m.d.sync += self.out_mid.eq(self.in_mid)
 428
 429
 430 class FPAddSpecialCases(FPState):
 431     """ special cases: NaNs, infs, zeros, denormalised
 432         NOTE: some of these are unique to add.  see "Special Operations"
 433         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 434     """
 435
 436     def __init__(self, width, id_wid):
 437         FPState.__init__(self, "special_cases")
 438         self.mod = FPAddSpecialCasesMod(width)
 439         self.out_z = self.mod.ospec()
 440         self.out_do_z = Signal(reset_less=True)
 441
 442     def setup(self, m, i):
 443         """ links module to inputs and outputs
 444         """
 445         self.mod.setup(m, i, self.out_do_z)
 446         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 447         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 448
 449     def action(self, m):
 450         self.idsync(m)
 451         with m.If(self.out_do_z):
 452             m.next = "put_z"
 453         with m.Else():
 454             m.next = "denormalise"
 455
 456
 457 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 458     """ special cases: NaNs, infs, zeros, denormalised
 459         NOTE: some of these are unique to add.  see "Special Operations"
 460         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 461     """
 462
 463     def __init__(self, width, id_wid):
 464         FPState.__init__(self, "special_cases")
 465         self.smod = FPAddSpecialCasesMod(width, id_wid)
 466         self.dmod = FPAddDeNormMod(width, id_wid)
 467         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 468         self.o = self.ospec()
 469
 470     def ispec(self):
 471         return self.smod.ispec()
 472
 473     def ospec(self):
 474         return self.dmod.ospec()
 475
 476     def setup(self, m, i):
 477         """ links module to inputs and outputs
 478         """
 479         # these only needed for break-out (early-out)
 480         # out_z = self.smod.ospec()
 481         # out_do_z = Signal(reset_less=True)
 482         self.smod.setup(m, i)
 483         self.dmod.setup(m, self.smod.o)
 484         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 485
 486         # out_do_z=True, only needed for early-out (split pipeline)
 487         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 488         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 489
 490         # out_do_z=False
 491         m.d.comb += self.o.eq(self.dmod.o)
 492
 493     def process(self, i):
 494         return self.o
 495
 496     def action(self, m):
 497         #with m.If(self.out_do_z):
 498         #    m.next = "put_z"
 499         #with m.Else():
 500             m.next = "align"
 501
 502
 503 class FPAddDeNormMod(FPState):
 504
 505     def __init__(self, width, id_wid):
 506         self.width = width
 507         self.id_wid = id_wid
 508         self.i = self.ispec()
 509         self.o = self.ospec()
 510
 511     def ispec(self):
 512         return FPSCData(self.width, self.id_wid)
 513
 514     def ospec(self):
 515         return FPSCData(self.width, self.id_wid)
 516
 517     def setup(self, m, i):
 518         """ links module to inputs and outputs
 519         """
 520         m.submodules.denormalise = self
 521         m.d.comb += self.i.eq(i)
 522
 523     def elaborate(self, platform):
 524         m = Module()
 525         m.submodules.denorm_in_a = self.i.a
 526         m.submodules.denorm_in_b = self.i.b
 527         m.submodules.denorm_out_a = self.o.a
 528         m.submodules.denorm_out_b = self.o.b
 529
 530         with m.If(~self.i.out_do_z):
 531             # XXX hmmm, don't like repeating identical code
 532             m.d.comb += self.o.a.eq(self.i.a)
 533             with m.If(self.i.a.exp_n127):
 534                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 535             with m.Else():
 536                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 537
 538             m.d.comb += self.o.b.eq(self.i.b)
 539             with m.If(self.i.b.exp_n127):
 540                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 541             with m.Else():
 542                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 543
 544         m.d.comb += self.o.mid.eq(self.i.mid)
 545         m.d.comb += self.o.z.eq(self.i.z)
 546         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 547         m.d.comb += self.o.oz.eq(self.i.oz)
 548
 549         return m
 550
 551
 552 class FPAddDeNorm(FPState):
 553
 554     def __init__(self, width, id_wid):
 555         FPState.__init__(self, "denormalise")
 556         self.mod = FPAddDeNormMod(width)
 557         self.out_a = FPNumBase(width)
 558         self.out_b = FPNumBase(width)
 559
 560     def setup(self, m, i):
 561         """ links module to inputs and outputs
 562         """
 563         self.mod.setup(m, i)
 564
 565         m.d.sync += self.out_a.eq(self.mod.out_a)
 566         m.d.sync += self.out_b.eq(self.mod.out_b)
 567
 568     def action(self, m):
 569         # Denormalised Number checks
 570         m.next = "align"
 571
 572
 573 class FPAddAlignMultiMod(FPState):
 574
 575     def __init__(self, width):
 576         self.in_a = FPNumBase(width)
 577         self.in_b = FPNumBase(width)
 578         self.out_a = FPNumIn(None, width)
 579         self.out_b = FPNumIn(None, width)
 580         self.exp_eq = Signal(reset_less=True)
 581
 582     def elaborate(self, platform):
 583         # This one however (single-cycle) will do the shift
 584         # in one go.
 585
 586         m = Module()
 587
 588         m.submodules.align_in_a = self.in_a
 589         m.submodules.align_in_b = self.in_b
 590         m.submodules.align_out_a = self.out_a
 591         m.submodules.align_out_b = self.out_b
 592
 593         # NOTE: this does *not* do single-cycle multi-shifting,
 594         #       it *STAYS* in the align state until exponents match
 595
 596         # exponent of a greater than b: shift b down
 597         m.d.comb += self.exp_eq.eq(0)
 598         m.d.comb += self.out_a.eq(self.in_a)
 599         m.d.comb += self.out_b.eq(self.in_b)
 600         agtb = Signal(reset_less=True)
 601         altb = Signal(reset_less=True)
 602         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 603         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 604         with m.If(agtb):
 605             m.d.comb += self.out_b.shift_down(self.in_b)
 606         # exponent of b greater than a: shift a down
 607         with m.Elif(altb):
 608             m.d.comb += self.out_a.shift_down(self.in_a)
 609         # exponents equal: move to next stage.
 610         with m.Else():
 611             m.d.comb += self.exp_eq.eq(1)
 612         return m
 613
 614
 615 class FPAddAlignMulti(FPState):
 616
 617     def __init__(self, width, id_wid):
 618         FPState.__init__(self, "align")
 619         self.mod = FPAddAlignMultiMod(width)
 620         self.out_a = FPNumIn(None, width)
 621         self.out_b = FPNumIn(None, width)
 622         self.exp_eq = Signal(reset_less=True)
 623
 624     def setup(self, m, in_a, in_b):
 625         """ links module to inputs and outputs
 626         """
 627         m.submodules.align = self.mod
 628         m.d.comb += self.mod.in_a.eq(in_a)
 629         m.d.comb += self.mod.in_b.eq(in_b)
 630         #m.d.comb += self.out_a.eq(self.mod.out_a)
 631         #m.d.comb += self.out_b.eq(self.mod.out_b)
 632         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 633         m.d.sync += self.out_a.eq(self.mod.out_a)
 634         m.d.sync += self.out_b.eq(self.mod.out_b)
 635
 636     def action(self, m):
 637         with m.If(self.exp_eq):
 638             m.next = "add_0"
 639
 640
 641 class FPNumIn2Ops:
 642
 643     def __init__(self, width, id_wid):
 644         self.a = FPNumIn(None, width)
 645         self.b = FPNumIn(None, width)
 646         self.z = FPNumOut(width, False)
 647         self.out_do_z = Signal(reset_less=True)
 648         self.oz = Signal(width, reset_less=True)
 649         self.mid = Signal(id_wid, reset_less=True)
 650
 651     def eq(self, i):
 652         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 653                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 654
 655
 656 class FPAddAlignSingleMod:
 657
 658     def __init__(self, width, id_wid):
 659         self.width = width
 660         self.id_wid = id_wid
 661         self.i = self.ispec()
 662         self.o = self.ospec()
 663
 664     def ispec(self):
 665         return FPSCData(self.width, self.id_wid)
 666
 667     def ospec(self):
 668         return FPNumIn2Ops(self.width, self.id_wid)
 669
 670     def process(self, i):
 671         return self.o
 672
 673     def setup(self, m, i):
 674         """ links module to inputs and outputs
 675         """
 676         m.submodules.align = self
 677         m.d.comb += self.i.eq(i)
 678
 679     def elaborate(self, platform):
 680         """ Aligns A against B or B against A, depending on which has the
 681             greater exponent.  This is done in a *single* cycle using
 682             variable-width bit-shift
 683
 684             the shifter used here is quite expensive in terms of gates.
 685             Mux A or B in (and out) into temporaries, as only one of them
 686             needs to be aligned against the other
 687         """
 688         m = Module()
 689
 690         m.submodules.align_in_a = self.i.a
 691         m.submodules.align_in_b = self.i.b
 692         m.submodules.align_out_a = self.o.a
 693         m.submodules.align_out_b = self.o.b
 694
 695         # temporary (muxed) input and output to be shifted
 696         t_inp = FPNumBase(self.width)
 697         t_out = FPNumIn(None, self.width)
 698         espec = (len(self.i.a.e), True)
 699         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 700         m.submodules.align_t_in = t_inp
 701         m.submodules.align_t_out = t_out
 702         m.submodules.multishift_r = msr
 703
 704         ediff = Signal(espec, reset_less=True)
 705         ediffr = Signal(espec, reset_less=True)
 706         tdiff = Signal(espec, reset_less=True)
 707         elz = Signal(reset_less=True)
 708         egz = Signal(reset_less=True)
 709
 710         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 711         m.d.comb += msr.inp.eq(t_inp.m)
 712         m.d.comb += msr.diff.eq(tdiff)
 713         m.d.comb += t_out.m.eq(msr.m)
 714         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 715         m.d.comb += t_out.s.eq(t_inp.s)
 716
 717         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 718         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 719         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 720         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 721
 722         # default: A-exp == B-exp, A and B untouched (fall through)
 723         m.d.comb += self.o.a.eq(self.i.a)
 724         m.d.comb += self.o.b.eq(self.i.b)
 725         # only one shifter (muxed)
 726         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 727         # exponent of a greater than b: shift b down
 728         with m.If(~self.i.out_do_z):
 729             with m.If(egz):
 730                 m.d.comb += [t_inp.eq(self.i.b),
 731                              tdiff.eq(ediff),
 732                              self.o.b.eq(t_out),
 733                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 734                             ]
 735             # exponent of b greater than a: shift a down
 736             with m.Elif(elz):
 737                 m.d.comb += [t_inp.eq(self.i.a),
 738                              tdiff.eq(ediffr),
 739                              self.o.a.eq(t_out),
 740                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 741                             ]
 742
 743         m.d.comb += self.o.mid.eq(self.i.mid)
 744         m.d.comb += self.o.z.eq(self.i.z)
 745         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 746         m.d.comb += self.o.oz.eq(self.i.oz)
 747
 748         return m
 749
 750
 751 class FPAddAlignSingle(FPState):
 752
 753     def __init__(self, width, id_wid):
 754         FPState.__init__(self, "align")
 755         self.mod = FPAddAlignSingleMod(width, id_wid)
 756         self.out_a = FPNumIn(None, width)
 757         self.out_b = FPNumIn(None, width)
 758
 759     def setup(self, m, i):
 760         """ links module to inputs and outputs
 761         """
 762         self.mod.setup(m, i)
 763
 764         # NOTE: could be done as comb
 765         m.d.sync += self.out_a.eq(self.mod.out_a)
 766         m.d.sync += self.out_b.eq(self.mod.out_b)
 767
 768     def action(self, m):
 769         m.next = "add_0"
 770
 771
 772 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 773
 774     def __init__(self, width, id_wid):
 775         FPState.__init__(self, "align")
 776         self.width = width
 777         self.id_wid = id_wid
 778         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 779         self.a1o = self.ospec()
 780
 781     def ispec(self):
 782         return FPSCData(self.width, self.id_wid)
 783         #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 784
 785     def ospec(self):
 786         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 787
 788     def setup(self, m, i):
 789         """ links module to inputs and outputs
 790         """
 791
 792         # chain AddAlignSingle, AddStage0 and AddStage1
 793         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 794         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 795         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 796
 797         chain = StageChain([mod, a0mod, a1mod])
 798         chain.setup(m, i)
 799
 800         m.d.comb += self.a1o.eq(a1mod.o)
 801
 802     def process(self, i):
 803         return self.a1o
 804
 805     def action(self, m):
 806         m.next = "normalise_1"
 807
 808
 809 class FPAddStage0Data:
 810
 811     def __init__(self, width, id_wid):
 812         self.z = FPNumBase(width, False)
 813         self.out_do_z = Signal(reset_less=True)
 814         self.oz = Signal(width, reset_less=True)
 815         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 816         self.mid = Signal(id_wid, reset_less=True)
 817
 818     def eq(self, i):
 819         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 820                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 821
 822
 823 class FPAddStage0Mod:
 824
 825     def __init__(self, width, id_wid):
 826         self.width = width
 827         self.id_wid = id_wid
 828         self.i = self.ispec()
 829         self.o = self.ospec()
 830
 831     def ispec(self):
 832         return FPSCData(self.width, self.id_wid)
 833
 834     def ospec(self):
 835         return FPAddStage0Data(self.width, self.id_wid)
 836
 837     def process(self, i):
 838         return self.o
 839
 840     def setup(self, m, i):
 841         """ links module to inputs and outputs
 842         """
 843         m.submodules.add0 = self
 844         m.d.comb += self.i.eq(i)
 845
 846     def elaborate(self, platform):
 847         m = Module()
 848         m.submodules.add0_in_a = self.i.a
 849         m.submodules.add0_in_b = self.i.b
 850         m.submodules.add0_out_z = self.o.z
 851
 852         # store intermediate tests (and zero-extended mantissas)
 853         seq = Signal(reset_less=True)
 854         mge = Signal(reset_less=True)
 855         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 856         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 857         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 858                      mge.eq(self.i.a.m >= self.i.b.m),
 859                      am0.eq(Cat(self.i.a.m, 0)),
 860                      bm0.eq(Cat(self.i.b.m, 0))
 861                     ]
 862         # same-sign (both negative or both positive) add mantissas
 863         with m.If(~self.i.out_do_z):
 864             m.d.comb += self.o.z.e.eq(self.i.a.e)
 865             with m.If(seq):
 866                 m.d.comb += [
 867                     self.o.tot.eq(am0 + bm0),
 868                     self.o.z.s.eq(self.i.a.s)
 869                 ]
 870             # a mantissa greater than b, use a
 871             with m.Elif(mge):
 872                 m.d.comb += [
 873                     self.o.tot.eq(am0 - bm0),
 874                     self.o.z.s.eq(self.i.a.s)
 875                 ]
 876             # b mantissa greater than a, use b
 877             with m.Else():
 878                 m.d.comb += [
 879                     self.o.tot.eq(bm0 - am0),
 880                     self.o.z.s.eq(self.i.b.s)
 881             ]
 882
 883         m.d.comb += self.o.oz.eq(self.i.oz)
 884         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 885         m.d.comb += self.o.mid.eq(self.i.mid)
 886         return m
 887
 888
 889 class FPAddStage0(FPState):
 890     """ First stage of add.  covers same-sign (add) and subtract
 891         special-casing when mantissas are greater or equal, to
 892         give greatest accuracy.
 893     """
 894
 895     def __init__(self, width, id_wid):
 896         FPState.__init__(self, "add_0")
 897         self.mod = FPAddStage0Mod(width)
 898         self.o = self.mod.ospec()
 899
 900     def setup(self, m, i):
 901         """ links module to inputs and outputs
 902         """
 903         self.mod.setup(m, i)
 904
 905         # NOTE: these could be done as combinatorial (merge add0+add1)
 906         m.d.sync += self.o.eq(self.mod.o)
 907
 908     def action(self, m):
 909         m.next = "add_1"
 910
 911
 912 class FPAddStage1Data:
 913
 914     def __init__(self, width, id_wid):
 915         self.z = FPNumBase(width, False)
 916         self.out_do_z = Signal(reset_less=True)
 917         self.oz = Signal(width, reset_less=True)
 918         self.of = Overflow()
 919         self.mid = Signal(id_wid, reset_less=True)
 920
 921     def eq(self, i):
 922         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 923                 self.of.eq(i.of), self.mid.eq(i.mid)]
 924
 925
 926
 927 class FPAddStage1Mod(FPState):
 928     """ Second stage of add: preparation for normalisation.
 929         detects when tot sum is too big (tot[27] is kinda a carry bit)
 930     """
 931
 932     def __init__(self, width, id_wid):
 933         self.width = width
 934         self.id_wid = id_wid
 935         self.i = self.ispec()
 936         self.o = self.ospec()
 937
 938     def ispec(self):
 939         return FPAddStage0Data(self.width, self.id_wid)
 940
 941     def ospec(self):
 942         return FPAddStage1Data(self.width, self.id_wid)
 943
 944     def process(self, i):
 945         return self.o
 946
 947     def setup(self, m, i):
 948         """ links module to inputs and outputs
 949         """
 950         m.submodules.add1 = self
 951         m.submodules.add1_out_overflow = self.o.of
 952
 953         m.d.comb += self.i.eq(i)
 954
 955     def elaborate(self, platform):
 956         m = Module()
 957         #m.submodules.norm1_in_overflow = self.in_of
 958         #m.submodules.norm1_out_overflow = self.out_of
 959         #m.submodules.norm1_in_z = self.in_z
 960         #m.submodules.norm1_out_z = self.out_z
 961         m.d.comb += self.o.z.eq(self.i.z)
 962         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 963         with m.If(~self.i.out_do_z):
 964             with m.If(self.i.tot[-1]):
 965                 m.d.comb += [
 966                     self.o.z.m.eq(self.i.tot[4:]),
 967                     self.o.of.m0.eq(self.i.tot[4]),
 968                     self.o.of.guard.eq(self.i.tot[3]),
 969                     self.o.of.round_bit.eq(self.i.tot[2]),
 970                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 971                     self.o.z.e.eq(self.i.z.e + 1)
 972             ]
 973             # tot[-1] (MSB) zero case
 974             with m.Else():
 975                 m.d.comb += [
 976                     self.o.z.m.eq(self.i.tot[3:]),
 977                     self.o.of.m0.eq(self.i.tot[3]),
 978                     self.o.of.guard.eq(self.i.tot[2]),
 979                     self.o.of.round_bit.eq(self.i.tot[1]),
 980                     self.o.of.sticky.eq(self.i.tot[0])
 981             ]
 982
 983         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 984         m.d.comb += self.o.oz.eq(self.i.oz)
 985         m.d.comb += self.o.mid.eq(self.i.mid)
 986
 987         return m
 988
 989
 990 class FPAddStage1(FPState):
 991
 992     def __init__(self, width, id_wid):
 993         FPState.__init__(self, "add_1")
 994         self.mod = FPAddStage1Mod(width)
 995         self.out_z = FPNumBase(width, False)
 996         self.out_of = Overflow()
 997         self.norm_stb = Signal()
 998
 999     def setup(self, m, i):
1000         """ links module to inputs and outputs
1001         """
1002         self.mod.setup(m, i)
1003
1004         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1005
1006         m.d.sync += self.out_of.eq(self.mod.out_of)
1007         m.d.sync += self.out_z.eq(self.mod.out_z)
1008         m.d.sync += self.norm_stb.eq(1)
1009
1010     def action(self, m):
1011         m.next = "normalise_1"
1012
1013
1014 class FPNormaliseModSingle:
1015
1016     def __init__(self, width):
1017         self.width = width
1018         self.in_z = self.ispec()
1019         self.out_z = self.ospec()
1020
1021     def ispec(self):
1022         return FPNumBase(self.width, False)
1023
1024     def ospec(self):
1025         return FPNumBase(self.width, False)
1026
1027     def setup(self, m, i):
1028         """ links module to inputs and outputs
1029         """
1030         m.submodules.normalise = self
1031         m.d.comb += self.i.eq(i)
1032
1033     def elaborate(self, platform):
1034         m = Module()
1035
1036         mwid = self.out_z.m_width+2
1037         pe = PriorityEncoder(mwid)
1038         m.submodules.norm_pe = pe
1039
1040         m.submodules.norm1_out_z = self.out_z
1041         m.submodules.norm1_in_z = self.in_z
1042
1043         in_z = FPNumBase(self.width, False)
1044         in_of = Overflow()
1045         m.submodules.norm1_insel_z = in_z
1046         m.submodules.norm1_insel_overflow = in_of
1047
1048         espec = (len(in_z.e), True)
1049         ediff_n126 = Signal(espec, reset_less=True)
1050         msr = MultiShiftRMerge(mwid, espec)
1051         m.submodules.multishift_r = msr
1052
1053         m.d.comb += in_z.eq(self.in_z)
1054         m.d.comb += in_of.eq(self.in_of)
1055         # initialise out from in (overridden below)
1056         m.d.comb += self.out_z.eq(in_z)
1057         m.d.comb += self.out_of.eq(in_of)
1058         # normalisation decrease condition
1059         decrease = Signal(reset_less=True)
1060         m.d.comb += decrease.eq(in_z.m_msbzero)
1061         # decrease exponent
1062         with m.If(decrease):
1063             # *sigh* not entirely obvious: count leading zeros (clz)
1064             # with a PriorityEncoder: to find from the MSB
1065             # we reverse the order of the bits.
1066             temp_m = Signal(mwid, reset_less=True)
1067             temp_s = Signal(mwid+1, reset_less=True)
1068             clz = Signal((len(in_z.e), True), reset_less=True)
1069             m.d.comb += [
1070                 # cat round and guard bits back into the mantissa
1071                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1072                 pe.i.eq(temp_m[::-1]),          # inverted
1073                 clz.eq(pe.o),                   # count zeros from MSB down
1074                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1075                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1076                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1077             ]
1078
1079         return m
1080
1081 class FPNorm1Data:
1082
1083     def __init__(self, width, id_wid):
1084         self.roundz = Signal(reset_less=True)
1085         self.z = FPNumBase(width, False)
1086         self.out_do_z = Signal(reset_less=True)
1087         self.oz = Signal(width, reset_less=True)
1088         self.mid = Signal(id_wid, reset_less=True)
1089
1090     def eq(self, i):
1091         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1092                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1093
1094
1095 class FPNorm1ModSingle:
1096
1097     def __init__(self, width, id_wid):
1098         self.width = width
1099         self.id_wid = id_wid
1100         self.i = self.ispec()
1101         self.o = self.ospec()
1102
1103     def ispec(self):
1104         return FPAddStage1Data(self.width, self.id_wid)
1105
1106     def ospec(self):
1107         return FPNorm1Data(self.width, self.id_wid)
1108
1109     def setup(self, m, i):
1110         """ links module to inputs and outputs
1111         """
1112         m.submodules.normalise_1 = self
1113         m.d.comb += self.i.eq(i)
1114
1115     def process(self, i):
1116         return self.o
1117
1118     def elaborate(self, platform):
1119         m = Module()
1120
1121         mwid = self.o.z.m_width+2
1122         pe = PriorityEncoder(mwid)
1123         m.submodules.norm_pe = pe
1124
1125         of = Overflow()
1126         m.d.comb += self.o.roundz.eq(of.roundz)
1127
1128         m.submodules.norm1_out_z = self.o.z
1129         m.submodules.norm1_out_overflow = of
1130         m.submodules.norm1_in_z = self.i.z
1131         m.submodules.norm1_in_overflow = self.i.of
1132
1133         i = self.ispec()
1134         m.submodules.norm1_insel_z = i.z
1135         m.submodules.norm1_insel_overflow = i.of
1136
1137         espec = (len(i.z.e), True)
1138         ediff_n126 = Signal(espec, reset_less=True)
1139         msr = MultiShiftRMerge(mwid, espec)
1140         m.submodules.multishift_r = msr
1141
1142         m.d.comb += i.eq(self.i)
1143         # initialise out from in (overridden below)
1144         m.d.comb += self.o.z.eq(i.z)
1145         m.d.comb += of.eq(i.of)
1146         # normalisation increase/decrease conditions
1147         decrease = Signal(reset_less=True)
1148         increase = Signal(reset_less=True)
1149         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1150         m.d.comb += increase.eq(i.z.exp_lt_n126)
1151         # decrease exponent
1152         with m.If(~self.i.out_do_z):
1153             with m.If(decrease):
1154                 # *sigh* not entirely obvious: count leading zeros (clz)
1155                 # with a PriorityEncoder: to find from the MSB
1156                 # we reverse the order of the bits.
1157                 temp_m = Signal(mwid, reset_less=True)
1158                 temp_s = Signal(mwid+1, reset_less=True)
1159                 clz = Signal((len(i.z.e), True), reset_less=True)
1160                 # make sure that the amount to decrease by does NOT
1161                 # go below the minimum non-INF/NaN exponent
1162                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1163                              i.z.exp_sub_n126)
1164                 m.d.comb += [
1165                     # cat round and guard bits back into the mantissa
1166                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1167                     pe.i.eq(temp_m[::-1]),          # inverted
1168                     clz.eq(limclz),                 # count zeros from MSB down
1169                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1170                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1171                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1172                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1173                     # overflow in bits 0..1: got shifted too (leave sticky)
1174                     of.guard.eq(temp_s[1]),       # guard
1175                     of.round_bit.eq(temp_s[0]),   # round
1176                 ]
1177             # increase exponent
1178             with m.Elif(increase):
1179                 temp_m = Signal(mwid+1, reset_less=True)
1180                 m.d.comb += [
1181                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1182                                   i.z.m)),
1183                     ediff_n126.eq(i.z.N126 - i.z.e),
1184                     # connect multi-shifter to inp/out mantissa (and ediff)
1185                     msr.inp.eq(temp_m),
1186                     msr.diff.eq(ediff_n126),
1187                     self.o.z.m.eq(msr.m[3:]),
1188                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1189                     # overflow in bits 0..1: got shifted too (leave sticky)
1190                     of.guard.eq(temp_s[2]),     # guard
1191                     of.round_bit.eq(temp_s[1]), # round
1192                     of.sticky.eq(temp_s[0]),    # sticky
1193                     self.o.z.e.eq(i.z.e + ediff_n126),
1194                 ]
1195
1196         m.d.comb += self.o.mid.eq(self.i.mid)
1197         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1198         m.d.comb += self.o.oz.eq(self.i.oz)
1199
1200         return m
1201
1202
1203 class FPNorm1ModMulti:
1204
1205     def __init__(self, width, single_cycle=True):
1206         self.width = width
1207         self.in_select = Signal(reset_less=True)
1208         self.in_z = FPNumBase(width, False)
1209         self.in_of = Overflow()
1210         self.temp_z = FPNumBase(width, False)
1211         self.temp_of = Overflow()
1212         self.out_z = FPNumBase(width, False)
1213         self.out_of = Overflow()
1214
1215     def elaborate(self, platform):
1216         m = Module()
1217
1218         m.submodules.norm1_out_z = self.out_z
1219         m.submodules.norm1_out_overflow = self.out_of
1220         m.submodules.norm1_temp_z = self.temp_z
1221         m.submodules.norm1_temp_of = self.temp_of
1222         m.submodules.norm1_in_z = self.in_z
1223         m.submodules.norm1_in_overflow = self.in_of
1224
1225         in_z = FPNumBase(self.width, False)
1226         in_of = Overflow()
1227         m.submodules.norm1_insel_z = in_z
1228         m.submodules.norm1_insel_overflow = in_of
1229
1230         # select which of temp or in z/of to use
1231         with m.If(self.in_select):
1232             m.d.comb += in_z.eq(self.in_z)
1233             m.d.comb += in_of.eq(self.in_of)
1234         with m.Else():
1235             m.d.comb += in_z.eq(self.temp_z)
1236             m.d.comb += in_of.eq(self.temp_of)
1237         # initialise out from in (overridden below)
1238         m.d.comb += self.out_z.eq(in_z)
1239         m.d.comb += self.out_of.eq(in_of)
1240         # normalisation increase/decrease conditions
1241         decrease = Signal(reset_less=True)
1242         increase = Signal(reset_less=True)
1243         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1244         m.d.comb += increase.eq(in_z.exp_lt_n126)
1245         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1246         # decrease exponent
1247         with m.If(decrease):
1248             m.d.comb += [
1249                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1250                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1251                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1252                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1253                 self.out_of.round_bit.eq(0),        # reset round bit
1254                 self.out_of.m0.eq(in_of.guard),
1255             ]
1256         # increase exponent
1257         with m.Elif(increase):
1258             m.d.comb += [
1259                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1260                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1261                 self.out_of.guard.eq(in_z.m[0]),
1262                 self.out_of.m0.eq(in_z.m[1]),
1263                 self.out_of.round_bit.eq(in_of.guard),
1264                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1265             ]
1266
1267         return m
1268
1269
1270 class FPNorm1Single(FPState):
1271
1272     def __init__(self, width, id_wid, single_cycle=True):
1273         FPState.__init__(self, "normalise_1")
1274         self.mod = FPNorm1ModSingle(width)
1275         self.o = self.ospec()
1276         self.out_z = FPNumBase(width, False)
1277         self.out_roundz = Signal(reset_less=True)
1278
1279     def ispec(self):
1280         return self.mod.ispec()
1281
1282     def ospec(self):
1283         return self.mod.ospec()
1284
1285     def setup(self, m, i):
1286         """ links module to inputs and outputs
1287         """
1288         self.mod.setup(m, i)
1289
1290     def action(self, m):
1291         m.next = "round"
1292
1293
1294 class FPNorm1Multi(FPState):
1295
1296     def __init__(self, width, id_wid):
1297         FPState.__init__(self, "normalise_1")
1298         self.mod = FPNorm1ModMulti(width)
1299         self.stb = Signal(reset_less=True)
1300         self.ack = Signal(reset=0, reset_less=True)
1301         self.out_norm = Signal(reset_less=True)
1302         self.in_accept = Signal(reset_less=True)
1303         self.temp_z = FPNumBase(width)
1304         self.temp_of = Overflow()
1305         self.out_z = FPNumBase(width)
1306         self.out_roundz = Signal(reset_less=True)
1307
1308     def setup(self, m, in_z, in_of, norm_stb):
1309         """ links module to inputs and outputs
1310         """
1311         self.mod.setup(m, in_z, in_of, norm_stb,
1312                        self.in_accept, self.temp_z, self.temp_of,
1313                        self.out_z, self.out_norm)
1314
1315         m.d.comb += self.stb.eq(norm_stb)
1316         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1317
1318     def action(self, m):
1319         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1320         m.d.sync += self.temp_of.eq(self.mod.out_of)
1321         m.d.sync += self.temp_z.eq(self.out_z)
1322         with m.If(self.out_norm):
1323             with m.If(self.in_accept):
1324                 m.d.sync += [
1325                     self.ack.eq(1),
1326                 ]
1327             with m.Else():
1328                 m.d.sync += self.ack.eq(0)
1329         with m.Else():
1330             # normalisation not required (or done).
1331             m.next = "round"
1332             m.d.sync += self.ack.eq(1)
1333             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1334
1335
1336 class FPNormToPack(FPState, UnbufferedPipeline):
1337
1338     def __init__(self, width, id_wid):
1339         FPState.__init__(self, "normalise_1")
1340         self.id_wid = id_wid
1341         self.width = width
1342         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1343
1344     def ispec(self):
1345         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1346
1347     def ospec(self):
1348         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1349
1350     def setup(self, m, i):
1351         """ links module to inputs and outputs
1352         """
1353
1354         # Normalisation, Rounding Corrections, Pack - in a chain
1355         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1356         rmod = FPRoundMod(self.width, self.id_wid)
1357         cmod = FPCorrectionsMod(self.width, self.id_wid)
1358         pmod = FPPackMod(self.width, self.id_wid)
1359         chain = StageChain([nmod, rmod, cmod, pmod])
1360         chain.setup(m, i)
1361         self.out_z = pmod.ospec()
1362
1363         m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1364         m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1365
1366     def process(self, i):
1367         return self.out_z
1368
1369     def action(self, m):
1370         m.next = "pack_put_z"
1371
1372
1373 class FPRoundData:
1374
1375     def __init__(self, width, id_wid):
1376         self.z = FPNumBase(width, False)
1377         self.out_do_z = Signal(reset_less=True)
1378         self.oz = Signal(width, reset_less=True)
1379         self.mid = Signal(id_wid, reset_less=True)
1380
1381     def eq(self, i):
1382         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1383                 self.mid.eq(i.mid)]
1384
1385
1386 class FPRoundMod:
1387
1388     def __init__(self, width, id_wid):
1389         self.width = width
1390         self.id_wid = id_wid
1391         self.i = self.ispec()
1392         self.out_z = self.ospec()
1393
1394     def ispec(self):
1395         return FPNorm1Data(self.width, self.id_wid)
1396
1397     def ospec(self):
1398         return FPRoundData(self.width, self.id_wid)
1399
1400     def process(self, i):
1401         return self.out_z
1402
1403     def setup(self, m, i):
1404         m.submodules.roundz = self
1405         m.d.comb += self.i.eq(i)
1406
1407     def elaborate(self, platform):
1408         m = Module()
1409         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1410         with m.If(~self.i.out_do_z):
1411             with m.If(self.i.roundz):
1412                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1413                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1414                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1415
1416         return m
1417
1418
1419 class FPRound(FPState):
1420
1421     def __init__(self, width, id_wid):
1422         FPState.__init__(self, "round")
1423         self.mod = FPRoundMod(width)
1424         self.out_z = self.ospec()
1425
1426     def ispec(self):
1427         return self.mod.ispec()
1428
1429     def ospec(self):
1430         return self.mod.ospec()
1431
1432     def setup(self, m, i):
1433         """ links module to inputs and outputs
1434         """
1435         self.mod.setup(m, i)
1436
1437         self.idsync(m)
1438         m.d.sync += self.out_z.eq(self.mod.out_z)
1439         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1440
1441     def action(self, m):
1442         m.next = "corrections"
1443
1444
1445 class FPCorrectionsMod:
1446
1447     def __init__(self, width, id_wid):
1448         self.width = width
1449         self.id_wid = id_wid
1450         self.i = self.ispec()
1451         self.out_z = self.ospec()
1452
1453     def ispec(self):
1454         return FPRoundData(self.width, self.id_wid)
1455
1456     def ospec(self):
1457         return FPRoundData(self.width, self.id_wid)
1458
1459     def process(self, i):
1460         return self.out_z
1461
1462     def setup(self, m, i):
1463         """ links module to inputs and outputs
1464         """
1465         m.submodules.corrections = self
1466         m.d.comb += self.i.eq(i)
1467
1468     def elaborate(self, platform):
1469         m = Module()
1470         m.submodules.corr_in_z = self.i.z
1471         m.submodules.corr_out_z = self.out_z.z
1472         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1473         with m.If(~self.i.out_do_z):
1474             with m.If(self.i.z.is_denormalised):
1475                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1476         return m
1477
1478
1479 class FPCorrections(FPState):
1480
1481     def __init__(self, width, id_wid):
1482         FPState.__init__(self, "corrections")
1483         self.mod = FPCorrectionsMod(width)
1484         self.out_z = self.ospec()
1485
1486     def ispec(self):
1487         return self.mod.ispec()
1488
1489     def ospec(self):
1490         return self.mod.ospec()
1491
1492     def setup(self, m, in_z):
1493         """ links module to inputs and outputs
1494         """
1495         self.mod.setup(m, in_z)
1496
1497         m.d.sync += self.out_z.eq(self.mod.out_z)
1498         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1499
1500     def action(self, m):
1501         m.next = "pack"
1502
1503
1504 class FPPackData:
1505
1506     def __init__(self, width, id_wid):
1507         self.z = Signal(width, reset_less=True)
1508         self.mid = Signal(id_wid, reset_less=True)
1509
1510     def eq(self, i):
1511         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1512
1513     def ports(self):
1514         return [self.z, self.mid]
1515
1516
1517 class FPPackMod:
1518
1519     def __init__(self, width, id_wid):
1520         self.width = width
1521         self.id_wid = id_wid
1522         self.i = self.ispec()
1523         self.o = self.ospec()
1524
1525     def ispec(self):
1526         return FPRoundData(self.width, self.id_wid)
1527
1528     def ospec(self):
1529         return FPPackData(self.width, self.id_wid)
1530
1531     def process(self, i):
1532         return self.o
1533
1534     def setup(self, m, in_z):
1535         """ links module to inputs and outputs
1536         """
1537         m.submodules.pack = self
1538         m.d.comb += self.i.eq(in_z)
1539
1540     def elaborate(self, platform):
1541         m = Module()
1542         z = FPNumOut(self.width, False)
1543         m.submodules.pack_in_z = self.i.z
1544         m.submodules.pack_out_z = z
1545         m.d.comb += self.o.mid.eq(self.i.mid)
1546         with m.If(~self.i.out_do_z):
1547             with m.If(self.i.z.is_overflowed):
1548                 m.d.comb += z.inf(self.i.z.s)
1549             with m.Else():
1550                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1551         with m.Else():
1552             m.d.comb += z.v.eq(self.i.oz)
1553         m.d.comb += self.o.z.eq(z.v)
1554         return m
1555
1556
1557 class FPPack(FPState):
1558
1559     def __init__(self, width, id_wid):
1560         FPState.__init__(self, "pack")
1561         self.mod = FPPackMod(width)
1562         self.out_z = self.ospec()
1563
1564     def ispec(self):
1565         return self.mod.ispec()
1566
1567     def ospec(self):
1568         return self.mod.ospec()
1569
1570     def setup(self, m, in_z):
1571         """ links module to inputs and outputs
1572         """
1573         self.mod.setup(m, in_z)
1574
1575         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1576         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1577
1578     def action(self, m):
1579         m.next = "pack_put_z"
1580
1581
1582 class FPPutZ(FPState):
1583
1584     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1585         FPState.__init__(self, state)
1586         if to_state is None:
1587             to_state = "get_ops"
1588         self.to_state = to_state
1589         self.in_z = in_z
1590         self.out_z = out_z
1591         self.in_mid = in_mid
1592         self.out_mid = out_mid
1593
1594     def action(self, m):
1595         if self.in_mid is not None:
1596             m.d.sync += self.out_mid.eq(self.in_mid)
1597         m.d.sync += [
1598           self.out_z.z.v.eq(self.in_z)
1599         ]
1600         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1601             m.d.sync += self.out_z.z.stb.eq(0)
1602             m.next = self.to_state
1603         with m.Else():
1604             m.d.sync += self.out_z.z.stb.eq(1)
1605
1606
1607 class FPPutZIdx(FPState):
1608
1609     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1610         FPState.__init__(self, state)
1611         if to_state is None:
1612             to_state = "get_ops"
1613         self.to_state = to_state
1614         self.in_z = in_z
1615         self.out_zs = out_zs
1616         self.in_mid = in_mid
1617
1618     def action(self, m):
1619         outz_stb = Signal(reset_less=True)
1620         outz_ack = Signal(reset_less=True)
1621         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1622                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1623                     ]
1624         m.d.sync += [
1625           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1626         ]
1627         with m.If(outz_stb & outz_ack):
1628             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1629             m.next = self.to_state
1630         with m.Else():
1631             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1632
1633 class FPOpData:
1634     def __init__(self, width, id_wid):
1635         self.z = FPOp(width)
1636         self.mid = Signal(id_wid, reset_less=True)
1637
1638     def eq(self, i):
1639         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1640
1641     def ports(self):
1642         return [self.z, self.mid]
1643
1644
1645 class FPADDBaseMod:
1646
1647     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1648         """ IEEE754 FP Add
1649
1650             * width: bit-width of IEEE754.  supported: 16, 32, 64
1651             * id_wid: an identifier that is sync-connected to the input
1652             * single_cycle: True indicates each stage to complete in 1 clock
1653             * compact: True indicates a reduced number of stages
1654         """
1655         self.width = width
1656         self.id_wid = id_wid
1657         self.single_cycle = single_cycle
1658         self.compact = compact
1659
1660         self.in_t = Trigger()
1661         self.i = self.ispec()
1662         self.o = self.ospec()
1663
1664         self.states = []
1665
1666     def ispec(self):
1667         return FPADDBaseData(self.width, self.id_wid)
1668
1669     def ospec(self):
1670         return FPOpData(self.width, self.id_wid)
1671
1672     def add_state(self, state):
1673         self.states.append(state)
1674         return state
1675
1676     def get_fragment(self, platform=None):
1677         """ creates the HDL code-fragment for FPAdd
1678         """
1679         m = Module()
1680         m.submodules.out_z = self.o.z
1681         m.submodules.in_t = self.in_t
1682         if self.compact:
1683             self.get_compact_fragment(m, platform)
1684         else:
1685             self.get_longer_fragment(m, platform)
1686
1687         with m.FSM() as fsm:
1688
1689             for state in self.states:
1690                 with m.State(state.state_from):
1691                     state.action(m)
1692
1693         return m
1694
1695     def get_longer_fragment(self, m, platform=None):
1696
1697         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1698                                       self.width))
1699         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1700         a = get.out_op1
1701         b = get.out_op2
1702
1703         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1704         sc.setup(m, a, b, self.in_mid)
1705
1706         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1707         dn.setup(m, a, b, sc.in_mid)
1708
1709         if self.single_cycle:
1710             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1711             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1712         else:
1713             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1714             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1715
1716         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1717         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1718
1719         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1720         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1721
1722         if self.single_cycle:
1723             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1724             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1725         else:
1726             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1727             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1728
1729         rn = self.add_state(FPRound(self.width, self.id_wid))
1730         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1731
1732         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1733         cor.setup(m, rn.out_z, rn.in_mid)
1734
1735         pa = self.add_state(FPPack(self.width, self.id_wid))
1736         pa.setup(m, cor.out_z, rn.in_mid)
1737
1738         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1739                                     pa.in_mid, self.out_mid))
1740
1741         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1742                                     pa.in_mid, self.out_mid))
1743
1744     def get_compact_fragment(self, m, platform=None):
1745
1746         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1747                                       self.width, self.id_wid))
1748         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1749
1750         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1751         sc.setup(m, get.o)
1752
1753         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1754         alm.setup(m, sc.o)
1755
1756         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1757         n1.setup(m, alm.a1o)
1758
1759         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1760                                     n1.out_z.mid, self.o.mid))
1761
1762         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1763         #                            sc.o.mid, self.o.mid))
1764
1765
1766 class FPADDBase(FPState):
1767
1768     def __init__(self, width, id_wid=None, single_cycle=False):
1769         """ IEEE754 FP Add
1770
1771             * width: bit-width of IEEE754.  supported: 16, 32, 64
1772             * id_wid: an identifier that is sync-connected to the input
1773             * single_cycle: True indicates each stage to complete in 1 clock
1774         """
1775         FPState.__init__(self, "fpadd")
1776         self.width = width
1777         self.single_cycle = single_cycle
1778         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1779         self.o = self.ospec()
1780
1781         self.in_t = Trigger()
1782         self.i = self.ispec()
1783
1784         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1785         self.in_accept = Signal(reset_less=True)
1786         self.add_stb = Signal(reset_less=True)
1787         self.add_ack = Signal(reset=0, reset_less=True)
1788
1789     def ispec(self):
1790         return self.mod.ispec()
1791
1792     def ospec(self):
1793         return self.mod.ospec()
1794
1795     def setup(self, m, i, add_stb, in_mid):
1796         m.d.comb += [self.i.eq(i),
1797                      self.mod.i.eq(self.i),
1798                      self.z_done.eq(self.mod.o.z.trigger),
1799                      #self.add_stb.eq(add_stb),
1800                      self.mod.in_t.stb.eq(self.in_t.stb),
1801                      self.in_t.ack.eq(self.mod.in_t.ack),
1802                      self.o.mid.eq(self.mod.o.mid),
1803                      self.o.z.v.eq(self.mod.o.z.v),
1804                      self.o.z.stb.eq(self.mod.o.z.stb),
1805                      self.mod.o.z.ack.eq(self.o.z.ack),
1806                     ]
1807
1808         m.d.sync += self.add_stb.eq(add_stb)
1809         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1810         m.d.sync += self.o.z.ack.eq(0) # likewise
1811         #m.d.sync += self.in_t.stb.eq(0)
1812
1813         m.submodules.fpadd = self.mod
1814
1815     def action(self, m):
1816
1817         # in_accept is set on incoming strobe HIGH and ack LOW.
1818         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1819
1820         #with m.If(self.in_t.ack):
1821         #    m.d.sync += self.in_t.stb.eq(0)
1822         with m.If(~self.z_done):
1823             # not done: test for accepting an incoming operand pair
1824             with m.If(self.in_accept):
1825                 m.d.sync += [
1826                     self.add_ack.eq(1), # acknowledge receipt...
1827                     self.in_t.stb.eq(1), # initiate add
1828                 ]
1829             with m.Else():
1830                 m.d.sync += [self.add_ack.eq(0),
1831                              self.in_t.stb.eq(0),
1832                              self.o.z.ack.eq(1),
1833                             ]
1834         with m.Else():
1835             # done: acknowledge, and write out id and value
1836             m.d.sync += [self.add_ack.eq(1),
1837                          self.in_t.stb.eq(0)
1838                         ]
1839             m.next = "put_z"
1840
1841             return
1842
1843             if self.in_mid is not None:
1844                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1845
1846             m.d.sync += [
1847               self.out_z.v.eq(self.mod.out_z.v)
1848             ]
1849             # move to output state on detecting z ack
1850             with m.If(self.out_z.trigger):
1851                 m.d.sync += self.out_z.stb.eq(0)
1852                 m.next = "put_z"
1853             with m.Else():
1854                 m.d.sync += self.out_z.stb.eq(1)
1855
1856
1857 class FPADDBasePipe(ControlBase):
1858     def __init__(self, width, id_wid):
1859         ControlBase.__init__(self)
1860         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1861         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1862         self.pipe3 = FPNormToPack(width, id_wid)
1863
1864         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1865
1866     def elaborate(self, platform):
1867         m = Module()
1868         m.submodules.scnorm = self.pipe1
1869         m.submodules.addalign = self.pipe2
1870         m.submodules.normpack = self.pipe3
1871         m.d.comb += self._eqs
1872         return m
1873
1874
1875 class FPAddInPassThruStage:
1876     def __init__(self, width, id_wid):
1877         self.width, self.id_wid = width, id_wid
1878     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1879     def ospec(self): return self.ispec()
1880     def process(self, i): return i
1881
1882
1883 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1884     def __init__(self, width, id_width, num_rows):
1885         self.num_rows = num_rows
1886         stage = FPAddInPassThruStage(width, id_width)
1887         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1888
1889     def ports(self):
1890         res = []
1891         for i in range(len(self.p)):
1892             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1893                     self.p[i].i_data.ports()
1894         res += [self.n.i_ready, self.n.o_valid] + \
1895                 self.n.o_data.ports()
1896         return res
1897
1898
1899
1900
1901 class FPAddOutPassThruStage:
1902     def __init__(self, width, id_wid):
1903         self.width, self.id_wid = width, id_wid
1904     def ispec(self): return FPPackData(self.width, self.id_wid)
1905     def ospec(self): return self.ispec()
1906     def process(self, i): return i
1907
1908
1909 class FPADDMuxOutPipe(CombMuxOutPipe):
1910     def __init__(self, width, id_wid, num_rows):
1911         self.num_rows = num_rows
1912         stage = FPAddOutPassThruStage(width, id_wid)
1913         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1914
1915     def ports(self):
1916         res = [self.p.i_valid, self.p.o_ready] + \
1917                 self.p.i_data.ports()
1918         for i in range(len(self.n)):
1919             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1920                     self.n[i].o_data.ports()
1921         return res
1922
1923
1924 class FPADDMuxInOut:
1925     """ Reservation-Station version of FPADD pipeline.
1926
1927         fan-in on
1928     """
1929     def __init__(self, width, id_wid, num_rows):
1930         self.num_rows = num_rows
1931         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1932         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1933         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1934
1935         self.p = self.inpipe.p  # kinda annoying,
1936         self.n = self.outpipe.n # use pipe in/out as this class in/out
1937         self._ports = self.inpipe.ports() + self.outpipe.ports()
1938
1939     def elaborate(self, platform):
1940         m = Module()
1941         m.submodules.inpipe = self.inpipe
1942         m.submodules.fpadd = self.fpadd
1943         m.submodules.outpipe = self.outpipe
1944
1945         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1946         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1947
1948         return m
1949
1950     def ports(self):
1951         return self._ports
1952
1953
1954 class ResArray:
1955     def __init__(self, width, id_wid):
1956         self.width = width
1957         self.id_wid = id_wid
1958         res = []
1959         for i in range(rs_sz):
1960             out_z = FPOp(width)
1961             out_z.name = "out_z_%d" % i
1962             res.append(out_z)
1963         self.res = Array(res)
1964         self.in_z = FPOp(width)
1965         self.in_mid = Signal(self.id_wid, reset_less=True)
1966
1967     def setup(self, m, in_z, in_mid):
1968         m.d.comb += [self.in_z.eq(in_z),
1969                      self.in_mid.eq(in_mid)]
1970
1971     def get_fragment(self, platform=None):
1972         """ creates the HDL code-fragment for FPAdd
1973         """
1974         m = Module()
1975         m.submodules.res_in_z = self.in_z
1976         m.submodules += self.res
1977
1978         return m
1979
1980     def ports(self):
1981         res = []
1982         for z in self.res:
1983             res += z.ports()
1984         return res
1985
1986
1987 class FPADD(FPID):
1988     """ FPADD: stages as follows:
1989
1990         FPGetOp (a)
1991            |
1992         FPGetOp (b)
1993            |
1994         FPAddBase---> FPAddBaseMod
1995            |            |
1996         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1997
1998         FPAddBase is tricky: it is both a stage and *has* stages.
1999         Connection to FPAddBaseMod therefore requires an in stb/ack
2000         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2001         needs to be the thing that raises the incoming stb.
2002     """
2003
2004     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2005         """ IEEE754 FP Add
2006
2007             * width: bit-width of IEEE754.  supported: 16, 32, 64
2008             * id_wid: an identifier that is sync-connected to the input
2009             * single_cycle: True indicates each stage to complete in 1 clock
2010         """
2011         self.width = width
2012         self.id_wid = id_wid
2013         self.single_cycle = single_cycle
2014
2015         #self.out_z = FPOp(width)
2016         self.ids = FPID(id_wid)
2017
2018         rs = []
2019         for i in range(rs_sz):
2020             in_a  = FPOp(width)
2021             in_b  = FPOp(width)
2022             in_a.name = "in_a_%d" % i
2023             in_b.name = "in_b_%d" % i
2024             rs.append((in_a, in_b))
2025         self.rs = Array(rs)
2026
2027         res = []
2028         for i in range(rs_sz):
2029             out_z = FPOp(width)
2030             out_z.name = "out_z_%d" % i
2031             res.append(out_z)
2032         self.res = Array(res)
2033
2034         self.states = []
2035
2036     def add_state(self, state):
2037         self.states.append(state)
2038         return state
2039
2040     def get_fragment(self, platform=None):
2041         """ creates the HDL code-fragment for FPAdd
2042         """
2043         m = Module()
2044         m.submodules += self.rs
2045
2046         in_a = self.rs[0][0]
2047         in_b = self.rs[0][1]
2048
2049         geta = self.add_state(FPGetOp("get_a", "get_b",
2050                                       in_a, self.width))
2051         geta.setup(m, in_a)
2052         a = geta.out_op
2053
2054         getb = self.add_state(FPGetOp("get_b", "fpadd",
2055                                       in_b, self.width))
2056         getb.setup(m, in_b)
2057         b = getb.out_op
2058
2059         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2060         ab = self.add_state(ab)
2061         abd = ab.ispec() # create an input spec object for FPADDBase
2062         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2063         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2064         o = ab.o
2065
2066         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2067                                     o.mid, "get_a"))
2068
2069         with m.FSM() as fsm:
2070
2071             for state in self.states:
2072                 with m.State(state.state_from):
2073                     state.action(m)
2074
2075         return m
2076
2077
2078 if __name__ == "__main__":
2079     if True:
2080         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2081         main(alu, ports=alu.rs[0][0].ports() + \
2082                         alu.rs[0][1].ports() + \
2083                         alu.res[0].ports() + \
2084                         [alu.ids.in_mid, alu.ids.out_mid])
2085     else:
2086         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2087         main(alu, ports=[alu.in_a, alu.in_b] + \
2088                         alu.in_t.ports() + \
2089                         alu.out_z.ports() + \
2090                         [alu.in_mid, alu.out_mid])
2091
2092
2093     # works... but don't use, just do "python fname.py convert -t v"
2094     #print (verilog.convert(alu, ports=[
2095     #                        ports=alu.in_a.ports() + \
2096     #                              alu.in_b.ports() + \
2097     #                              alu.out_z.ports())