src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPNumBase2Ops:
 191
 192     def __init__(self, width, id_wid, m_extra=True):
 193         self.a = FPNumBase(width, m_extra)
 194         self.b = FPNumBase(width, m_extra)
 195         self.mid = Signal(id_wid, reset_less=True)
 196
 197     def eq(self, i):
 198         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 199
 200     def ports(self):
 201         return [self.a, self.b, self.mid]
 202
 203
 204 class FPADDBaseData:
 205
 206     def __init__(self, width, id_wid):
 207         self.width = width
 208         self.id_wid = id_wid
 209         self.a  = Signal(width)
 210         self.b  = Signal(width)
 211         self.mid = Signal(id_wid, reset_less=True)
 212
 213     def eq(self, i):
 214         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 215
 216     def ports(self):
 217         return [self.a, self.b, self.mid]
 218
 219
 220 class FPGet2OpMod(Trigger):
 221     def __init__(self, width, id_wid):
 222         Trigger.__init__(self)
 223         self.width = width
 224         self.id_wid = id_wid
 225         self.i = self.ispec()
 226         self.o = self.ospec()
 227
 228     def ispec(self):
 229         return FPADDBaseData(self.width, self.id_wid)
 230
 231     def ospec(self):
 232         return FPNumBase2Ops(self.width, self.id_wid)
 233
 234     def process(self, i):
 235         return self.o
 236
 237     def elaborate(self, platform):
 238         m = Trigger.elaborate(self, platform)
 239         m.submodules.get_op1_out = self.o.a
 240         m.submodules.get_op2_out = self.o.b
 241         out_op1 = FPNumIn(None, self.width)
 242         out_op2 = FPNumIn(None, self.width)
 243         with m.If(self.trigger):
 244             m.d.comb += [
 245                 out_op1.decode(self.i.a),
 246                 out_op2.decode(self.i.b),
 247                 self.o.a.eq(out_op1),
 248                 self.o.b.eq(out_op2),
 249                 self.o.mid.eq(self.i.mid)
 250             ]
 251         return m
 252
 253
 254 class FPGet2Op(FPState):
 255     """ gets operands
 256     """
 257
 258     def __init__(self, in_state, out_state, width, id_wid):
 259         FPState.__init__(self, in_state)
 260         self.out_state = out_state
 261         self.mod = FPGet2OpMod(width, id_wid)
 262         self.o = self.mod.ospec()
 263         self.in_stb = Signal(reset_less=True)
 264         self.out_ack = Signal(reset_less=True)
 265         self.out_decode = Signal(reset_less=True)
 266
 267     def setup(self, m, i, in_stb, in_ack):
 268         """ links module to inputs and outputs
 269         """
 270         m.submodules.get_ops = self.mod
 271         m.d.comb += self.mod.i.eq(i)
 272         m.d.comb += self.mod.stb.eq(in_stb)
 273         m.d.comb += self.out_ack.eq(self.mod.ack)
 274         m.d.comb += self.out_decode.eq(self.mod.trigger)
 275         m.d.comb += in_ack.eq(self.mod.ack)
 276
 277     def action(self, m):
 278         with m.If(self.out_decode):
 279             m.next = self.out_state
 280             m.d.sync += [
 281                 self.mod.ack.eq(0),
 282                 self.o.eq(self.mod.o),
 283             ]
 284         with m.Else():
 285             m.d.sync += self.mod.ack.eq(1)
 286
 287
 288 class FPSCData:
 289
 290     def __init__(self, width, id_wid):
 291         self.a = FPNumBase(width, True)
 292         self.b = FPNumBase(width, True)
 293         self.z = FPNumOut(width, False)
 294         self.oz = Signal(width, reset_less=True)
 295         self.out_do_z = Signal(reset_less=True)
 296         self.mid = Signal(id_wid, reset_less=True)
 297
 298     def eq(self, i):
 299         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 300                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 301
 302
 303 class FPAddSpecialCasesMod:
 304     """ special cases: NaNs, infs, zeros, denormalised
 305         NOTE: some of these are unique to add.  see "Special Operations"
 306         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 307     """
 308
 309     def __init__(self, width, id_wid):
 310         self.width = width
 311         self.id_wid = id_wid
 312         self.i = self.ispec()
 313         self.o = self.ospec()
 314
 315     def ispec(self):
 316         return FPNumBase2Ops(self.width, self.id_wid)
 317
 318     def ospec(self):
 319         return FPSCData(self.width, self.id_wid)
 320
 321     def setup(self, m, i):
 322         """ links module to inputs and outputs
 323         """
 324         m.submodules.specialcases = self
 325         m.d.comb += self.i.eq(i)
 326
 327     def process(self, i):
 328         return self.o
 329
 330     def elaborate(self, platform):
 331         m = Module()
 332
 333         m.submodules.sc_in_a = self.i.a
 334         m.submodules.sc_in_b = self.i.b
 335         m.submodules.sc_out_z = self.o.z
 336
 337         s_nomatch = Signal()
 338         m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
 339
 340         m_match = Signal()
 341         m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
 342
 343         # if a is NaN or b is NaN return NaN
 344         with m.If(self.i.a.is_nan | self.i.b.is_nan):
 345             m.d.comb += self.o.out_do_z.eq(1)
 346             m.d.comb += self.o.z.nan(0)
 347
 348         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 349         # under review
 350
 351         ## if a is zero and b is NaN return -b
 352         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 353         #    m.d.comb += self.o.out_do_z.eq(1)
 354         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 355
 356         ## if b is zero and a is NaN return -a
 357         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 358         #    m.d.comb += self.o.out_do_z.eq(1)
 359         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 360
 361         ## if a is -zero and b is NaN return -b
 362         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 363         #    m.d.comb += self.o.out_do_z.eq(1)
 364         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 365
 366         ## if b is -zero and a is NaN return -a
 367         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 368         #    m.d.comb += self.o.out_do_z.eq(1)
 369         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 370
 371         # if a is inf return inf (or NaN)
 372         with m.Elif(self.i.a.is_inf):
 373             m.d.comb += self.o.out_do_z.eq(1)
 374             m.d.comb += self.o.z.inf(self.i.a.s)
 375             # if a is inf and signs don't match return NaN
 376             with m.If(self.i.b.exp_128 & s_nomatch):
 377                 m.d.comb += self.o.z.nan(0)
 378
 379         # if b is inf return inf
 380         with m.Elif(self.i.b.is_inf):
 381             m.d.comb += self.o.out_do_z.eq(1)
 382             m.d.comb += self.o.z.inf(self.i.b.s)
 383
 384         # if a is zero and b zero return signed-a/b
 385         with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
 386             m.d.comb += self.o.out_do_z.eq(1)
 387             m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
 388                                           self.i.b.e,
 389                                           self.i.b.m[3:-1])
 390
 391         # if a is zero return b
 392         with m.Elif(self.i.a.is_zero):
 393             m.d.comb += self.o.out_do_z.eq(1)
 394             m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
 395                                       self.i.b.m[3:-1])
 396
 397         # if b is zero return a
 398         with m.Elif(self.i.b.is_zero):
 399             m.d.comb += self.o.out_do_z.eq(1)
 400             m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
 401                                       self.i.a.m[3:-1])
 402
 403         # if a equal to -b return zero (+ve zero)
 404         with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
 405             m.d.comb += self.o.out_do_z.eq(1)
 406             m.d.comb += self.o.z.zero(0)
 407
 408         # Denormalised Number checks next, so pass a/b data through
 409         with m.Else():
 410             m.d.comb += self.o.out_do_z.eq(0)
 411             m.d.comb += self.o.a.eq(self.i.a)
 412             m.d.comb += self.o.b.eq(self.i.b)
 413
 414         m.d.comb += self.o.oz.eq(self.o.z.v)
 415         m.d.comb += self.o.mid.eq(self.i.mid)
 416
 417         return m
 418
 419
 420 class FPID:
 421     def __init__(self, id_wid):
 422         self.id_wid = id_wid
 423         if self.id_wid:
 424             self.in_mid = Signal(id_wid, reset_less=True)
 425             self.out_mid = Signal(id_wid, reset_less=True)
 426         else:
 427             self.in_mid = None
 428             self.out_mid = None
 429
 430     def idsync(self, m):
 431         if self.id_wid is not None:
 432             m.d.sync += self.out_mid.eq(self.in_mid)
 433
 434
 435 class FPAddSpecialCases(FPState):
 436     """ special cases: NaNs, infs, zeros, denormalised
 437         NOTE: some of these are unique to add.  see "Special Operations"
 438         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 439     """
 440
 441     def __init__(self, width, id_wid):
 442         FPState.__init__(self, "special_cases")
 443         self.mod = FPAddSpecialCasesMod(width)
 444         self.out_z = self.mod.ospec()
 445         self.out_do_z = Signal(reset_less=True)
 446
 447     def setup(self, m, i):
 448         """ links module to inputs and outputs
 449         """
 450         self.mod.setup(m, i, self.out_do_z)
 451         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 452         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 453
 454     def action(self, m):
 455         self.idsync(m)
 456         with m.If(self.out_do_z):
 457             m.next = "put_z"
 458         with m.Else():
 459             m.next = "denormalise"
 460
 461
 462 class FPAddSpecialCasesDeNorm(FPState):
 463     """ special cases: NaNs, infs, zeros, denormalised
 464         NOTE: some of these are unique to add.  see "Special Operations"
 465         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 466     """
 467
 468     def __init__(self, width, id_wid):
 469         FPState.__init__(self, "special_cases")
 470         self.smod = FPAddSpecialCasesMod(width, id_wid)
 471         self.dmod = FPAddDeNormMod(width, id_wid)
 472         self.o = self.ospec()
 473
 474     def ispec(self):
 475         return self.smod.ispec()
 476
 477     def ospec(self):
 478         return self.dmod.ospec()
 479
 480     def setup(self, m, i):
 481         """ links module to inputs and outputs
 482         """
 483         # these only needed for break-out (early-out)
 484         # out_z = self.smod.ospec()
 485         # out_do_z = Signal(reset_less=True)
 486         self.smod.setup(m, i)
 487         self.dmod.setup(m, self.smod.o)
 488         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 489
 490         # out_do_z=True, only needed for early-out (split pipeline)
 491         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 492         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 493
 494         # out_do_z=False
 495         m.d.sync += self.o.eq(self.dmod.o)
 496
 497     def process(self, i):
 498         return self.o
 499
 500     def action(self, m):
 501         #with m.If(self.out_do_z):
 502         #    m.next = "put_z"
 503         #with m.Else():
 504             m.next = "align"
 505
 506
 507 class FPAddDeNormMod(FPState):
 508
 509     def __init__(self, width, id_wid):
 510         self.width = width
 511         self.id_wid = id_wid
 512         self.i = self.ispec()
 513         self.o = self.ospec()
 514
 515     def ispec(self):
 516         return FPSCData(self.width, self.id_wid)
 517
 518     def ospec(self):
 519         return FPSCData(self.width, self.id_wid)
 520
 521     def setup(self, m, i):
 522         """ links module to inputs and outputs
 523         """
 524         m.submodules.denormalise = self
 525         m.d.comb += self.i.eq(i)
 526
 527     def elaborate(self, platform):
 528         m = Module()
 529         m.submodules.denorm_in_a = self.i.a
 530         m.submodules.denorm_in_b = self.i.b
 531         m.submodules.denorm_out_a = self.o.a
 532         m.submodules.denorm_out_b = self.o.b
 533
 534         with m.If(~self.i.out_do_z):
 535             # XXX hmmm, don't like repeating identical code
 536             m.d.comb += self.o.a.eq(self.i.a)
 537             with m.If(self.i.a.exp_n127):
 538                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 539             with m.Else():
 540                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 541
 542             m.d.comb += self.o.b.eq(self.i.b)
 543             with m.If(self.i.b.exp_n127):
 544                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 545             with m.Else():
 546                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 547
 548         m.d.comb += self.o.mid.eq(self.i.mid)
 549         m.d.comb += self.o.z.eq(self.i.z)
 550         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 551         m.d.comb += self.o.oz.eq(self.i.oz)
 552
 553         return m
 554
 555
 556 class FPAddDeNorm(FPState):
 557
 558     def __init__(self, width, id_wid):
 559         FPState.__init__(self, "denormalise")
 560         self.mod = FPAddDeNormMod(width)
 561         self.out_a = FPNumBase(width)
 562         self.out_b = FPNumBase(width)
 563
 564     def setup(self, m, i):
 565         """ links module to inputs and outputs
 566         """
 567         self.mod.setup(m, i)
 568
 569         m.d.sync += self.out_a.eq(self.mod.out_a)
 570         m.d.sync += self.out_b.eq(self.mod.out_b)
 571
 572     def action(self, m):
 573         # Denormalised Number checks
 574         m.next = "align"
 575
 576
 577 class FPAddAlignMultiMod(FPState):
 578
 579     def __init__(self, width):
 580         self.in_a = FPNumBase(width)
 581         self.in_b = FPNumBase(width)
 582         self.out_a = FPNumIn(None, width)
 583         self.out_b = FPNumIn(None, width)
 584         self.exp_eq = Signal(reset_less=True)
 585
 586     def elaborate(self, platform):
 587         # This one however (single-cycle) will do the shift
 588         # in one go.
 589
 590         m = Module()
 591
 592         m.submodules.align_in_a = self.in_a
 593         m.submodules.align_in_b = self.in_b
 594         m.submodules.align_out_a = self.out_a
 595         m.submodules.align_out_b = self.out_b
 596
 597         # NOTE: this does *not* do single-cycle multi-shifting,
 598         #       it *STAYS* in the align state until exponents match
 599
 600         # exponent of a greater than b: shift b down
 601         m.d.comb += self.exp_eq.eq(0)
 602         m.d.comb += self.out_a.eq(self.in_a)
 603         m.d.comb += self.out_b.eq(self.in_b)
 604         agtb = Signal(reset_less=True)
 605         altb = Signal(reset_less=True)
 606         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 607         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 608         with m.If(agtb):
 609             m.d.comb += self.out_b.shift_down(self.in_b)
 610         # exponent of b greater than a: shift a down
 611         with m.Elif(altb):
 612             m.d.comb += self.out_a.shift_down(self.in_a)
 613         # exponents equal: move to next stage.
 614         with m.Else():
 615             m.d.comb += self.exp_eq.eq(1)
 616         return m
 617
 618
 619 class FPAddAlignMulti(FPState):
 620
 621     def __init__(self, width, id_wid):
 622         FPState.__init__(self, "align")
 623         self.mod = FPAddAlignMultiMod(width)
 624         self.out_a = FPNumIn(None, width)
 625         self.out_b = FPNumIn(None, width)
 626         self.exp_eq = Signal(reset_less=True)
 627
 628     def setup(self, m, in_a, in_b):
 629         """ links module to inputs and outputs
 630         """
 631         m.submodules.align = self.mod
 632         m.d.comb += self.mod.in_a.eq(in_a)
 633         m.d.comb += self.mod.in_b.eq(in_b)
 634         #m.d.comb += self.out_a.eq(self.mod.out_a)
 635         #m.d.comb += self.out_b.eq(self.mod.out_b)
 636         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 637         m.d.sync += self.out_a.eq(self.mod.out_a)
 638         m.d.sync += self.out_b.eq(self.mod.out_b)
 639
 640     def action(self, m):
 641         with m.If(self.exp_eq):
 642             m.next = "add_0"
 643
 644
 645 class FPNumIn2Ops:
 646
 647     def __init__(self, width, id_wid):
 648         self.a = FPNumIn(None, width)
 649         self.b = FPNumIn(None, width)
 650         self.z = FPNumOut(width, False)
 651         self.out_do_z = Signal(reset_less=True)
 652         self.oz = Signal(width, reset_less=True)
 653         self.mid = Signal(id_wid, reset_less=True)
 654
 655     def eq(self, i):
 656         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 657                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 658
 659
 660 class FPAddAlignSingleMod:
 661
 662     def __init__(self, width, id_wid):
 663         self.width = width
 664         self.id_wid = id_wid
 665         self.i = self.ispec()
 666         self.o = self.ospec()
 667
 668     def ispec(self):
 669         return FPSCData(self.width, self.id_wid)
 670
 671     def ospec(self):
 672         return FPNumIn2Ops(self.width, self.id_wid)
 673
 674     def process(self, i):
 675         return self.o
 676
 677     def setup(self, m, i):
 678         """ links module to inputs and outputs
 679         """
 680         m.submodules.align = self
 681         m.d.comb += self.i.eq(i)
 682
 683     def elaborate(self, platform):
 684         """ Aligns A against B or B against A, depending on which has the
 685             greater exponent.  This is done in a *single* cycle using
 686             variable-width bit-shift
 687
 688             the shifter used here is quite expensive in terms of gates.
 689             Mux A or B in (and out) into temporaries, as only one of them
 690             needs to be aligned against the other
 691         """
 692         m = Module()
 693
 694         m.submodules.align_in_a = self.i.a
 695         m.submodules.align_in_b = self.i.b
 696         m.submodules.align_out_a = self.o.a
 697         m.submodules.align_out_b = self.o.b
 698
 699         # temporary (muxed) input and output to be shifted
 700         t_inp = FPNumBase(self.width)
 701         t_out = FPNumIn(None, self.width)
 702         espec = (len(self.i.a.e), True)
 703         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 704         m.submodules.align_t_in = t_inp
 705         m.submodules.align_t_out = t_out
 706         m.submodules.multishift_r = msr
 707
 708         ediff = Signal(espec, reset_less=True)
 709         ediffr = Signal(espec, reset_less=True)
 710         tdiff = Signal(espec, reset_less=True)
 711         elz = Signal(reset_less=True)
 712         egz = Signal(reset_less=True)
 713
 714         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 715         m.d.comb += msr.inp.eq(t_inp.m)
 716         m.d.comb += msr.diff.eq(tdiff)
 717         m.d.comb += t_out.m.eq(msr.m)
 718         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 719         m.d.comb += t_out.s.eq(t_inp.s)
 720
 721         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 722         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 723         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 724         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 725
 726         # default: A-exp == B-exp, A and B untouched (fall through)
 727         m.d.comb += self.o.a.eq(self.i.a)
 728         m.d.comb += self.o.b.eq(self.i.b)
 729         # only one shifter (muxed)
 730         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 731         # exponent of a greater than b: shift b down
 732         with m.If(~self.i.out_do_z):
 733             with m.If(egz):
 734                 m.d.comb += [t_inp.eq(self.i.b),
 735                              tdiff.eq(ediff),
 736                              self.o.b.eq(t_out),
 737                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 738                             ]
 739             # exponent of b greater than a: shift a down
 740             with m.Elif(elz):
 741                 m.d.comb += [t_inp.eq(self.i.a),
 742                              tdiff.eq(ediffr),
 743                              self.o.a.eq(t_out),
 744                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 745                             ]
 746
 747         m.d.comb += self.o.mid.eq(self.i.mid)
 748         m.d.comb += self.o.z.eq(self.i.z)
 749         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 750         m.d.comb += self.o.oz.eq(self.i.oz)
 751
 752         return m
 753
 754
 755 class FPAddAlignSingle(FPState):
 756
 757     def __init__(self, width, id_wid):
 758         FPState.__init__(self, "align")
 759         self.mod = FPAddAlignSingleMod(width, id_wid)
 760         self.out_a = FPNumIn(None, width)
 761         self.out_b = FPNumIn(None, width)
 762
 763     def setup(self, m, i):
 764         """ links module to inputs and outputs
 765         """
 766         self.mod.setup(m, i)
 767
 768         # NOTE: could be done as comb
 769         m.d.sync += self.out_a.eq(self.mod.out_a)
 770         m.d.sync += self.out_b.eq(self.mod.out_b)
 771
 772     def action(self, m):
 773         m.next = "add_0"
 774
 775
 776 class FPAddAlignSingleAdd(FPState):
 777
 778     def __init__(self, width, id_wid):
 779         FPState.__init__(self, "align")
 780         self.width = width
 781         self.id_wid = id_wid
 782         self.a1o = self.ospec()
 783
 784     def ispec(self):
 785         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 786
 787     def ospec(self):
 788         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 789
 790     def setup(self, m, i):
 791         """ links module to inputs and outputs
 792         """
 793
 794         # chain AddAlignSingle, AddStage0 and AddStage1
 795         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 796         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 797         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 798
 799         chain = StageChain([mod, a0mod, a1mod])
 800         chain.setup(m, i)
 801
 802         m.d.sync += self.a1o.eq(a1mod.o)
 803
 804     def process(self, i):
 805         return self.a1o
 806
 807     def action(self, m):
 808         m.next = "normalise_1"
 809
 810
 811 class FPAddStage0Data:
 812
 813     def __init__(self, width, id_wid):
 814         self.z = FPNumBase(width, False)
 815         self.out_do_z = Signal(reset_less=True)
 816         self.oz = Signal(width, reset_less=True)
 817         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 818         self.mid = Signal(id_wid, reset_less=True)
 819
 820     def eq(self, i):
 821         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 822                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 823
 824
 825 class FPAddStage0Mod:
 826
 827     def __init__(self, width, id_wid):
 828         self.width = width
 829         self.id_wid = id_wid
 830         self.i = self.ispec()
 831         self.o = self.ospec()
 832
 833     def ispec(self):
 834         return FPSCData(self.width, self.id_wid)
 835
 836     def ospec(self):
 837         return FPAddStage0Data(self.width, self.id_wid)
 838
 839     def process(self, i):
 840         return self.o
 841
 842     def setup(self, m, i):
 843         """ links module to inputs and outputs
 844         """
 845         m.submodules.add0 = self
 846         m.d.comb += self.i.eq(i)
 847
 848     def elaborate(self, platform):
 849         m = Module()
 850         m.submodules.add0_in_a = self.i.a
 851         m.submodules.add0_in_b = self.i.b
 852         m.submodules.add0_out_z = self.o.z
 853
 854         # store intermediate tests (and zero-extended mantissas)
 855         seq = Signal(reset_less=True)
 856         mge = Signal(reset_less=True)
 857         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 858         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 859         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 860                      mge.eq(self.i.a.m >= self.i.b.m),
 861                      am0.eq(Cat(self.i.a.m, 0)),
 862                      bm0.eq(Cat(self.i.b.m, 0))
 863                     ]
 864         # same-sign (both negative or both positive) add mantissas
 865         with m.If(~self.i.out_do_z):
 866             m.d.comb += self.o.z.e.eq(self.i.a.e)
 867             with m.If(seq):
 868                 m.d.comb += [
 869                     self.o.tot.eq(am0 + bm0),
 870                     self.o.z.s.eq(self.i.a.s)
 871                 ]
 872             # a mantissa greater than b, use a
 873             with m.Elif(mge):
 874                 m.d.comb += [
 875                     self.o.tot.eq(am0 - bm0),
 876                     self.o.z.s.eq(self.i.a.s)
 877                 ]
 878             # b mantissa greater than a, use b
 879             with m.Else():
 880                 m.d.comb += [
 881                     self.o.tot.eq(bm0 - am0),
 882                     self.o.z.s.eq(self.i.b.s)
 883             ]
 884
 885         m.d.comb += self.o.oz.eq(self.i.oz)
 886         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 887         m.d.comb += self.o.mid.eq(self.i.mid)
 888         return m
 889
 890
 891 class FPAddStage0(FPState):
 892     """ First stage of add.  covers same-sign (add) and subtract
 893         special-casing when mantissas are greater or equal, to
 894         give greatest accuracy.
 895     """
 896
 897     def __init__(self, width, id_wid):
 898         FPState.__init__(self, "add_0")
 899         self.mod = FPAddStage0Mod(width)
 900         self.o = self.mod.ospec()
 901
 902     def setup(self, m, i):
 903         """ links module to inputs and outputs
 904         """
 905         self.mod.setup(m, i)
 906
 907         # NOTE: these could be done as combinatorial (merge add0+add1)
 908         m.d.sync += self.o.eq(self.mod.o)
 909
 910     def action(self, m):
 911         m.next = "add_1"
 912
 913
 914 class FPAddStage1Data:
 915
 916     def __init__(self, width, id_wid):
 917         self.z = FPNumBase(width, False)
 918         self.out_do_z = Signal(reset_less=True)
 919         self.oz = Signal(width, reset_less=True)
 920         self.of = Overflow()
 921         self.mid = Signal(id_wid, reset_less=True)
 922
 923     def eq(self, i):
 924         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 925                 self.of.eq(i.of), self.mid.eq(i.mid)]
 926
 927
 928
 929 class FPAddStage1Mod(FPState):
 930     """ Second stage of add: preparation for normalisation.
 931         detects when tot sum is too big (tot[27] is kinda a carry bit)
 932     """
 933
 934     def __init__(self, width, id_wid):
 935         self.width = width
 936         self.id_wid = id_wid
 937         self.i = self.ispec()
 938         self.o = self.ospec()
 939
 940     def ispec(self):
 941         return FPAddStage0Data(self.width, self.id_wid)
 942
 943     def ospec(self):
 944         return FPAddStage1Data(self.width, self.id_wid)
 945
 946     def process(self, i):
 947         return self.o
 948
 949     def setup(self, m, i):
 950         """ links module to inputs and outputs
 951         """
 952         m.submodules.add1 = self
 953         m.submodules.add1_out_overflow = self.o.of
 954
 955         m.d.comb += self.i.eq(i)
 956
 957     def elaborate(self, platform):
 958         m = Module()
 959         #m.submodules.norm1_in_overflow = self.in_of
 960         #m.submodules.norm1_out_overflow = self.out_of
 961         #m.submodules.norm1_in_z = self.in_z
 962         #m.submodules.norm1_out_z = self.out_z
 963         m.d.comb += self.o.z.eq(self.i.z)
 964         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 965         with m.If(~self.i.out_do_z):
 966             with m.If(self.i.tot[-1]):
 967                 m.d.comb += [
 968                     self.o.z.m.eq(self.i.tot[4:]),
 969                     self.o.of.m0.eq(self.i.tot[4]),
 970                     self.o.of.guard.eq(self.i.tot[3]),
 971                     self.o.of.round_bit.eq(self.i.tot[2]),
 972                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 973                     self.o.z.e.eq(self.i.z.e + 1)
 974             ]
 975             # tot[-1] (MSB) zero case
 976             with m.Else():
 977                 m.d.comb += [
 978                     self.o.z.m.eq(self.i.tot[3:]),
 979                     self.o.of.m0.eq(self.i.tot[3]),
 980                     self.o.of.guard.eq(self.i.tot[2]),
 981                     self.o.of.round_bit.eq(self.i.tot[1]),
 982                     self.o.of.sticky.eq(self.i.tot[0])
 983             ]
 984
 985         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 986         m.d.comb += self.o.oz.eq(self.i.oz)
 987         m.d.comb += self.o.mid.eq(self.i.mid)
 988
 989         return m
 990
 991
 992 class FPAddStage1(FPState):
 993
 994     def __init__(self, width, id_wid):
 995         FPState.__init__(self, "add_1")
 996         self.mod = FPAddStage1Mod(width)
 997         self.out_z = FPNumBase(width, False)
 998         self.out_of = Overflow()
 999         self.norm_stb = Signal()
1000
1001     def setup(self, m, i):
1002         """ links module to inputs and outputs
1003         """
1004         self.mod.setup(m, i)
1005
1006         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1007
1008         m.d.sync += self.out_of.eq(self.mod.out_of)
1009         m.d.sync += self.out_z.eq(self.mod.out_z)
1010         m.d.sync += self.norm_stb.eq(1)
1011
1012     def action(self, m):
1013         m.next = "normalise_1"
1014
1015
1016 class FPNormaliseModSingle:
1017
1018     def __init__(self, width):
1019         self.width = width
1020         self.in_z = self.ispec()
1021         self.out_z = self.ospec()
1022
1023     def ispec(self):
1024         return FPNumBase(self.width, False)
1025
1026     def ospec(self):
1027         return FPNumBase(self.width, False)
1028
1029     def setup(self, m, i):
1030         """ links module to inputs and outputs
1031         """
1032         m.submodules.normalise = self
1033         m.d.comb += self.i.eq(i)
1034
1035     def elaborate(self, platform):
1036         m = Module()
1037
1038         mwid = self.out_z.m_width+2
1039         pe = PriorityEncoder(mwid)
1040         m.submodules.norm_pe = pe
1041
1042         m.submodules.norm1_out_z = self.out_z
1043         m.submodules.norm1_in_z = self.in_z
1044
1045         in_z = FPNumBase(self.width, False)
1046         in_of = Overflow()
1047         m.submodules.norm1_insel_z = in_z
1048         m.submodules.norm1_insel_overflow = in_of
1049
1050         espec = (len(in_z.e), True)
1051         ediff_n126 = Signal(espec, reset_less=True)
1052         msr = MultiShiftRMerge(mwid, espec)
1053         m.submodules.multishift_r = msr
1054
1055         m.d.comb += in_z.eq(self.in_z)
1056         m.d.comb += in_of.eq(self.in_of)
1057         # initialise out from in (overridden below)
1058         m.d.comb += self.out_z.eq(in_z)
1059         m.d.comb += self.out_of.eq(in_of)
1060         # normalisation decrease condition
1061         decrease = Signal(reset_less=True)
1062         m.d.comb += decrease.eq(in_z.m_msbzero)
1063         # decrease exponent
1064         with m.If(decrease):
1065             # *sigh* not entirely obvious: count leading zeros (clz)
1066             # with a PriorityEncoder: to find from the MSB
1067             # we reverse the order of the bits.
1068             temp_m = Signal(mwid, reset_less=True)
1069             temp_s = Signal(mwid+1, reset_less=True)
1070             clz = Signal((len(in_z.e), True), reset_less=True)
1071             m.d.comb += [
1072                 # cat round and guard bits back into the mantissa
1073                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1074                 pe.i.eq(temp_m[::-1]),          # inverted
1075                 clz.eq(pe.o),                   # count zeros from MSB down
1076                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1077                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1078                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1079             ]
1080
1081         return m
1082
1083 class FPNorm1Data:
1084
1085     def __init__(self, width, id_wid):
1086         self.roundz = Signal(reset_less=True)
1087         self.z = FPNumBase(width, False)
1088         self.out_do_z = Signal(reset_less=True)
1089         self.oz = Signal(width, reset_less=True)
1090         self.mid = Signal(id_wid, reset_less=True)
1091
1092     def eq(self, i):
1093         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1094                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1095
1096
1097 class FPNorm1ModSingle:
1098
1099     def __init__(self, width, id_wid):
1100         self.width = width
1101         self.id_wid = id_wid
1102         self.i = self.ispec()
1103         self.o = self.ospec()
1104
1105     def ispec(self):
1106         return FPAddStage1Data(self.width, self.id_wid)
1107
1108     def ospec(self):
1109         return FPNorm1Data(self.width, self.id_wid)
1110
1111     def setup(self, m, i):
1112         """ links module to inputs and outputs
1113         """
1114         m.submodules.normalise_1 = self
1115         m.d.comb += self.i.eq(i)
1116
1117     def process(self, i):
1118         return self.o
1119
1120     def elaborate(self, platform):
1121         m = Module()
1122
1123         mwid = self.o.z.m_width+2
1124         pe = PriorityEncoder(mwid)
1125         m.submodules.norm_pe = pe
1126
1127         of = Overflow()
1128         m.d.comb += self.o.roundz.eq(of.roundz)
1129
1130         m.submodules.norm1_out_z = self.o.z
1131         m.submodules.norm1_out_overflow = of
1132         m.submodules.norm1_in_z = self.i.z
1133         m.submodules.norm1_in_overflow = self.i.of
1134
1135         i = self.ispec()
1136         m.submodules.norm1_insel_z = i.z
1137         m.submodules.norm1_insel_overflow = i.of
1138
1139         espec = (len(i.z.e), True)
1140         ediff_n126 = Signal(espec, reset_less=True)
1141         msr = MultiShiftRMerge(mwid, espec)
1142         m.submodules.multishift_r = msr
1143
1144         m.d.comb += i.eq(self.i)
1145         # initialise out from in (overridden below)
1146         m.d.comb += self.o.z.eq(i.z)
1147         m.d.comb += of.eq(i.of)
1148         # normalisation increase/decrease conditions
1149         decrease = Signal(reset_less=True)
1150         increase = Signal(reset_less=True)
1151         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1152         m.d.comb += increase.eq(i.z.exp_lt_n126)
1153         # decrease exponent
1154         with m.If(~self.i.out_do_z):
1155             with m.If(decrease):
1156                 # *sigh* not entirely obvious: count leading zeros (clz)
1157                 # with a PriorityEncoder: to find from the MSB
1158                 # we reverse the order of the bits.
1159                 temp_m = Signal(mwid, reset_less=True)
1160                 temp_s = Signal(mwid+1, reset_less=True)
1161                 clz = Signal((len(i.z.e), True), reset_less=True)
1162                 # make sure that the amount to decrease by does NOT
1163                 # go below the minimum non-INF/NaN exponent
1164                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1165                              i.z.exp_sub_n126)
1166                 m.d.comb += [
1167                     # cat round and guard bits back into the mantissa
1168                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1169                     pe.i.eq(temp_m[::-1]),          # inverted
1170                     clz.eq(limclz),                 # count zeros from MSB down
1171                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1172                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1173                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1174                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1175                     # overflow in bits 0..1: got shifted too (leave sticky)
1176                     of.guard.eq(temp_s[1]),       # guard
1177                     of.round_bit.eq(temp_s[0]),   # round
1178                 ]
1179             # increase exponent
1180             with m.Elif(increase):
1181                 temp_m = Signal(mwid+1, reset_less=True)
1182                 m.d.comb += [
1183                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1184                                   i.z.m)),
1185                     ediff_n126.eq(i.z.N126 - i.z.e),
1186                     # connect multi-shifter to inp/out mantissa (and ediff)
1187                     msr.inp.eq(temp_m),
1188                     msr.diff.eq(ediff_n126),
1189                     self.o.z.m.eq(msr.m[3:]),
1190                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1191                     # overflow in bits 0..1: got shifted too (leave sticky)
1192                     of.guard.eq(temp_s[2]),     # guard
1193                     of.round_bit.eq(temp_s[1]), # round
1194                     of.sticky.eq(temp_s[0]),    # sticky
1195                     self.o.z.e.eq(i.z.e + ediff_n126),
1196                 ]
1197
1198         m.d.comb += self.o.mid.eq(self.i.mid)
1199         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1200         m.d.comb += self.o.oz.eq(self.i.oz)
1201
1202         return m
1203
1204
1205 class FPNorm1ModMulti:
1206
1207     def __init__(self, width, single_cycle=True):
1208         self.width = width
1209         self.in_select = Signal(reset_less=True)
1210         self.in_z = FPNumBase(width, False)
1211         self.in_of = Overflow()
1212         self.temp_z = FPNumBase(width, False)
1213         self.temp_of = Overflow()
1214         self.out_z = FPNumBase(width, False)
1215         self.out_of = Overflow()
1216
1217     def elaborate(self, platform):
1218         m = Module()
1219
1220         m.submodules.norm1_out_z = self.out_z
1221         m.submodules.norm1_out_overflow = self.out_of
1222         m.submodules.norm1_temp_z = self.temp_z
1223         m.submodules.norm1_temp_of = self.temp_of
1224         m.submodules.norm1_in_z = self.in_z
1225         m.submodules.norm1_in_overflow = self.in_of
1226
1227         in_z = FPNumBase(self.width, False)
1228         in_of = Overflow()
1229         m.submodules.norm1_insel_z = in_z
1230         m.submodules.norm1_insel_overflow = in_of
1231
1232         # select which of temp or in z/of to use
1233         with m.If(self.in_select):
1234             m.d.comb += in_z.eq(self.in_z)
1235             m.d.comb += in_of.eq(self.in_of)
1236         with m.Else():
1237             m.d.comb += in_z.eq(self.temp_z)
1238             m.d.comb += in_of.eq(self.temp_of)
1239         # initialise out from in (overridden below)
1240         m.d.comb += self.out_z.eq(in_z)
1241         m.d.comb += self.out_of.eq(in_of)
1242         # normalisation increase/decrease conditions
1243         decrease = Signal(reset_less=True)
1244         increase = Signal(reset_less=True)
1245         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1246         m.d.comb += increase.eq(in_z.exp_lt_n126)
1247         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1248         # decrease exponent
1249         with m.If(decrease):
1250             m.d.comb += [
1251                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1252                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1253                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1254                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1255                 self.out_of.round_bit.eq(0),        # reset round bit
1256                 self.out_of.m0.eq(in_of.guard),
1257             ]
1258         # increase exponent
1259         with m.Elif(increase):
1260             m.d.comb += [
1261                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1262                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1263                 self.out_of.guard.eq(in_z.m[0]),
1264                 self.out_of.m0.eq(in_z.m[1]),
1265                 self.out_of.round_bit.eq(in_of.guard),
1266                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1267             ]
1268
1269         return m
1270
1271
1272 class FPNorm1Single(FPState):
1273
1274     def __init__(self, width, id_wid, single_cycle=True):
1275         FPState.__init__(self, "normalise_1")
1276         self.mod = FPNorm1ModSingle(width)
1277         self.o = self.ospec()
1278         self.out_z = FPNumBase(width, False)
1279         self.out_roundz = Signal(reset_less=True)
1280
1281     def ispec(self):
1282         return self.mod.ispec()
1283
1284     def ospec(self):
1285         return self.mod.ospec()
1286
1287     def setup(self, m, i):
1288         """ links module to inputs and outputs
1289         """
1290         self.mod.setup(m, i)
1291
1292     def action(self, m):
1293         m.next = "round"
1294
1295
1296 class FPNorm1Multi(FPState):
1297
1298     def __init__(self, width, id_wid):
1299         FPState.__init__(self, "normalise_1")
1300         self.mod = FPNorm1ModMulti(width)
1301         self.stb = Signal(reset_less=True)
1302         self.ack = Signal(reset=0, reset_less=True)
1303         self.out_norm = Signal(reset_less=True)
1304         self.in_accept = Signal(reset_less=True)
1305         self.temp_z = FPNumBase(width)
1306         self.temp_of = Overflow()
1307         self.out_z = FPNumBase(width)
1308         self.out_roundz = Signal(reset_less=True)
1309
1310     def setup(self, m, in_z, in_of, norm_stb):
1311         """ links module to inputs and outputs
1312         """
1313         self.mod.setup(m, in_z, in_of, norm_stb,
1314                        self.in_accept, self.temp_z, self.temp_of,
1315                        self.out_z, self.out_norm)
1316
1317         m.d.comb += self.stb.eq(norm_stb)
1318         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1319
1320     def action(self, m):
1321         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1322         m.d.sync += self.temp_of.eq(self.mod.out_of)
1323         m.d.sync += self.temp_z.eq(self.out_z)
1324         with m.If(self.out_norm):
1325             with m.If(self.in_accept):
1326                 m.d.sync += [
1327                     self.ack.eq(1),
1328                 ]
1329             with m.Else():
1330                 m.d.sync += self.ack.eq(0)
1331         with m.Else():
1332             # normalisation not required (or done).
1333             m.next = "round"
1334             m.d.sync += self.ack.eq(1)
1335             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1336
1337
1338 class FPNormToPack(FPState):
1339
1340     def __init__(self, width, id_wid):
1341         FPState.__init__(self, "normalise_1")
1342         self.id_wid = id_wid
1343         self.width = width
1344
1345     def ispec(self):
1346         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1347
1348     def ospec(self):
1349         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1350
1351     def setup(self, m, i):
1352         """ links module to inputs and outputs
1353         """
1354
1355         # Normalisation, Rounding Corrections, Pack - in a chain
1356         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1357         rmod = FPRoundMod(self.width, self.id_wid)
1358         cmod = FPCorrectionsMod(self.width, self.id_wid)
1359         pmod = FPPackMod(self.width, self.id_wid)
1360         chain = StageChain([nmod, rmod, cmod, pmod])
1361         chain.setup(m, i)
1362         self.out_z = pmod.ospec()
1363
1364         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1365         m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1366
1367     def process(self, i):
1368         return self.out_z
1369
1370     def action(self, m):
1371         m.next = "pack_put_z"
1372
1373
1374 class FPRoundData:
1375
1376     def __init__(self, width, id_wid):
1377         self.z = FPNumBase(width, False)
1378         self.out_do_z = Signal(reset_less=True)
1379         self.oz = Signal(width, reset_less=True)
1380         self.mid = Signal(id_wid, reset_less=True)
1381
1382     def eq(self, i):
1383         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1384                 self.mid.eq(i.mid)]
1385
1386
1387 class FPRoundMod:
1388
1389     def __init__(self, width, id_wid):
1390         self.width = width
1391         self.id_wid = id_wid
1392         self.i = self.ispec()
1393         self.out_z = self.ospec()
1394
1395     def ispec(self):
1396         return FPNorm1Data(self.width, self.id_wid)
1397
1398     def ospec(self):
1399         return FPRoundData(self.width, self.id_wid)
1400
1401     def process(self, i):
1402         return self.out_z
1403
1404     def setup(self, m, i):
1405         m.submodules.roundz = self
1406         m.d.comb += self.i.eq(i)
1407
1408     def elaborate(self, platform):
1409         m = Module()
1410         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1411         with m.If(~self.i.out_do_z):
1412             with m.If(self.i.roundz):
1413                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1414                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1415                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1416
1417         return m
1418
1419
1420 class FPRound(FPState):
1421
1422     def __init__(self, width, id_wid):
1423         FPState.__init__(self, "round")
1424         self.mod = FPRoundMod(width)
1425         self.out_z = self.ospec()
1426
1427     def ispec(self):
1428         return self.mod.ispec()
1429
1430     def ospec(self):
1431         return self.mod.ospec()
1432
1433     def setup(self, m, i):
1434         """ links module to inputs and outputs
1435         """
1436         self.mod.setup(m, i)
1437
1438         self.idsync(m)
1439         m.d.sync += self.out_z.eq(self.mod.out_z)
1440         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1441
1442     def action(self, m):
1443         m.next = "corrections"
1444
1445
1446 class FPCorrectionsMod:
1447
1448     def __init__(self, width, id_wid):
1449         self.width = width
1450         self.id_wid = id_wid
1451         self.i = self.ispec()
1452         self.out_z = self.ospec()
1453
1454     def ispec(self):
1455         return FPRoundData(self.width, self.id_wid)
1456
1457     def ospec(self):
1458         return FPRoundData(self.width, self.id_wid)
1459
1460     def process(self, i):
1461         return self.out_z
1462
1463     def setup(self, m, i):
1464         """ links module to inputs and outputs
1465         """
1466         m.submodules.corrections = self
1467         m.d.comb += self.i.eq(i)
1468
1469     def elaborate(self, platform):
1470         m = Module()
1471         m.submodules.corr_in_z = self.i.z
1472         m.submodules.corr_out_z = self.out_z.z
1473         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1474         with m.If(~self.i.out_do_z):
1475             with m.If(self.i.z.is_denormalised):
1476                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1477         return m
1478
1479
1480 class FPCorrections(FPState):
1481
1482     def __init__(self, width, id_wid):
1483         FPState.__init__(self, "corrections")
1484         self.mod = FPCorrectionsMod(width)
1485         self.out_z = self.ospec()
1486
1487     def ispec(self):
1488         return self.mod.ispec()
1489
1490     def ospec(self):
1491         return self.mod.ospec()
1492
1493     def setup(self, m, in_z):
1494         """ links module to inputs and outputs
1495         """
1496         self.mod.setup(m, in_z)
1497
1498         m.d.sync += self.out_z.eq(self.mod.out_z)
1499         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1500
1501     def action(self, m):
1502         m.next = "pack"
1503
1504
1505 class FPPackData:
1506
1507     def __init__(self, width, id_wid):
1508         self.z = Signal(width, reset_less=True)
1509         self.mid = Signal(id_wid, reset_less=True)
1510
1511     def eq(self, i):
1512         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1513
1514
1515 class FPPackMod:
1516
1517     def __init__(self, width, id_wid):
1518         self.width = width
1519         self.id_wid = id_wid
1520         self.i = self.ispec()
1521         self.o = self.ospec()
1522
1523     def ispec(self):
1524         return FPRoundData(self.width, self.id_wid)
1525
1526     def ospec(self):
1527         return FPPackData(self.width, self.id_wid)
1528
1529     def process(self, i):
1530         return self.o
1531
1532     def setup(self, m, in_z):
1533         """ links module to inputs and outputs
1534         """
1535         m.submodules.pack = self
1536         m.d.comb += self.i.eq(in_z)
1537
1538     def elaborate(self, platform):
1539         m = Module()
1540         z = FPNumOut(self.width, False)
1541         m.submodules.pack_in_z = self.i.z
1542         m.submodules.pack_out_z = z
1543         m.d.comb += self.o.mid.eq(self.i.mid)
1544         with m.If(~self.i.out_do_z):
1545             with m.If(self.i.z.is_overflowed):
1546                 m.d.comb += z.inf(self.i.z.s)
1547             with m.Else():
1548                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1549         with m.Else():
1550             m.d.comb += z.v.eq(self.i.oz)
1551         m.d.comb += self.o.z.eq(z.v)
1552         return m
1553
1554
1555 class FPPack(FPState):
1556
1557     def __init__(self, width, id_wid):
1558         FPState.__init__(self, "pack")
1559         self.mod = FPPackMod(width)
1560         self.out_z = self.ospec()
1561
1562     def ispec(self):
1563         return self.mod.ispec()
1564
1565     def ospec(self):
1566         return self.mod.ospec()
1567
1568     def setup(self, m, in_z):
1569         """ links module to inputs and outputs
1570         """
1571         self.mod.setup(m, in_z)
1572
1573         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1574         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1575
1576     def action(self, m):
1577         m.next = "pack_put_z"
1578
1579
1580 class FPPutZ(FPState):
1581
1582     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1583         FPState.__init__(self, state)
1584         if to_state is None:
1585             to_state = "get_ops"
1586         self.to_state = to_state
1587         self.in_z = in_z
1588         self.out_z = out_z
1589         self.in_mid = in_mid
1590         self.out_mid = out_mid
1591
1592     def action(self, m):
1593         if self.in_mid is not None:
1594             m.d.sync += self.out_mid.eq(self.in_mid)
1595         m.d.sync += [
1596           self.out_z.z.v.eq(self.in_z)
1597         ]
1598         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1599             m.d.sync += self.out_z.z.stb.eq(0)
1600             m.next = self.to_state
1601         with m.Else():
1602             m.d.sync += self.out_z.z.stb.eq(1)
1603
1604
1605 class FPPutZIdx(FPState):
1606
1607     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1608         FPState.__init__(self, state)
1609         if to_state is None:
1610             to_state = "get_ops"
1611         self.to_state = to_state
1612         self.in_z = in_z
1613         self.out_zs = out_zs
1614         self.in_mid = in_mid
1615
1616     def action(self, m):
1617         outz_stb = Signal(reset_less=True)
1618         outz_ack = Signal(reset_less=True)
1619         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1620                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1621                     ]
1622         m.d.sync += [
1623           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1624         ]
1625         with m.If(outz_stb & outz_ack):
1626             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1627             m.next = self.to_state
1628         with m.Else():
1629             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1630
1631 class FPOpData:
1632     def __init__(self, width, id_wid):
1633         self.z = FPOp(width)
1634         self.mid = Signal(id_wid, reset_less=True)
1635
1636     def eq(self, i):
1637         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1638
1639     def ports(self):
1640         return [self.z, self.mid]
1641
1642
1643 class FPADDBaseMod:
1644
1645     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1646         """ IEEE754 FP Add
1647
1648             * width: bit-width of IEEE754.  supported: 16, 32, 64
1649             * id_wid: an identifier that is sync-connected to the input
1650             * single_cycle: True indicates each stage to complete in 1 clock
1651             * compact: True indicates a reduced number of stages
1652         """
1653         self.width = width
1654         self.id_wid = id_wid
1655         self.single_cycle = single_cycle
1656         self.compact = compact
1657
1658         self.in_t = Trigger()
1659         self.i = self.ispec()
1660         self.o = self.ospec()
1661
1662         self.states = []
1663
1664     def ispec(self):
1665         return FPADDBaseData(self.width, self.id_wid)
1666
1667     def ospec(self):
1668         return FPOpData(self.width, self.id_wid)
1669
1670     def add_state(self, state):
1671         self.states.append(state)
1672         return state
1673
1674     def get_fragment(self, platform=None):
1675         """ creates the HDL code-fragment for FPAdd
1676         """
1677         m = Module()
1678         m.submodules.out_z = self.o.z
1679         m.submodules.in_t = self.in_t
1680         if self.compact:
1681             self.get_compact_fragment(m, platform)
1682         else:
1683             self.get_longer_fragment(m, platform)
1684
1685         with m.FSM() as fsm:
1686
1687             for state in self.states:
1688                 with m.State(state.state_from):
1689                     state.action(m)
1690
1691         return m
1692
1693     def get_longer_fragment(self, m, platform=None):
1694
1695         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1696                                       self.width))
1697         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1698         a = get.out_op1
1699         b = get.out_op2
1700
1701         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1702         sc.setup(m, a, b, self.in_mid)
1703
1704         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1705         dn.setup(m, a, b, sc.in_mid)
1706
1707         if self.single_cycle:
1708             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1709             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1710         else:
1711             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1712             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1713
1714         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1715         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1716
1717         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1718         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1719
1720         if self.single_cycle:
1721             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1722             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1723         else:
1724             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1725             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1726
1727         rn = self.add_state(FPRound(self.width, self.id_wid))
1728         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1729
1730         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1731         cor.setup(m, rn.out_z, rn.in_mid)
1732
1733         pa = self.add_state(FPPack(self.width, self.id_wid))
1734         pa.setup(m, cor.out_z, rn.in_mid)
1735
1736         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1737                                     pa.in_mid, self.out_mid))
1738
1739         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1740                                     pa.in_mid, self.out_mid))
1741
1742     def get_compact_fragment(self, m, platform=None):
1743
1744         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1745                                       self.width, self.id_wid))
1746         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1747
1748         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1749         sc.setup(m, get.o)
1750
1751         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1752         alm.setup(m, sc.o)
1753
1754         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1755         n1.setup(m, alm.a1o)
1756
1757         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1758                                     n1.out_z.mid, self.o.mid))
1759
1760         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1761         #                            sc.o.mid, self.o.mid))
1762
1763
1764 class FPADDBase(FPState):
1765
1766     def __init__(self, width, id_wid=None, single_cycle=False):
1767         """ IEEE754 FP Add
1768
1769             * width: bit-width of IEEE754.  supported: 16, 32, 64
1770             * id_wid: an identifier that is sync-connected to the input
1771             * single_cycle: True indicates each stage to complete in 1 clock
1772         """
1773         FPState.__init__(self, "fpadd")
1774         self.width = width
1775         self.single_cycle = single_cycle
1776         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1777         self.o = self.ospec()
1778
1779         self.in_t = Trigger()
1780         self.i = self.ispec()
1781
1782         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1783         self.in_accept = Signal(reset_less=True)
1784         self.add_stb = Signal(reset_less=True)
1785         self.add_ack = Signal(reset=0, reset_less=True)
1786
1787     def ispec(self):
1788         return self.mod.ispec()
1789
1790     def ospec(self):
1791         return self.mod.ospec()
1792
1793     def setup(self, m, i, add_stb, in_mid):
1794         m.d.comb += [self.i.eq(i),
1795                      self.mod.i.eq(self.i),
1796                      self.z_done.eq(self.mod.o.z.trigger),
1797                      #self.add_stb.eq(add_stb),
1798                      self.mod.in_t.stb.eq(self.in_t.stb),
1799                      self.in_t.ack.eq(self.mod.in_t.ack),
1800                      self.o.mid.eq(self.mod.o.mid),
1801                      self.o.z.v.eq(self.mod.o.z.v),
1802                      self.o.z.stb.eq(self.mod.o.z.stb),
1803                      self.mod.o.z.ack.eq(self.o.z.ack),
1804                     ]
1805
1806         m.d.sync += self.add_stb.eq(add_stb)
1807         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1808         m.d.sync += self.o.z.ack.eq(0) # likewise
1809         #m.d.sync += self.in_t.stb.eq(0)
1810
1811         m.submodules.fpadd = self.mod
1812
1813     def action(self, m):
1814
1815         # in_accept is set on incoming strobe HIGH and ack LOW.
1816         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1817
1818         #with m.If(self.in_t.ack):
1819         #    m.d.sync += self.in_t.stb.eq(0)
1820         with m.If(~self.z_done):
1821             # not done: test for accepting an incoming operand pair
1822             with m.If(self.in_accept):
1823                 m.d.sync += [
1824                     self.add_ack.eq(1), # acknowledge receipt...
1825                     self.in_t.stb.eq(1), # initiate add
1826                 ]
1827             with m.Else():
1828                 m.d.sync += [self.add_ack.eq(0),
1829                              self.in_t.stb.eq(0),
1830                              self.o.z.ack.eq(1),
1831                             ]
1832         with m.Else():
1833             # done: acknowledge, and write out id and value
1834             m.d.sync += [self.add_ack.eq(1),
1835                          self.in_t.stb.eq(0)
1836                         ]
1837             m.next = "put_z"
1838
1839             return
1840
1841             if self.in_mid is not None:
1842                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1843
1844             m.d.sync += [
1845               self.out_z.v.eq(self.mod.out_z.v)
1846             ]
1847             # move to output state on detecting z ack
1848             with m.If(self.out_z.trigger):
1849                 m.d.sync += self.out_z.stb.eq(0)
1850                 m.next = "put_z"
1851             with m.Else():
1852                 m.d.sync += self.out_z.stb.eq(1)
1853
1854
1855 class FPADDStageOut:
1856     def __init__(self, width, id_wid):
1857         self.z = Signal(width)
1858         self.mid = Signal(id_wid, reset_less=True)
1859
1860     def eq(self, i):
1861         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1862
1863     def ports(self):
1864         return [self.z, self.mid]
1865
1866
1867 # matches the format of FPADDStageOut, allows eq function to do assignments
1868 class PlaceHolder: pass
1869
1870
1871 class FPAddBaseStage:
1872     def __init__(self, width, id_wid):
1873         self.width = width
1874         self.id_wid = id_wid
1875
1876     def ispec(self):
1877         return FPADDBaseData(self.width, self.id_wid)
1878
1879     def ospec(self):
1880         return FPADDStageOut(self.width, self.id_wid)
1881
1882     def process(self, i):
1883         o = PlaceHolder()
1884         o.z = i.a + i.b
1885         o.mid = i.mid
1886         return o
1887
1888
1889 class FPADDBasePipe1(UnbufferedPipeline):
1890     def __init__(self, width, id_wid):
1891         stage = FPAddBaseStage(width, id_wid)
1892         UnbufferedPipeline.__init__(self, stage)
1893
1894
1895 class FPADDBasePipe(ControlBase):
1896     def __init__(self, width, id_wid):
1897         ControlBase.__init__(self)
1898         self.pipe1 = FPADDBasePipe1(width, id_wid)
1899         self._eqs = self.connect([self.pipe1])
1900
1901     def elaborate(self, platform):
1902         m = Module()
1903         m.submodules.pipe1 = self.pipe1
1904         m.d.comb += self._eqs
1905         return m
1906
1907
1908 class PriorityCombPipeline(CombMultiInPipeline):
1909     def __init__(self, stage, p_len):
1910         p_mux = InputPriorityArbiter(self, p_len)
1911         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1912
1913     def ports(self):
1914         return self.p_mux.ports()
1915
1916
1917 class FPAddInPassThruStage:
1918     def __init__(self, width, id_wid):
1919         self.width, self.id_wid = width, id_wid
1920     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1921     def ospec(self): return self.ispec()
1922     def process(self, i): return i
1923
1924
1925 class FPADDInMuxPipe(PriorityCombPipeline):
1926     def __init__(self, width, id_width, num_rows):
1927         self.num_rows = num_rows
1928         stage = FPAddInPassThruStage(width, id_width)
1929         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1930         #self.p.i_data = stage.ispec()
1931         #self.n.o_data = stage.ospec()
1932
1933     def ports(self):
1934         res = []
1935         for i in range(len(self.p)):
1936             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1937                     self.p[i].i_data.ports()
1938         res += [self.n.i_ready, self.n.o_valid] + \
1939                 self.n.o_data.ports()
1940         return res
1941
1942
1943 class MuxCombPipeline(CombMultiOutPipeline):
1944     def __init__(self, stage, n_len):
1945         # HACK: stage is also the n-way multiplexer
1946         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1947
1948         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1949         stage.m_id = self.p.i_data.mid
1950
1951     def ports(self):
1952         return self.p_mux.ports()
1953
1954
1955 class FPAddOutPassThruStage:
1956     def __init__(self, width, id_wid):
1957         self.width, self.id_wid = width, id_wid
1958     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1959     def ospec(self): return self.ispec()
1960     def process(self, i): return i
1961
1962
1963 class FPADDMuxOutPipe(MuxCombPipeline):
1964     def __init__(self, width, id_wid, num_rows):
1965         self.num_rows = num_rows
1966         stage = FPAddOutPassThruStage(width, id_wid)
1967         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1968         #self.p.i_data = stage.ispec()
1969         #self.n.o_data = stage.ospec()
1970
1971     def ports(self):
1972         res = [self.p.i_valid, self.p.o_ready] + \
1973                 self.p.i_data.ports()
1974         for i in range(len(self.n)):
1975             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1976                     self.n[i].o_data.ports()
1977         return res
1978
1979
1980 class FPADDMuxInOut:
1981     """ Reservation-Station version of FPADD pipeline.
1982
1983         fan-in on
1984     """
1985     def __init__(self, width, id_wid, num_rows):
1986         self.num_rows = num_rows
1987         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1988         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1989         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1990
1991         self.p = self.inpipe.p  # kinda annoying,
1992         self.n = self.outpipe.n # use pipe in/out as this class in/out
1993         self._ports = self.inpipe.ports() + self.outpipe.ports()
1994
1995     def elaborate(self, platform):
1996         m = Module()
1997         m.submodules.inpipe = self.inpipe
1998         m.submodules.fpadd = self.fpadd
1999         m.submodules.outpipe = self.outpipe
2000
2001         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
2002         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
2003
2004         return m
2005
2006     def ports(self):
2007         return self._ports
2008
2009
2010 class ResArray:
2011     def __init__(self, width, id_wid):
2012         self.width = width
2013         self.id_wid = id_wid
2014         res = []
2015         for i in range(rs_sz):
2016             out_z = FPOp(width)
2017             out_z.name = "out_z_%d" % i
2018             res.append(out_z)
2019         self.res = Array(res)
2020         self.in_z = FPOp(width)
2021         self.in_mid = Signal(self.id_wid, reset_less=True)
2022
2023     def setup(self, m, in_z, in_mid):
2024         m.d.comb += [self.in_z.eq(in_z),
2025                      self.in_mid.eq(in_mid)]
2026
2027     def get_fragment(self, platform=None):
2028         """ creates the HDL code-fragment for FPAdd
2029         """
2030         m = Module()
2031         m.submodules.res_in_z = self.in_z
2032         m.submodules += self.res
2033
2034         return m
2035
2036     def ports(self):
2037         res = []
2038         for z in self.res:
2039             res += z.ports()
2040         return res
2041
2042
2043 class FPADD(FPID):
2044     """ FPADD: stages as follows:
2045
2046         FPGetOp (a)
2047            |
2048         FPGetOp (b)
2049            |
2050         FPAddBase---> FPAddBaseMod
2051            |            |
2052         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2053
2054         FPAddBase is tricky: it is both a stage and *has* stages.
2055         Connection to FPAddBaseMod therefore requires an in stb/ack
2056         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2057         needs to be the thing that raises the incoming stb.
2058     """
2059
2060     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2061         """ IEEE754 FP Add
2062
2063             * width: bit-width of IEEE754.  supported: 16, 32, 64
2064             * id_wid: an identifier that is sync-connected to the input
2065             * single_cycle: True indicates each stage to complete in 1 clock
2066         """
2067         self.width = width
2068         self.id_wid = id_wid
2069         self.single_cycle = single_cycle
2070
2071         #self.out_z = FPOp(width)
2072         self.ids = FPID(id_wid)
2073
2074         rs = []
2075         for i in range(rs_sz):
2076             in_a  = FPOp(width)
2077             in_b  = FPOp(width)
2078             in_a.name = "in_a_%d" % i
2079             in_b.name = "in_b_%d" % i
2080             rs.append((in_a, in_b))
2081         self.rs = Array(rs)
2082
2083         res = []
2084         for i in range(rs_sz):
2085             out_z = FPOp(width)
2086             out_z.name = "out_z_%d" % i
2087             res.append(out_z)
2088         self.res = Array(res)
2089
2090         self.states = []
2091
2092     def add_state(self, state):
2093         self.states.append(state)
2094         return state
2095
2096     def get_fragment(self, platform=None):
2097         """ creates the HDL code-fragment for FPAdd
2098         """
2099         m = Module()
2100         m.submodules += self.rs
2101
2102         in_a = self.rs[0][0]
2103         in_b = self.rs[0][1]
2104
2105         geta = self.add_state(FPGetOp("get_a", "get_b",
2106                                       in_a, self.width))
2107         geta.setup(m, in_a)
2108         a = geta.out_op
2109
2110         getb = self.add_state(FPGetOp("get_b", "fpadd",
2111                                       in_b, self.width))
2112         getb.setup(m, in_b)
2113         b = getb.out_op
2114
2115         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2116         ab = self.add_state(ab)
2117         abd = ab.ispec() # create an input spec object for FPADDBase
2118         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2119         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2120         o = ab.o
2121
2122         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2123                                     o.mid, "get_a"))
2124
2125         with m.FSM() as fsm:
2126
2127             for state in self.states:
2128                 with m.State(state.state_from):
2129                     state.action(m)
2130
2131         return m
2132
2133
2134 if __name__ == "__main__":
2135     if True:
2136         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2137         main(alu, ports=alu.rs[0][0].ports() + \
2138                         alu.rs[0][1].ports() + \
2139                         alu.res[0].ports() + \
2140                         [alu.ids.in_mid, alu.ids.out_mid])
2141     else:
2142         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2143         main(alu, ports=[alu.in_a, alu.in_b] + \
2144                         alu.in_t.ports() + \
2145                         alu.out_z.ports() + \
2146                         [alu.in_mid, alu.out_mid])
2147
2148
2149     # works... but don't use, just do "python fname.py convert -t v"
2150     #print (verilog.convert(alu, ports=[
2151     #                        ports=alu.in_a.ports() + \
2152     #                              alu.in_b.ports() + \
2153     #                              alu.out_z.ports())