src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPNumBase2Ops:
 191
 192     def __init__(self, width, id_wid, m_extra=True):
 193         self.a = FPNumBase(width, m_extra)
 194         self.b = FPNumBase(width, m_extra)
 195         self.mid = Signal(id_wid, reset_less=True)
 196
 197     def eq(self, i):
 198         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 199
 200     def ports(self):
 201         return [self.a, self.b, self.mid]
 202
 203
 204 class FPADDBaseData:
 205
 206     def __init__(self, width, id_wid):
 207         self.width = width
 208         self.id_wid = id_wid
 209         self.a  = Signal(width)
 210         self.b  = Signal(width)
 211         self.mid = Signal(id_wid, reset_less=True)
 212
 213     def eq(self, i):
 214         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 215
 216     def ports(self):
 217         return [self.a, self.b, self.mid]
 218
 219
 220 class FPGet2OpMod(Trigger):
 221     def __init__(self, width, id_wid):
 222         Trigger.__init__(self)
 223         self.width = width
 224         self.id_wid = id_wid
 225         self.i = self.ispec()
 226         self.o = self.ospec()
 227
 228     def ispec(self):
 229         return FPADDBaseData(self.width, self.id_wid)
 230
 231     def ospec(self):
 232         return FPADDBaseData(self.width, self.id_wid)
 233
 234     def process(self, i):
 235         return self.o
 236
 237     def elaborate(self, platform):
 238         m = Trigger.elaborate(self, platform)
 239         with m.If(self.trigger):
 240             m.d.comb += [
 241                 self.o.eq(self.i),
 242             ]
 243         return m
 244
 245
 246 class FPGet2Op(FPState):
 247     """ gets operands
 248     """
 249
 250     def __init__(self, in_state, out_state, width, id_wid):
 251         FPState.__init__(self, in_state)
 252         self.out_state = out_state
 253         self.mod = FPGet2OpMod(width, id_wid)
 254         self.o = self.mod.ospec()
 255         self.in_stb = Signal(reset_less=True)
 256         self.out_ack = Signal(reset_less=True)
 257         self.out_decode = Signal(reset_less=True)
 258
 259     def setup(self, m, i, in_stb, in_ack):
 260         """ links module to inputs and outputs
 261         """
 262         m.submodules.get_ops = self.mod
 263         m.d.comb += self.mod.i.eq(i)
 264         m.d.comb += self.mod.stb.eq(in_stb)
 265         m.d.comb += self.out_ack.eq(self.mod.ack)
 266         m.d.comb += self.out_decode.eq(self.mod.trigger)
 267         m.d.comb += in_ack.eq(self.mod.ack)
 268
 269     def action(self, m):
 270         with m.If(self.out_decode):
 271             m.next = self.out_state
 272             m.d.sync += [
 273                 self.mod.ack.eq(0),
 274                 self.o.eq(self.mod.o),
 275             ]
 276         with m.Else():
 277             m.d.sync += self.mod.ack.eq(1)
 278
 279
 280 class FPSCData:
 281
 282     def __init__(self, width, id_wid):
 283         self.a = FPNumBase(width, True)
 284         self.b = FPNumBase(width, True)
 285         self.z = FPNumOut(width, False)
 286         self.oz = Signal(width, reset_less=True)
 287         self.out_do_z = Signal(reset_less=True)
 288         self.mid = Signal(id_wid, reset_less=True)
 289
 290     def eq(self, i):
 291         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 292                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 293
 294
 295 class FPAddSpecialCasesMod:
 296     """ special cases: NaNs, infs, zeros, denormalised
 297         NOTE: some of these are unique to add.  see "Special Operations"
 298         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 299     """
 300
 301     def __init__(self, width, id_wid):
 302         self.width = width
 303         self.id_wid = id_wid
 304         self.i = self.ispec()
 305         self.o = self.ospec()
 306
 307     def ispec(self):
 308         return FPADDBaseData(self.width, self.id_wid)
 309
 310     def ospec(self):
 311         return FPSCData(self.width, self.id_wid)
 312
 313     def setup(self, m, i):
 314         """ links module to inputs and outputs
 315         """
 316         m.submodules.specialcases = self
 317         m.d.comb += self.i.eq(i)
 318
 319     def process(self, i):
 320         return self.o
 321
 322     def elaborate(self, platform):
 323         m = Module()
 324
 325         m.submodules.sc_out_z = self.o.z
 326
 327         # decode: XXX really should move to separate stage
 328         a1 = FPNumIn(None, self.width)
 329         b1 = FPNumIn(None, self.width)
 330         m.submodules.sc_decode_a = a1
 331         m.submodules.sc_decode_b = b1
 332         m.d.comb += [a1.decode(self.i.a),
 333                      b1.decode(self.i.b),
 334                     ]
 335
 336         s_nomatch = Signal()
 337         m.d.comb += s_nomatch.eq(a1.s != b1.s)
 338
 339         m_match = Signal()
 340         m.d.comb += m_match.eq(a1.m == b1.m)
 341
 342         # if a is NaN or b is NaN return NaN
 343         with m.If(a1.is_nan | b1.is_nan):
 344             m.d.comb += self.o.out_do_z.eq(1)
 345             m.d.comb += self.o.z.nan(0)
 346
 347         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 348         # under review
 349
 350         ## if a is zero and b is NaN return -b
 351         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 352         #    m.d.comb += self.o.out_do_z.eq(1)
 353         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 354
 355         ## if b is zero and a is NaN return -a
 356         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 357         #    m.d.comb += self.o.out_do_z.eq(1)
 358         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 359
 360         ## if a is -zero and b is NaN return -b
 361         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 362         #    m.d.comb += self.o.out_do_z.eq(1)
 363         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 364
 365         ## if b is -zero and a is NaN return -a
 366         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 367         #    m.d.comb += self.o.out_do_z.eq(1)
 368         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 369
 370         # if a is inf return inf (or NaN)
 371         with m.Elif(a1.is_inf):
 372             m.d.comb += self.o.out_do_z.eq(1)
 373             m.d.comb += self.o.z.inf(a1.s)
 374             # if a is inf and signs don't match return NaN
 375             with m.If(b1.exp_128 & s_nomatch):
 376                 m.d.comb += self.o.z.nan(0)
 377
 378         # if b is inf return inf
 379         with m.Elif(b1.is_inf):
 380             m.d.comb += self.o.out_do_z.eq(1)
 381             m.d.comb += self.o.z.inf(b1.s)
 382
 383         # if a is zero and b zero return signed-a/b
 384         with m.Elif(a1.is_zero & b1.is_zero):
 385             m.d.comb += self.o.out_do_z.eq(1)
 386             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 387
 388         # if a is zero return b
 389         with m.Elif(a1.is_zero):
 390             m.d.comb += self.o.out_do_z.eq(1)
 391             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 392
 393         # if b is zero return a
 394         with m.Elif(b1.is_zero):
 395             m.d.comb += self.o.out_do_z.eq(1)
 396             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 397
 398         # if a equal to -b return zero (+ve zero)
 399         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 400             m.d.comb += self.o.out_do_z.eq(1)
 401             m.d.comb += self.o.z.zero(0)
 402
 403         # Denormalised Number checks next, so pass a/b data through
 404         with m.Else():
 405             m.d.comb += self.o.out_do_z.eq(0)
 406             m.d.comb += self.o.a.eq(a1)
 407             m.d.comb += self.o.b.eq(b1)
 408
 409         m.d.comb += self.o.oz.eq(self.o.z.v)
 410         m.d.comb += self.o.mid.eq(self.i.mid)
 411
 412         return m
 413
 414
 415 class FPID:
 416     def __init__(self, id_wid):
 417         self.id_wid = id_wid
 418         if self.id_wid:
 419             self.in_mid = Signal(id_wid, reset_less=True)
 420             self.out_mid = Signal(id_wid, reset_less=True)
 421         else:
 422             self.in_mid = None
 423             self.out_mid = None
 424
 425     def idsync(self, m):
 426         if self.id_wid is not None:
 427             m.d.sync += self.out_mid.eq(self.in_mid)
 428
 429
 430 class FPAddSpecialCases(FPState):
 431     """ special cases: NaNs, infs, zeros, denormalised
 432         NOTE: some of these are unique to add.  see "Special Operations"
 433         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 434     """
 435
 436     def __init__(self, width, id_wid):
 437         FPState.__init__(self, "special_cases")
 438         self.mod = FPAddSpecialCasesMod(width)
 439         self.out_z = self.mod.ospec()
 440         self.out_do_z = Signal(reset_less=True)
 441
 442     def setup(self, m, i):
 443         """ links module to inputs and outputs
 444         """
 445         self.mod.setup(m, i, self.out_do_z)
 446         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 447         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 448
 449     def action(self, m):
 450         self.idsync(m)
 451         with m.If(self.out_do_z):
 452             m.next = "put_z"
 453         with m.Else():
 454             m.next = "denormalise"
 455
 456
 457 class FPAddSpecialCasesDeNorm(FPState):
 458     """ special cases: NaNs, infs, zeros, denormalised
 459         NOTE: some of these are unique to add.  see "Special Operations"
 460         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 461     """
 462
 463     def __init__(self, width, id_wid):
 464         FPState.__init__(self, "special_cases")
 465         self.smod = FPAddSpecialCasesMod(width, id_wid)
 466         self.dmod = FPAddDeNormMod(width, id_wid)
 467         self.o = self.ospec()
 468
 469     def ispec(self):
 470         return self.smod.ispec()
 471
 472     def ospec(self):
 473         return self.dmod.ospec()
 474
 475     def setup(self, m, i):
 476         """ links module to inputs and outputs
 477         """
 478         # these only needed for break-out (early-out)
 479         # out_z = self.smod.ospec()
 480         # out_do_z = Signal(reset_less=True)
 481         self.smod.setup(m, i)
 482         self.dmod.setup(m, self.smod.o)
 483         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 484
 485         # out_do_z=True, only needed for early-out (split pipeline)
 486         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 487         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 488
 489         # out_do_z=False
 490         m.d.sync += self.o.eq(self.dmod.o)
 491
 492     def process(self, i):
 493         return self.o
 494
 495     def action(self, m):
 496         #with m.If(self.out_do_z):
 497         #    m.next = "put_z"
 498         #with m.Else():
 499             m.next = "align"
 500
 501
 502 class FPAddDeNormMod(FPState):
 503
 504     def __init__(self, width, id_wid):
 505         self.width = width
 506         self.id_wid = id_wid
 507         self.i = self.ispec()
 508         self.o = self.ospec()
 509
 510     def ispec(self):
 511         return FPSCData(self.width, self.id_wid)
 512
 513     def ospec(self):
 514         return FPSCData(self.width, self.id_wid)
 515
 516     def setup(self, m, i):
 517         """ links module to inputs and outputs
 518         """
 519         m.submodules.denormalise = self
 520         m.d.comb += self.i.eq(i)
 521
 522     def elaborate(self, platform):
 523         m = Module()
 524         m.submodules.denorm_in_a = self.i.a
 525         m.submodules.denorm_in_b = self.i.b
 526         m.submodules.denorm_out_a = self.o.a
 527         m.submodules.denorm_out_b = self.o.b
 528
 529         with m.If(~self.i.out_do_z):
 530             # XXX hmmm, don't like repeating identical code
 531             m.d.comb += self.o.a.eq(self.i.a)
 532             with m.If(self.i.a.exp_n127):
 533                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 534             with m.Else():
 535                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 536
 537             m.d.comb += self.o.b.eq(self.i.b)
 538             with m.If(self.i.b.exp_n127):
 539                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 540             with m.Else():
 541                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 542
 543         m.d.comb += self.o.mid.eq(self.i.mid)
 544         m.d.comb += self.o.z.eq(self.i.z)
 545         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 546         m.d.comb += self.o.oz.eq(self.i.oz)
 547
 548         return m
 549
 550
 551 class FPAddDeNorm(FPState):
 552
 553     def __init__(self, width, id_wid):
 554         FPState.__init__(self, "denormalise")
 555         self.mod = FPAddDeNormMod(width)
 556         self.out_a = FPNumBase(width)
 557         self.out_b = FPNumBase(width)
 558
 559     def setup(self, m, i):
 560         """ links module to inputs and outputs
 561         """
 562         self.mod.setup(m, i)
 563
 564         m.d.sync += self.out_a.eq(self.mod.out_a)
 565         m.d.sync += self.out_b.eq(self.mod.out_b)
 566
 567     def action(self, m):
 568         # Denormalised Number checks
 569         m.next = "align"
 570
 571
 572 class FPAddAlignMultiMod(FPState):
 573
 574     def __init__(self, width):
 575         self.in_a = FPNumBase(width)
 576         self.in_b = FPNumBase(width)
 577         self.out_a = FPNumIn(None, width)
 578         self.out_b = FPNumIn(None, width)
 579         self.exp_eq = Signal(reset_less=True)
 580
 581     def elaborate(self, platform):
 582         # This one however (single-cycle) will do the shift
 583         # in one go.
 584
 585         m = Module()
 586
 587         m.submodules.align_in_a = self.in_a
 588         m.submodules.align_in_b = self.in_b
 589         m.submodules.align_out_a = self.out_a
 590         m.submodules.align_out_b = self.out_b
 591
 592         # NOTE: this does *not* do single-cycle multi-shifting,
 593         #       it *STAYS* in the align state until exponents match
 594
 595         # exponent of a greater than b: shift b down
 596         m.d.comb += self.exp_eq.eq(0)
 597         m.d.comb += self.out_a.eq(self.in_a)
 598         m.d.comb += self.out_b.eq(self.in_b)
 599         agtb = Signal(reset_less=True)
 600         altb = Signal(reset_less=True)
 601         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 602         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 603         with m.If(agtb):
 604             m.d.comb += self.out_b.shift_down(self.in_b)
 605         # exponent of b greater than a: shift a down
 606         with m.Elif(altb):
 607             m.d.comb += self.out_a.shift_down(self.in_a)
 608         # exponents equal: move to next stage.
 609         with m.Else():
 610             m.d.comb += self.exp_eq.eq(1)
 611         return m
 612
 613
 614 class FPAddAlignMulti(FPState):
 615
 616     def __init__(self, width, id_wid):
 617         FPState.__init__(self, "align")
 618         self.mod = FPAddAlignMultiMod(width)
 619         self.out_a = FPNumIn(None, width)
 620         self.out_b = FPNumIn(None, width)
 621         self.exp_eq = Signal(reset_less=True)
 622
 623     def setup(self, m, in_a, in_b):
 624         """ links module to inputs and outputs
 625         """
 626         m.submodules.align = self.mod
 627         m.d.comb += self.mod.in_a.eq(in_a)
 628         m.d.comb += self.mod.in_b.eq(in_b)
 629         #m.d.comb += self.out_a.eq(self.mod.out_a)
 630         #m.d.comb += self.out_b.eq(self.mod.out_b)
 631         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 632         m.d.sync += self.out_a.eq(self.mod.out_a)
 633         m.d.sync += self.out_b.eq(self.mod.out_b)
 634
 635     def action(self, m):
 636         with m.If(self.exp_eq):
 637             m.next = "add_0"
 638
 639
 640 class FPNumIn2Ops:
 641
 642     def __init__(self, width, id_wid):
 643         self.a = FPNumIn(None, width)
 644         self.b = FPNumIn(None, width)
 645         self.z = FPNumOut(width, False)
 646         self.out_do_z = Signal(reset_less=True)
 647         self.oz = Signal(width, reset_less=True)
 648         self.mid = Signal(id_wid, reset_less=True)
 649
 650     def eq(self, i):
 651         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 652                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 653
 654
 655 class FPAddAlignSingleMod:
 656
 657     def __init__(self, width, id_wid):
 658         self.width = width
 659         self.id_wid = id_wid
 660         self.i = self.ispec()
 661         self.o = self.ospec()
 662
 663     def ispec(self):
 664         return FPSCData(self.width, self.id_wid)
 665
 666     def ospec(self):
 667         return FPNumIn2Ops(self.width, self.id_wid)
 668
 669     def process(self, i):
 670         return self.o
 671
 672     def setup(self, m, i):
 673         """ links module to inputs and outputs
 674         """
 675         m.submodules.align = self
 676         m.d.comb += self.i.eq(i)
 677
 678     def elaborate(self, platform):
 679         """ Aligns A against B or B against A, depending on which has the
 680             greater exponent.  This is done in a *single* cycle using
 681             variable-width bit-shift
 682
 683             the shifter used here is quite expensive in terms of gates.
 684             Mux A or B in (and out) into temporaries, as only one of them
 685             needs to be aligned against the other
 686         """
 687         m = Module()
 688
 689         m.submodules.align_in_a = self.i.a
 690         m.submodules.align_in_b = self.i.b
 691         m.submodules.align_out_a = self.o.a
 692         m.submodules.align_out_b = self.o.b
 693
 694         # temporary (muxed) input and output to be shifted
 695         t_inp = FPNumBase(self.width)
 696         t_out = FPNumIn(None, self.width)
 697         espec = (len(self.i.a.e), True)
 698         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 699         m.submodules.align_t_in = t_inp
 700         m.submodules.align_t_out = t_out
 701         m.submodules.multishift_r = msr
 702
 703         ediff = Signal(espec, reset_less=True)
 704         ediffr = Signal(espec, reset_less=True)
 705         tdiff = Signal(espec, reset_less=True)
 706         elz = Signal(reset_less=True)
 707         egz = Signal(reset_less=True)
 708
 709         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 710         m.d.comb += msr.inp.eq(t_inp.m)
 711         m.d.comb += msr.diff.eq(tdiff)
 712         m.d.comb += t_out.m.eq(msr.m)
 713         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 714         m.d.comb += t_out.s.eq(t_inp.s)
 715
 716         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 717         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 718         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 719         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 720
 721         # default: A-exp == B-exp, A and B untouched (fall through)
 722         m.d.comb += self.o.a.eq(self.i.a)
 723         m.d.comb += self.o.b.eq(self.i.b)
 724         # only one shifter (muxed)
 725         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 726         # exponent of a greater than b: shift b down
 727         with m.If(~self.i.out_do_z):
 728             with m.If(egz):
 729                 m.d.comb += [t_inp.eq(self.i.b),
 730                              tdiff.eq(ediff),
 731                              self.o.b.eq(t_out),
 732                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 733                             ]
 734             # exponent of b greater than a: shift a down
 735             with m.Elif(elz):
 736                 m.d.comb += [t_inp.eq(self.i.a),
 737                              tdiff.eq(ediffr),
 738                              self.o.a.eq(t_out),
 739                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 740                             ]
 741
 742         m.d.comb += self.o.mid.eq(self.i.mid)
 743         m.d.comb += self.o.z.eq(self.i.z)
 744         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 745         m.d.comb += self.o.oz.eq(self.i.oz)
 746
 747         return m
 748
 749
 750 class FPAddAlignSingle(FPState):
 751
 752     def __init__(self, width, id_wid):
 753         FPState.__init__(self, "align")
 754         self.mod = FPAddAlignSingleMod(width, id_wid)
 755         self.out_a = FPNumIn(None, width)
 756         self.out_b = FPNumIn(None, width)
 757
 758     def setup(self, m, i):
 759         """ links module to inputs and outputs
 760         """
 761         self.mod.setup(m, i)
 762
 763         # NOTE: could be done as comb
 764         m.d.sync += self.out_a.eq(self.mod.out_a)
 765         m.d.sync += self.out_b.eq(self.mod.out_b)
 766
 767     def action(self, m):
 768         m.next = "add_0"
 769
 770
 771 class FPAddAlignSingleAdd(FPState):
 772
 773     def __init__(self, width, id_wid):
 774         FPState.__init__(self, "align")
 775         self.width = width
 776         self.id_wid = id_wid
 777         self.a1o = self.ospec()
 778
 779     def ispec(self):
 780         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 781
 782     def ospec(self):
 783         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 784
 785     def setup(self, m, i):
 786         """ links module to inputs and outputs
 787         """
 788
 789         # chain AddAlignSingle, AddStage0 and AddStage1
 790         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 791         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 792         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 793
 794         chain = StageChain([mod, a0mod, a1mod])
 795         chain.setup(m, i)
 796
 797         m.d.sync += self.a1o.eq(a1mod.o)
 798
 799     def process(self, i):
 800         return self.a1o
 801
 802     def action(self, m):
 803         m.next = "normalise_1"
 804
 805
 806 class FPAddStage0Data:
 807
 808     def __init__(self, width, id_wid):
 809         self.z = FPNumBase(width, False)
 810         self.out_do_z = Signal(reset_less=True)
 811         self.oz = Signal(width, reset_less=True)
 812         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 813         self.mid = Signal(id_wid, reset_less=True)
 814
 815     def eq(self, i):
 816         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 817                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 818
 819
 820 class FPAddStage0Mod:
 821
 822     def __init__(self, width, id_wid):
 823         self.width = width
 824         self.id_wid = id_wid
 825         self.i = self.ispec()
 826         self.o = self.ospec()
 827
 828     def ispec(self):
 829         return FPSCData(self.width, self.id_wid)
 830
 831     def ospec(self):
 832         return FPAddStage0Data(self.width, self.id_wid)
 833
 834     def process(self, i):
 835         return self.o
 836
 837     def setup(self, m, i):
 838         """ links module to inputs and outputs
 839         """
 840         m.submodules.add0 = self
 841         m.d.comb += self.i.eq(i)
 842
 843     def elaborate(self, platform):
 844         m = Module()
 845         m.submodules.add0_in_a = self.i.a
 846         m.submodules.add0_in_b = self.i.b
 847         m.submodules.add0_out_z = self.o.z
 848
 849         # store intermediate tests (and zero-extended mantissas)
 850         seq = Signal(reset_less=True)
 851         mge = Signal(reset_less=True)
 852         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 853         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 854         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 855                      mge.eq(self.i.a.m >= self.i.b.m),
 856                      am0.eq(Cat(self.i.a.m, 0)),
 857                      bm0.eq(Cat(self.i.b.m, 0))
 858                     ]
 859         # same-sign (both negative or both positive) add mantissas
 860         with m.If(~self.i.out_do_z):
 861             m.d.comb += self.o.z.e.eq(self.i.a.e)
 862             with m.If(seq):
 863                 m.d.comb += [
 864                     self.o.tot.eq(am0 + bm0),
 865                     self.o.z.s.eq(self.i.a.s)
 866                 ]
 867             # a mantissa greater than b, use a
 868             with m.Elif(mge):
 869                 m.d.comb += [
 870                     self.o.tot.eq(am0 - bm0),
 871                     self.o.z.s.eq(self.i.a.s)
 872                 ]
 873             # b mantissa greater than a, use b
 874             with m.Else():
 875                 m.d.comb += [
 876                     self.o.tot.eq(bm0 - am0),
 877                     self.o.z.s.eq(self.i.b.s)
 878             ]
 879
 880         m.d.comb += self.o.oz.eq(self.i.oz)
 881         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 882         m.d.comb += self.o.mid.eq(self.i.mid)
 883         return m
 884
 885
 886 class FPAddStage0(FPState):
 887     """ First stage of add.  covers same-sign (add) and subtract
 888         special-casing when mantissas are greater or equal, to
 889         give greatest accuracy.
 890     """
 891
 892     def __init__(self, width, id_wid):
 893         FPState.__init__(self, "add_0")
 894         self.mod = FPAddStage0Mod(width)
 895         self.o = self.mod.ospec()
 896
 897     def setup(self, m, i):
 898         """ links module to inputs and outputs
 899         """
 900         self.mod.setup(m, i)
 901
 902         # NOTE: these could be done as combinatorial (merge add0+add1)
 903         m.d.sync += self.o.eq(self.mod.o)
 904
 905     def action(self, m):
 906         m.next = "add_1"
 907
 908
 909 class FPAddStage1Data:
 910
 911     def __init__(self, width, id_wid):
 912         self.z = FPNumBase(width, False)
 913         self.out_do_z = Signal(reset_less=True)
 914         self.oz = Signal(width, reset_less=True)
 915         self.of = Overflow()
 916         self.mid = Signal(id_wid, reset_less=True)
 917
 918     def eq(self, i):
 919         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 920                 self.of.eq(i.of), self.mid.eq(i.mid)]
 921
 922
 923
 924 class FPAddStage1Mod(FPState):
 925     """ Second stage of add: preparation for normalisation.
 926         detects when tot sum is too big (tot[27] is kinda a carry bit)
 927     """
 928
 929     def __init__(self, width, id_wid):
 930         self.width = width
 931         self.id_wid = id_wid
 932         self.i = self.ispec()
 933         self.o = self.ospec()
 934
 935     def ispec(self):
 936         return FPAddStage0Data(self.width, self.id_wid)
 937
 938     def ospec(self):
 939         return FPAddStage1Data(self.width, self.id_wid)
 940
 941     def process(self, i):
 942         return self.o
 943
 944     def setup(self, m, i):
 945         """ links module to inputs and outputs
 946         """
 947         m.submodules.add1 = self
 948         m.submodules.add1_out_overflow = self.o.of
 949
 950         m.d.comb += self.i.eq(i)
 951
 952     def elaborate(self, platform):
 953         m = Module()
 954         #m.submodules.norm1_in_overflow = self.in_of
 955         #m.submodules.norm1_out_overflow = self.out_of
 956         #m.submodules.norm1_in_z = self.in_z
 957         #m.submodules.norm1_out_z = self.out_z
 958         m.d.comb += self.o.z.eq(self.i.z)
 959         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 960         with m.If(~self.i.out_do_z):
 961             with m.If(self.i.tot[-1]):
 962                 m.d.comb += [
 963                     self.o.z.m.eq(self.i.tot[4:]),
 964                     self.o.of.m0.eq(self.i.tot[4]),
 965                     self.o.of.guard.eq(self.i.tot[3]),
 966                     self.o.of.round_bit.eq(self.i.tot[2]),
 967                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 968                     self.o.z.e.eq(self.i.z.e + 1)
 969             ]
 970             # tot[-1] (MSB) zero case
 971             with m.Else():
 972                 m.d.comb += [
 973                     self.o.z.m.eq(self.i.tot[3:]),
 974                     self.o.of.m0.eq(self.i.tot[3]),
 975                     self.o.of.guard.eq(self.i.tot[2]),
 976                     self.o.of.round_bit.eq(self.i.tot[1]),
 977                     self.o.of.sticky.eq(self.i.tot[0])
 978             ]
 979
 980         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 981         m.d.comb += self.o.oz.eq(self.i.oz)
 982         m.d.comb += self.o.mid.eq(self.i.mid)
 983
 984         return m
 985
 986
 987 class FPAddStage1(FPState):
 988
 989     def __init__(self, width, id_wid):
 990         FPState.__init__(self, "add_1")
 991         self.mod = FPAddStage1Mod(width)
 992         self.out_z = FPNumBase(width, False)
 993         self.out_of = Overflow()
 994         self.norm_stb = Signal()
 995
 996     def setup(self, m, i):
 997         """ links module to inputs and outputs
 998         """
 999         self.mod.setup(m, i)
1000
1001         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1002
1003         m.d.sync += self.out_of.eq(self.mod.out_of)
1004         m.d.sync += self.out_z.eq(self.mod.out_z)
1005         m.d.sync += self.norm_stb.eq(1)
1006
1007     def action(self, m):
1008         m.next = "normalise_1"
1009
1010
1011 class FPNormaliseModSingle:
1012
1013     def __init__(self, width):
1014         self.width = width
1015         self.in_z = self.ispec()
1016         self.out_z = self.ospec()
1017
1018     def ispec(self):
1019         return FPNumBase(self.width, False)
1020
1021     def ospec(self):
1022         return FPNumBase(self.width, False)
1023
1024     def setup(self, m, i):
1025         """ links module to inputs and outputs
1026         """
1027         m.submodules.normalise = self
1028         m.d.comb += self.i.eq(i)
1029
1030     def elaborate(self, platform):
1031         m = Module()
1032
1033         mwid = self.out_z.m_width+2
1034         pe = PriorityEncoder(mwid)
1035         m.submodules.norm_pe = pe
1036
1037         m.submodules.norm1_out_z = self.out_z
1038         m.submodules.norm1_in_z = self.in_z
1039
1040         in_z = FPNumBase(self.width, False)
1041         in_of = Overflow()
1042         m.submodules.norm1_insel_z = in_z
1043         m.submodules.norm1_insel_overflow = in_of
1044
1045         espec = (len(in_z.e), True)
1046         ediff_n126 = Signal(espec, reset_less=True)
1047         msr = MultiShiftRMerge(mwid, espec)
1048         m.submodules.multishift_r = msr
1049
1050         m.d.comb += in_z.eq(self.in_z)
1051         m.d.comb += in_of.eq(self.in_of)
1052         # initialise out from in (overridden below)
1053         m.d.comb += self.out_z.eq(in_z)
1054         m.d.comb += self.out_of.eq(in_of)
1055         # normalisation decrease condition
1056         decrease = Signal(reset_less=True)
1057         m.d.comb += decrease.eq(in_z.m_msbzero)
1058         # decrease exponent
1059         with m.If(decrease):
1060             # *sigh* not entirely obvious: count leading zeros (clz)
1061             # with a PriorityEncoder: to find from the MSB
1062             # we reverse the order of the bits.
1063             temp_m = Signal(mwid, reset_less=True)
1064             temp_s = Signal(mwid+1, reset_less=True)
1065             clz = Signal((len(in_z.e), True), reset_less=True)
1066             m.d.comb += [
1067                 # cat round and guard bits back into the mantissa
1068                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1069                 pe.i.eq(temp_m[::-1]),          # inverted
1070                 clz.eq(pe.o),                   # count zeros from MSB down
1071                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1072                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1073                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1074             ]
1075
1076         return m
1077
1078 class FPNorm1Data:
1079
1080     def __init__(self, width, id_wid):
1081         self.roundz = Signal(reset_less=True)
1082         self.z = FPNumBase(width, False)
1083         self.out_do_z = Signal(reset_less=True)
1084         self.oz = Signal(width, reset_less=True)
1085         self.mid = Signal(id_wid, reset_less=True)
1086
1087     def eq(self, i):
1088         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1089                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1090
1091
1092 class FPNorm1ModSingle:
1093
1094     def __init__(self, width, id_wid):
1095         self.width = width
1096         self.id_wid = id_wid
1097         self.i = self.ispec()
1098         self.o = self.ospec()
1099
1100     def ispec(self):
1101         return FPAddStage1Data(self.width, self.id_wid)
1102
1103     def ospec(self):
1104         return FPNorm1Data(self.width, self.id_wid)
1105
1106     def setup(self, m, i):
1107         """ links module to inputs and outputs
1108         """
1109         m.submodules.normalise_1 = self
1110         m.d.comb += self.i.eq(i)
1111
1112     def process(self, i):
1113         return self.o
1114
1115     def elaborate(self, platform):
1116         m = Module()
1117
1118         mwid = self.o.z.m_width+2
1119         pe = PriorityEncoder(mwid)
1120         m.submodules.norm_pe = pe
1121
1122         of = Overflow()
1123         m.d.comb += self.o.roundz.eq(of.roundz)
1124
1125         m.submodules.norm1_out_z = self.o.z
1126         m.submodules.norm1_out_overflow = of
1127         m.submodules.norm1_in_z = self.i.z
1128         m.submodules.norm1_in_overflow = self.i.of
1129
1130         i = self.ispec()
1131         m.submodules.norm1_insel_z = i.z
1132         m.submodules.norm1_insel_overflow = i.of
1133
1134         espec = (len(i.z.e), True)
1135         ediff_n126 = Signal(espec, reset_less=True)
1136         msr = MultiShiftRMerge(mwid, espec)
1137         m.submodules.multishift_r = msr
1138
1139         m.d.comb += i.eq(self.i)
1140         # initialise out from in (overridden below)
1141         m.d.comb += self.o.z.eq(i.z)
1142         m.d.comb += of.eq(i.of)
1143         # normalisation increase/decrease conditions
1144         decrease = Signal(reset_less=True)
1145         increase = Signal(reset_less=True)
1146         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1147         m.d.comb += increase.eq(i.z.exp_lt_n126)
1148         # decrease exponent
1149         with m.If(~self.i.out_do_z):
1150             with m.If(decrease):
1151                 # *sigh* not entirely obvious: count leading zeros (clz)
1152                 # with a PriorityEncoder: to find from the MSB
1153                 # we reverse the order of the bits.
1154                 temp_m = Signal(mwid, reset_less=True)
1155                 temp_s = Signal(mwid+1, reset_less=True)
1156                 clz = Signal((len(i.z.e), True), reset_less=True)
1157                 # make sure that the amount to decrease by does NOT
1158                 # go below the minimum non-INF/NaN exponent
1159                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1160                              i.z.exp_sub_n126)
1161                 m.d.comb += [
1162                     # cat round and guard bits back into the mantissa
1163                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1164                     pe.i.eq(temp_m[::-1]),          # inverted
1165                     clz.eq(limclz),                 # count zeros from MSB down
1166                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1167                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1168                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1169                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1170                     # overflow in bits 0..1: got shifted too (leave sticky)
1171                     of.guard.eq(temp_s[1]),       # guard
1172                     of.round_bit.eq(temp_s[0]),   # round
1173                 ]
1174             # increase exponent
1175             with m.Elif(increase):
1176                 temp_m = Signal(mwid+1, reset_less=True)
1177                 m.d.comb += [
1178                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1179                                   i.z.m)),
1180                     ediff_n126.eq(i.z.N126 - i.z.e),
1181                     # connect multi-shifter to inp/out mantissa (and ediff)
1182                     msr.inp.eq(temp_m),
1183                     msr.diff.eq(ediff_n126),
1184                     self.o.z.m.eq(msr.m[3:]),
1185                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1186                     # overflow in bits 0..1: got shifted too (leave sticky)
1187                     of.guard.eq(temp_s[2]),     # guard
1188                     of.round_bit.eq(temp_s[1]), # round
1189                     of.sticky.eq(temp_s[0]),    # sticky
1190                     self.o.z.e.eq(i.z.e + ediff_n126),
1191                 ]
1192
1193         m.d.comb += self.o.mid.eq(self.i.mid)
1194         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1195         m.d.comb += self.o.oz.eq(self.i.oz)
1196
1197         return m
1198
1199
1200 class FPNorm1ModMulti:
1201
1202     def __init__(self, width, single_cycle=True):
1203         self.width = width
1204         self.in_select = Signal(reset_less=True)
1205         self.in_z = FPNumBase(width, False)
1206         self.in_of = Overflow()
1207         self.temp_z = FPNumBase(width, False)
1208         self.temp_of = Overflow()
1209         self.out_z = FPNumBase(width, False)
1210         self.out_of = Overflow()
1211
1212     def elaborate(self, platform):
1213         m = Module()
1214
1215         m.submodules.norm1_out_z = self.out_z
1216         m.submodules.norm1_out_overflow = self.out_of
1217         m.submodules.norm1_temp_z = self.temp_z
1218         m.submodules.norm1_temp_of = self.temp_of
1219         m.submodules.norm1_in_z = self.in_z
1220         m.submodules.norm1_in_overflow = self.in_of
1221
1222         in_z = FPNumBase(self.width, False)
1223         in_of = Overflow()
1224         m.submodules.norm1_insel_z = in_z
1225         m.submodules.norm1_insel_overflow = in_of
1226
1227         # select which of temp or in z/of to use
1228         with m.If(self.in_select):
1229             m.d.comb += in_z.eq(self.in_z)
1230             m.d.comb += in_of.eq(self.in_of)
1231         with m.Else():
1232             m.d.comb += in_z.eq(self.temp_z)
1233             m.d.comb += in_of.eq(self.temp_of)
1234         # initialise out from in (overridden below)
1235         m.d.comb += self.out_z.eq(in_z)
1236         m.d.comb += self.out_of.eq(in_of)
1237         # normalisation increase/decrease conditions
1238         decrease = Signal(reset_less=True)
1239         increase = Signal(reset_less=True)
1240         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1241         m.d.comb += increase.eq(in_z.exp_lt_n126)
1242         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1243         # decrease exponent
1244         with m.If(decrease):
1245             m.d.comb += [
1246                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1247                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1248                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1249                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1250                 self.out_of.round_bit.eq(0),        # reset round bit
1251                 self.out_of.m0.eq(in_of.guard),
1252             ]
1253         # increase exponent
1254         with m.Elif(increase):
1255             m.d.comb += [
1256                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1257                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1258                 self.out_of.guard.eq(in_z.m[0]),
1259                 self.out_of.m0.eq(in_z.m[1]),
1260                 self.out_of.round_bit.eq(in_of.guard),
1261                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1262             ]
1263
1264         return m
1265
1266
1267 class FPNorm1Single(FPState):
1268
1269     def __init__(self, width, id_wid, single_cycle=True):
1270         FPState.__init__(self, "normalise_1")
1271         self.mod = FPNorm1ModSingle(width)
1272         self.o = self.ospec()
1273         self.out_z = FPNumBase(width, False)
1274         self.out_roundz = Signal(reset_less=True)
1275
1276     def ispec(self):
1277         return self.mod.ispec()
1278
1279     def ospec(self):
1280         return self.mod.ospec()
1281
1282     def setup(self, m, i):
1283         """ links module to inputs and outputs
1284         """
1285         self.mod.setup(m, i)
1286
1287     def action(self, m):
1288         m.next = "round"
1289
1290
1291 class FPNorm1Multi(FPState):
1292
1293     def __init__(self, width, id_wid):
1294         FPState.__init__(self, "normalise_1")
1295         self.mod = FPNorm1ModMulti(width)
1296         self.stb = Signal(reset_less=True)
1297         self.ack = Signal(reset=0, reset_less=True)
1298         self.out_norm = Signal(reset_less=True)
1299         self.in_accept = Signal(reset_less=True)
1300         self.temp_z = FPNumBase(width)
1301         self.temp_of = Overflow()
1302         self.out_z = FPNumBase(width)
1303         self.out_roundz = Signal(reset_less=True)
1304
1305     def setup(self, m, in_z, in_of, norm_stb):
1306         """ links module to inputs and outputs
1307         """
1308         self.mod.setup(m, in_z, in_of, norm_stb,
1309                        self.in_accept, self.temp_z, self.temp_of,
1310                        self.out_z, self.out_norm)
1311
1312         m.d.comb += self.stb.eq(norm_stb)
1313         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1314
1315     def action(self, m):
1316         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1317         m.d.sync += self.temp_of.eq(self.mod.out_of)
1318         m.d.sync += self.temp_z.eq(self.out_z)
1319         with m.If(self.out_norm):
1320             with m.If(self.in_accept):
1321                 m.d.sync += [
1322                     self.ack.eq(1),
1323                 ]
1324             with m.Else():
1325                 m.d.sync += self.ack.eq(0)
1326         with m.Else():
1327             # normalisation not required (or done).
1328             m.next = "round"
1329             m.d.sync += self.ack.eq(1)
1330             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1331
1332
1333 class FPNormToPack(FPState):
1334
1335     def __init__(self, width, id_wid):
1336         FPState.__init__(self, "normalise_1")
1337         self.id_wid = id_wid
1338         self.width = width
1339
1340     def ispec(self):
1341         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1342
1343     def ospec(self):
1344         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1345
1346     def setup(self, m, i):
1347         """ links module to inputs and outputs
1348         """
1349
1350         # Normalisation, Rounding Corrections, Pack - in a chain
1351         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1352         rmod = FPRoundMod(self.width, self.id_wid)
1353         cmod = FPCorrectionsMod(self.width, self.id_wid)
1354         pmod = FPPackMod(self.width, self.id_wid)
1355         chain = StageChain([nmod, rmod, cmod, pmod])
1356         chain.setup(m, i)
1357         self.out_z = pmod.ospec()
1358
1359         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1360         m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1361
1362     def process(self, i):
1363         return self.out_z
1364
1365     def action(self, m):
1366         m.next = "pack_put_z"
1367
1368
1369 class FPRoundData:
1370
1371     def __init__(self, width, id_wid):
1372         self.z = FPNumBase(width, False)
1373         self.out_do_z = Signal(reset_less=True)
1374         self.oz = Signal(width, reset_less=True)
1375         self.mid = Signal(id_wid, reset_less=True)
1376
1377     def eq(self, i):
1378         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1379                 self.mid.eq(i.mid)]
1380
1381
1382 class FPRoundMod:
1383
1384     def __init__(self, width, id_wid):
1385         self.width = width
1386         self.id_wid = id_wid
1387         self.i = self.ispec()
1388         self.out_z = self.ospec()
1389
1390     def ispec(self):
1391         return FPNorm1Data(self.width, self.id_wid)
1392
1393     def ospec(self):
1394         return FPRoundData(self.width, self.id_wid)
1395
1396     def process(self, i):
1397         return self.out_z
1398
1399     def setup(self, m, i):
1400         m.submodules.roundz = self
1401         m.d.comb += self.i.eq(i)
1402
1403     def elaborate(self, platform):
1404         m = Module()
1405         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1406         with m.If(~self.i.out_do_z):
1407             with m.If(self.i.roundz):
1408                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1409                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1410                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1411
1412         return m
1413
1414
1415 class FPRound(FPState):
1416
1417     def __init__(self, width, id_wid):
1418         FPState.__init__(self, "round")
1419         self.mod = FPRoundMod(width)
1420         self.out_z = self.ospec()
1421
1422     def ispec(self):
1423         return self.mod.ispec()
1424
1425     def ospec(self):
1426         return self.mod.ospec()
1427
1428     def setup(self, m, i):
1429         """ links module to inputs and outputs
1430         """
1431         self.mod.setup(m, i)
1432
1433         self.idsync(m)
1434         m.d.sync += self.out_z.eq(self.mod.out_z)
1435         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1436
1437     def action(self, m):
1438         m.next = "corrections"
1439
1440
1441 class FPCorrectionsMod:
1442
1443     def __init__(self, width, id_wid):
1444         self.width = width
1445         self.id_wid = id_wid
1446         self.i = self.ispec()
1447         self.out_z = self.ospec()
1448
1449     def ispec(self):
1450         return FPRoundData(self.width, self.id_wid)
1451
1452     def ospec(self):
1453         return FPRoundData(self.width, self.id_wid)
1454
1455     def process(self, i):
1456         return self.out_z
1457
1458     def setup(self, m, i):
1459         """ links module to inputs and outputs
1460         """
1461         m.submodules.corrections = self
1462         m.d.comb += self.i.eq(i)
1463
1464     def elaborate(self, platform):
1465         m = Module()
1466         m.submodules.corr_in_z = self.i.z
1467         m.submodules.corr_out_z = self.out_z.z
1468         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1469         with m.If(~self.i.out_do_z):
1470             with m.If(self.i.z.is_denormalised):
1471                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1472         return m
1473
1474
1475 class FPCorrections(FPState):
1476
1477     def __init__(self, width, id_wid):
1478         FPState.__init__(self, "corrections")
1479         self.mod = FPCorrectionsMod(width)
1480         self.out_z = self.ospec()
1481
1482     def ispec(self):
1483         return self.mod.ispec()
1484
1485     def ospec(self):
1486         return self.mod.ospec()
1487
1488     def setup(self, m, in_z):
1489         """ links module to inputs and outputs
1490         """
1491         self.mod.setup(m, in_z)
1492
1493         m.d.sync += self.out_z.eq(self.mod.out_z)
1494         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1495
1496     def action(self, m):
1497         m.next = "pack"
1498
1499
1500 class FPPackData:
1501
1502     def __init__(self, width, id_wid):
1503         self.z = Signal(width, reset_less=True)
1504         self.mid = Signal(id_wid, reset_less=True)
1505
1506     def eq(self, i):
1507         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1508
1509
1510 class FPPackMod:
1511
1512     def __init__(self, width, id_wid):
1513         self.width = width
1514         self.id_wid = id_wid
1515         self.i = self.ispec()
1516         self.o = self.ospec()
1517
1518     def ispec(self):
1519         return FPRoundData(self.width, self.id_wid)
1520
1521     def ospec(self):
1522         return FPPackData(self.width, self.id_wid)
1523
1524     def process(self, i):
1525         return self.o
1526
1527     def setup(self, m, in_z):
1528         """ links module to inputs and outputs
1529         """
1530         m.submodules.pack = self
1531         m.d.comb += self.i.eq(in_z)
1532
1533     def elaborate(self, platform):
1534         m = Module()
1535         z = FPNumOut(self.width, False)
1536         m.submodules.pack_in_z = self.i.z
1537         m.submodules.pack_out_z = z
1538         m.d.comb += self.o.mid.eq(self.i.mid)
1539         with m.If(~self.i.out_do_z):
1540             with m.If(self.i.z.is_overflowed):
1541                 m.d.comb += z.inf(self.i.z.s)
1542             with m.Else():
1543                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1544         with m.Else():
1545             m.d.comb += z.v.eq(self.i.oz)
1546         m.d.comb += self.o.z.eq(z.v)
1547         return m
1548
1549
1550 class FPPack(FPState):
1551
1552     def __init__(self, width, id_wid):
1553         FPState.__init__(self, "pack")
1554         self.mod = FPPackMod(width)
1555         self.out_z = self.ospec()
1556
1557     def ispec(self):
1558         return self.mod.ispec()
1559
1560     def ospec(self):
1561         return self.mod.ospec()
1562
1563     def setup(self, m, in_z):
1564         """ links module to inputs and outputs
1565         """
1566         self.mod.setup(m, in_z)
1567
1568         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1569         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1570
1571     def action(self, m):
1572         m.next = "pack_put_z"
1573
1574
1575 class FPPutZ(FPState):
1576
1577     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1578         FPState.__init__(self, state)
1579         if to_state is None:
1580             to_state = "get_ops"
1581         self.to_state = to_state
1582         self.in_z = in_z
1583         self.out_z = out_z
1584         self.in_mid = in_mid
1585         self.out_mid = out_mid
1586
1587     def action(self, m):
1588         if self.in_mid is not None:
1589             m.d.sync += self.out_mid.eq(self.in_mid)
1590         m.d.sync += [
1591           self.out_z.z.v.eq(self.in_z)
1592         ]
1593         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1594             m.d.sync += self.out_z.z.stb.eq(0)
1595             m.next = self.to_state
1596         with m.Else():
1597             m.d.sync += self.out_z.z.stb.eq(1)
1598
1599
1600 class FPPutZIdx(FPState):
1601
1602     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1603         FPState.__init__(self, state)
1604         if to_state is None:
1605             to_state = "get_ops"
1606         self.to_state = to_state
1607         self.in_z = in_z
1608         self.out_zs = out_zs
1609         self.in_mid = in_mid
1610
1611     def action(self, m):
1612         outz_stb = Signal(reset_less=True)
1613         outz_ack = Signal(reset_less=True)
1614         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1615                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1616                     ]
1617         m.d.sync += [
1618           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1619         ]
1620         with m.If(outz_stb & outz_ack):
1621             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1622             m.next = self.to_state
1623         with m.Else():
1624             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1625
1626 class FPOpData:
1627     def __init__(self, width, id_wid):
1628         self.z = FPOp(width)
1629         self.mid = Signal(id_wid, reset_less=True)
1630
1631     def eq(self, i):
1632         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1633
1634     def ports(self):
1635         return [self.z, self.mid]
1636
1637
1638 class FPADDBaseMod:
1639
1640     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1641         """ IEEE754 FP Add
1642
1643             * width: bit-width of IEEE754.  supported: 16, 32, 64
1644             * id_wid: an identifier that is sync-connected to the input
1645             * single_cycle: True indicates each stage to complete in 1 clock
1646             * compact: True indicates a reduced number of stages
1647         """
1648         self.width = width
1649         self.id_wid = id_wid
1650         self.single_cycle = single_cycle
1651         self.compact = compact
1652
1653         self.in_t = Trigger()
1654         self.i = self.ispec()
1655         self.o = self.ospec()
1656
1657         self.states = []
1658
1659     def ispec(self):
1660         return FPADDBaseData(self.width, self.id_wid)
1661
1662     def ospec(self):
1663         return FPOpData(self.width, self.id_wid)
1664
1665     def add_state(self, state):
1666         self.states.append(state)
1667         return state
1668
1669     def get_fragment(self, platform=None):
1670         """ creates the HDL code-fragment for FPAdd
1671         """
1672         m = Module()
1673         m.submodules.out_z = self.o.z
1674         m.submodules.in_t = self.in_t
1675         if self.compact:
1676             self.get_compact_fragment(m, platform)
1677         else:
1678             self.get_longer_fragment(m, platform)
1679
1680         with m.FSM() as fsm:
1681
1682             for state in self.states:
1683                 with m.State(state.state_from):
1684                     state.action(m)
1685
1686         return m
1687
1688     def get_longer_fragment(self, m, platform=None):
1689
1690         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1691                                       self.width))
1692         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1693         a = get.out_op1
1694         b = get.out_op2
1695
1696         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1697         sc.setup(m, a, b, self.in_mid)
1698
1699         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1700         dn.setup(m, a, b, sc.in_mid)
1701
1702         if self.single_cycle:
1703             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1704             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1705         else:
1706             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1707             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1708
1709         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1710         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1711
1712         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1713         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1714
1715         if self.single_cycle:
1716             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1717             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1718         else:
1719             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1720             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1721
1722         rn = self.add_state(FPRound(self.width, self.id_wid))
1723         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1724
1725         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1726         cor.setup(m, rn.out_z, rn.in_mid)
1727
1728         pa = self.add_state(FPPack(self.width, self.id_wid))
1729         pa.setup(m, cor.out_z, rn.in_mid)
1730
1731         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1732                                     pa.in_mid, self.out_mid))
1733
1734         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1735                                     pa.in_mid, self.out_mid))
1736
1737     def get_compact_fragment(self, m, platform=None):
1738
1739         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1740                                       self.width, self.id_wid))
1741         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1742
1743         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1744         sc.setup(m, get.o)
1745
1746         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1747         alm.setup(m, sc.o)
1748
1749         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1750         n1.setup(m, alm.a1o)
1751
1752         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1753                                     n1.out_z.mid, self.o.mid))
1754
1755         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1756         #                            sc.o.mid, self.o.mid))
1757
1758
1759 class FPADDBase(FPState):
1760
1761     def __init__(self, width, id_wid=None, single_cycle=False):
1762         """ IEEE754 FP Add
1763
1764             * width: bit-width of IEEE754.  supported: 16, 32, 64
1765             * id_wid: an identifier that is sync-connected to the input
1766             * single_cycle: True indicates each stage to complete in 1 clock
1767         """
1768         FPState.__init__(self, "fpadd")
1769         self.width = width
1770         self.single_cycle = single_cycle
1771         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1772         self.o = self.ospec()
1773
1774         self.in_t = Trigger()
1775         self.i = self.ispec()
1776
1777         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1778         self.in_accept = Signal(reset_less=True)
1779         self.add_stb = Signal(reset_less=True)
1780         self.add_ack = Signal(reset=0, reset_less=True)
1781
1782     def ispec(self):
1783         return self.mod.ispec()
1784
1785     def ospec(self):
1786         return self.mod.ospec()
1787
1788     def setup(self, m, i, add_stb, in_mid):
1789         m.d.comb += [self.i.eq(i),
1790                      self.mod.i.eq(self.i),
1791                      self.z_done.eq(self.mod.o.z.trigger),
1792                      #self.add_stb.eq(add_stb),
1793                      self.mod.in_t.stb.eq(self.in_t.stb),
1794                      self.in_t.ack.eq(self.mod.in_t.ack),
1795                      self.o.mid.eq(self.mod.o.mid),
1796                      self.o.z.v.eq(self.mod.o.z.v),
1797                      self.o.z.stb.eq(self.mod.o.z.stb),
1798                      self.mod.o.z.ack.eq(self.o.z.ack),
1799                     ]
1800
1801         m.d.sync += self.add_stb.eq(add_stb)
1802         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1803         m.d.sync += self.o.z.ack.eq(0) # likewise
1804         #m.d.sync += self.in_t.stb.eq(0)
1805
1806         m.submodules.fpadd = self.mod
1807
1808     def action(self, m):
1809
1810         # in_accept is set on incoming strobe HIGH and ack LOW.
1811         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1812
1813         #with m.If(self.in_t.ack):
1814         #    m.d.sync += self.in_t.stb.eq(0)
1815         with m.If(~self.z_done):
1816             # not done: test for accepting an incoming operand pair
1817             with m.If(self.in_accept):
1818                 m.d.sync += [
1819                     self.add_ack.eq(1), # acknowledge receipt...
1820                     self.in_t.stb.eq(1), # initiate add
1821                 ]
1822             with m.Else():
1823                 m.d.sync += [self.add_ack.eq(0),
1824                              self.in_t.stb.eq(0),
1825                              self.o.z.ack.eq(1),
1826                             ]
1827         with m.Else():
1828             # done: acknowledge, and write out id and value
1829             m.d.sync += [self.add_ack.eq(1),
1830                          self.in_t.stb.eq(0)
1831                         ]
1832             m.next = "put_z"
1833
1834             return
1835
1836             if self.in_mid is not None:
1837                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1838
1839             m.d.sync += [
1840               self.out_z.v.eq(self.mod.out_z.v)
1841             ]
1842             # move to output state on detecting z ack
1843             with m.If(self.out_z.trigger):
1844                 m.d.sync += self.out_z.stb.eq(0)
1845                 m.next = "put_z"
1846             with m.Else():
1847                 m.d.sync += self.out_z.stb.eq(1)
1848
1849
1850 class FPADDStageOut:
1851     def __init__(self, width, id_wid):
1852         self.z = Signal(width)
1853         self.mid = Signal(id_wid, reset_less=True)
1854
1855     def eq(self, i):
1856         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1857
1858     def ports(self):
1859         return [self.z, self.mid]
1860
1861
1862 # matches the format of FPADDStageOut, allows eq function to do assignments
1863 class PlaceHolder: pass
1864
1865
1866 class FPAddBaseStage:
1867     def __init__(self, width, id_wid):
1868         self.width = width
1869         self.id_wid = id_wid
1870
1871     def ispec(self):
1872         return FPADDBaseData(self.width, self.id_wid)
1873
1874     def ospec(self):
1875         return FPADDStageOut(self.width, self.id_wid)
1876
1877     def process(self, i):
1878         o = PlaceHolder()
1879         o.z = i.a + i.b
1880         o.mid = i.mid
1881         return o
1882
1883
1884 class FPADDBasePipe1(UnbufferedPipeline):
1885     def __init__(self, width, id_wid):
1886         stage = FPAddBaseStage(width, id_wid)
1887         UnbufferedPipeline.__init__(self, stage)
1888
1889
1890 class FPADDBasePipe(ControlBase):
1891     def __init__(self, width, id_wid):
1892         ControlBase.__init__(self)
1893         self.pipe1 = FPADDBasePipe1(width, id_wid)
1894         self._eqs = self.connect([self.pipe1])
1895
1896     def elaborate(self, platform):
1897         m = Module()
1898         m.submodules.pipe1 = self.pipe1
1899         m.d.comb += self._eqs
1900         return m
1901
1902
1903 class PriorityCombPipeline(CombMultiInPipeline):
1904     def __init__(self, stage, p_len):
1905         p_mux = InputPriorityArbiter(self, p_len)
1906         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1907
1908     def ports(self):
1909         return self.p_mux.ports()
1910
1911
1912 class FPAddInPassThruStage:
1913     def __init__(self, width, id_wid):
1914         self.width, self.id_wid = width, id_wid
1915     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1916     def ospec(self): return self.ispec()
1917     def process(self, i): return i
1918
1919
1920 class FPADDInMuxPipe(PriorityCombPipeline):
1921     def __init__(self, width, id_width, num_rows):
1922         self.num_rows = num_rows
1923         stage = FPAddInPassThruStage(width, id_width)
1924         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1925         #self.p.i_data = stage.ispec()
1926         #self.n.o_data = stage.ospec()
1927
1928     def ports(self):
1929         res = []
1930         for i in range(len(self.p)):
1931             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1932                     self.p[i].i_data.ports()
1933         res += [self.n.i_ready, self.n.o_valid] + \
1934                 self.n.o_data.ports()
1935         return res
1936
1937
1938 class MuxCombPipeline(CombMultiOutPipeline):
1939     def __init__(self, stage, n_len):
1940         # HACK: stage is also the n-way multiplexer
1941         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1942
1943         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1944         stage.m_id = self.p.i_data.mid
1945
1946     def ports(self):
1947         return self.p_mux.ports()
1948
1949
1950 class FPAddOutPassThruStage:
1951     def __init__(self, width, id_wid):
1952         self.width, self.id_wid = width, id_wid
1953     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1954     def ospec(self): return self.ispec()
1955     def process(self, i): return i
1956
1957
1958 class FPADDMuxOutPipe(MuxCombPipeline):
1959     def __init__(self, width, id_wid, num_rows):
1960         self.num_rows = num_rows
1961         stage = FPAddOutPassThruStage(width, id_wid)
1962         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1963         #self.p.i_data = stage.ispec()
1964         #self.n.o_data = stage.ospec()
1965
1966     def ports(self):
1967         res = [self.p.i_valid, self.p.o_ready] + \
1968                 self.p.i_data.ports()
1969         for i in range(len(self.n)):
1970             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1971                     self.n[i].o_data.ports()
1972         return res
1973
1974
1975 class FPADDMuxInOut:
1976     """ Reservation-Station version of FPADD pipeline.
1977
1978         fan-in on
1979     """
1980     def __init__(self, width, id_wid, num_rows):
1981         self.num_rows = num_rows
1982         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1983         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1984         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1985
1986         self.p = self.inpipe.p  # kinda annoying,
1987         self.n = self.outpipe.n # use pipe in/out as this class in/out
1988         self._ports = self.inpipe.ports() + self.outpipe.ports()
1989
1990     def elaborate(self, platform):
1991         m = Module()
1992         m.submodules.inpipe = self.inpipe
1993         m.submodules.fpadd = self.fpadd
1994         m.submodules.outpipe = self.outpipe
1995
1996         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1997         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1998
1999         return m
2000
2001     def ports(self):
2002         return self._ports
2003
2004
2005 class ResArray:
2006     def __init__(self, width, id_wid):
2007         self.width = width
2008         self.id_wid = id_wid
2009         res = []
2010         for i in range(rs_sz):
2011             out_z = FPOp(width)
2012             out_z.name = "out_z_%d" % i
2013             res.append(out_z)
2014         self.res = Array(res)
2015         self.in_z = FPOp(width)
2016         self.in_mid = Signal(self.id_wid, reset_less=True)
2017
2018     def setup(self, m, in_z, in_mid):
2019         m.d.comb += [self.in_z.eq(in_z),
2020                      self.in_mid.eq(in_mid)]
2021
2022     def get_fragment(self, platform=None):
2023         """ creates the HDL code-fragment for FPAdd
2024         """
2025         m = Module()
2026         m.submodules.res_in_z = self.in_z
2027         m.submodules += self.res
2028
2029         return m
2030
2031     def ports(self):
2032         res = []
2033         for z in self.res:
2034             res += z.ports()
2035         return res
2036
2037
2038 class FPADD(FPID):
2039     """ FPADD: stages as follows:
2040
2041         FPGetOp (a)
2042            |
2043         FPGetOp (b)
2044            |
2045         FPAddBase---> FPAddBaseMod
2046            |            |
2047         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2048
2049         FPAddBase is tricky: it is both a stage and *has* stages.
2050         Connection to FPAddBaseMod therefore requires an in stb/ack
2051         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2052         needs to be the thing that raises the incoming stb.
2053     """
2054
2055     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2056         """ IEEE754 FP Add
2057
2058             * width: bit-width of IEEE754.  supported: 16, 32, 64
2059             * id_wid: an identifier that is sync-connected to the input
2060             * single_cycle: True indicates each stage to complete in 1 clock
2061         """
2062         self.width = width
2063         self.id_wid = id_wid
2064         self.single_cycle = single_cycle
2065
2066         #self.out_z = FPOp(width)
2067         self.ids = FPID(id_wid)
2068
2069         rs = []
2070         for i in range(rs_sz):
2071             in_a  = FPOp(width)
2072             in_b  = FPOp(width)
2073             in_a.name = "in_a_%d" % i
2074             in_b.name = "in_b_%d" % i
2075             rs.append((in_a, in_b))
2076         self.rs = Array(rs)
2077
2078         res = []
2079         for i in range(rs_sz):
2080             out_z = FPOp(width)
2081             out_z.name = "out_z_%d" % i
2082             res.append(out_z)
2083         self.res = Array(res)
2084
2085         self.states = []
2086
2087     def add_state(self, state):
2088         self.states.append(state)
2089         return state
2090
2091     def get_fragment(self, platform=None):
2092         """ creates the HDL code-fragment for FPAdd
2093         """
2094         m = Module()
2095         m.submodules += self.rs
2096
2097         in_a = self.rs[0][0]
2098         in_b = self.rs[0][1]
2099
2100         geta = self.add_state(FPGetOp("get_a", "get_b",
2101                                       in_a, self.width))
2102         geta.setup(m, in_a)
2103         a = geta.out_op
2104
2105         getb = self.add_state(FPGetOp("get_b", "fpadd",
2106                                       in_b, self.width))
2107         getb.setup(m, in_b)
2108         b = getb.out_op
2109
2110         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2111         ab = self.add_state(ab)
2112         abd = ab.ispec() # create an input spec object for FPADDBase
2113         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2114         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2115         o = ab.o
2116
2117         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2118                                     o.mid, "get_a"))
2119
2120         with m.FSM() as fsm:
2121
2122             for state in self.states:
2123                 with m.State(state.state_from):
2124                     state.action(m)
2125
2126         return m
2127
2128
2129 if __name__ == "__main__":
2130     if True:
2131         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2132         main(alu, ports=alu.rs[0][0].ports() + \
2133                         alu.rs[0][1].ports() + \
2134                         alu.res[0].ports() + \
2135                         [alu.ids.in_mid, alu.ids.out_mid])
2136     else:
2137         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2138         main(alu, ports=[alu.in_a, alu.in_b] + \
2139                         alu.in_t.ports() + \
2140                         alu.out_z.ports() + \
2141                         [alu.in_mid, alu.out_mid])
2142
2143
2144     # works... but don't use, just do "python fname.py convert -t v"
2145     #print (verilog.convert(alu, ports=[
2146     #                        ports=alu.in_a.ports() + \
2147     #                              alu.in_b.ports() + \
2148     #                              alu.out_z.ports())