src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import PriorityCombMuxInPipe
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPNumBase2Ops:
 191
 192     def __init__(self, width, id_wid, m_extra=True):
 193         self.a = FPNumBase(width, m_extra)
 194         self.b = FPNumBase(width, m_extra)
 195         self.mid = Signal(id_wid, reset_less=True)
 196
 197     def eq(self, i):
 198         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 199
 200     def ports(self):
 201         return [self.a, self.b, self.mid]
 202
 203
 204 class FPADDBaseData:
 205
 206     def __init__(self, width, id_wid):
 207         self.width = width
 208         self.id_wid = id_wid
 209         self.a  = Signal(width)
 210         self.b  = Signal(width)
 211         self.mid = Signal(id_wid, reset_less=True)
 212
 213     def eq(self, i):
 214         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 215
 216     def ports(self):
 217         return [self.a, self.b, self.mid]
 218
 219
 220 class FPGet2OpMod(Trigger):
 221     def __init__(self, width, id_wid):
 222         Trigger.__init__(self)
 223         self.width = width
 224         self.id_wid = id_wid
 225         self.i = self.ispec()
 226         self.o = self.ospec()
 227
 228     def ispec(self):
 229         return FPADDBaseData(self.width, self.id_wid)
 230
 231     def ospec(self):
 232         return FPADDBaseData(self.width, self.id_wid)
 233
 234     def process(self, i):
 235         return self.o
 236
 237     def elaborate(self, platform):
 238         m = Trigger.elaborate(self, platform)
 239         with m.If(self.trigger):
 240             m.d.comb += [
 241                 self.o.eq(self.i),
 242             ]
 243         return m
 244
 245
 246 class FPGet2Op(FPState):
 247     """ gets operands
 248     """
 249
 250     def __init__(self, in_state, out_state, width, id_wid):
 251         FPState.__init__(self, in_state)
 252         self.out_state = out_state
 253         self.mod = FPGet2OpMod(width, id_wid)
 254         self.o = self.mod.ospec()
 255         self.in_stb = Signal(reset_less=True)
 256         self.out_ack = Signal(reset_less=True)
 257         self.out_decode = Signal(reset_less=True)
 258
 259     def setup(self, m, i, in_stb, in_ack):
 260         """ links module to inputs and outputs
 261         """
 262         m.submodules.get_ops = self.mod
 263         m.d.comb += self.mod.i.eq(i)
 264         m.d.comb += self.mod.stb.eq(in_stb)
 265         m.d.comb += self.out_ack.eq(self.mod.ack)
 266         m.d.comb += self.out_decode.eq(self.mod.trigger)
 267         m.d.comb += in_ack.eq(self.mod.ack)
 268
 269     def action(self, m):
 270         with m.If(self.out_decode):
 271             m.next = self.out_state
 272             m.d.sync += [
 273                 self.mod.ack.eq(0),
 274                 self.o.eq(self.mod.o),
 275             ]
 276         with m.Else():
 277             m.d.sync += self.mod.ack.eq(1)
 278
 279
 280 class FPSCData:
 281
 282     def __init__(self, width, id_wid):
 283         self.a = FPNumBase(width, True)
 284         self.b = FPNumBase(width, True)
 285         self.z = FPNumOut(width, False)
 286         self.oz = Signal(width, reset_less=True)
 287         self.out_do_z = Signal(reset_less=True)
 288         self.mid = Signal(id_wid, reset_less=True)
 289
 290     def eq(self, i):
 291         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 292                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 293
 294
 295 class FPAddSpecialCasesMod:
 296     """ special cases: NaNs, infs, zeros, denormalised
 297         NOTE: some of these are unique to add.  see "Special Operations"
 298         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 299     """
 300
 301     def __init__(self, width, id_wid):
 302         self.width = width
 303         self.id_wid = id_wid
 304         self.i = self.ispec()
 305         self.o = self.ospec()
 306
 307     def ispec(self):
 308         return FPADDBaseData(self.width, self.id_wid)
 309
 310     def ospec(self):
 311         return FPSCData(self.width, self.id_wid)
 312
 313     def setup(self, m, i):
 314         """ links module to inputs and outputs
 315         """
 316         m.submodules.specialcases = self
 317         m.d.comb += self.i.eq(i)
 318
 319     def process(self, i):
 320         return self.o
 321
 322     def elaborate(self, platform):
 323         m = Module()
 324
 325         m.submodules.sc_out_z = self.o.z
 326
 327         # decode: XXX really should move to separate stage
 328         a1 = FPNumIn(None, self.width)
 329         b1 = FPNumIn(None, self.width)
 330         m.submodules.sc_decode_a = a1
 331         m.submodules.sc_decode_b = b1
 332         m.d.comb += [a1.decode(self.i.a),
 333                      b1.decode(self.i.b),
 334                     ]
 335
 336         s_nomatch = Signal()
 337         m.d.comb += s_nomatch.eq(a1.s != b1.s)
 338
 339         m_match = Signal()
 340         m.d.comb += m_match.eq(a1.m == b1.m)
 341
 342         # if a is NaN or b is NaN return NaN
 343         with m.If(a1.is_nan | b1.is_nan):
 344             m.d.comb += self.o.out_do_z.eq(1)
 345             m.d.comb += self.o.z.nan(0)
 346
 347         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 348         # under review
 349
 350         ## if a is zero and b is NaN return -b
 351         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 352         #    m.d.comb += self.o.out_do_z.eq(1)
 353         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 354
 355         ## if b is zero and a is NaN return -a
 356         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 357         #    m.d.comb += self.o.out_do_z.eq(1)
 358         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 359
 360         ## if a is -zero and b is NaN return -b
 361         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 362         #    m.d.comb += self.o.out_do_z.eq(1)
 363         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 364
 365         ## if b is -zero and a is NaN return -a
 366         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 367         #    m.d.comb += self.o.out_do_z.eq(1)
 368         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 369
 370         # if a is inf return inf (or NaN)
 371         with m.Elif(a1.is_inf):
 372             m.d.comb += self.o.out_do_z.eq(1)
 373             m.d.comb += self.o.z.inf(a1.s)
 374             # if a is inf and signs don't match return NaN
 375             with m.If(b1.exp_128 & s_nomatch):
 376                 m.d.comb += self.o.z.nan(0)
 377
 378         # if b is inf return inf
 379         with m.Elif(b1.is_inf):
 380             m.d.comb += self.o.out_do_z.eq(1)
 381             m.d.comb += self.o.z.inf(b1.s)
 382
 383         # if a is zero and b zero return signed-a/b
 384         with m.Elif(a1.is_zero & b1.is_zero):
 385             m.d.comb += self.o.out_do_z.eq(1)
 386             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 387
 388         # if a is zero return b
 389         with m.Elif(a1.is_zero):
 390             m.d.comb += self.o.out_do_z.eq(1)
 391             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 392
 393         # if b is zero return a
 394         with m.Elif(b1.is_zero):
 395             m.d.comb += self.o.out_do_z.eq(1)
 396             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 397
 398         # if a equal to -b return zero (+ve zero)
 399         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 400             m.d.comb += self.o.out_do_z.eq(1)
 401             m.d.comb += self.o.z.zero(0)
 402
 403         # Denormalised Number checks next, so pass a/b data through
 404         with m.Else():
 405             m.d.comb += self.o.out_do_z.eq(0)
 406             m.d.comb += self.o.a.eq(a1)
 407             m.d.comb += self.o.b.eq(b1)
 408
 409         m.d.comb += self.o.oz.eq(self.o.z.v)
 410         m.d.comb += self.o.mid.eq(self.i.mid)
 411
 412         return m
 413
 414
 415 class FPID:
 416     def __init__(self, id_wid):
 417         self.id_wid = id_wid
 418         if self.id_wid:
 419             self.in_mid = Signal(id_wid, reset_less=True)
 420             self.out_mid = Signal(id_wid, reset_less=True)
 421         else:
 422             self.in_mid = None
 423             self.out_mid = None
 424
 425     def idsync(self, m):
 426         if self.id_wid is not None:
 427             m.d.sync += self.out_mid.eq(self.in_mid)
 428
 429
 430 class FPAddSpecialCases(FPState):
 431     """ special cases: NaNs, infs, zeros, denormalised
 432         NOTE: some of these are unique to add.  see "Special Operations"
 433         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 434     """
 435
 436     def __init__(self, width, id_wid):
 437         FPState.__init__(self, "special_cases")
 438         self.mod = FPAddSpecialCasesMod(width)
 439         self.out_z = self.mod.ospec()
 440         self.out_do_z = Signal(reset_less=True)
 441
 442     def setup(self, m, i):
 443         """ links module to inputs and outputs
 444         """
 445         self.mod.setup(m, i, self.out_do_z)
 446         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 447         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 448
 449     def action(self, m):
 450         self.idsync(m)
 451         with m.If(self.out_do_z):
 452             m.next = "put_z"
 453         with m.Else():
 454             m.next = "denormalise"
 455
 456
 457 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 458     """ special cases: NaNs, infs, zeros, denormalised
 459         NOTE: some of these are unique to add.  see "Special Operations"
 460         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 461     """
 462
 463     def __init__(self, width, id_wid):
 464         FPState.__init__(self, "special_cases")
 465         self.smod = FPAddSpecialCasesMod(width, id_wid)
 466         self.dmod = FPAddDeNormMod(width, id_wid)
 467         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 468         self.o = self.ospec()
 469
 470     def ispec(self):
 471         return self.smod.ispec()
 472
 473     def ospec(self):
 474         return self.dmod.ospec()
 475
 476     def setup(self, m, i):
 477         """ links module to inputs and outputs
 478         """
 479         # these only needed for break-out (early-out)
 480         # out_z = self.smod.ospec()
 481         # out_do_z = Signal(reset_less=True)
 482         self.smod.setup(m, i)
 483         self.dmod.setup(m, self.smod.o)
 484         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 485
 486         # out_do_z=True, only needed for early-out (split pipeline)
 487         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 488         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 489
 490         # out_do_z=False
 491         m.d.comb += self.o.eq(self.dmod.o)
 492
 493     def process(self, i):
 494         return self.o
 495
 496     def action(self, m):
 497         #with m.If(self.out_do_z):
 498         #    m.next = "put_z"
 499         #with m.Else():
 500             m.next = "align"
 501
 502
 503 class FPAddDeNormMod(FPState):
 504
 505     def __init__(self, width, id_wid):
 506         self.width = width
 507         self.id_wid = id_wid
 508         self.i = self.ispec()
 509         self.o = self.ospec()
 510
 511     def ispec(self):
 512         return FPSCData(self.width, self.id_wid)
 513
 514     def ospec(self):
 515         return FPSCData(self.width, self.id_wid)
 516
 517     def setup(self, m, i):
 518         """ links module to inputs and outputs
 519         """
 520         m.submodules.denormalise = self
 521         m.d.comb += self.i.eq(i)
 522
 523     def elaborate(self, platform):
 524         m = Module()
 525         m.submodules.denorm_in_a = self.i.a
 526         m.submodules.denorm_in_b = self.i.b
 527         m.submodules.denorm_out_a = self.o.a
 528         m.submodules.denorm_out_b = self.o.b
 529
 530         with m.If(~self.i.out_do_z):
 531             # XXX hmmm, don't like repeating identical code
 532             m.d.comb += self.o.a.eq(self.i.a)
 533             with m.If(self.i.a.exp_n127):
 534                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 535             with m.Else():
 536                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 537
 538             m.d.comb += self.o.b.eq(self.i.b)
 539             with m.If(self.i.b.exp_n127):
 540                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 541             with m.Else():
 542                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 543
 544         m.d.comb += self.o.mid.eq(self.i.mid)
 545         m.d.comb += self.o.z.eq(self.i.z)
 546         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 547         m.d.comb += self.o.oz.eq(self.i.oz)
 548
 549         return m
 550
 551
 552 class FPAddDeNorm(FPState):
 553
 554     def __init__(self, width, id_wid):
 555         FPState.__init__(self, "denormalise")
 556         self.mod = FPAddDeNormMod(width)
 557         self.out_a = FPNumBase(width)
 558         self.out_b = FPNumBase(width)
 559
 560     def setup(self, m, i):
 561         """ links module to inputs and outputs
 562         """
 563         self.mod.setup(m, i)
 564
 565         m.d.sync += self.out_a.eq(self.mod.out_a)
 566         m.d.sync += self.out_b.eq(self.mod.out_b)
 567
 568     def action(self, m):
 569         # Denormalised Number checks
 570         m.next = "align"
 571
 572
 573 class FPAddAlignMultiMod(FPState):
 574
 575     def __init__(self, width):
 576         self.in_a = FPNumBase(width)
 577         self.in_b = FPNumBase(width)
 578         self.out_a = FPNumIn(None, width)
 579         self.out_b = FPNumIn(None, width)
 580         self.exp_eq = Signal(reset_less=True)
 581
 582     def elaborate(self, platform):
 583         # This one however (single-cycle) will do the shift
 584         # in one go.
 585
 586         m = Module()
 587
 588         m.submodules.align_in_a = self.in_a
 589         m.submodules.align_in_b = self.in_b
 590         m.submodules.align_out_a = self.out_a
 591         m.submodules.align_out_b = self.out_b
 592
 593         # NOTE: this does *not* do single-cycle multi-shifting,
 594         #       it *STAYS* in the align state until exponents match
 595
 596         # exponent of a greater than b: shift b down
 597         m.d.comb += self.exp_eq.eq(0)
 598         m.d.comb += self.out_a.eq(self.in_a)
 599         m.d.comb += self.out_b.eq(self.in_b)
 600         agtb = Signal(reset_less=True)
 601         altb = Signal(reset_less=True)
 602         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 603         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 604         with m.If(agtb):
 605             m.d.comb += self.out_b.shift_down(self.in_b)
 606         # exponent of b greater than a: shift a down
 607         with m.Elif(altb):
 608             m.d.comb += self.out_a.shift_down(self.in_a)
 609         # exponents equal: move to next stage.
 610         with m.Else():
 611             m.d.comb += self.exp_eq.eq(1)
 612         return m
 613
 614
 615 class FPAddAlignMulti(FPState):
 616
 617     def __init__(self, width, id_wid):
 618         FPState.__init__(self, "align")
 619         self.mod = FPAddAlignMultiMod(width)
 620         self.out_a = FPNumIn(None, width)
 621         self.out_b = FPNumIn(None, width)
 622         self.exp_eq = Signal(reset_less=True)
 623
 624     def setup(self, m, in_a, in_b):
 625         """ links module to inputs and outputs
 626         """
 627         m.submodules.align = self.mod
 628         m.d.comb += self.mod.in_a.eq(in_a)
 629         m.d.comb += self.mod.in_b.eq(in_b)
 630         #m.d.comb += self.out_a.eq(self.mod.out_a)
 631         #m.d.comb += self.out_b.eq(self.mod.out_b)
 632         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 633         m.d.sync += self.out_a.eq(self.mod.out_a)
 634         m.d.sync += self.out_b.eq(self.mod.out_b)
 635
 636     def action(self, m):
 637         with m.If(self.exp_eq):
 638             m.next = "add_0"
 639
 640
 641 class FPNumIn2Ops:
 642
 643     def __init__(self, width, id_wid):
 644         self.a = FPNumIn(None, width)
 645         self.b = FPNumIn(None, width)
 646         self.z = FPNumOut(width, False)
 647         self.out_do_z = Signal(reset_less=True)
 648         self.oz = Signal(width, reset_less=True)
 649         self.mid = Signal(id_wid, reset_less=True)
 650
 651     def eq(self, i):
 652         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 653                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 654
 655
 656 class FPAddAlignSingleMod:
 657
 658     def __init__(self, width, id_wid):
 659         self.width = width
 660         self.id_wid = id_wid
 661         self.i = self.ispec()
 662         self.o = self.ospec()
 663
 664     def ispec(self):
 665         return FPSCData(self.width, self.id_wid)
 666
 667     def ospec(self):
 668         return FPNumIn2Ops(self.width, self.id_wid)
 669
 670     def process(self, i):
 671         return self.o
 672
 673     def setup(self, m, i):
 674         """ links module to inputs and outputs
 675         """
 676         m.submodules.align = self
 677         m.d.comb += self.i.eq(i)
 678
 679     def elaborate(self, platform):
 680         """ Aligns A against B or B against A, depending on which has the
 681             greater exponent.  This is done in a *single* cycle using
 682             variable-width bit-shift
 683
 684             the shifter used here is quite expensive in terms of gates.
 685             Mux A or B in (and out) into temporaries, as only one of them
 686             needs to be aligned against the other
 687         """
 688         m = Module()
 689
 690         m.submodules.align_in_a = self.i.a
 691         m.submodules.align_in_b = self.i.b
 692         m.submodules.align_out_a = self.o.a
 693         m.submodules.align_out_b = self.o.b
 694
 695         # temporary (muxed) input and output to be shifted
 696         t_inp = FPNumBase(self.width)
 697         t_out = FPNumIn(None, self.width)
 698         espec = (len(self.i.a.e), True)
 699         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 700         m.submodules.align_t_in = t_inp
 701         m.submodules.align_t_out = t_out
 702         m.submodules.multishift_r = msr
 703
 704         ediff = Signal(espec, reset_less=True)
 705         ediffr = Signal(espec, reset_less=True)
 706         tdiff = Signal(espec, reset_less=True)
 707         elz = Signal(reset_less=True)
 708         egz = Signal(reset_less=True)
 709
 710         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 711         m.d.comb += msr.inp.eq(t_inp.m)
 712         m.d.comb += msr.diff.eq(tdiff)
 713         m.d.comb += t_out.m.eq(msr.m)
 714         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 715         m.d.comb += t_out.s.eq(t_inp.s)
 716
 717         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 718         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 719         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 720         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 721
 722         # default: A-exp == B-exp, A and B untouched (fall through)
 723         m.d.comb += self.o.a.eq(self.i.a)
 724         m.d.comb += self.o.b.eq(self.i.b)
 725         # only one shifter (muxed)
 726         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 727         # exponent of a greater than b: shift b down
 728         with m.If(~self.i.out_do_z):
 729             with m.If(egz):
 730                 m.d.comb += [t_inp.eq(self.i.b),
 731                              tdiff.eq(ediff),
 732                              self.o.b.eq(t_out),
 733                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 734                             ]
 735             # exponent of b greater than a: shift a down
 736             with m.Elif(elz):
 737                 m.d.comb += [t_inp.eq(self.i.a),
 738                              tdiff.eq(ediffr),
 739                              self.o.a.eq(t_out),
 740                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 741                             ]
 742
 743         m.d.comb += self.o.mid.eq(self.i.mid)
 744         m.d.comb += self.o.z.eq(self.i.z)
 745         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 746         m.d.comb += self.o.oz.eq(self.i.oz)
 747
 748         return m
 749
 750
 751 class FPAddAlignSingle(FPState):
 752
 753     def __init__(self, width, id_wid):
 754         FPState.__init__(self, "align")
 755         self.mod = FPAddAlignSingleMod(width, id_wid)
 756         self.out_a = FPNumIn(None, width)
 757         self.out_b = FPNumIn(None, width)
 758
 759     def setup(self, m, i):
 760         """ links module to inputs and outputs
 761         """
 762         self.mod.setup(m, i)
 763
 764         # NOTE: could be done as comb
 765         m.d.sync += self.out_a.eq(self.mod.out_a)
 766         m.d.sync += self.out_b.eq(self.mod.out_b)
 767
 768     def action(self, m):
 769         m.next = "add_0"
 770
 771
 772 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 773
 774     def __init__(self, width, id_wid):
 775         FPState.__init__(self, "align")
 776         self.width = width
 777         self.id_wid = id_wid
 778         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 779         self.a1o = self.ospec()
 780
 781     def ispec(self):
 782         return FPSCData(self.width, self.id_wid)
 783         #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 784
 785     def ospec(self):
 786         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 787
 788     def setup(self, m, i):
 789         """ links module to inputs and outputs
 790         """
 791
 792         # chain AddAlignSingle, AddStage0 and AddStage1
 793         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 794         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 795         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 796
 797         chain = StageChain([mod, a0mod, a1mod])
 798         chain.setup(m, i)
 799
 800         m.d.comb += self.a1o.eq(a1mod.o)
 801
 802     def process(self, i):
 803         return self.a1o
 804
 805     def action(self, m):
 806         m.next = "normalise_1"
 807
 808
 809 class FPAddStage0Data:
 810
 811     def __init__(self, width, id_wid):
 812         self.z = FPNumBase(width, False)
 813         self.out_do_z = Signal(reset_less=True)
 814         self.oz = Signal(width, reset_less=True)
 815         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 816         self.mid = Signal(id_wid, reset_less=True)
 817
 818     def eq(self, i):
 819         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 820                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 821
 822
 823 class FPAddStage0Mod:
 824
 825     def __init__(self, width, id_wid):
 826         self.width = width
 827         self.id_wid = id_wid
 828         self.i = self.ispec()
 829         self.o = self.ospec()
 830
 831     def ispec(self):
 832         return FPSCData(self.width, self.id_wid)
 833
 834     def ospec(self):
 835         return FPAddStage0Data(self.width, self.id_wid)
 836
 837     def process(self, i):
 838         return self.o
 839
 840     def setup(self, m, i):
 841         """ links module to inputs and outputs
 842         """
 843         m.submodules.add0 = self
 844         m.d.comb += self.i.eq(i)
 845
 846     def elaborate(self, platform):
 847         m = Module()
 848         m.submodules.add0_in_a = self.i.a
 849         m.submodules.add0_in_b = self.i.b
 850         m.submodules.add0_out_z = self.o.z
 851
 852         # store intermediate tests (and zero-extended mantissas)
 853         seq = Signal(reset_less=True)
 854         mge = Signal(reset_less=True)
 855         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 856         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 857         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 858                      mge.eq(self.i.a.m >= self.i.b.m),
 859                      am0.eq(Cat(self.i.a.m, 0)),
 860                      bm0.eq(Cat(self.i.b.m, 0))
 861                     ]
 862         # same-sign (both negative or both positive) add mantissas
 863         with m.If(~self.i.out_do_z):
 864             m.d.comb += self.o.z.e.eq(self.i.a.e)
 865             with m.If(seq):
 866                 m.d.comb += [
 867                     self.o.tot.eq(am0 + bm0),
 868                     self.o.z.s.eq(self.i.a.s)
 869                 ]
 870             # a mantissa greater than b, use a
 871             with m.Elif(mge):
 872                 m.d.comb += [
 873                     self.o.tot.eq(am0 - bm0),
 874                     self.o.z.s.eq(self.i.a.s)
 875                 ]
 876             # b mantissa greater than a, use b
 877             with m.Else():
 878                 m.d.comb += [
 879                     self.o.tot.eq(bm0 - am0),
 880                     self.o.z.s.eq(self.i.b.s)
 881             ]
 882
 883         m.d.comb += self.o.oz.eq(self.i.oz)
 884         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 885         m.d.comb += self.o.mid.eq(self.i.mid)
 886         return m
 887
 888
 889 class FPAddStage0(FPState):
 890     """ First stage of add.  covers same-sign (add) and subtract
 891         special-casing when mantissas are greater or equal, to
 892         give greatest accuracy.
 893     """
 894
 895     def __init__(self, width, id_wid):
 896         FPState.__init__(self, "add_0")
 897         self.mod = FPAddStage0Mod(width)
 898         self.o = self.mod.ospec()
 899
 900     def setup(self, m, i):
 901         """ links module to inputs and outputs
 902         """
 903         self.mod.setup(m, i)
 904
 905         # NOTE: these could be done as combinatorial (merge add0+add1)
 906         m.d.sync += self.o.eq(self.mod.o)
 907
 908     def action(self, m):
 909         m.next = "add_1"
 910
 911
 912 class FPAddStage1Data:
 913
 914     def __init__(self, width, id_wid):
 915         self.z = FPNumBase(width, False)
 916         self.out_do_z = Signal(reset_less=True)
 917         self.oz = Signal(width, reset_less=True)
 918         self.of = Overflow()
 919         self.mid = Signal(id_wid, reset_less=True)
 920
 921     def eq(self, i):
 922         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 923                 self.of.eq(i.of), self.mid.eq(i.mid)]
 924
 925
 926
 927 class FPAddStage1Mod(FPState):
 928     """ Second stage of add: preparation for normalisation.
 929         detects when tot sum is too big (tot[27] is kinda a carry bit)
 930     """
 931
 932     def __init__(self, width, id_wid):
 933         self.width = width
 934         self.id_wid = id_wid
 935         self.i = self.ispec()
 936         self.o = self.ospec()
 937
 938     def ispec(self):
 939         return FPAddStage0Data(self.width, self.id_wid)
 940
 941     def ospec(self):
 942         return FPAddStage1Data(self.width, self.id_wid)
 943
 944     def process(self, i):
 945         return self.o
 946
 947     def setup(self, m, i):
 948         """ links module to inputs and outputs
 949         """
 950         m.submodules.add1 = self
 951         m.submodules.add1_out_overflow = self.o.of
 952
 953         m.d.comb += self.i.eq(i)
 954
 955     def elaborate(self, platform):
 956         m = Module()
 957         #m.submodules.norm1_in_overflow = self.in_of
 958         #m.submodules.norm1_out_overflow = self.out_of
 959         #m.submodules.norm1_in_z = self.in_z
 960         #m.submodules.norm1_out_z = self.out_z
 961         m.d.comb += self.o.z.eq(self.i.z)
 962         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 963         with m.If(~self.i.out_do_z):
 964             with m.If(self.i.tot[-1]):
 965                 m.d.comb += [
 966                     self.o.z.m.eq(self.i.tot[4:]),
 967                     self.o.of.m0.eq(self.i.tot[4]),
 968                     self.o.of.guard.eq(self.i.tot[3]),
 969                     self.o.of.round_bit.eq(self.i.tot[2]),
 970                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 971                     self.o.z.e.eq(self.i.z.e + 1)
 972             ]
 973             # tot[-1] (MSB) zero case
 974             with m.Else():
 975                 m.d.comb += [
 976                     self.o.z.m.eq(self.i.tot[3:]),
 977                     self.o.of.m0.eq(self.i.tot[3]),
 978                     self.o.of.guard.eq(self.i.tot[2]),
 979                     self.o.of.round_bit.eq(self.i.tot[1]),
 980                     self.o.of.sticky.eq(self.i.tot[0])
 981             ]
 982
 983         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 984         m.d.comb += self.o.oz.eq(self.i.oz)
 985         m.d.comb += self.o.mid.eq(self.i.mid)
 986
 987         return m
 988
 989
 990 class FPAddStage1(FPState):
 991
 992     def __init__(self, width, id_wid):
 993         FPState.__init__(self, "add_1")
 994         self.mod = FPAddStage1Mod(width)
 995         self.out_z = FPNumBase(width, False)
 996         self.out_of = Overflow()
 997         self.norm_stb = Signal()
 998
 999     def setup(self, m, i):
1000         """ links module to inputs and outputs
1001         """
1002         self.mod.setup(m, i)
1003
1004         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1005
1006         m.d.sync += self.out_of.eq(self.mod.out_of)
1007         m.d.sync += self.out_z.eq(self.mod.out_z)
1008         m.d.sync += self.norm_stb.eq(1)
1009
1010     def action(self, m):
1011         m.next = "normalise_1"
1012
1013
1014 class FPNormaliseModSingle:
1015
1016     def __init__(self, width):
1017         self.width = width
1018         self.in_z = self.ispec()
1019         self.out_z = self.ospec()
1020
1021     def ispec(self):
1022         return FPNumBase(self.width, False)
1023
1024     def ospec(self):
1025         return FPNumBase(self.width, False)
1026
1027     def setup(self, m, i):
1028         """ links module to inputs and outputs
1029         """
1030         m.submodules.normalise = self
1031         m.d.comb += self.i.eq(i)
1032
1033     def elaborate(self, platform):
1034         m = Module()
1035
1036         mwid = self.out_z.m_width+2
1037         pe = PriorityEncoder(mwid)
1038         m.submodules.norm_pe = pe
1039
1040         m.submodules.norm1_out_z = self.out_z
1041         m.submodules.norm1_in_z = self.in_z
1042
1043         in_z = FPNumBase(self.width, False)
1044         in_of = Overflow()
1045         m.submodules.norm1_insel_z = in_z
1046         m.submodules.norm1_insel_overflow = in_of
1047
1048         espec = (len(in_z.e), True)
1049         ediff_n126 = Signal(espec, reset_less=True)
1050         msr = MultiShiftRMerge(mwid, espec)
1051         m.submodules.multishift_r = msr
1052
1053         m.d.comb += in_z.eq(self.in_z)
1054         m.d.comb += in_of.eq(self.in_of)
1055         # initialise out from in (overridden below)
1056         m.d.comb += self.out_z.eq(in_z)
1057         m.d.comb += self.out_of.eq(in_of)
1058         # normalisation decrease condition
1059         decrease = Signal(reset_less=True)
1060         m.d.comb += decrease.eq(in_z.m_msbzero)
1061         # decrease exponent
1062         with m.If(decrease):
1063             # *sigh* not entirely obvious: count leading zeros (clz)
1064             # with a PriorityEncoder: to find from the MSB
1065             # we reverse the order of the bits.
1066             temp_m = Signal(mwid, reset_less=True)
1067             temp_s = Signal(mwid+1, reset_less=True)
1068             clz = Signal((len(in_z.e), True), reset_less=True)
1069             m.d.comb += [
1070                 # cat round and guard bits back into the mantissa
1071                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1072                 pe.i.eq(temp_m[::-1]),          # inverted
1073                 clz.eq(pe.o),                   # count zeros from MSB down
1074                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1075                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1076                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1077             ]
1078
1079         return m
1080
1081 class FPNorm1Data:
1082
1083     def __init__(self, width, id_wid):
1084         self.roundz = Signal(reset_less=True)
1085         self.z = FPNumBase(width, False)
1086         self.out_do_z = Signal(reset_less=True)
1087         self.oz = Signal(width, reset_less=True)
1088         self.mid = Signal(id_wid, reset_less=True)
1089
1090     def eq(self, i):
1091         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1092                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1093
1094
1095 class FPNorm1ModSingle:
1096
1097     def __init__(self, width, id_wid):
1098         self.width = width
1099         self.id_wid = id_wid
1100         self.i = self.ispec()
1101         self.o = self.ospec()
1102
1103     def ispec(self):
1104         return FPAddStage1Data(self.width, self.id_wid)
1105
1106     def ospec(self):
1107         return FPNorm1Data(self.width, self.id_wid)
1108
1109     def setup(self, m, i):
1110         """ links module to inputs and outputs
1111         """
1112         m.submodules.normalise_1 = self
1113         m.d.comb += self.i.eq(i)
1114
1115     def process(self, i):
1116         return self.o
1117
1118     def elaborate(self, platform):
1119         m = Module()
1120
1121         mwid = self.o.z.m_width+2
1122         pe = PriorityEncoder(mwid)
1123         m.submodules.norm_pe = pe
1124
1125         of = Overflow()
1126         m.d.comb += self.o.roundz.eq(of.roundz)
1127
1128         m.submodules.norm1_out_z = self.o.z
1129         m.submodules.norm1_out_overflow = of
1130         m.submodules.norm1_in_z = self.i.z
1131         m.submodules.norm1_in_overflow = self.i.of
1132
1133         i = self.ispec()
1134         m.submodules.norm1_insel_z = i.z
1135         m.submodules.norm1_insel_overflow = i.of
1136
1137         espec = (len(i.z.e), True)
1138         ediff_n126 = Signal(espec, reset_less=True)
1139         msr = MultiShiftRMerge(mwid, espec)
1140         m.submodules.multishift_r = msr
1141
1142         m.d.comb += i.eq(self.i)
1143         # initialise out from in (overridden below)
1144         m.d.comb += self.o.z.eq(i.z)
1145         m.d.comb += of.eq(i.of)
1146         # normalisation increase/decrease conditions
1147         decrease = Signal(reset_less=True)
1148         increase = Signal(reset_less=True)
1149         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1150         m.d.comb += increase.eq(i.z.exp_lt_n126)
1151         # decrease exponent
1152         with m.If(~self.i.out_do_z):
1153             with m.If(decrease):
1154                 # *sigh* not entirely obvious: count leading zeros (clz)
1155                 # with a PriorityEncoder: to find from the MSB
1156                 # we reverse the order of the bits.
1157                 temp_m = Signal(mwid, reset_less=True)
1158                 temp_s = Signal(mwid+1, reset_less=True)
1159                 clz = Signal((len(i.z.e), True), reset_less=True)
1160                 # make sure that the amount to decrease by does NOT
1161                 # go below the minimum non-INF/NaN exponent
1162                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1163                              i.z.exp_sub_n126)
1164                 m.d.comb += [
1165                     # cat round and guard bits back into the mantissa
1166                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1167                     pe.i.eq(temp_m[::-1]),          # inverted
1168                     clz.eq(limclz),                 # count zeros from MSB down
1169                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1170                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1171                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1172                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1173                     # overflow in bits 0..1: got shifted too (leave sticky)
1174                     of.guard.eq(temp_s[1]),       # guard
1175                     of.round_bit.eq(temp_s[0]),   # round
1176                 ]
1177             # increase exponent
1178             with m.Elif(increase):
1179                 temp_m = Signal(mwid+1, reset_less=True)
1180                 m.d.comb += [
1181                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1182                                   i.z.m)),
1183                     ediff_n126.eq(i.z.N126 - i.z.e),
1184                     # connect multi-shifter to inp/out mantissa (and ediff)
1185                     msr.inp.eq(temp_m),
1186                     msr.diff.eq(ediff_n126),
1187                     self.o.z.m.eq(msr.m[3:]),
1188                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1189                     # overflow in bits 0..1: got shifted too (leave sticky)
1190                     of.guard.eq(temp_s[2]),     # guard
1191                     of.round_bit.eq(temp_s[1]), # round
1192                     of.sticky.eq(temp_s[0]),    # sticky
1193                     self.o.z.e.eq(i.z.e + ediff_n126),
1194                 ]
1195
1196         m.d.comb += self.o.mid.eq(self.i.mid)
1197         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1198         m.d.comb += self.o.oz.eq(self.i.oz)
1199
1200         return m
1201
1202
1203 class FPNorm1ModMulti:
1204
1205     def __init__(self, width, single_cycle=True):
1206         self.width = width
1207         self.in_select = Signal(reset_less=True)
1208         self.in_z = FPNumBase(width, False)
1209         self.in_of = Overflow()
1210         self.temp_z = FPNumBase(width, False)
1211         self.temp_of = Overflow()
1212         self.out_z = FPNumBase(width, False)
1213         self.out_of = Overflow()
1214
1215     def elaborate(self, platform):
1216         m = Module()
1217
1218         m.submodules.norm1_out_z = self.out_z
1219         m.submodules.norm1_out_overflow = self.out_of
1220         m.submodules.norm1_temp_z = self.temp_z
1221         m.submodules.norm1_temp_of = self.temp_of
1222         m.submodules.norm1_in_z = self.in_z
1223         m.submodules.norm1_in_overflow = self.in_of
1224
1225         in_z = FPNumBase(self.width, False)
1226         in_of = Overflow()
1227         m.submodules.norm1_insel_z = in_z
1228         m.submodules.norm1_insel_overflow = in_of
1229
1230         # select which of temp or in z/of to use
1231         with m.If(self.in_select):
1232             m.d.comb += in_z.eq(self.in_z)
1233             m.d.comb += in_of.eq(self.in_of)
1234         with m.Else():
1235             m.d.comb += in_z.eq(self.temp_z)
1236             m.d.comb += in_of.eq(self.temp_of)
1237         # initialise out from in (overridden below)
1238         m.d.comb += self.out_z.eq(in_z)
1239         m.d.comb += self.out_of.eq(in_of)
1240         # normalisation increase/decrease conditions
1241         decrease = Signal(reset_less=True)
1242         increase = Signal(reset_less=True)
1243         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1244         m.d.comb += increase.eq(in_z.exp_lt_n126)
1245         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1246         # decrease exponent
1247         with m.If(decrease):
1248             m.d.comb += [
1249                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1250                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1251                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1252                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1253                 self.out_of.round_bit.eq(0),        # reset round bit
1254                 self.out_of.m0.eq(in_of.guard),
1255             ]
1256         # increase exponent
1257         with m.Elif(increase):
1258             m.d.comb += [
1259                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1260                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1261                 self.out_of.guard.eq(in_z.m[0]),
1262                 self.out_of.m0.eq(in_z.m[1]),
1263                 self.out_of.round_bit.eq(in_of.guard),
1264                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1265             ]
1266
1267         return m
1268
1269
1270 class FPNorm1Single(FPState):
1271
1272     def __init__(self, width, id_wid, single_cycle=True):
1273         FPState.__init__(self, "normalise_1")
1274         self.mod = FPNorm1ModSingle(width)
1275         self.o = self.ospec()
1276         self.out_z = FPNumBase(width, False)
1277         self.out_roundz = Signal(reset_less=True)
1278
1279     def ispec(self):
1280         return self.mod.ispec()
1281
1282     def ospec(self):
1283         return self.mod.ospec()
1284
1285     def setup(self, m, i):
1286         """ links module to inputs and outputs
1287         """
1288         self.mod.setup(m, i)
1289
1290     def action(self, m):
1291         m.next = "round"
1292
1293
1294 class FPNorm1Multi(FPState):
1295
1296     def __init__(self, width, id_wid):
1297         FPState.__init__(self, "normalise_1")
1298         self.mod = FPNorm1ModMulti(width)
1299         self.stb = Signal(reset_less=True)
1300         self.ack = Signal(reset=0, reset_less=True)
1301         self.out_norm = Signal(reset_less=True)
1302         self.in_accept = Signal(reset_less=True)
1303         self.temp_z = FPNumBase(width)
1304         self.temp_of = Overflow()
1305         self.out_z = FPNumBase(width)
1306         self.out_roundz = Signal(reset_less=True)
1307
1308     def setup(self, m, in_z, in_of, norm_stb):
1309         """ links module to inputs and outputs
1310         """
1311         self.mod.setup(m, in_z, in_of, norm_stb,
1312                        self.in_accept, self.temp_z, self.temp_of,
1313                        self.out_z, self.out_norm)
1314
1315         m.d.comb += self.stb.eq(norm_stb)
1316         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1317
1318     def action(self, m):
1319         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1320         m.d.sync += self.temp_of.eq(self.mod.out_of)
1321         m.d.sync += self.temp_z.eq(self.out_z)
1322         with m.If(self.out_norm):
1323             with m.If(self.in_accept):
1324                 m.d.sync += [
1325                     self.ack.eq(1),
1326                 ]
1327             with m.Else():
1328                 m.d.sync += self.ack.eq(0)
1329         with m.Else():
1330             # normalisation not required (or done).
1331             m.next = "round"
1332             m.d.sync += self.ack.eq(1)
1333             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1334
1335
1336 class FPNormToPack(FPState, UnbufferedPipeline):
1337
1338     def __init__(self, width, id_wid):
1339         FPState.__init__(self, "normalise_1")
1340         self.id_wid = id_wid
1341         self.width = width
1342         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1343
1344     def ispec(self):
1345         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1346
1347     def ospec(self):
1348         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1349
1350     def setup(self, m, i):
1351         """ links module to inputs and outputs
1352         """
1353
1354         # Normalisation, Rounding Corrections, Pack - in a chain
1355         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1356         rmod = FPRoundMod(self.width, self.id_wid)
1357         cmod = FPCorrectionsMod(self.width, self.id_wid)
1358         pmod = FPPackMod(self.width, self.id_wid)
1359         chain = StageChain([nmod, rmod, cmod, pmod])
1360         chain.setup(m, i)
1361         self.out_z = pmod.ospec()
1362
1363         m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1364         m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1365
1366     def process(self, i):
1367         return self.out_z
1368
1369     def action(self, m):
1370         m.next = "pack_put_z"
1371
1372
1373 class FPRoundData:
1374
1375     def __init__(self, width, id_wid):
1376         self.z = FPNumBase(width, False)
1377         self.out_do_z = Signal(reset_less=True)
1378         self.oz = Signal(width, reset_less=True)
1379         self.mid = Signal(id_wid, reset_less=True)
1380
1381     def eq(self, i):
1382         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1383                 self.mid.eq(i.mid)]
1384
1385
1386 class FPRoundMod:
1387
1388     def __init__(self, width, id_wid):
1389         self.width = width
1390         self.id_wid = id_wid
1391         self.i = self.ispec()
1392         self.out_z = self.ospec()
1393
1394     def ispec(self):
1395         return FPNorm1Data(self.width, self.id_wid)
1396
1397     def ospec(self):
1398         return FPRoundData(self.width, self.id_wid)
1399
1400     def process(self, i):
1401         return self.out_z
1402
1403     def setup(self, m, i):
1404         m.submodules.roundz = self
1405         m.d.comb += self.i.eq(i)
1406
1407     def elaborate(self, platform):
1408         m = Module()
1409         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1410         with m.If(~self.i.out_do_z):
1411             with m.If(self.i.roundz):
1412                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1413                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1414                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1415
1416         return m
1417
1418
1419 class FPRound(FPState):
1420
1421     def __init__(self, width, id_wid):
1422         FPState.__init__(self, "round")
1423         self.mod = FPRoundMod(width)
1424         self.out_z = self.ospec()
1425
1426     def ispec(self):
1427         return self.mod.ispec()
1428
1429     def ospec(self):
1430         return self.mod.ospec()
1431
1432     def setup(self, m, i):
1433         """ links module to inputs and outputs
1434         """
1435         self.mod.setup(m, i)
1436
1437         self.idsync(m)
1438         m.d.sync += self.out_z.eq(self.mod.out_z)
1439         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1440
1441     def action(self, m):
1442         m.next = "corrections"
1443
1444
1445 class FPCorrectionsMod:
1446
1447     def __init__(self, width, id_wid):
1448         self.width = width
1449         self.id_wid = id_wid
1450         self.i = self.ispec()
1451         self.out_z = self.ospec()
1452
1453     def ispec(self):
1454         return FPRoundData(self.width, self.id_wid)
1455
1456     def ospec(self):
1457         return FPRoundData(self.width, self.id_wid)
1458
1459     def process(self, i):
1460         return self.out_z
1461
1462     def setup(self, m, i):
1463         """ links module to inputs and outputs
1464         """
1465         m.submodules.corrections = self
1466         m.d.comb += self.i.eq(i)
1467
1468     def elaborate(self, platform):
1469         m = Module()
1470         m.submodules.corr_in_z = self.i.z
1471         m.submodules.corr_out_z = self.out_z.z
1472         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1473         with m.If(~self.i.out_do_z):
1474             with m.If(self.i.z.is_denormalised):
1475                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1476         return m
1477
1478
1479 class FPCorrections(FPState):
1480
1481     def __init__(self, width, id_wid):
1482         FPState.__init__(self, "corrections")
1483         self.mod = FPCorrectionsMod(width)
1484         self.out_z = self.ospec()
1485
1486     def ispec(self):
1487         return self.mod.ispec()
1488
1489     def ospec(self):
1490         return self.mod.ospec()
1491
1492     def setup(self, m, in_z):
1493         """ links module to inputs and outputs
1494         """
1495         self.mod.setup(m, in_z)
1496
1497         m.d.sync += self.out_z.eq(self.mod.out_z)
1498         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1499
1500     def action(self, m):
1501         m.next = "pack"
1502
1503
1504 class FPPackData:
1505
1506     def __init__(self, width, id_wid):
1507         self.z = Signal(width, reset_less=True)
1508         self.mid = Signal(id_wid, reset_less=True)
1509
1510     def eq(self, i):
1511         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1512
1513
1514 class FPPackMod:
1515
1516     def __init__(self, width, id_wid):
1517         self.width = width
1518         self.id_wid = id_wid
1519         self.i = self.ispec()
1520         self.o = self.ospec()
1521
1522     def ispec(self):
1523         return FPRoundData(self.width, self.id_wid)
1524
1525     def ospec(self):
1526         return FPPackData(self.width, self.id_wid)
1527
1528     def process(self, i):
1529         return self.o
1530
1531     def setup(self, m, in_z):
1532         """ links module to inputs and outputs
1533         """
1534         m.submodules.pack = self
1535         m.d.comb += self.i.eq(in_z)
1536
1537     def elaborate(self, platform):
1538         m = Module()
1539         z = FPNumOut(self.width, False)
1540         m.submodules.pack_in_z = self.i.z
1541         m.submodules.pack_out_z = z
1542         m.d.comb += self.o.mid.eq(self.i.mid)
1543         with m.If(~self.i.out_do_z):
1544             with m.If(self.i.z.is_overflowed):
1545                 m.d.comb += z.inf(self.i.z.s)
1546             with m.Else():
1547                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1548         with m.Else():
1549             m.d.comb += z.v.eq(self.i.oz)
1550         m.d.comb += self.o.z.eq(z.v)
1551         return m
1552
1553
1554 class FPPack(FPState):
1555
1556     def __init__(self, width, id_wid):
1557         FPState.__init__(self, "pack")
1558         self.mod = FPPackMod(width)
1559         self.out_z = self.ospec()
1560
1561     def ispec(self):
1562         return self.mod.ispec()
1563
1564     def ospec(self):
1565         return self.mod.ospec()
1566
1567     def setup(self, m, in_z):
1568         """ links module to inputs and outputs
1569         """
1570         self.mod.setup(m, in_z)
1571
1572         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1573         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1574
1575     def action(self, m):
1576         m.next = "pack_put_z"
1577
1578
1579 class FPPutZ(FPState):
1580
1581     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1582         FPState.__init__(self, state)
1583         if to_state is None:
1584             to_state = "get_ops"
1585         self.to_state = to_state
1586         self.in_z = in_z
1587         self.out_z = out_z
1588         self.in_mid = in_mid
1589         self.out_mid = out_mid
1590
1591     def action(self, m):
1592         if self.in_mid is not None:
1593             m.d.sync += self.out_mid.eq(self.in_mid)
1594         m.d.sync += [
1595           self.out_z.z.v.eq(self.in_z)
1596         ]
1597         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1598             m.d.sync += self.out_z.z.stb.eq(0)
1599             m.next = self.to_state
1600         with m.Else():
1601             m.d.sync += self.out_z.z.stb.eq(1)
1602
1603
1604 class FPPutZIdx(FPState):
1605
1606     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1607         FPState.__init__(self, state)
1608         if to_state is None:
1609             to_state = "get_ops"
1610         self.to_state = to_state
1611         self.in_z = in_z
1612         self.out_zs = out_zs
1613         self.in_mid = in_mid
1614
1615     def action(self, m):
1616         outz_stb = Signal(reset_less=True)
1617         outz_ack = Signal(reset_less=True)
1618         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1619                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1620                     ]
1621         m.d.sync += [
1622           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1623         ]
1624         with m.If(outz_stb & outz_ack):
1625             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1626             m.next = self.to_state
1627         with m.Else():
1628             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1629
1630 class FPOpData:
1631     def __init__(self, width, id_wid):
1632         self.z = FPOp(width)
1633         self.mid = Signal(id_wid, reset_less=True)
1634
1635     def eq(self, i):
1636         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1637
1638     def ports(self):
1639         return [self.z, self.mid]
1640
1641
1642 class FPADDBaseMod:
1643
1644     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1645         """ IEEE754 FP Add
1646
1647             * width: bit-width of IEEE754.  supported: 16, 32, 64
1648             * id_wid: an identifier that is sync-connected to the input
1649             * single_cycle: True indicates each stage to complete in 1 clock
1650             * compact: True indicates a reduced number of stages
1651         """
1652         self.width = width
1653         self.id_wid = id_wid
1654         self.single_cycle = single_cycle
1655         self.compact = compact
1656
1657         self.in_t = Trigger()
1658         self.i = self.ispec()
1659         self.o = self.ospec()
1660
1661         self.states = []
1662
1663     def ispec(self):
1664         return FPADDBaseData(self.width, self.id_wid)
1665
1666     def ospec(self):
1667         return FPOpData(self.width, self.id_wid)
1668
1669     def add_state(self, state):
1670         self.states.append(state)
1671         return state
1672
1673     def get_fragment(self, platform=None):
1674         """ creates the HDL code-fragment for FPAdd
1675         """
1676         m = Module()
1677         m.submodules.out_z = self.o.z
1678         m.submodules.in_t = self.in_t
1679         if self.compact:
1680             self.get_compact_fragment(m, platform)
1681         else:
1682             self.get_longer_fragment(m, platform)
1683
1684         with m.FSM() as fsm:
1685
1686             for state in self.states:
1687                 with m.State(state.state_from):
1688                     state.action(m)
1689
1690         return m
1691
1692     def get_longer_fragment(self, m, platform=None):
1693
1694         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1695                                       self.width))
1696         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1697         a = get.out_op1
1698         b = get.out_op2
1699
1700         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1701         sc.setup(m, a, b, self.in_mid)
1702
1703         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1704         dn.setup(m, a, b, sc.in_mid)
1705
1706         if self.single_cycle:
1707             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1708             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1709         else:
1710             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1711             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1712
1713         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1714         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1715
1716         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1717         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1718
1719         if self.single_cycle:
1720             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1721             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1722         else:
1723             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1724             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1725
1726         rn = self.add_state(FPRound(self.width, self.id_wid))
1727         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1728
1729         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1730         cor.setup(m, rn.out_z, rn.in_mid)
1731
1732         pa = self.add_state(FPPack(self.width, self.id_wid))
1733         pa.setup(m, cor.out_z, rn.in_mid)
1734
1735         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1736                                     pa.in_mid, self.out_mid))
1737
1738         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1739                                     pa.in_mid, self.out_mid))
1740
1741     def get_compact_fragment(self, m, platform=None):
1742
1743         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1744                                       self.width, self.id_wid))
1745         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1746
1747         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1748         sc.setup(m, get.o)
1749
1750         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1751         alm.setup(m, sc.o)
1752
1753         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1754         n1.setup(m, alm.a1o)
1755
1756         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1757                                     n1.out_z.mid, self.o.mid))
1758
1759         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1760         #                            sc.o.mid, self.o.mid))
1761
1762
1763 class FPADDBase(FPState):
1764
1765     def __init__(self, width, id_wid=None, single_cycle=False):
1766         """ IEEE754 FP Add
1767
1768             * width: bit-width of IEEE754.  supported: 16, 32, 64
1769             * id_wid: an identifier that is sync-connected to the input
1770             * single_cycle: True indicates each stage to complete in 1 clock
1771         """
1772         FPState.__init__(self, "fpadd")
1773         self.width = width
1774         self.single_cycle = single_cycle
1775         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1776         self.o = self.ospec()
1777
1778         self.in_t = Trigger()
1779         self.i = self.ispec()
1780
1781         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1782         self.in_accept = Signal(reset_less=True)
1783         self.add_stb = Signal(reset_less=True)
1784         self.add_ack = Signal(reset=0, reset_less=True)
1785
1786     def ispec(self):
1787         return self.mod.ispec()
1788
1789     def ospec(self):
1790         return self.mod.ospec()
1791
1792     def setup(self, m, i, add_stb, in_mid):
1793         m.d.comb += [self.i.eq(i),
1794                      self.mod.i.eq(self.i),
1795                      self.z_done.eq(self.mod.o.z.trigger),
1796                      #self.add_stb.eq(add_stb),
1797                      self.mod.in_t.stb.eq(self.in_t.stb),
1798                      self.in_t.ack.eq(self.mod.in_t.ack),
1799                      self.o.mid.eq(self.mod.o.mid),
1800                      self.o.z.v.eq(self.mod.o.z.v),
1801                      self.o.z.stb.eq(self.mod.o.z.stb),
1802                      self.mod.o.z.ack.eq(self.o.z.ack),
1803                     ]
1804
1805         m.d.sync += self.add_stb.eq(add_stb)
1806         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1807         m.d.sync += self.o.z.ack.eq(0) # likewise
1808         #m.d.sync += self.in_t.stb.eq(0)
1809
1810         m.submodules.fpadd = self.mod
1811
1812     def action(self, m):
1813
1814         # in_accept is set on incoming strobe HIGH and ack LOW.
1815         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1816
1817         #with m.If(self.in_t.ack):
1818         #    m.d.sync += self.in_t.stb.eq(0)
1819         with m.If(~self.z_done):
1820             # not done: test for accepting an incoming operand pair
1821             with m.If(self.in_accept):
1822                 m.d.sync += [
1823                     self.add_ack.eq(1), # acknowledge receipt...
1824                     self.in_t.stb.eq(1), # initiate add
1825                 ]
1826             with m.Else():
1827                 m.d.sync += [self.add_ack.eq(0),
1828                              self.in_t.stb.eq(0),
1829                              self.o.z.ack.eq(1),
1830                             ]
1831         with m.Else():
1832             # done: acknowledge, and write out id and value
1833             m.d.sync += [self.add_ack.eq(1),
1834                          self.in_t.stb.eq(0)
1835                         ]
1836             m.next = "put_z"
1837
1838             return
1839
1840             if self.in_mid is not None:
1841                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1842
1843             m.d.sync += [
1844               self.out_z.v.eq(self.mod.out_z.v)
1845             ]
1846             # move to output state on detecting z ack
1847             with m.If(self.out_z.trigger):
1848                 m.d.sync += self.out_z.stb.eq(0)
1849                 m.next = "put_z"
1850             with m.Else():
1851                 m.d.sync += self.out_z.stb.eq(1)
1852
1853
1854 class FPADDStageOut:
1855     def __init__(self, width, id_wid):
1856         self.z = Signal(width)
1857         self.mid = Signal(id_wid, reset_less=True)
1858
1859     def eq(self, i):
1860         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1861
1862     def ports(self):
1863         return [self.z, self.mid]
1864
1865
1866 # matches the format of FPADDStageOut, allows eq function to do assignments
1867 class PlaceHolder: pass
1868
1869
1870 class FPAddBaseStage:
1871     def __init__(self, width, id_wid):
1872         self.width = width
1873         self.id_wid = id_wid
1874
1875     def ispec(self):
1876         return FPADDBaseData(self.width, self.id_wid)
1877
1878     def ospec(self):
1879         return FPADDStageOut(self.width, self.id_wid)
1880
1881     def process(self, i):
1882         o = PlaceHolder()
1883         o.z = i.a + i.b
1884         o.mid = i.mid
1885         return o
1886
1887
1888 class FPADDBasePipe1(UnbufferedPipeline):
1889     def __init__(self, width, id_wid):
1890         stage = FPAddBaseStage(width, id_wid)
1891         UnbufferedPipeline.__init__(self, stage)
1892
1893
1894 class FPADDBasePipe(ControlBase):
1895     def __init__(self, width, id_wid):
1896         ControlBase.__init__(self)
1897         #self.pipe1 = FPADDBasePipe1(width, id_wid)
1898         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1899         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1900         self.pipe3 = FPNormToPack(width, id_wid)
1901
1902         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1903
1904     def elaborate(self, platform):
1905         m = Module()
1906         m.submodules.scnorm = self.pipe1
1907         m.submodules.addalign = self.pipe2
1908         m.submodules.normpack = self.pipe3
1909         m.d.comb += self._eqs
1910         return m
1911
1912
1913 class FPAddInPassThruStage:
1914     def __init__(self, width, id_wid):
1915         self.width, self.id_wid = width, id_wid
1916     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1917     def ospec(self): return self.ispec()
1918     def process(self, i): return i
1919
1920
1921 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1922     def __init__(self, width, id_width, num_rows):
1923         self.num_rows = num_rows
1924         stage = FPAddInPassThruStage(width, id_width)
1925         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1926
1927     def ports(self):
1928         res = []
1929         for i in range(len(self.p)):
1930             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1931                     self.p[i].i_data.ports()
1932         res += [self.n.i_ready, self.n.o_valid] + \
1933                 self.n.o_data.ports()
1934         return res
1935
1936
1937 class MuxCombPipeline(CombMultiOutPipeline):
1938     def __init__(self, stage, n_len):
1939         # HACK: stage is also the n-way multiplexer
1940         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1941
1942         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1943         stage.m_id = self.p.i_data.mid
1944
1945     def ports(self):
1946         return self.p_mux.ports()
1947
1948
1949 class FPAddOutPassThruStage:
1950     def __init__(self, width, id_wid):
1951         self.width, self.id_wid = width, id_wid
1952     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1953     def ospec(self): return self.ispec()
1954     def process(self, i): return i
1955
1956
1957 class FPADDMuxOutPipe(MuxCombPipeline):
1958     def __init__(self, width, id_wid, num_rows):
1959         self.num_rows = num_rows
1960         stage = FPAddOutPassThruStage(width, id_wid)
1961         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1962         #self.p.i_data = stage.ispec()
1963         #self.n.o_data = stage.ospec()
1964
1965     def ports(self):
1966         res = [self.p.i_valid, self.p.o_ready] + \
1967                 self.p.i_data.ports()
1968         for i in range(len(self.n)):
1969             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1970                     self.n[i].o_data.ports()
1971         return res
1972
1973
1974 class FPADDMuxInOut:
1975     """ Reservation-Station version of FPADD pipeline.
1976
1977         fan-in on
1978     """
1979     def __init__(self, width, id_wid, num_rows):
1980         self.num_rows = num_rows
1981         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1982         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1983         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1984
1985         self.p = self.inpipe.p  # kinda annoying,
1986         self.n = self.outpipe.n # use pipe in/out as this class in/out
1987         self._ports = self.inpipe.ports() + self.outpipe.ports()
1988
1989     def elaborate(self, platform):
1990         m = Module()
1991         m.submodules.inpipe = self.inpipe
1992         m.submodules.fpadd = self.fpadd
1993         m.submodules.outpipe = self.outpipe
1994
1995         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1996         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1997
1998         return m
1999
2000     def ports(self):
2001         return self._ports
2002
2003
2004 class ResArray:
2005     def __init__(self, width, id_wid):
2006         self.width = width
2007         self.id_wid = id_wid
2008         res = []
2009         for i in range(rs_sz):
2010             out_z = FPOp(width)
2011             out_z.name = "out_z_%d" % i
2012             res.append(out_z)
2013         self.res = Array(res)
2014         self.in_z = FPOp(width)
2015         self.in_mid = Signal(self.id_wid, reset_less=True)
2016
2017     def setup(self, m, in_z, in_mid):
2018         m.d.comb += [self.in_z.eq(in_z),
2019                      self.in_mid.eq(in_mid)]
2020
2021     def get_fragment(self, platform=None):
2022         """ creates the HDL code-fragment for FPAdd
2023         """
2024         m = Module()
2025         m.submodules.res_in_z = self.in_z
2026         m.submodules += self.res
2027
2028         return m
2029
2030     def ports(self):
2031         res = []
2032         for z in self.res:
2033             res += z.ports()
2034         return res
2035
2036
2037 class FPADD(FPID):
2038     """ FPADD: stages as follows:
2039
2040         FPGetOp (a)
2041            |
2042         FPGetOp (b)
2043            |
2044         FPAddBase---> FPAddBaseMod
2045            |            |
2046         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2047
2048         FPAddBase is tricky: it is both a stage and *has* stages.
2049         Connection to FPAddBaseMod therefore requires an in stb/ack
2050         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2051         needs to be the thing that raises the incoming stb.
2052     """
2053
2054     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2055         """ IEEE754 FP Add
2056
2057             * width: bit-width of IEEE754.  supported: 16, 32, 64
2058             * id_wid: an identifier that is sync-connected to the input
2059             * single_cycle: True indicates each stage to complete in 1 clock
2060         """
2061         self.width = width
2062         self.id_wid = id_wid
2063         self.single_cycle = single_cycle
2064
2065         #self.out_z = FPOp(width)
2066         self.ids = FPID(id_wid)
2067
2068         rs = []
2069         for i in range(rs_sz):
2070             in_a  = FPOp(width)
2071             in_b  = FPOp(width)
2072             in_a.name = "in_a_%d" % i
2073             in_b.name = "in_b_%d" % i
2074             rs.append((in_a, in_b))
2075         self.rs = Array(rs)
2076
2077         res = []
2078         for i in range(rs_sz):
2079             out_z = FPOp(width)
2080             out_z.name = "out_z_%d" % i
2081             res.append(out_z)
2082         self.res = Array(res)
2083
2084         self.states = []
2085
2086     def add_state(self, state):
2087         self.states.append(state)
2088         return state
2089
2090     def get_fragment(self, platform=None):
2091         """ creates the HDL code-fragment for FPAdd
2092         """
2093         m = Module()
2094         m.submodules += self.rs
2095
2096         in_a = self.rs[0][0]
2097         in_b = self.rs[0][1]
2098
2099         geta = self.add_state(FPGetOp("get_a", "get_b",
2100                                       in_a, self.width))
2101         geta.setup(m, in_a)
2102         a = geta.out_op
2103
2104         getb = self.add_state(FPGetOp("get_b", "fpadd",
2105                                       in_b, self.width))
2106         getb.setup(m, in_b)
2107         b = getb.out_op
2108
2109         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2110         ab = self.add_state(ab)
2111         abd = ab.ispec() # create an input spec object for FPADDBase
2112         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2113         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2114         o = ab.o
2115
2116         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2117                                     o.mid, "get_a"))
2118
2119         with m.FSM() as fsm:
2120
2121             for state in self.states:
2122                 with m.State(state.state_from):
2123                     state.action(m)
2124
2125         return m
2126
2127
2128 if __name__ == "__main__":
2129     if True:
2130         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2131         main(alu, ports=alu.rs[0][0].ports() + \
2132                         alu.rs[0][1].ports() + \
2133                         alu.res[0].ports() + \
2134                         [alu.ids.in_mid, alu.ids.out_mid])
2135     else:
2136         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2137         main(alu, ports=[alu.in_a, alu.in_b] + \
2138                         alu.in_t.ports() + \
2139                         alu.out_z.ports() + \
2140                         [alu.in_mid, alu.out_mid])
2141
2142
2143     # works... but don't use, just do "python fname.py convert -t v"
2144     #print (verilog.convert(alu, ports=[
2145     #                        ports=alu.in_a.ports() + \
2146     #                              alu.in_b.ports() + \
2147     #                              alu.out_z.ports())