src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
  13                         PassThroughStage)
  14 from multipipe import CombMuxOutPipe
  15 from multipipe import PriorityCombMuxInPipe
  16
  17 #from fpbase import FPNumShiftMultiRight
  18
  19
  20 class FPState(FPBase):
  21     def __init__(self, state_from):
  22         self.state_from = state_from
  23
  24     def set_inputs(self, inputs):
  25         self.inputs = inputs
  26         for k,v in inputs.items():
  27             setattr(self, k, v)
  28
  29     def set_outputs(self, outputs):
  30         self.outputs = outputs
  31         for k,v in outputs.items():
  32             setattr(self, k, v)
  33
  34
  35 class FPGetSyncOpsMod:
  36     def __init__(self, width, num_ops=2):
  37         self.width = width
  38         self.num_ops = num_ops
  39         inops = []
  40         outops = []
  41         for i in range(num_ops):
  42             inops.append(Signal(width, reset_less=True))
  43             outops.append(Signal(width, reset_less=True))
  44         self.in_op = inops
  45         self.out_op = outops
  46         self.stb = Signal(num_ops)
  47         self.ack = Signal()
  48         self.ready = Signal(reset_less=True)
  49         self.out_decode = Signal(reset_less=True)
  50
  51     def elaborate(self, platform):
  52         m = Module()
  53         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  54         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  55         with m.If(self.out_decode):
  56             for i in range(self.num_ops):
  57                 m.d.comb += [
  58                         self.out_op[i].eq(self.in_op[i]),
  59                 ]
  60         return m
  61
  62     def ports(self):
  63         return self.in_op + self.out_op + [self.stb, self.ack]
  64
  65
  66 class FPOps(Trigger):
  67     def __init__(self, width, num_ops):
  68         Trigger.__init__(self)
  69         self.width = width
  70         self.num_ops = num_ops
  71
  72         res = []
  73         for i in range(num_ops):
  74             res.append(Signal(width))
  75         self.v  = Array(res)
  76
  77     def ports(self):
  78         res = []
  79         for i in range(self.num_ops):
  80             res.append(self.v[i])
  81         res.append(self.ack)
  82         res.append(self.stb)
  83         return res
  84
  85
  86 class InputGroup:
  87     def __init__(self, width, num_ops=2, num_rows=4):
  88         self.width = width
  89         self.num_ops = num_ops
  90         self.num_rows = num_rows
  91         self.mmax = int(log(self.num_rows) / log(2))
  92         self.rs = []
  93         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  94         for i in range(num_rows):
  95             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  96         self.rs = Array(self.rs)
  97
  98         self.out_op = FPOps(width, num_ops)
  99
 100     def elaborate(self, platform):
 101         m = Module()
 102
 103         pe = PriorityEncoder(self.num_rows)
 104         m.submodules.selector = pe
 105         m.submodules.out_op = self.out_op
 106         m.submodules += self.rs
 107
 108         # connect priority encoder
 109         in_ready = []
 110         for i in range(self.num_rows):
 111             in_ready.append(self.rs[i].ready)
 112         m.d.comb += pe.i.eq(Cat(*in_ready))
 113
 114         active = Signal(reset_less=True)
 115         out_en = Signal(reset_less=True)
 116         m.d.comb += active.eq(~pe.n) # encoder active
 117         m.d.comb += out_en.eq(active & self.out_op.trigger)
 118
 119         # encoder active: ack relevant input, record MID, pass output
 120         with m.If(out_en):
 121             rs = self.rs[pe.o]
 122             m.d.sync += self.mid.eq(pe.o)
 123             m.d.sync += rs.ack.eq(0)
 124             m.d.sync += self.out_op.stb.eq(0)
 125             for j in range(self.num_ops):
 126                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 127         with m.Else():
 128             m.d.sync += self.out_op.stb.eq(1)
 129             # acks all default to zero
 130             for i in range(self.num_rows):
 131                 m.d.sync += self.rs[i].ack.eq(1)
 132
 133         return m
 134
 135     def ports(self):
 136         res = []
 137         for i in range(self.num_rows):
 138             inop = self.rs[i]
 139             res += inop.in_op + [inop.stb]
 140         return self.out_op.ports() + res + [self.mid]
 141
 142
 143 class FPGetOpMod:
 144     def __init__(self, width):
 145         self.in_op = FPOp(width)
 146         self.out_op = Signal(width)
 147         self.out_decode = Signal(reset_less=True)
 148
 149     def elaborate(self, platform):
 150         m = Module()
 151         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 152         m.submodules.get_op_in = self.in_op
 153         #m.submodules.get_op_out = self.out_op
 154         with m.If(self.out_decode):
 155             m.d.comb += [
 156                 self.out_op.eq(self.in_op.v),
 157             ]
 158         return m
 159
 160
 161 class FPGetOp(FPState):
 162     """ gets operand
 163     """
 164
 165     def __init__(self, in_state, out_state, in_op, width):
 166         FPState.__init__(self, in_state)
 167         self.out_state = out_state
 168         self.mod = FPGetOpMod(width)
 169         self.in_op = in_op
 170         self.out_op = Signal(width)
 171         self.out_decode = Signal(reset_less=True)
 172
 173     def setup(self, m, in_op):
 174         """ links module to inputs and outputs
 175         """
 176         setattr(m.submodules, self.state_from, self.mod)
 177         m.d.comb += self.mod.in_op.eq(in_op)
 178         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 179
 180     def action(self, m):
 181         with m.If(self.out_decode):
 182             m.next = self.out_state
 183             m.d.sync += [
 184                 self.in_op.ack.eq(0),
 185                 self.out_op.eq(self.mod.out_op)
 186             ]
 187         with m.Else():
 188             m.d.sync += self.in_op.ack.eq(1)
 189
 190
 191 class FPNumBase2Ops:
 192
 193     def __init__(self, width, id_wid, m_extra=True):
 194         self.a = FPNumBase(width, m_extra)
 195         self.b = FPNumBase(width, m_extra)
 196         self.mid = Signal(id_wid, reset_less=True)
 197
 198     def eq(self, i):
 199         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 200
 201     def ports(self):
 202         return [self.a, self.b, self.mid]
 203
 204
 205 class FPADDBaseData:
 206
 207     def __init__(self, width, id_wid):
 208         self.width = width
 209         self.id_wid = id_wid
 210         self.a  = Signal(width)
 211         self.b  = Signal(width)
 212         self.mid = Signal(id_wid, reset_less=True)
 213
 214     def eq(self, i):
 215         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 216
 217     def ports(self):
 218         return [self.a, self.b, self.mid]
 219
 220
 221 class FPGet2OpMod(Trigger):
 222     def __init__(self, width, id_wid):
 223         Trigger.__init__(self)
 224         self.width = width
 225         self.id_wid = id_wid
 226         self.i = self.ispec()
 227         self.o = self.ospec()
 228
 229     def ispec(self):
 230         return FPADDBaseData(self.width, self.id_wid)
 231
 232     def ospec(self):
 233         return FPADDBaseData(self.width, self.id_wid)
 234
 235     def process(self, i):
 236         return self.o
 237
 238     def elaborate(self, platform):
 239         m = Trigger.elaborate(self, platform)
 240         with m.If(self.trigger):
 241             m.d.comb += [
 242                 self.o.eq(self.i),
 243             ]
 244         return m
 245
 246
 247 class FPGet2Op(FPState):
 248     """ gets operands
 249     """
 250
 251     def __init__(self, in_state, out_state, width, id_wid):
 252         FPState.__init__(self, in_state)
 253         self.out_state = out_state
 254         self.mod = FPGet2OpMod(width, id_wid)
 255         self.o = self.mod.ospec()
 256         self.in_stb = Signal(reset_less=True)
 257         self.out_ack = Signal(reset_less=True)
 258         self.out_decode = Signal(reset_less=True)
 259
 260     def setup(self, m, i, in_stb, in_ack):
 261         """ links module to inputs and outputs
 262         """
 263         m.submodules.get_ops = self.mod
 264         m.d.comb += self.mod.i.eq(i)
 265         m.d.comb += self.mod.stb.eq(in_stb)
 266         m.d.comb += self.out_ack.eq(self.mod.ack)
 267         m.d.comb += self.out_decode.eq(self.mod.trigger)
 268         m.d.comb += in_ack.eq(self.mod.ack)
 269
 270     def action(self, m):
 271         with m.If(self.out_decode):
 272             m.next = self.out_state
 273             m.d.sync += [
 274                 self.mod.ack.eq(0),
 275                 self.o.eq(self.mod.o),
 276             ]
 277         with m.Else():
 278             m.d.sync += self.mod.ack.eq(1)
 279
 280
 281 class FPSCData:
 282
 283     def __init__(self, width, id_wid):
 284         self.a = FPNumBase(width, True)
 285         self.b = FPNumBase(width, True)
 286         self.z = FPNumOut(width, False)
 287         self.oz = Signal(width, reset_less=True)
 288         self.out_do_z = Signal(reset_less=True)
 289         self.mid = Signal(id_wid, reset_less=True)
 290
 291     def eq(self, i):
 292         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 293                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 294
 295
 296 class FPAddSpecialCasesMod:
 297     """ special cases: NaNs, infs, zeros, denormalised
 298         NOTE: some of these are unique to add.  see "Special Operations"
 299         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 300     """
 301
 302     def __init__(self, width, id_wid):
 303         self.width = width
 304         self.id_wid = id_wid
 305         self.i = self.ispec()
 306         self.o = self.ospec()
 307
 308     def ispec(self):
 309         return FPADDBaseData(self.width, self.id_wid)
 310
 311     def ospec(self):
 312         return FPSCData(self.width, self.id_wid)
 313
 314     def setup(self, m, i):
 315         """ links module to inputs and outputs
 316         """
 317         m.submodules.specialcases = self
 318         m.d.comb += self.i.eq(i)
 319
 320     def process(self, i):
 321         return self.o
 322
 323     def elaborate(self, platform):
 324         m = Module()
 325
 326         m.submodules.sc_out_z = self.o.z
 327
 328         # decode: XXX really should move to separate stage
 329         a1 = FPNumIn(None, self.width)
 330         b1 = FPNumIn(None, self.width)
 331         m.submodules.sc_decode_a = a1
 332         m.submodules.sc_decode_b = b1
 333         m.d.comb += [a1.decode(self.i.a),
 334                      b1.decode(self.i.b),
 335                     ]
 336
 337         s_nomatch = Signal()
 338         m.d.comb += s_nomatch.eq(a1.s != b1.s)
 339
 340         m_match = Signal()
 341         m.d.comb += m_match.eq(a1.m == b1.m)
 342
 343         # if a is NaN or b is NaN return NaN
 344         with m.If(a1.is_nan | b1.is_nan):
 345             m.d.comb += self.o.out_do_z.eq(1)
 346             m.d.comb += self.o.z.nan(0)
 347
 348         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 349         # under review
 350
 351         ## if a is zero and b is NaN return -b
 352         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 353         #    m.d.comb += self.o.out_do_z.eq(1)
 354         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 355
 356         ## if b is zero and a is NaN return -a
 357         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 358         #    m.d.comb += self.o.out_do_z.eq(1)
 359         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 360
 361         ## if a is -zero and b is NaN return -b
 362         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 363         #    m.d.comb += self.o.out_do_z.eq(1)
 364         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 365
 366         ## if b is -zero and a is NaN return -a
 367         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 368         #    m.d.comb += self.o.out_do_z.eq(1)
 369         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 370
 371         # if a is inf return inf (or NaN)
 372         with m.Elif(a1.is_inf):
 373             m.d.comb += self.o.out_do_z.eq(1)
 374             m.d.comb += self.o.z.inf(a1.s)
 375             # if a is inf and signs don't match return NaN
 376             with m.If(b1.exp_128 & s_nomatch):
 377                 m.d.comb += self.o.z.nan(0)
 378
 379         # if b is inf return inf
 380         with m.Elif(b1.is_inf):
 381             m.d.comb += self.o.out_do_z.eq(1)
 382             m.d.comb += self.o.z.inf(b1.s)
 383
 384         # if a is zero and b zero return signed-a/b
 385         with m.Elif(a1.is_zero & b1.is_zero):
 386             m.d.comb += self.o.out_do_z.eq(1)
 387             m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
 388
 389         # if a is zero return b
 390         with m.Elif(a1.is_zero):
 391             m.d.comb += self.o.out_do_z.eq(1)
 392             m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
 393
 394         # if b is zero return a
 395         with m.Elif(b1.is_zero):
 396             m.d.comb += self.o.out_do_z.eq(1)
 397             m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
 398
 399         # if a equal to -b return zero (+ve zero)
 400         with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
 401             m.d.comb += self.o.out_do_z.eq(1)
 402             m.d.comb += self.o.z.zero(0)
 403
 404         # Denormalised Number checks next, so pass a/b data through
 405         with m.Else():
 406             m.d.comb += self.o.out_do_z.eq(0)
 407             m.d.comb += self.o.a.eq(a1)
 408             m.d.comb += self.o.b.eq(b1)
 409
 410         m.d.comb += self.o.oz.eq(self.o.z.v)
 411         m.d.comb += self.o.mid.eq(self.i.mid)
 412
 413         return m
 414
 415
 416 class FPID:
 417     def __init__(self, id_wid):
 418         self.id_wid = id_wid
 419         if self.id_wid:
 420             self.in_mid = Signal(id_wid, reset_less=True)
 421             self.out_mid = Signal(id_wid, reset_less=True)
 422         else:
 423             self.in_mid = None
 424             self.out_mid = None
 425
 426     def idsync(self, m):
 427         if self.id_wid is not None:
 428             m.d.sync += self.out_mid.eq(self.in_mid)
 429
 430
 431 class FPAddSpecialCases(FPState):
 432     """ special cases: NaNs, infs, zeros, denormalised
 433         NOTE: some of these are unique to add.  see "Special Operations"
 434         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 435     """
 436
 437     def __init__(self, width, id_wid):
 438         FPState.__init__(self, "special_cases")
 439         self.mod = FPAddSpecialCasesMod(width)
 440         self.out_z = self.mod.ospec()
 441         self.out_do_z = Signal(reset_less=True)
 442
 443     def setup(self, m, i):
 444         """ links module to inputs and outputs
 445         """
 446         self.mod.setup(m, i, self.out_do_z)
 447         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 448         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 449
 450     def action(self, m):
 451         self.idsync(m)
 452         with m.If(self.out_do_z):
 453             m.next = "put_z"
 454         with m.Else():
 455             m.next = "denormalise"
 456
 457
 458 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
 459     """ special cases: NaNs, infs, zeros, denormalised
 460         NOTE: some of these are unique to add.  see "Special Operations"
 461         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 462     """
 463
 464     def __init__(self, width, id_wid):
 465         FPState.__init__(self, "special_cases")
 466         self.smod = FPAddSpecialCasesMod(width, id_wid)
 467         self.dmod = FPAddDeNormMod(width, id_wid)
 468         UnbufferedPipeline.__init__(self, self) # pipe is its own stage
 469         self.o = self.ospec()
 470
 471     def ispec(self):
 472         return self.smod.ispec()
 473
 474     def ospec(self):
 475         return self.dmod.ospec()
 476
 477     def setup(self, m, i):
 478         """ links module to inputs and outputs
 479         """
 480         # these only needed for break-out (early-out)
 481         # out_z = self.smod.ospec()
 482         # out_do_z = Signal(reset_less=True)
 483         self.smod.setup(m, i)
 484         self.dmod.setup(m, self.smod.o)
 485         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 486
 487         # out_do_z=True, only needed for early-out (split pipeline)
 488         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 489         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 490
 491         # out_do_z=False
 492         m.d.comb += self.o.eq(self.dmod.o)
 493
 494     def process(self, i):
 495         return self.o
 496
 497     def action(self, m):
 498         #with m.If(self.out_do_z):
 499         #    m.next = "put_z"
 500         #with m.Else():
 501             m.next = "align"
 502
 503
 504 class FPAddDeNormMod(FPState):
 505
 506     def __init__(self, width, id_wid):
 507         self.width = width
 508         self.id_wid = id_wid
 509         self.i = self.ispec()
 510         self.o = self.ospec()
 511
 512     def ispec(self):
 513         return FPSCData(self.width, self.id_wid)
 514
 515     def ospec(self):
 516         return FPSCData(self.width, self.id_wid)
 517
 518     def setup(self, m, i):
 519         """ links module to inputs and outputs
 520         """
 521         m.submodules.denormalise = self
 522         m.d.comb += self.i.eq(i)
 523
 524     def elaborate(self, platform):
 525         m = Module()
 526         m.submodules.denorm_in_a = self.i.a
 527         m.submodules.denorm_in_b = self.i.b
 528         m.submodules.denorm_out_a = self.o.a
 529         m.submodules.denorm_out_b = self.o.b
 530
 531         with m.If(~self.i.out_do_z):
 532             # XXX hmmm, don't like repeating identical code
 533             m.d.comb += self.o.a.eq(self.i.a)
 534             with m.If(self.i.a.exp_n127):
 535                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 536             with m.Else():
 537                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 538
 539             m.d.comb += self.o.b.eq(self.i.b)
 540             with m.If(self.i.b.exp_n127):
 541                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 542             with m.Else():
 543                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 544
 545         m.d.comb += self.o.mid.eq(self.i.mid)
 546         m.d.comb += self.o.z.eq(self.i.z)
 547         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 548         m.d.comb += self.o.oz.eq(self.i.oz)
 549
 550         return m
 551
 552
 553 class FPAddDeNorm(FPState):
 554
 555     def __init__(self, width, id_wid):
 556         FPState.__init__(self, "denormalise")
 557         self.mod = FPAddDeNormMod(width)
 558         self.out_a = FPNumBase(width)
 559         self.out_b = FPNumBase(width)
 560
 561     def setup(self, m, i):
 562         """ links module to inputs and outputs
 563         """
 564         self.mod.setup(m, i)
 565
 566         m.d.sync += self.out_a.eq(self.mod.out_a)
 567         m.d.sync += self.out_b.eq(self.mod.out_b)
 568
 569     def action(self, m):
 570         # Denormalised Number checks
 571         m.next = "align"
 572
 573
 574 class FPAddAlignMultiMod(FPState):
 575
 576     def __init__(self, width):
 577         self.in_a = FPNumBase(width)
 578         self.in_b = FPNumBase(width)
 579         self.out_a = FPNumIn(None, width)
 580         self.out_b = FPNumIn(None, width)
 581         self.exp_eq = Signal(reset_less=True)
 582
 583     def elaborate(self, platform):
 584         # This one however (single-cycle) will do the shift
 585         # in one go.
 586
 587         m = Module()
 588
 589         m.submodules.align_in_a = self.in_a
 590         m.submodules.align_in_b = self.in_b
 591         m.submodules.align_out_a = self.out_a
 592         m.submodules.align_out_b = self.out_b
 593
 594         # NOTE: this does *not* do single-cycle multi-shifting,
 595         #       it *STAYS* in the align state until exponents match
 596
 597         # exponent of a greater than b: shift b down
 598         m.d.comb += self.exp_eq.eq(0)
 599         m.d.comb += self.out_a.eq(self.in_a)
 600         m.d.comb += self.out_b.eq(self.in_b)
 601         agtb = Signal(reset_less=True)
 602         altb = Signal(reset_less=True)
 603         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 604         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 605         with m.If(agtb):
 606             m.d.comb += self.out_b.shift_down(self.in_b)
 607         # exponent of b greater than a: shift a down
 608         with m.Elif(altb):
 609             m.d.comb += self.out_a.shift_down(self.in_a)
 610         # exponents equal: move to next stage.
 611         with m.Else():
 612             m.d.comb += self.exp_eq.eq(1)
 613         return m
 614
 615
 616 class FPAddAlignMulti(FPState):
 617
 618     def __init__(self, width, id_wid):
 619         FPState.__init__(self, "align")
 620         self.mod = FPAddAlignMultiMod(width)
 621         self.out_a = FPNumIn(None, width)
 622         self.out_b = FPNumIn(None, width)
 623         self.exp_eq = Signal(reset_less=True)
 624
 625     def setup(self, m, in_a, in_b):
 626         """ links module to inputs and outputs
 627         """
 628         m.submodules.align = self.mod
 629         m.d.comb += self.mod.in_a.eq(in_a)
 630         m.d.comb += self.mod.in_b.eq(in_b)
 631         #m.d.comb += self.out_a.eq(self.mod.out_a)
 632         #m.d.comb += self.out_b.eq(self.mod.out_b)
 633         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 634         m.d.sync += self.out_a.eq(self.mod.out_a)
 635         m.d.sync += self.out_b.eq(self.mod.out_b)
 636
 637     def action(self, m):
 638         with m.If(self.exp_eq):
 639             m.next = "add_0"
 640
 641
 642 class FPNumIn2Ops:
 643
 644     def __init__(self, width, id_wid):
 645         self.a = FPNumIn(None, width)
 646         self.b = FPNumIn(None, width)
 647         self.z = FPNumOut(width, False)
 648         self.out_do_z = Signal(reset_less=True)
 649         self.oz = Signal(width, reset_less=True)
 650         self.mid = Signal(id_wid, reset_less=True)
 651
 652     def eq(self, i):
 653         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 654                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 655
 656
 657 class FPAddAlignSingleMod:
 658
 659     def __init__(self, width, id_wid):
 660         self.width = width
 661         self.id_wid = id_wid
 662         self.i = self.ispec()
 663         self.o = self.ospec()
 664
 665     def ispec(self):
 666         return FPSCData(self.width, self.id_wid)
 667
 668     def ospec(self):
 669         return FPNumIn2Ops(self.width, self.id_wid)
 670
 671     def process(self, i):
 672         return self.o
 673
 674     def setup(self, m, i):
 675         """ links module to inputs and outputs
 676         """
 677         m.submodules.align = self
 678         m.d.comb += self.i.eq(i)
 679
 680     def elaborate(self, platform):
 681         """ Aligns A against B or B against A, depending on which has the
 682             greater exponent.  This is done in a *single* cycle using
 683             variable-width bit-shift
 684
 685             the shifter used here is quite expensive in terms of gates.
 686             Mux A or B in (and out) into temporaries, as only one of them
 687             needs to be aligned against the other
 688         """
 689         m = Module()
 690
 691         m.submodules.align_in_a = self.i.a
 692         m.submodules.align_in_b = self.i.b
 693         m.submodules.align_out_a = self.o.a
 694         m.submodules.align_out_b = self.o.b
 695
 696         # temporary (muxed) input and output to be shifted
 697         t_inp = FPNumBase(self.width)
 698         t_out = FPNumIn(None, self.width)
 699         espec = (len(self.i.a.e), True)
 700         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 701         m.submodules.align_t_in = t_inp
 702         m.submodules.align_t_out = t_out
 703         m.submodules.multishift_r = msr
 704
 705         ediff = Signal(espec, reset_less=True)
 706         ediffr = Signal(espec, reset_less=True)
 707         tdiff = Signal(espec, reset_less=True)
 708         elz = Signal(reset_less=True)
 709         egz = Signal(reset_less=True)
 710
 711         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 712         m.d.comb += msr.inp.eq(t_inp.m)
 713         m.d.comb += msr.diff.eq(tdiff)
 714         m.d.comb += t_out.m.eq(msr.m)
 715         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 716         m.d.comb += t_out.s.eq(t_inp.s)
 717
 718         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 719         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 720         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 721         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 722
 723         # default: A-exp == B-exp, A and B untouched (fall through)
 724         m.d.comb += self.o.a.eq(self.i.a)
 725         m.d.comb += self.o.b.eq(self.i.b)
 726         # only one shifter (muxed)
 727         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 728         # exponent of a greater than b: shift b down
 729         with m.If(~self.i.out_do_z):
 730             with m.If(egz):
 731                 m.d.comb += [t_inp.eq(self.i.b),
 732                              tdiff.eq(ediff),
 733                              self.o.b.eq(t_out),
 734                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 735                             ]
 736             # exponent of b greater than a: shift a down
 737             with m.Elif(elz):
 738                 m.d.comb += [t_inp.eq(self.i.a),
 739                              tdiff.eq(ediffr),
 740                              self.o.a.eq(t_out),
 741                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 742                             ]
 743
 744         m.d.comb += self.o.mid.eq(self.i.mid)
 745         m.d.comb += self.o.z.eq(self.i.z)
 746         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 747         m.d.comb += self.o.oz.eq(self.i.oz)
 748
 749         return m
 750
 751
 752 class FPAddAlignSingle(FPState):
 753
 754     def __init__(self, width, id_wid):
 755         FPState.__init__(self, "align")
 756         self.mod = FPAddAlignSingleMod(width, id_wid)
 757         self.out_a = FPNumIn(None, width)
 758         self.out_b = FPNumIn(None, width)
 759
 760     def setup(self, m, i):
 761         """ links module to inputs and outputs
 762         """
 763         self.mod.setup(m, i)
 764
 765         # NOTE: could be done as comb
 766         m.d.sync += self.out_a.eq(self.mod.out_a)
 767         m.d.sync += self.out_b.eq(self.mod.out_b)
 768
 769     def action(self, m):
 770         m.next = "add_0"
 771
 772
 773 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
 774
 775     def __init__(self, width, id_wid):
 776         FPState.__init__(self, "align")
 777         self.width = width
 778         self.id_wid = id_wid
 779         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
 780         self.a1o = self.ospec()
 781
 782     def ispec(self):
 783         return FPSCData(self.width, self.id_wid)
 784         #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 785
 786     def ospec(self):
 787         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 788
 789     def setup(self, m, i):
 790         """ links module to inputs and outputs
 791         """
 792
 793         # chain AddAlignSingle, AddStage0 and AddStage1
 794         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 795         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 796         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 797
 798         chain = StageChain([mod, a0mod, a1mod])
 799         chain.setup(m, i)
 800
 801         m.d.comb += self.a1o.eq(a1mod.o)
 802
 803     def process(self, i):
 804         return self.a1o
 805
 806     def action(self, m):
 807         m.next = "normalise_1"
 808
 809
 810 class FPAddStage0Data:
 811
 812     def __init__(self, width, id_wid):
 813         self.z = FPNumBase(width, False)
 814         self.out_do_z = Signal(reset_less=True)
 815         self.oz = Signal(width, reset_less=True)
 816         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 817         self.mid = Signal(id_wid, reset_less=True)
 818
 819     def eq(self, i):
 820         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 821                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 822
 823
 824 class FPAddStage0Mod:
 825
 826     def __init__(self, width, id_wid):
 827         self.width = width
 828         self.id_wid = id_wid
 829         self.i = self.ispec()
 830         self.o = self.ospec()
 831
 832     def ispec(self):
 833         return FPSCData(self.width, self.id_wid)
 834
 835     def ospec(self):
 836         return FPAddStage0Data(self.width, self.id_wid)
 837
 838     def process(self, i):
 839         return self.o
 840
 841     def setup(self, m, i):
 842         """ links module to inputs and outputs
 843         """
 844         m.submodules.add0 = self
 845         m.d.comb += self.i.eq(i)
 846
 847     def elaborate(self, platform):
 848         m = Module()
 849         m.submodules.add0_in_a = self.i.a
 850         m.submodules.add0_in_b = self.i.b
 851         m.submodules.add0_out_z = self.o.z
 852
 853         # store intermediate tests (and zero-extended mantissas)
 854         seq = Signal(reset_less=True)
 855         mge = Signal(reset_less=True)
 856         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 857         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 858         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 859                      mge.eq(self.i.a.m >= self.i.b.m),
 860                      am0.eq(Cat(self.i.a.m, 0)),
 861                      bm0.eq(Cat(self.i.b.m, 0))
 862                     ]
 863         # same-sign (both negative or both positive) add mantissas
 864         with m.If(~self.i.out_do_z):
 865             m.d.comb += self.o.z.e.eq(self.i.a.e)
 866             with m.If(seq):
 867                 m.d.comb += [
 868                     self.o.tot.eq(am0 + bm0),
 869                     self.o.z.s.eq(self.i.a.s)
 870                 ]
 871             # a mantissa greater than b, use a
 872             with m.Elif(mge):
 873                 m.d.comb += [
 874                     self.o.tot.eq(am0 - bm0),
 875                     self.o.z.s.eq(self.i.a.s)
 876                 ]
 877             # b mantissa greater than a, use b
 878             with m.Else():
 879                 m.d.comb += [
 880                     self.o.tot.eq(bm0 - am0),
 881                     self.o.z.s.eq(self.i.b.s)
 882             ]
 883
 884         m.d.comb += self.o.oz.eq(self.i.oz)
 885         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 886         m.d.comb += self.o.mid.eq(self.i.mid)
 887         return m
 888
 889
 890 class FPAddStage0(FPState):
 891     """ First stage of add.  covers same-sign (add) and subtract
 892         special-casing when mantissas are greater or equal, to
 893         give greatest accuracy.
 894     """
 895
 896     def __init__(self, width, id_wid):
 897         FPState.__init__(self, "add_0")
 898         self.mod = FPAddStage0Mod(width)
 899         self.o = self.mod.ospec()
 900
 901     def setup(self, m, i):
 902         """ links module to inputs and outputs
 903         """
 904         self.mod.setup(m, i)
 905
 906         # NOTE: these could be done as combinatorial (merge add0+add1)
 907         m.d.sync += self.o.eq(self.mod.o)
 908
 909     def action(self, m):
 910         m.next = "add_1"
 911
 912
 913 class FPAddStage1Data:
 914
 915     def __init__(self, width, id_wid):
 916         self.z = FPNumBase(width, False)
 917         self.out_do_z = Signal(reset_less=True)
 918         self.oz = Signal(width, reset_less=True)
 919         self.of = Overflow()
 920         self.mid = Signal(id_wid, reset_less=True)
 921
 922     def eq(self, i):
 923         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 924                 self.of.eq(i.of), self.mid.eq(i.mid)]
 925
 926
 927
 928 class FPAddStage1Mod(FPState):
 929     """ Second stage of add: preparation for normalisation.
 930         detects when tot sum is too big (tot[27] is kinda a carry bit)
 931     """
 932
 933     def __init__(self, width, id_wid):
 934         self.width = width
 935         self.id_wid = id_wid
 936         self.i = self.ispec()
 937         self.o = self.ospec()
 938
 939     def ispec(self):
 940         return FPAddStage0Data(self.width, self.id_wid)
 941
 942     def ospec(self):
 943         return FPAddStage1Data(self.width, self.id_wid)
 944
 945     def process(self, i):
 946         return self.o
 947
 948     def setup(self, m, i):
 949         """ links module to inputs and outputs
 950         """
 951         m.submodules.add1 = self
 952         m.submodules.add1_out_overflow = self.o.of
 953
 954         m.d.comb += self.i.eq(i)
 955
 956     def elaborate(self, platform):
 957         m = Module()
 958         #m.submodules.norm1_in_overflow = self.in_of
 959         #m.submodules.norm1_out_overflow = self.out_of
 960         #m.submodules.norm1_in_z = self.in_z
 961         #m.submodules.norm1_out_z = self.out_z
 962         m.d.comb += self.o.z.eq(self.i.z)
 963         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 964         with m.If(~self.i.out_do_z):
 965             with m.If(self.i.tot[-1]):
 966                 m.d.comb += [
 967                     self.o.z.m.eq(self.i.tot[4:]),
 968                     self.o.of.m0.eq(self.i.tot[4]),
 969                     self.o.of.guard.eq(self.i.tot[3]),
 970                     self.o.of.round_bit.eq(self.i.tot[2]),
 971                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 972                     self.o.z.e.eq(self.i.z.e + 1)
 973             ]
 974             # tot[-1] (MSB) zero case
 975             with m.Else():
 976                 m.d.comb += [
 977                     self.o.z.m.eq(self.i.tot[3:]),
 978                     self.o.of.m0.eq(self.i.tot[3]),
 979                     self.o.of.guard.eq(self.i.tot[2]),
 980                     self.o.of.round_bit.eq(self.i.tot[1]),
 981                     self.o.of.sticky.eq(self.i.tot[0])
 982             ]
 983
 984         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 985         m.d.comb += self.o.oz.eq(self.i.oz)
 986         m.d.comb += self.o.mid.eq(self.i.mid)
 987
 988         return m
 989
 990
 991 class FPAddStage1(FPState):
 992
 993     def __init__(self, width, id_wid):
 994         FPState.__init__(self, "add_1")
 995         self.mod = FPAddStage1Mod(width)
 996         self.out_z = FPNumBase(width, False)
 997         self.out_of = Overflow()
 998         self.norm_stb = Signal()
 999
1000     def setup(self, m, i):
1001         """ links module to inputs and outputs
1002         """
1003         self.mod.setup(m, i)
1004
1005         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1006
1007         m.d.sync += self.out_of.eq(self.mod.out_of)
1008         m.d.sync += self.out_z.eq(self.mod.out_z)
1009         m.d.sync += self.norm_stb.eq(1)
1010
1011     def action(self, m):
1012         m.next = "normalise_1"
1013
1014
1015 class FPNormaliseModSingle:
1016
1017     def __init__(self, width):
1018         self.width = width
1019         self.in_z = self.ispec()
1020         self.out_z = self.ospec()
1021
1022     def ispec(self):
1023         return FPNumBase(self.width, False)
1024
1025     def ospec(self):
1026         return FPNumBase(self.width, False)
1027
1028     def setup(self, m, i):
1029         """ links module to inputs and outputs
1030         """
1031         m.submodules.normalise = self
1032         m.d.comb += self.i.eq(i)
1033
1034     def elaborate(self, platform):
1035         m = Module()
1036
1037         mwid = self.out_z.m_width+2
1038         pe = PriorityEncoder(mwid)
1039         m.submodules.norm_pe = pe
1040
1041         m.submodules.norm1_out_z = self.out_z
1042         m.submodules.norm1_in_z = self.in_z
1043
1044         in_z = FPNumBase(self.width, False)
1045         in_of = Overflow()
1046         m.submodules.norm1_insel_z = in_z
1047         m.submodules.norm1_insel_overflow = in_of
1048
1049         espec = (len(in_z.e), True)
1050         ediff_n126 = Signal(espec, reset_less=True)
1051         msr = MultiShiftRMerge(mwid, espec)
1052         m.submodules.multishift_r = msr
1053
1054         m.d.comb += in_z.eq(self.in_z)
1055         m.d.comb += in_of.eq(self.in_of)
1056         # initialise out from in (overridden below)
1057         m.d.comb += self.out_z.eq(in_z)
1058         m.d.comb += self.out_of.eq(in_of)
1059         # normalisation decrease condition
1060         decrease = Signal(reset_less=True)
1061         m.d.comb += decrease.eq(in_z.m_msbzero)
1062         # decrease exponent
1063         with m.If(decrease):
1064             # *sigh* not entirely obvious: count leading zeros (clz)
1065             # with a PriorityEncoder: to find from the MSB
1066             # we reverse the order of the bits.
1067             temp_m = Signal(mwid, reset_less=True)
1068             temp_s = Signal(mwid+1, reset_less=True)
1069             clz = Signal((len(in_z.e), True), reset_less=True)
1070             m.d.comb += [
1071                 # cat round and guard bits back into the mantissa
1072                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1073                 pe.i.eq(temp_m[::-1]),          # inverted
1074                 clz.eq(pe.o),                   # count zeros from MSB down
1075                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1076                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1077                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1078             ]
1079
1080         return m
1081
1082 class FPNorm1Data:
1083
1084     def __init__(self, width, id_wid):
1085         self.roundz = Signal(reset_less=True)
1086         self.z = FPNumBase(width, False)
1087         self.out_do_z = Signal(reset_less=True)
1088         self.oz = Signal(width, reset_less=True)
1089         self.mid = Signal(id_wid, reset_less=True)
1090
1091     def eq(self, i):
1092         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1093                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1094
1095
1096 class FPNorm1ModSingle:
1097
1098     def __init__(self, width, id_wid):
1099         self.width = width
1100         self.id_wid = id_wid
1101         self.i = self.ispec()
1102         self.o = self.ospec()
1103
1104     def ispec(self):
1105         return FPAddStage1Data(self.width, self.id_wid)
1106
1107     def ospec(self):
1108         return FPNorm1Data(self.width, self.id_wid)
1109
1110     def setup(self, m, i):
1111         """ links module to inputs and outputs
1112         """
1113         m.submodules.normalise_1 = self
1114         m.d.comb += self.i.eq(i)
1115
1116     def process(self, i):
1117         return self.o
1118
1119     def elaborate(self, platform):
1120         m = Module()
1121
1122         mwid = self.o.z.m_width+2
1123         pe = PriorityEncoder(mwid)
1124         m.submodules.norm_pe = pe
1125
1126         of = Overflow()
1127         m.d.comb += self.o.roundz.eq(of.roundz)
1128
1129         m.submodules.norm1_out_z = self.o.z
1130         m.submodules.norm1_out_overflow = of
1131         m.submodules.norm1_in_z = self.i.z
1132         m.submodules.norm1_in_overflow = self.i.of
1133
1134         i = self.ispec()
1135         m.submodules.norm1_insel_z = i.z
1136         m.submodules.norm1_insel_overflow = i.of
1137
1138         espec = (len(i.z.e), True)
1139         ediff_n126 = Signal(espec, reset_less=True)
1140         msr = MultiShiftRMerge(mwid, espec)
1141         m.submodules.multishift_r = msr
1142
1143         m.d.comb += i.eq(self.i)
1144         # initialise out from in (overridden below)
1145         m.d.comb += self.o.z.eq(i.z)
1146         m.d.comb += of.eq(i.of)
1147         # normalisation increase/decrease conditions
1148         decrease = Signal(reset_less=True)
1149         increase = Signal(reset_less=True)
1150         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1151         m.d.comb += increase.eq(i.z.exp_lt_n126)
1152         # decrease exponent
1153         with m.If(~self.i.out_do_z):
1154             with m.If(decrease):
1155                 # *sigh* not entirely obvious: count leading zeros (clz)
1156                 # with a PriorityEncoder: to find from the MSB
1157                 # we reverse the order of the bits.
1158                 temp_m = Signal(mwid, reset_less=True)
1159                 temp_s = Signal(mwid+1, reset_less=True)
1160                 clz = Signal((len(i.z.e), True), reset_less=True)
1161                 # make sure that the amount to decrease by does NOT
1162                 # go below the minimum non-INF/NaN exponent
1163                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1164                              i.z.exp_sub_n126)
1165                 m.d.comb += [
1166                     # cat round and guard bits back into the mantissa
1167                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1168                     pe.i.eq(temp_m[::-1]),          # inverted
1169                     clz.eq(limclz),                 # count zeros from MSB down
1170                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1171                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1172                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1173                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1174                     # overflow in bits 0..1: got shifted too (leave sticky)
1175                     of.guard.eq(temp_s[1]),       # guard
1176                     of.round_bit.eq(temp_s[0]),   # round
1177                 ]
1178             # increase exponent
1179             with m.Elif(increase):
1180                 temp_m = Signal(mwid+1, reset_less=True)
1181                 m.d.comb += [
1182                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1183                                   i.z.m)),
1184                     ediff_n126.eq(i.z.N126 - i.z.e),
1185                     # connect multi-shifter to inp/out mantissa (and ediff)
1186                     msr.inp.eq(temp_m),
1187                     msr.diff.eq(ediff_n126),
1188                     self.o.z.m.eq(msr.m[3:]),
1189                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1190                     # overflow in bits 0..1: got shifted too (leave sticky)
1191                     of.guard.eq(temp_s[2]),     # guard
1192                     of.round_bit.eq(temp_s[1]), # round
1193                     of.sticky.eq(temp_s[0]),    # sticky
1194                     self.o.z.e.eq(i.z.e + ediff_n126),
1195                 ]
1196
1197         m.d.comb += self.o.mid.eq(self.i.mid)
1198         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1199         m.d.comb += self.o.oz.eq(self.i.oz)
1200
1201         return m
1202
1203
1204 class FPNorm1ModMulti:
1205
1206     def __init__(self, width, single_cycle=True):
1207         self.width = width
1208         self.in_select = Signal(reset_less=True)
1209         self.in_z = FPNumBase(width, False)
1210         self.in_of = Overflow()
1211         self.temp_z = FPNumBase(width, False)
1212         self.temp_of = Overflow()
1213         self.out_z = FPNumBase(width, False)
1214         self.out_of = Overflow()
1215
1216     def elaborate(self, platform):
1217         m = Module()
1218
1219         m.submodules.norm1_out_z = self.out_z
1220         m.submodules.norm1_out_overflow = self.out_of
1221         m.submodules.norm1_temp_z = self.temp_z
1222         m.submodules.norm1_temp_of = self.temp_of
1223         m.submodules.norm1_in_z = self.in_z
1224         m.submodules.norm1_in_overflow = self.in_of
1225
1226         in_z = FPNumBase(self.width, False)
1227         in_of = Overflow()
1228         m.submodules.norm1_insel_z = in_z
1229         m.submodules.norm1_insel_overflow = in_of
1230
1231         # select which of temp or in z/of to use
1232         with m.If(self.in_select):
1233             m.d.comb += in_z.eq(self.in_z)
1234             m.d.comb += in_of.eq(self.in_of)
1235         with m.Else():
1236             m.d.comb += in_z.eq(self.temp_z)
1237             m.d.comb += in_of.eq(self.temp_of)
1238         # initialise out from in (overridden below)
1239         m.d.comb += self.out_z.eq(in_z)
1240         m.d.comb += self.out_of.eq(in_of)
1241         # normalisation increase/decrease conditions
1242         decrease = Signal(reset_less=True)
1243         increase = Signal(reset_less=True)
1244         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1245         m.d.comb += increase.eq(in_z.exp_lt_n126)
1246         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1247         # decrease exponent
1248         with m.If(decrease):
1249             m.d.comb += [
1250                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1251                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1252                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1253                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1254                 self.out_of.round_bit.eq(0),        # reset round bit
1255                 self.out_of.m0.eq(in_of.guard),
1256             ]
1257         # increase exponent
1258         with m.Elif(increase):
1259             m.d.comb += [
1260                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1261                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1262                 self.out_of.guard.eq(in_z.m[0]),
1263                 self.out_of.m0.eq(in_z.m[1]),
1264                 self.out_of.round_bit.eq(in_of.guard),
1265                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1266             ]
1267
1268         return m
1269
1270
1271 class FPNorm1Single(FPState):
1272
1273     def __init__(self, width, id_wid, single_cycle=True):
1274         FPState.__init__(self, "normalise_1")
1275         self.mod = FPNorm1ModSingle(width)
1276         self.o = self.ospec()
1277         self.out_z = FPNumBase(width, False)
1278         self.out_roundz = Signal(reset_less=True)
1279
1280     def ispec(self):
1281         return self.mod.ispec()
1282
1283     def ospec(self):
1284         return self.mod.ospec()
1285
1286     def setup(self, m, i):
1287         """ links module to inputs and outputs
1288         """
1289         self.mod.setup(m, i)
1290
1291     def action(self, m):
1292         m.next = "round"
1293
1294
1295 class FPNorm1Multi(FPState):
1296
1297     def __init__(self, width, id_wid):
1298         FPState.__init__(self, "normalise_1")
1299         self.mod = FPNorm1ModMulti(width)
1300         self.stb = Signal(reset_less=True)
1301         self.ack = Signal(reset=0, reset_less=True)
1302         self.out_norm = Signal(reset_less=True)
1303         self.in_accept = Signal(reset_less=True)
1304         self.temp_z = FPNumBase(width)
1305         self.temp_of = Overflow()
1306         self.out_z = FPNumBase(width)
1307         self.out_roundz = Signal(reset_less=True)
1308
1309     def setup(self, m, in_z, in_of, norm_stb):
1310         """ links module to inputs and outputs
1311         """
1312         self.mod.setup(m, in_z, in_of, norm_stb,
1313                        self.in_accept, self.temp_z, self.temp_of,
1314                        self.out_z, self.out_norm)
1315
1316         m.d.comb += self.stb.eq(norm_stb)
1317         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1318
1319     def action(self, m):
1320         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1321         m.d.sync += self.temp_of.eq(self.mod.out_of)
1322         m.d.sync += self.temp_z.eq(self.out_z)
1323         with m.If(self.out_norm):
1324             with m.If(self.in_accept):
1325                 m.d.sync += [
1326                     self.ack.eq(1),
1327                 ]
1328             with m.Else():
1329                 m.d.sync += self.ack.eq(0)
1330         with m.Else():
1331             # normalisation not required (or done).
1332             m.next = "round"
1333             m.d.sync += self.ack.eq(1)
1334             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1335
1336
1337 class FPNormToPack(FPState, UnbufferedPipeline):
1338
1339     def __init__(self, width, id_wid):
1340         FPState.__init__(self, "normalise_1")
1341         self.id_wid = id_wid
1342         self.width = width
1343         UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1344
1345     def ispec(self):
1346         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1347
1348     def ospec(self):
1349         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1350
1351     def setup(self, m, i):
1352         """ links module to inputs and outputs
1353         """
1354
1355         # Normalisation, Rounding Corrections, Pack - in a chain
1356         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1357         rmod = FPRoundMod(self.width, self.id_wid)
1358         cmod = FPCorrectionsMod(self.width, self.id_wid)
1359         pmod = FPPackMod(self.width, self.id_wid)
1360         chain = StageChain([nmod, rmod, cmod, pmod])
1361         chain.setup(m, i)
1362         self.out_z = pmod.ospec()
1363
1364         m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1365         m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1366
1367     def process(self, i):
1368         return self.out_z
1369
1370     def action(self, m):
1371         m.next = "pack_put_z"
1372
1373
1374 class FPRoundData:
1375
1376     def __init__(self, width, id_wid):
1377         self.z = FPNumBase(width, False)
1378         self.out_do_z = Signal(reset_less=True)
1379         self.oz = Signal(width, reset_less=True)
1380         self.mid = Signal(id_wid, reset_less=True)
1381
1382     def eq(self, i):
1383         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1384                 self.mid.eq(i.mid)]
1385
1386
1387 class FPRoundMod:
1388
1389     def __init__(self, width, id_wid):
1390         self.width = width
1391         self.id_wid = id_wid
1392         self.i = self.ispec()
1393         self.out_z = self.ospec()
1394
1395     def ispec(self):
1396         return FPNorm1Data(self.width, self.id_wid)
1397
1398     def ospec(self):
1399         return FPRoundData(self.width, self.id_wid)
1400
1401     def process(self, i):
1402         return self.out_z
1403
1404     def setup(self, m, i):
1405         m.submodules.roundz = self
1406         m.d.comb += self.i.eq(i)
1407
1408     def elaborate(self, platform):
1409         m = Module()
1410         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1411         with m.If(~self.i.out_do_z):
1412             with m.If(self.i.roundz):
1413                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1414                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1415                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1416
1417         return m
1418
1419
1420 class FPRound(FPState):
1421
1422     def __init__(self, width, id_wid):
1423         FPState.__init__(self, "round")
1424         self.mod = FPRoundMod(width)
1425         self.out_z = self.ospec()
1426
1427     def ispec(self):
1428         return self.mod.ispec()
1429
1430     def ospec(self):
1431         return self.mod.ospec()
1432
1433     def setup(self, m, i):
1434         """ links module to inputs and outputs
1435         """
1436         self.mod.setup(m, i)
1437
1438         self.idsync(m)
1439         m.d.sync += self.out_z.eq(self.mod.out_z)
1440         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1441
1442     def action(self, m):
1443         m.next = "corrections"
1444
1445
1446 class FPCorrectionsMod:
1447
1448     def __init__(self, width, id_wid):
1449         self.width = width
1450         self.id_wid = id_wid
1451         self.i = self.ispec()
1452         self.out_z = self.ospec()
1453
1454     def ispec(self):
1455         return FPRoundData(self.width, self.id_wid)
1456
1457     def ospec(self):
1458         return FPRoundData(self.width, self.id_wid)
1459
1460     def process(self, i):
1461         return self.out_z
1462
1463     def setup(self, m, i):
1464         """ links module to inputs and outputs
1465         """
1466         m.submodules.corrections = self
1467         m.d.comb += self.i.eq(i)
1468
1469     def elaborate(self, platform):
1470         m = Module()
1471         m.submodules.corr_in_z = self.i.z
1472         m.submodules.corr_out_z = self.out_z.z
1473         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1474         with m.If(~self.i.out_do_z):
1475             with m.If(self.i.z.is_denormalised):
1476                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1477         return m
1478
1479
1480 class FPCorrections(FPState):
1481
1482     def __init__(self, width, id_wid):
1483         FPState.__init__(self, "corrections")
1484         self.mod = FPCorrectionsMod(width)
1485         self.out_z = self.ospec()
1486
1487     def ispec(self):
1488         return self.mod.ispec()
1489
1490     def ospec(self):
1491         return self.mod.ospec()
1492
1493     def setup(self, m, in_z):
1494         """ links module to inputs and outputs
1495         """
1496         self.mod.setup(m, in_z)
1497
1498         m.d.sync += self.out_z.eq(self.mod.out_z)
1499         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1500
1501     def action(self, m):
1502         m.next = "pack"
1503
1504
1505 class FPPackData:
1506
1507     def __init__(self, width, id_wid):
1508         self.z = Signal(width, reset_less=True)
1509         self.mid = Signal(id_wid, reset_less=True)
1510
1511     def eq(self, i):
1512         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1513
1514     def ports(self):
1515         return [self.z, self.mid]
1516
1517
1518 class FPPackMod:
1519
1520     def __init__(self, width, id_wid):
1521         self.width = width
1522         self.id_wid = id_wid
1523         self.i = self.ispec()
1524         self.o = self.ospec()
1525
1526     def ispec(self):
1527         return FPRoundData(self.width, self.id_wid)
1528
1529     def ospec(self):
1530         return FPPackData(self.width, self.id_wid)
1531
1532     def process(self, i):
1533         return self.o
1534
1535     def setup(self, m, in_z):
1536         """ links module to inputs and outputs
1537         """
1538         m.submodules.pack = self
1539         m.d.comb += self.i.eq(in_z)
1540
1541     def elaborate(self, platform):
1542         m = Module()
1543         z = FPNumOut(self.width, False)
1544         m.submodules.pack_in_z = self.i.z
1545         m.submodules.pack_out_z = z
1546         m.d.comb += self.o.mid.eq(self.i.mid)
1547         with m.If(~self.i.out_do_z):
1548             with m.If(self.i.z.is_overflowed):
1549                 m.d.comb += z.inf(self.i.z.s)
1550             with m.Else():
1551                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1552         with m.Else():
1553             m.d.comb += z.v.eq(self.i.oz)
1554         m.d.comb += self.o.z.eq(z.v)
1555         return m
1556
1557
1558 class FPPack(FPState):
1559
1560     def __init__(self, width, id_wid):
1561         FPState.__init__(self, "pack")
1562         self.mod = FPPackMod(width)
1563         self.out_z = self.ospec()
1564
1565     def ispec(self):
1566         return self.mod.ispec()
1567
1568     def ospec(self):
1569         return self.mod.ospec()
1570
1571     def setup(self, m, in_z):
1572         """ links module to inputs and outputs
1573         """
1574         self.mod.setup(m, in_z)
1575
1576         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1577         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1578
1579     def action(self, m):
1580         m.next = "pack_put_z"
1581
1582
1583 class FPPutZ(FPState):
1584
1585     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1586         FPState.__init__(self, state)
1587         if to_state is None:
1588             to_state = "get_ops"
1589         self.to_state = to_state
1590         self.in_z = in_z
1591         self.out_z = out_z
1592         self.in_mid = in_mid
1593         self.out_mid = out_mid
1594
1595     def action(self, m):
1596         if self.in_mid is not None:
1597             m.d.sync += self.out_mid.eq(self.in_mid)
1598         m.d.sync += [
1599           self.out_z.z.v.eq(self.in_z)
1600         ]
1601         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1602             m.d.sync += self.out_z.z.stb.eq(0)
1603             m.next = self.to_state
1604         with m.Else():
1605             m.d.sync += self.out_z.z.stb.eq(1)
1606
1607
1608 class FPPutZIdx(FPState):
1609
1610     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1611         FPState.__init__(self, state)
1612         if to_state is None:
1613             to_state = "get_ops"
1614         self.to_state = to_state
1615         self.in_z = in_z
1616         self.out_zs = out_zs
1617         self.in_mid = in_mid
1618
1619     def action(self, m):
1620         outz_stb = Signal(reset_less=True)
1621         outz_ack = Signal(reset_less=True)
1622         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1623                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1624                     ]
1625         m.d.sync += [
1626           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1627         ]
1628         with m.If(outz_stb & outz_ack):
1629             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1630             m.next = self.to_state
1631         with m.Else():
1632             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1633
1634 class FPOpData:
1635     def __init__(self, width, id_wid):
1636         self.z = FPOp(width)
1637         self.mid = Signal(id_wid, reset_less=True)
1638
1639     def eq(self, i):
1640         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1641
1642     def ports(self):
1643         return [self.z, self.mid]
1644
1645
1646 class FPADDBaseMod:
1647
1648     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1649         """ IEEE754 FP Add
1650
1651             * width: bit-width of IEEE754.  supported: 16, 32, 64
1652             * id_wid: an identifier that is sync-connected to the input
1653             * single_cycle: True indicates each stage to complete in 1 clock
1654             * compact: True indicates a reduced number of stages
1655         """
1656         self.width = width
1657         self.id_wid = id_wid
1658         self.single_cycle = single_cycle
1659         self.compact = compact
1660
1661         self.in_t = Trigger()
1662         self.i = self.ispec()
1663         self.o = self.ospec()
1664
1665         self.states = []
1666
1667     def ispec(self):
1668         return FPADDBaseData(self.width, self.id_wid)
1669
1670     def ospec(self):
1671         return FPOpData(self.width, self.id_wid)
1672
1673     def add_state(self, state):
1674         self.states.append(state)
1675         return state
1676
1677     def get_fragment(self, platform=None):
1678         """ creates the HDL code-fragment for FPAdd
1679         """
1680         m = Module()
1681         m.submodules.out_z = self.o.z
1682         m.submodules.in_t = self.in_t
1683         if self.compact:
1684             self.get_compact_fragment(m, platform)
1685         else:
1686             self.get_longer_fragment(m, platform)
1687
1688         with m.FSM() as fsm:
1689
1690             for state in self.states:
1691                 with m.State(state.state_from):
1692                     state.action(m)
1693
1694         return m
1695
1696     def get_longer_fragment(self, m, platform=None):
1697
1698         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1699                                       self.width))
1700         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1701         a = get.out_op1
1702         b = get.out_op2
1703
1704         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1705         sc.setup(m, a, b, self.in_mid)
1706
1707         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1708         dn.setup(m, a, b, sc.in_mid)
1709
1710         if self.single_cycle:
1711             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1712             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1713         else:
1714             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1715             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1716
1717         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1718         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1719
1720         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1721         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1722
1723         if self.single_cycle:
1724             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1725             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1726         else:
1727             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1728             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1729
1730         rn = self.add_state(FPRound(self.width, self.id_wid))
1731         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1732
1733         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1734         cor.setup(m, rn.out_z, rn.in_mid)
1735
1736         pa = self.add_state(FPPack(self.width, self.id_wid))
1737         pa.setup(m, cor.out_z, rn.in_mid)
1738
1739         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1740                                     pa.in_mid, self.out_mid))
1741
1742         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1743                                     pa.in_mid, self.out_mid))
1744
1745     def get_compact_fragment(self, m, platform=None):
1746
1747         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1748                                       self.width, self.id_wid))
1749         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1750
1751         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1752         sc.setup(m, get.o)
1753
1754         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1755         alm.setup(m, sc.o)
1756
1757         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1758         n1.setup(m, alm.a1o)
1759
1760         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1761                                     n1.out_z.mid, self.o.mid))
1762
1763         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1764         #                            sc.o.mid, self.o.mid))
1765
1766
1767 class FPADDBase(FPState):
1768
1769     def __init__(self, width, id_wid=None, single_cycle=False):
1770         """ IEEE754 FP Add
1771
1772             * width: bit-width of IEEE754.  supported: 16, 32, 64
1773             * id_wid: an identifier that is sync-connected to the input
1774             * single_cycle: True indicates each stage to complete in 1 clock
1775         """
1776         FPState.__init__(self, "fpadd")
1777         self.width = width
1778         self.single_cycle = single_cycle
1779         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1780         self.o = self.ospec()
1781
1782         self.in_t = Trigger()
1783         self.i = self.ispec()
1784
1785         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1786         self.in_accept = Signal(reset_less=True)
1787         self.add_stb = Signal(reset_less=True)
1788         self.add_ack = Signal(reset=0, reset_less=True)
1789
1790     def ispec(self):
1791         return self.mod.ispec()
1792
1793     def ospec(self):
1794         return self.mod.ospec()
1795
1796     def setup(self, m, i, add_stb, in_mid):
1797         m.d.comb += [self.i.eq(i),
1798                      self.mod.i.eq(self.i),
1799                      self.z_done.eq(self.mod.o.z.trigger),
1800                      #self.add_stb.eq(add_stb),
1801                      self.mod.in_t.stb.eq(self.in_t.stb),
1802                      self.in_t.ack.eq(self.mod.in_t.ack),
1803                      self.o.mid.eq(self.mod.o.mid),
1804                      self.o.z.v.eq(self.mod.o.z.v),
1805                      self.o.z.stb.eq(self.mod.o.z.stb),
1806                      self.mod.o.z.ack.eq(self.o.z.ack),
1807                     ]
1808
1809         m.d.sync += self.add_stb.eq(add_stb)
1810         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1811         m.d.sync += self.o.z.ack.eq(0) # likewise
1812         #m.d.sync += self.in_t.stb.eq(0)
1813
1814         m.submodules.fpadd = self.mod
1815
1816     def action(self, m):
1817
1818         # in_accept is set on incoming strobe HIGH and ack LOW.
1819         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1820
1821         #with m.If(self.in_t.ack):
1822         #    m.d.sync += self.in_t.stb.eq(0)
1823         with m.If(~self.z_done):
1824             # not done: test for accepting an incoming operand pair
1825             with m.If(self.in_accept):
1826                 m.d.sync += [
1827                     self.add_ack.eq(1), # acknowledge receipt...
1828                     self.in_t.stb.eq(1), # initiate add
1829                 ]
1830             with m.Else():
1831                 m.d.sync += [self.add_ack.eq(0),
1832                              self.in_t.stb.eq(0),
1833                              self.o.z.ack.eq(1),
1834                             ]
1835         with m.Else():
1836             # done: acknowledge, and write out id and value
1837             m.d.sync += [self.add_ack.eq(1),
1838                          self.in_t.stb.eq(0)
1839                         ]
1840             m.next = "put_z"
1841
1842             return
1843
1844             if self.in_mid is not None:
1845                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1846
1847             m.d.sync += [
1848               self.out_z.v.eq(self.mod.out_z.v)
1849             ]
1850             # move to output state on detecting z ack
1851             with m.If(self.out_z.trigger):
1852                 m.d.sync += self.out_z.stb.eq(0)
1853                 m.next = "put_z"
1854             with m.Else():
1855                 m.d.sync += self.out_z.stb.eq(1)
1856
1857
1858 class FPADDBasePipe(ControlBase):
1859     def __init__(self, width, id_wid):
1860         ControlBase.__init__(self)
1861         self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1862         self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1863         self.pipe3 = FPNormToPack(width, id_wid)
1864
1865         self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1866
1867     def elaborate(self, platform):
1868         m = Module()
1869         m.submodules.scnorm = self.pipe1
1870         m.submodules.addalign = self.pipe2
1871         m.submodules.normpack = self.pipe3
1872         m.d.comb += self._eqs
1873         return m
1874
1875
1876 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1877     def __init__(self, width, id_wid, num_rows):
1878         self.num_rows = num_rows
1879         def iospec(): return FPADDBaseData(width, id_wid)
1880         stage = PassThroughStage(iospec)
1881         PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1882
1883     def ports(self):
1884         res = []
1885         for i in range(len(self.p)):
1886             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1887                     self.p[i].i_data.ports()
1888         res += [self.n.i_ready, self.n.o_valid] + \
1889                 self.n.o_data.ports()
1890         return res
1891
1892
1893 class FPADDMuxOutPipe(CombMuxOutPipe):
1894     def __init__(self, width, id_wid, num_rows):
1895         self.num_rows = num_rows
1896         def iospec(): return FPPackData(width, id_wid)
1897         stage = PassThroughStage(iospec)
1898         CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1899
1900
1901 class FPADDMuxInOut:
1902     """ Reservation-Station version of FPADD pipeline.
1903
1904         fan-in on
1905     """
1906     def __init__(self, width, id_wid, num_rows):
1907         self.num_rows = num_rows
1908         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1909         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1910         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1911
1912         self.p = self.inpipe.p  # kinda annoying,
1913         self.n = self.outpipe.n # use pipe in/out as this class in/out
1914         self._ports = self.inpipe.ports() + self.outpipe.ports()
1915
1916     def elaborate(self, platform):
1917         m = Module()
1918         m.submodules.inpipe = self.inpipe
1919         m.submodules.fpadd = self.fpadd
1920         m.submodules.outpipe = self.outpipe
1921
1922         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1923         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1924
1925         return m
1926
1927     def ports(self):
1928         return self._ports
1929
1930
1931 class ResArray:
1932     def __init__(self, width, id_wid):
1933         self.width = width
1934         self.id_wid = id_wid
1935         res = []
1936         for i in range(rs_sz):
1937             out_z = FPOp(width)
1938             out_z.name = "out_z_%d" % i
1939             res.append(out_z)
1940         self.res = Array(res)
1941         self.in_z = FPOp(width)
1942         self.in_mid = Signal(self.id_wid, reset_less=True)
1943
1944     def setup(self, m, in_z, in_mid):
1945         m.d.comb += [self.in_z.eq(in_z),
1946                      self.in_mid.eq(in_mid)]
1947
1948     def get_fragment(self, platform=None):
1949         """ creates the HDL code-fragment for FPAdd
1950         """
1951         m = Module()
1952         m.submodules.res_in_z = self.in_z
1953         m.submodules += self.res
1954
1955         return m
1956
1957     def ports(self):
1958         res = []
1959         for z in self.res:
1960             res += z.ports()
1961         return res
1962
1963
1964 class FPADD(FPID):
1965     """ FPADD: stages as follows:
1966
1967         FPGetOp (a)
1968            |
1969         FPGetOp (b)
1970            |
1971         FPAddBase---> FPAddBaseMod
1972            |            |
1973         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1974
1975         FPAddBase is tricky: it is both a stage and *has* stages.
1976         Connection to FPAddBaseMod therefore requires an in stb/ack
1977         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1978         needs to be the thing that raises the incoming stb.
1979     """
1980
1981     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1982         """ IEEE754 FP Add
1983
1984             * width: bit-width of IEEE754.  supported: 16, 32, 64
1985             * id_wid: an identifier that is sync-connected to the input
1986             * single_cycle: True indicates each stage to complete in 1 clock
1987         """
1988         self.width = width
1989         self.id_wid = id_wid
1990         self.single_cycle = single_cycle
1991
1992         #self.out_z = FPOp(width)
1993         self.ids = FPID(id_wid)
1994
1995         rs = []
1996         for i in range(rs_sz):
1997             in_a  = FPOp(width)
1998             in_b  = FPOp(width)
1999             in_a.name = "in_a_%d" % i
2000             in_b.name = "in_b_%d" % i
2001             rs.append((in_a, in_b))
2002         self.rs = Array(rs)
2003
2004         res = []
2005         for i in range(rs_sz):
2006             out_z = FPOp(width)
2007             out_z.name = "out_z_%d" % i
2008             res.append(out_z)
2009         self.res = Array(res)
2010
2011         self.states = []
2012
2013     def add_state(self, state):
2014         self.states.append(state)
2015         return state
2016
2017     def get_fragment(self, platform=None):
2018         """ creates the HDL code-fragment for FPAdd
2019         """
2020         m = Module()
2021         m.submodules += self.rs
2022
2023         in_a = self.rs[0][0]
2024         in_b = self.rs[0][1]
2025
2026         geta = self.add_state(FPGetOp("get_a", "get_b",
2027                                       in_a, self.width))
2028         geta.setup(m, in_a)
2029         a = geta.out_op
2030
2031         getb = self.add_state(FPGetOp("get_b", "fpadd",
2032                                       in_b, self.width))
2033         getb.setup(m, in_b)
2034         b = getb.out_op
2035
2036         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2037         ab = self.add_state(ab)
2038         abd = ab.ispec() # create an input spec object for FPADDBase
2039         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2040         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2041         o = ab.o
2042
2043         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2044                                     o.mid, "get_a"))
2045
2046         with m.FSM() as fsm:
2047
2048             for state in self.states:
2049                 with m.State(state.state_from):
2050                     state.action(m)
2051
2052         return m
2053
2054
2055 if __name__ == "__main__":
2056     if True:
2057         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2058         main(alu, ports=alu.rs[0][0].ports() + \
2059                         alu.rs[0][1].ports() + \
2060                         alu.res[0].ports() + \
2061                         [alu.ids.in_mid, alu.ids.out_mid])
2062     else:
2063         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2064         main(alu, ports=[alu.in_a, alu.in_b] + \
2065                         alu.in_t.ports() + \
2066                         alu.out_z.ports() + \
2067                         [alu.in_mid, alu.out_mid])
2068
2069
2070     # works... but don't use, just do "python fname.py convert -t v"
2071     #print (verilog.convert(alu, ports=[
2072     #                        ports=alu.in_a.ports() + \
2073     #                              alu.in_b.ports() + \
2074     #                              alu.out_z.ports())