src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
  13 from multipipe import CombMultiOutPipeline
  14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
  15
  16 #from fpbase import FPNumShiftMultiRight
  17
  18
  19 class FPState(FPBase):
  20     def __init__(self, state_from):
  21         self.state_from = state_from
  22
  23     def set_inputs(self, inputs):
  24         self.inputs = inputs
  25         for k,v in inputs.items():
  26             setattr(self, k, v)
  27
  28     def set_outputs(self, outputs):
  29         self.outputs = outputs
  30         for k,v in outputs.items():
  31             setattr(self, k, v)
  32
  33
  34 class FPGetSyncOpsMod:
  35     def __init__(self, width, num_ops=2):
  36         self.width = width
  37         self.num_ops = num_ops
  38         inops = []
  39         outops = []
  40         for i in range(num_ops):
  41             inops.append(Signal(width, reset_less=True))
  42             outops.append(Signal(width, reset_less=True))
  43         self.in_op = inops
  44         self.out_op = outops
  45         self.stb = Signal(num_ops)
  46         self.ack = Signal()
  47         self.ready = Signal(reset_less=True)
  48         self.out_decode = Signal(reset_less=True)
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  53         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  54         with m.If(self.out_decode):
  55             for i in range(self.num_ops):
  56                 m.d.comb += [
  57                         self.out_op[i].eq(self.in_op[i]),
  58                 ]
  59         return m
  60
  61     def ports(self):
  62         return self.in_op + self.out_op + [self.stb, self.ack]
  63
  64
  65 class FPOps(Trigger):
  66     def __init__(self, width, num_ops):
  67         Trigger.__init__(self)
  68         self.width = width
  69         self.num_ops = num_ops
  70
  71         res = []
  72         for i in range(num_ops):
  73             res.append(Signal(width))
  74         self.v  = Array(res)
  75
  76     def ports(self):
  77         res = []
  78         for i in range(self.num_ops):
  79             res.append(self.v[i])
  80         res.append(self.ack)
  81         res.append(self.stb)
  82         return res
  83
  84
  85 class InputGroup:
  86     def __init__(self, width, num_ops=2, num_rows=4):
  87         self.width = width
  88         self.num_ops = num_ops
  89         self.num_rows = num_rows
  90         self.mmax = int(log(self.num_rows) / log(2))
  91         self.rs = []
  92         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  93         for i in range(num_rows):
  94             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  95         self.rs = Array(self.rs)
  96
  97         self.out_op = FPOps(width, num_ops)
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101
 102         pe = PriorityEncoder(self.num_rows)
 103         m.submodules.selector = pe
 104         m.submodules.out_op = self.out_op
 105         m.submodules += self.rs
 106
 107         # connect priority encoder
 108         in_ready = []
 109         for i in range(self.num_rows):
 110             in_ready.append(self.rs[i].ready)
 111         m.d.comb += pe.i.eq(Cat(*in_ready))
 112
 113         active = Signal(reset_less=True)
 114         out_en = Signal(reset_less=True)
 115         m.d.comb += active.eq(~pe.n) # encoder active
 116         m.d.comb += out_en.eq(active & self.out_op.trigger)
 117
 118         # encoder active: ack relevant input, record MID, pass output
 119         with m.If(out_en):
 120             rs = self.rs[pe.o]
 121             m.d.sync += self.mid.eq(pe.o)
 122             m.d.sync += rs.ack.eq(0)
 123             m.d.sync += self.out_op.stb.eq(0)
 124             for j in range(self.num_ops):
 125                 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
 126         with m.Else():
 127             m.d.sync += self.out_op.stb.eq(1)
 128             # acks all default to zero
 129             for i in range(self.num_rows):
 130                 m.d.sync += self.rs[i].ack.eq(1)
 131
 132         return m
 133
 134     def ports(self):
 135         res = []
 136         for i in range(self.num_rows):
 137             inop = self.rs[i]
 138             res += inop.in_op + [inop.stb]
 139         return self.out_op.ports() + res + [self.mid]
 140
 141
 142 class FPGetOpMod:
 143     def __init__(self, width):
 144         self.in_op = FPOp(width)
 145         self.out_op = Signal(width)
 146         self.out_decode = Signal(reset_less=True)
 147
 148     def elaborate(self, platform):
 149         m = Module()
 150         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 151         m.submodules.get_op_in = self.in_op
 152         #m.submodules.get_op_out = self.out_op
 153         with m.If(self.out_decode):
 154             m.d.comb += [
 155                 self.out_op.eq(self.in_op.v),
 156             ]
 157         return m
 158
 159
 160 class FPGetOp(FPState):
 161     """ gets operand
 162     """
 163
 164     def __init__(self, in_state, out_state, in_op, width):
 165         FPState.__init__(self, in_state)
 166         self.out_state = out_state
 167         self.mod = FPGetOpMod(width)
 168         self.in_op = in_op
 169         self.out_op = Signal(width)
 170         self.out_decode = Signal(reset_less=True)
 171
 172     def setup(self, m, in_op):
 173         """ links module to inputs and outputs
 174         """
 175         setattr(m.submodules, self.state_from, self.mod)
 176         m.d.comb += self.mod.in_op.eq(in_op)
 177         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 178
 179     def action(self, m):
 180         with m.If(self.out_decode):
 181             m.next = self.out_state
 182             m.d.sync += [
 183                 self.in_op.ack.eq(0),
 184                 self.out_op.eq(self.mod.out_op)
 185             ]
 186         with m.Else():
 187             m.d.sync += self.in_op.ack.eq(1)
 188
 189
 190 class FPGet2OpMod(Trigger):
 191     def __init__(self, width, id_wid):
 192         Trigger.__init__(self)
 193         self.width = width
 194         self.id_wid = id_wid
 195         self.i = self.ispec()
 196         self.o = self.ospec()
 197
 198     def ispec(self):
 199         return FPADDBaseData(self.width, self.id_wid)
 200
 201     def ospec(self):
 202         return FPNumBase2Ops(self.width, self.id_wid)
 203
 204     def process(self, i):
 205         return self.o
 206
 207     def elaborate(self, platform):
 208         m = Trigger.elaborate(self, platform)
 209         m.submodules.get_op1_out = self.o.a
 210         m.submodules.get_op2_out = self.o.b
 211         out_op1 = FPNumIn(None, self.width)
 212         out_op2 = FPNumIn(None, self.width)
 213         with m.If(self.trigger):
 214             m.d.comb += [
 215                 out_op1.decode(self.i.a),
 216                 out_op2.decode(self.i.b),
 217                 self.o.a.eq(out_op1),
 218                 self.o.b.eq(out_op2),
 219                 self.o.mid.eq(self.i.mid)
 220             ]
 221         return m
 222
 223
 224 class FPGet2Op(FPState):
 225     """ gets operands
 226     """
 227
 228     def __init__(self, in_state, out_state, width, id_wid):
 229         FPState.__init__(self, in_state)
 230         self.out_state = out_state
 231         self.mod = FPGet2OpMod(width, id_wid)
 232         self.o = self.mod.ospec()
 233         self.in_stb = Signal(reset_less=True)
 234         self.out_ack = Signal(reset_less=True)
 235         self.out_decode = Signal(reset_less=True)
 236
 237     def setup(self, m, i, in_stb, in_ack):
 238         """ links module to inputs and outputs
 239         """
 240         m.submodules.get_ops = self.mod
 241         m.d.comb += self.mod.i.eq(i)
 242         m.d.comb += self.mod.stb.eq(in_stb)
 243         m.d.comb += self.out_ack.eq(self.mod.ack)
 244         m.d.comb += self.out_decode.eq(self.mod.trigger)
 245         m.d.comb += in_ack.eq(self.mod.ack)
 246
 247     def action(self, m):
 248         with m.If(self.out_decode):
 249             m.next = self.out_state
 250             m.d.sync += [
 251                 self.mod.ack.eq(0),
 252                 self.o.eq(self.mod.o),
 253             ]
 254         with m.Else():
 255             m.d.sync += self.mod.ack.eq(1)
 256
 257
 258 class FPNumBase2Ops:
 259
 260     def __init__(self, width, id_wid, m_extra=True):
 261         self.a = FPNumBase(width, m_extra)
 262         self.b = FPNumBase(width, m_extra)
 263         self.mid = Signal(id_wid, reset_less=True)
 264
 265     def eq(self, i):
 266         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 267
 268
 269 class FPSCData:
 270
 271     def __init__(self, width, id_wid):
 272         self.a = FPNumBase(width, True)
 273         self.b = FPNumBase(width, True)
 274         self.z = FPNumOut(width, False)
 275         self.oz = Signal(width, reset_less=True)
 276         self.out_do_z = Signal(reset_less=True)
 277         self.mid = Signal(id_wid, reset_less=True)
 278
 279     def eq(self, i):
 280         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 281                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 282
 283
 284 class FPAddSpecialCasesMod:
 285     """ special cases: NaNs, infs, zeros, denormalised
 286         NOTE: some of these are unique to add.  see "Special Operations"
 287         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 288     """
 289
 290     def __init__(self, width, id_wid):
 291         self.width = width
 292         self.id_wid = id_wid
 293         self.i = self.ispec()
 294         self.o = self.ospec()
 295
 296     def ispec(self):
 297         return FPNumBase2Ops(self.width, self.id_wid)
 298
 299     def ospec(self):
 300         return FPSCData(self.width, self.id_wid)
 301
 302     def setup(self, m, i):
 303         """ links module to inputs and outputs
 304         """
 305         m.submodules.specialcases = self
 306         m.d.comb += self.i.eq(i)
 307
 308     def process(self, i):
 309         return self.o
 310
 311     def elaborate(self, platform):
 312         m = Module()
 313
 314         m.submodules.sc_in_a = self.i.a
 315         m.submodules.sc_in_b = self.i.b
 316         m.submodules.sc_out_z = self.o.z
 317
 318         s_nomatch = Signal()
 319         m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
 320
 321         m_match = Signal()
 322         m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
 323
 324         # if a is NaN or b is NaN return NaN
 325         with m.If(self.i.a.is_nan | self.i.b.is_nan):
 326             m.d.comb += self.o.out_do_z.eq(1)
 327             m.d.comb += self.o.z.nan(0)
 328
 329         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 330         # under review
 331
 332         ## if a is zero and b is NaN return -b
 333         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 334         #    m.d.comb += self.o.out_do_z.eq(1)
 335         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 336
 337         ## if b is zero and a is NaN return -a
 338         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 339         #    m.d.comb += self.o.out_do_z.eq(1)
 340         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 341
 342         ## if a is -zero and b is NaN return -b
 343         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 344         #    m.d.comb += self.o.out_do_z.eq(1)
 345         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 346
 347         ## if b is -zero and a is NaN return -a
 348         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 349         #    m.d.comb += self.o.out_do_z.eq(1)
 350         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 351
 352         # if a is inf return inf (or NaN)
 353         with m.Elif(self.i.a.is_inf):
 354             m.d.comb += self.o.out_do_z.eq(1)
 355             m.d.comb += self.o.z.inf(self.i.a.s)
 356             # if a is inf and signs don't match return NaN
 357             with m.If(self.i.b.exp_128 & s_nomatch):
 358                 m.d.comb += self.o.z.nan(0)
 359
 360         # if b is inf return inf
 361         with m.Elif(self.i.b.is_inf):
 362             m.d.comb += self.o.out_do_z.eq(1)
 363             m.d.comb += self.o.z.inf(self.i.b.s)
 364
 365         # if a is zero and b zero return signed-a/b
 366         with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
 367             m.d.comb += self.o.out_do_z.eq(1)
 368             m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
 369                                           self.i.b.e,
 370                                           self.i.b.m[3:-1])
 371
 372         # if a is zero return b
 373         with m.Elif(self.i.a.is_zero):
 374             m.d.comb += self.o.out_do_z.eq(1)
 375             m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
 376                                       self.i.b.m[3:-1])
 377
 378         # if b is zero return a
 379         with m.Elif(self.i.b.is_zero):
 380             m.d.comb += self.o.out_do_z.eq(1)
 381             m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
 382                                       self.i.a.m[3:-1])
 383
 384         # if a equal to -b return zero (+ve zero)
 385         with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
 386             m.d.comb += self.o.out_do_z.eq(1)
 387             m.d.comb += self.o.z.zero(0)
 388
 389         # Denormalised Number checks next, so pass a/b data through
 390         with m.Else():
 391             m.d.comb += self.o.out_do_z.eq(0)
 392             m.d.comb += self.o.a.eq(self.i.a)
 393             m.d.comb += self.o.b.eq(self.i.b)
 394
 395         m.d.comb += self.o.oz.eq(self.o.z.v)
 396         m.d.comb += self.o.mid.eq(self.i.mid)
 397
 398         return m
 399
 400
 401 class FPID:
 402     def __init__(self, id_wid):
 403         self.id_wid = id_wid
 404         if self.id_wid:
 405             self.in_mid = Signal(id_wid, reset_less=True)
 406             self.out_mid = Signal(id_wid, reset_less=True)
 407         else:
 408             self.in_mid = None
 409             self.out_mid = None
 410
 411     def idsync(self, m):
 412         if self.id_wid is not None:
 413             m.d.sync += self.out_mid.eq(self.in_mid)
 414
 415
 416 class FPAddSpecialCases(FPState):
 417     """ special cases: NaNs, infs, zeros, denormalised
 418         NOTE: some of these are unique to add.  see "Special Operations"
 419         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 420     """
 421
 422     def __init__(self, width, id_wid):
 423         FPState.__init__(self, "special_cases")
 424         self.mod = FPAddSpecialCasesMod(width)
 425         self.out_z = self.mod.ospec()
 426         self.out_do_z = Signal(reset_less=True)
 427
 428     def setup(self, m, i):
 429         """ links module to inputs and outputs
 430         """
 431         self.mod.setup(m, i, self.out_do_z)
 432         m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 433         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)  # (and mid)
 434
 435     def action(self, m):
 436         self.idsync(m)
 437         with m.If(self.out_do_z):
 438             m.next = "put_z"
 439         with m.Else():
 440             m.next = "denormalise"
 441
 442
 443 class FPAddSpecialCasesDeNorm(FPState):
 444     """ special cases: NaNs, infs, zeros, denormalised
 445         NOTE: some of these are unique to add.  see "Special Operations"
 446         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 447     """
 448
 449     def __init__(self, width, id_wid):
 450         FPState.__init__(self, "special_cases")
 451         self.smod = FPAddSpecialCasesMod(width, id_wid)
 452         self.dmod = FPAddDeNormMod(width, id_wid)
 453         self.o = self.ospec()
 454
 455     def ispec(self):
 456         return self.smod.ispec()
 457
 458     def ospec(self):
 459         return self.dmod.ospec()
 460
 461     def setup(self, m, i):
 462         """ links module to inputs and outputs
 463         """
 464         # these only needed for break-out (early-out)
 465         # out_z = self.smod.ospec()
 466         # out_do_z = Signal(reset_less=True)
 467         self.smod.setup(m, i)
 468         self.dmod.setup(m, self.smod.o)
 469         #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
 470
 471         # out_do_z=True, only needed for early-out (split pipeline)
 472         #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
 473         #m.d.sync += out_z.mid.eq(self.smod.o.mid)  # (and mid)
 474
 475         # out_do_z=False
 476         m.d.sync += self.o.eq(self.dmod.o)
 477
 478     def process(self, i):
 479         return self.o
 480
 481     def action(self, m):
 482         #with m.If(self.out_do_z):
 483         #    m.next = "put_z"
 484         #with m.Else():
 485             m.next = "align"
 486
 487
 488 class FPAddDeNormMod(FPState):
 489
 490     def __init__(self, width, id_wid):
 491         self.width = width
 492         self.id_wid = id_wid
 493         self.i = self.ispec()
 494         self.o = self.ospec()
 495
 496     def ispec(self):
 497         return FPSCData(self.width, self.id_wid)
 498
 499     def ospec(self):
 500         return FPSCData(self.width, self.id_wid)
 501
 502     def setup(self, m, i):
 503         """ links module to inputs and outputs
 504         """
 505         m.submodules.denormalise = self
 506         m.d.comb += self.i.eq(i)
 507
 508     def elaborate(self, platform):
 509         m = Module()
 510         m.submodules.denorm_in_a = self.i.a
 511         m.submodules.denorm_in_b = self.i.b
 512         m.submodules.denorm_out_a = self.o.a
 513         m.submodules.denorm_out_b = self.o.b
 514
 515         with m.If(~self.i.out_do_z):
 516             # XXX hmmm, don't like repeating identical code
 517             m.d.comb += self.o.a.eq(self.i.a)
 518             with m.If(self.i.a.exp_n127):
 519                 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
 520             with m.Else():
 521                 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
 522
 523             m.d.comb += self.o.b.eq(self.i.b)
 524             with m.If(self.i.b.exp_n127):
 525                 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
 526             with m.Else():
 527                 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
 528
 529         m.d.comb += self.o.mid.eq(self.i.mid)
 530         m.d.comb += self.o.z.eq(self.i.z)
 531         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 532         m.d.comb += self.o.oz.eq(self.i.oz)
 533
 534         return m
 535
 536
 537 class FPAddDeNorm(FPState):
 538
 539     def __init__(self, width, id_wid):
 540         FPState.__init__(self, "denormalise")
 541         self.mod = FPAddDeNormMod(width)
 542         self.out_a = FPNumBase(width)
 543         self.out_b = FPNumBase(width)
 544
 545     def setup(self, m, i):
 546         """ links module to inputs and outputs
 547         """
 548         self.mod.setup(m, i)
 549
 550         m.d.sync += self.out_a.eq(self.mod.out_a)
 551         m.d.sync += self.out_b.eq(self.mod.out_b)
 552
 553     def action(self, m):
 554         # Denormalised Number checks
 555         m.next = "align"
 556
 557
 558 class FPAddAlignMultiMod(FPState):
 559
 560     def __init__(self, width):
 561         self.in_a = FPNumBase(width)
 562         self.in_b = FPNumBase(width)
 563         self.out_a = FPNumIn(None, width)
 564         self.out_b = FPNumIn(None, width)
 565         self.exp_eq = Signal(reset_less=True)
 566
 567     def elaborate(self, platform):
 568         # This one however (single-cycle) will do the shift
 569         # in one go.
 570
 571         m = Module()
 572
 573         m.submodules.align_in_a = self.in_a
 574         m.submodules.align_in_b = self.in_b
 575         m.submodules.align_out_a = self.out_a
 576         m.submodules.align_out_b = self.out_b
 577
 578         # NOTE: this does *not* do single-cycle multi-shifting,
 579         #       it *STAYS* in the align state until exponents match
 580
 581         # exponent of a greater than b: shift b down
 582         m.d.comb += self.exp_eq.eq(0)
 583         m.d.comb += self.out_a.eq(self.in_a)
 584         m.d.comb += self.out_b.eq(self.in_b)
 585         agtb = Signal(reset_less=True)
 586         altb = Signal(reset_less=True)
 587         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 588         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 589         with m.If(agtb):
 590             m.d.comb += self.out_b.shift_down(self.in_b)
 591         # exponent of b greater than a: shift a down
 592         with m.Elif(altb):
 593             m.d.comb += self.out_a.shift_down(self.in_a)
 594         # exponents equal: move to next stage.
 595         with m.Else():
 596             m.d.comb += self.exp_eq.eq(1)
 597         return m
 598
 599
 600 class FPAddAlignMulti(FPState):
 601
 602     def __init__(self, width, id_wid):
 603         FPState.__init__(self, "align")
 604         self.mod = FPAddAlignMultiMod(width)
 605         self.out_a = FPNumIn(None, width)
 606         self.out_b = FPNumIn(None, width)
 607         self.exp_eq = Signal(reset_less=True)
 608
 609     def setup(self, m, in_a, in_b):
 610         """ links module to inputs and outputs
 611         """
 612         m.submodules.align = self.mod
 613         m.d.comb += self.mod.in_a.eq(in_a)
 614         m.d.comb += self.mod.in_b.eq(in_b)
 615         #m.d.comb += self.out_a.eq(self.mod.out_a)
 616         #m.d.comb += self.out_b.eq(self.mod.out_b)
 617         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 618         m.d.sync += self.out_a.eq(self.mod.out_a)
 619         m.d.sync += self.out_b.eq(self.mod.out_b)
 620
 621     def action(self, m):
 622         with m.If(self.exp_eq):
 623             m.next = "add_0"
 624
 625
 626 class FPNumIn2Ops:
 627
 628     def __init__(self, width, id_wid):
 629         self.a = FPNumIn(None, width)
 630         self.b = FPNumIn(None, width)
 631         self.z = FPNumOut(width, False)
 632         self.out_do_z = Signal(reset_less=True)
 633         self.oz = Signal(width, reset_less=True)
 634         self.mid = Signal(id_wid, reset_less=True)
 635
 636     def eq(self, i):
 637         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 638                 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
 639
 640
 641 class FPAddAlignSingleMod:
 642
 643     def __init__(self, width, id_wid):
 644         self.width = width
 645         self.id_wid = id_wid
 646         self.i = self.ispec()
 647         self.o = self.ospec()
 648
 649     def ispec(self):
 650         return FPSCData(self.width, self.id_wid)
 651
 652     def ospec(self):
 653         return FPNumIn2Ops(self.width, self.id_wid)
 654
 655     def process(self, i):
 656         return self.o
 657
 658     def setup(self, m, i):
 659         """ links module to inputs and outputs
 660         """
 661         m.submodules.align = self
 662         m.d.comb += self.i.eq(i)
 663
 664     def elaborate(self, platform):
 665         """ Aligns A against B or B against A, depending on which has the
 666             greater exponent.  This is done in a *single* cycle using
 667             variable-width bit-shift
 668
 669             the shifter used here is quite expensive in terms of gates.
 670             Mux A or B in (and out) into temporaries, as only one of them
 671             needs to be aligned against the other
 672         """
 673         m = Module()
 674
 675         m.submodules.align_in_a = self.i.a
 676         m.submodules.align_in_b = self.i.b
 677         m.submodules.align_out_a = self.o.a
 678         m.submodules.align_out_b = self.o.b
 679
 680         # temporary (muxed) input and output to be shifted
 681         t_inp = FPNumBase(self.width)
 682         t_out = FPNumIn(None, self.width)
 683         espec = (len(self.i.a.e), True)
 684         msr = MultiShiftRMerge(self.i.a.m_width, espec)
 685         m.submodules.align_t_in = t_inp
 686         m.submodules.align_t_out = t_out
 687         m.submodules.multishift_r = msr
 688
 689         ediff = Signal(espec, reset_less=True)
 690         ediffr = Signal(espec, reset_less=True)
 691         tdiff = Signal(espec, reset_less=True)
 692         elz = Signal(reset_less=True)
 693         egz = Signal(reset_less=True)
 694
 695         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 696         m.d.comb += msr.inp.eq(t_inp.m)
 697         m.d.comb += msr.diff.eq(tdiff)
 698         m.d.comb += t_out.m.eq(msr.m)
 699         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 700         m.d.comb += t_out.s.eq(t_inp.s)
 701
 702         m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
 703         m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
 704         m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
 705         m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
 706
 707         # default: A-exp == B-exp, A and B untouched (fall through)
 708         m.d.comb += self.o.a.eq(self.i.a)
 709         m.d.comb += self.o.b.eq(self.i.b)
 710         # only one shifter (muxed)
 711         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 712         # exponent of a greater than b: shift b down
 713         with m.If(~self.i.out_do_z):
 714             with m.If(egz):
 715                 m.d.comb += [t_inp.eq(self.i.b),
 716                              tdiff.eq(ediff),
 717                              self.o.b.eq(t_out),
 718                              self.o.b.s.eq(self.i.b.s), # whoops forgot sign
 719                             ]
 720             # exponent of b greater than a: shift a down
 721             with m.Elif(elz):
 722                 m.d.comb += [t_inp.eq(self.i.a),
 723                              tdiff.eq(ediffr),
 724                              self.o.a.eq(t_out),
 725                              self.o.a.s.eq(self.i.a.s), # whoops forgot sign
 726                             ]
 727
 728         m.d.comb += self.o.mid.eq(self.i.mid)
 729         m.d.comb += self.o.z.eq(self.i.z)
 730         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 731         m.d.comb += self.o.oz.eq(self.i.oz)
 732
 733         return m
 734
 735
 736 class FPAddAlignSingle(FPState):
 737
 738     def __init__(self, width, id_wid):
 739         FPState.__init__(self, "align")
 740         self.mod = FPAddAlignSingleMod(width, id_wid)
 741         self.out_a = FPNumIn(None, width)
 742         self.out_b = FPNumIn(None, width)
 743
 744     def setup(self, m, i):
 745         """ links module to inputs and outputs
 746         """
 747         self.mod.setup(m, i)
 748
 749         # NOTE: could be done as comb
 750         m.d.sync += self.out_a.eq(self.mod.out_a)
 751         m.d.sync += self.out_b.eq(self.mod.out_b)
 752
 753     def action(self, m):
 754         m.next = "add_0"
 755
 756
 757 class FPAddAlignSingleAdd(FPState):
 758
 759     def __init__(self, width, id_wid):
 760         FPState.__init__(self, "align")
 761         self.width = width
 762         self.id_wid = id_wid
 763         self.a1o = self.ospec()
 764
 765     def ispec(self):
 766         return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
 767
 768     def ospec(self):
 769         return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
 770
 771     def setup(self, m, i):
 772         """ links module to inputs and outputs
 773         """
 774
 775         # chain AddAlignSingle, AddStage0 and AddStage1
 776         mod = FPAddAlignSingleMod(self.width, self.id_wid)
 777         a0mod = FPAddStage0Mod(self.width, self.id_wid)
 778         a1mod = FPAddStage1Mod(self.width, self.id_wid)
 779
 780         chain = StageChain([mod, a0mod, a1mod])
 781         chain.setup(m, i)
 782
 783         m.d.sync += self.a1o.eq(a1mod.o)
 784
 785     def process(self, i):
 786         return self.a1o
 787
 788     def action(self, m):
 789         m.next = "normalise_1"
 790
 791
 792 class FPAddStage0Data:
 793
 794     def __init__(self, width, id_wid):
 795         self.z = FPNumBase(width, False)
 796         self.out_do_z = Signal(reset_less=True)
 797         self.oz = Signal(width, reset_less=True)
 798         self.tot = Signal(self.z.m_width + 4, reset_less=True)
 799         self.mid = Signal(id_wid, reset_less=True)
 800
 801     def eq(self, i):
 802         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 803                 self.tot.eq(i.tot), self.mid.eq(i.mid)]
 804
 805
 806 class FPAddStage0Mod:
 807
 808     def __init__(self, width, id_wid):
 809         self.width = width
 810         self.id_wid = id_wid
 811         self.i = self.ispec()
 812         self.o = self.ospec()
 813
 814     def ispec(self):
 815         return FPSCData(self.width, self.id_wid)
 816
 817     def ospec(self):
 818         return FPAddStage0Data(self.width, self.id_wid)
 819
 820     def process(self, i):
 821         return self.o
 822
 823     def setup(self, m, i):
 824         """ links module to inputs and outputs
 825         """
 826         m.submodules.add0 = self
 827         m.d.comb += self.i.eq(i)
 828
 829     def elaborate(self, platform):
 830         m = Module()
 831         m.submodules.add0_in_a = self.i.a
 832         m.submodules.add0_in_b = self.i.b
 833         m.submodules.add0_out_z = self.o.z
 834
 835         # store intermediate tests (and zero-extended mantissas)
 836         seq = Signal(reset_less=True)
 837         mge = Signal(reset_less=True)
 838         am0 = Signal(len(self.i.a.m)+1, reset_less=True)
 839         bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
 840         m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
 841                      mge.eq(self.i.a.m >= self.i.b.m),
 842                      am0.eq(Cat(self.i.a.m, 0)),
 843                      bm0.eq(Cat(self.i.b.m, 0))
 844                     ]
 845         # same-sign (both negative or both positive) add mantissas
 846         with m.If(~self.i.out_do_z):
 847             m.d.comb += self.o.z.e.eq(self.i.a.e)
 848             with m.If(seq):
 849                 m.d.comb += [
 850                     self.o.tot.eq(am0 + bm0),
 851                     self.o.z.s.eq(self.i.a.s)
 852                 ]
 853             # a mantissa greater than b, use a
 854             with m.Elif(mge):
 855                 m.d.comb += [
 856                     self.o.tot.eq(am0 - bm0),
 857                     self.o.z.s.eq(self.i.a.s)
 858                 ]
 859             # b mantissa greater than a, use b
 860             with m.Else():
 861                 m.d.comb += [
 862                     self.o.tot.eq(bm0 - am0),
 863                     self.o.z.s.eq(self.i.b.s)
 864             ]
 865
 866         m.d.comb += self.o.oz.eq(self.i.oz)
 867         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 868         m.d.comb += self.o.mid.eq(self.i.mid)
 869         return m
 870
 871
 872 class FPAddStage0(FPState):
 873     """ First stage of add.  covers same-sign (add) and subtract
 874         special-casing when mantissas are greater or equal, to
 875         give greatest accuracy.
 876     """
 877
 878     def __init__(self, width, id_wid):
 879         FPState.__init__(self, "add_0")
 880         self.mod = FPAddStage0Mod(width)
 881         self.o = self.mod.ospec()
 882
 883     def setup(self, m, i):
 884         """ links module to inputs and outputs
 885         """
 886         self.mod.setup(m, i)
 887
 888         # NOTE: these could be done as combinatorial (merge add0+add1)
 889         m.d.sync += self.o.eq(self.mod.o)
 890
 891     def action(self, m):
 892         m.next = "add_1"
 893
 894
 895 class FPAddStage1Data:
 896
 897     def __init__(self, width, id_wid):
 898         self.z = FPNumBase(width, False)
 899         self.out_do_z = Signal(reset_less=True)
 900         self.oz = Signal(width, reset_less=True)
 901         self.of = Overflow()
 902         self.mid = Signal(id_wid, reset_less=True)
 903
 904     def eq(self, i):
 905         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
 906                 self.of.eq(i.of), self.mid.eq(i.mid)]
 907
 908
 909
 910 class FPAddStage1Mod(FPState):
 911     """ Second stage of add: preparation for normalisation.
 912         detects when tot sum is too big (tot[27] is kinda a carry bit)
 913     """
 914
 915     def __init__(self, width, id_wid):
 916         self.width = width
 917         self.id_wid = id_wid
 918         self.i = self.ispec()
 919         self.o = self.ospec()
 920
 921     def ispec(self):
 922         return FPAddStage0Data(self.width, self.id_wid)
 923
 924     def ospec(self):
 925         return FPAddStage1Data(self.width, self.id_wid)
 926
 927     def process(self, i):
 928         return self.o
 929
 930     def setup(self, m, i):
 931         """ links module to inputs and outputs
 932         """
 933         m.submodules.add1 = self
 934         m.submodules.add1_out_overflow = self.o.of
 935
 936         m.d.comb += self.i.eq(i)
 937
 938     def elaborate(self, platform):
 939         m = Module()
 940         #m.submodules.norm1_in_overflow = self.in_of
 941         #m.submodules.norm1_out_overflow = self.out_of
 942         #m.submodules.norm1_in_z = self.in_z
 943         #m.submodules.norm1_out_z = self.out_z
 944         m.d.comb += self.o.z.eq(self.i.z)
 945         # tot[-1] (MSB) gets set when the sum overflows. shift result down
 946         with m.If(~self.i.out_do_z):
 947             with m.If(self.i.tot[-1]):
 948                 m.d.comb += [
 949                     self.o.z.m.eq(self.i.tot[4:]),
 950                     self.o.of.m0.eq(self.i.tot[4]),
 951                     self.o.of.guard.eq(self.i.tot[3]),
 952                     self.o.of.round_bit.eq(self.i.tot[2]),
 953                     self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
 954                     self.o.z.e.eq(self.i.z.e + 1)
 955             ]
 956             # tot[-1] (MSB) zero case
 957             with m.Else():
 958                 m.d.comb += [
 959                     self.o.z.m.eq(self.i.tot[3:]),
 960                     self.o.of.m0.eq(self.i.tot[3]),
 961                     self.o.of.guard.eq(self.i.tot[2]),
 962                     self.o.of.round_bit.eq(self.i.tot[1]),
 963                     self.o.of.sticky.eq(self.i.tot[0])
 964             ]
 965
 966         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
 967         m.d.comb += self.o.oz.eq(self.i.oz)
 968         m.d.comb += self.o.mid.eq(self.i.mid)
 969
 970         return m
 971
 972
 973 class FPAddStage1(FPState):
 974
 975     def __init__(self, width, id_wid):
 976         FPState.__init__(self, "add_1")
 977         self.mod = FPAddStage1Mod(width)
 978         self.out_z = FPNumBase(width, False)
 979         self.out_of = Overflow()
 980         self.norm_stb = Signal()
 981
 982     def setup(self, m, i):
 983         """ links module to inputs and outputs
 984         """
 985         self.mod.setup(m, i)
 986
 987         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 988
 989         m.d.sync += self.out_of.eq(self.mod.out_of)
 990         m.d.sync += self.out_z.eq(self.mod.out_z)
 991         m.d.sync += self.norm_stb.eq(1)
 992
 993     def action(self, m):
 994         m.next = "normalise_1"
 995
 996
 997 class FPNormaliseModSingle:
 998
 999     def __init__(self, width):
1000         self.width = width
1001         self.in_z = self.ispec()
1002         self.out_z = self.ospec()
1003
1004     def ispec(self):
1005         return FPNumBase(self.width, False)
1006
1007     def ospec(self):
1008         return FPNumBase(self.width, False)
1009
1010     def setup(self, m, i):
1011         """ links module to inputs and outputs
1012         """
1013         m.submodules.normalise = self
1014         m.d.comb += self.i.eq(i)
1015
1016     def elaborate(self, platform):
1017         m = Module()
1018
1019         mwid = self.out_z.m_width+2
1020         pe = PriorityEncoder(mwid)
1021         m.submodules.norm_pe = pe
1022
1023         m.submodules.norm1_out_z = self.out_z
1024         m.submodules.norm1_in_z = self.in_z
1025
1026         in_z = FPNumBase(self.width, False)
1027         in_of = Overflow()
1028         m.submodules.norm1_insel_z = in_z
1029         m.submodules.norm1_insel_overflow = in_of
1030
1031         espec = (len(in_z.e), True)
1032         ediff_n126 = Signal(espec, reset_less=True)
1033         msr = MultiShiftRMerge(mwid, espec)
1034         m.submodules.multishift_r = msr
1035
1036         m.d.comb += in_z.eq(self.in_z)
1037         m.d.comb += in_of.eq(self.in_of)
1038         # initialise out from in (overridden below)
1039         m.d.comb += self.out_z.eq(in_z)
1040         m.d.comb += self.out_of.eq(in_of)
1041         # normalisation decrease condition
1042         decrease = Signal(reset_less=True)
1043         m.d.comb += decrease.eq(in_z.m_msbzero)
1044         # decrease exponent
1045         with m.If(decrease):
1046             # *sigh* not entirely obvious: count leading zeros (clz)
1047             # with a PriorityEncoder: to find from the MSB
1048             # we reverse the order of the bits.
1049             temp_m = Signal(mwid, reset_less=True)
1050             temp_s = Signal(mwid+1, reset_less=True)
1051             clz = Signal((len(in_z.e), True), reset_less=True)
1052             m.d.comb += [
1053                 # cat round and guard bits back into the mantissa
1054                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1055                 pe.i.eq(temp_m[::-1]),          # inverted
1056                 clz.eq(pe.o),                   # count zeros from MSB down
1057                 temp_s.eq(temp_m << clz),       # shift mantissa UP
1058                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
1059                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
1060             ]
1061
1062         return m
1063
1064 class FPNorm1Data:
1065
1066     def __init__(self, width, id_wid):
1067         self.roundz = Signal(reset_less=True)
1068         self.z = FPNumBase(width, False)
1069         self.out_do_z = Signal(reset_less=True)
1070         self.oz = Signal(width, reset_less=True)
1071         self.mid = Signal(id_wid, reset_less=True)
1072
1073     def eq(self, i):
1074         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1075                 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1076
1077
1078 class FPNorm1ModSingle:
1079
1080     def __init__(self, width, id_wid):
1081         self.width = width
1082         self.id_wid = id_wid
1083         self.i = self.ispec()
1084         self.o = self.ospec()
1085
1086     def ispec(self):
1087         return FPAddStage1Data(self.width, self.id_wid)
1088
1089     def ospec(self):
1090         return FPNorm1Data(self.width, self.id_wid)
1091
1092     def setup(self, m, i):
1093         """ links module to inputs and outputs
1094         """
1095         m.submodules.normalise_1 = self
1096         m.d.comb += self.i.eq(i)
1097
1098     def process(self, i):
1099         return self.o
1100
1101     def elaborate(self, platform):
1102         m = Module()
1103
1104         mwid = self.o.z.m_width+2
1105         pe = PriorityEncoder(mwid)
1106         m.submodules.norm_pe = pe
1107
1108         of = Overflow()
1109         m.d.comb += self.o.roundz.eq(of.roundz)
1110
1111         m.submodules.norm1_out_z = self.o.z
1112         m.submodules.norm1_out_overflow = of
1113         m.submodules.norm1_in_z = self.i.z
1114         m.submodules.norm1_in_overflow = self.i.of
1115
1116         i = self.ispec()
1117         m.submodules.norm1_insel_z = i.z
1118         m.submodules.norm1_insel_overflow = i.of
1119
1120         espec = (len(i.z.e), True)
1121         ediff_n126 = Signal(espec, reset_less=True)
1122         msr = MultiShiftRMerge(mwid, espec)
1123         m.submodules.multishift_r = msr
1124
1125         m.d.comb += i.eq(self.i)
1126         # initialise out from in (overridden below)
1127         m.d.comb += self.o.z.eq(i.z)
1128         m.d.comb += of.eq(i.of)
1129         # normalisation increase/decrease conditions
1130         decrease = Signal(reset_less=True)
1131         increase = Signal(reset_less=True)
1132         m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1133         m.d.comb += increase.eq(i.z.exp_lt_n126)
1134         # decrease exponent
1135         with m.If(~self.i.out_do_z):
1136             with m.If(decrease):
1137                 # *sigh* not entirely obvious: count leading zeros (clz)
1138                 # with a PriorityEncoder: to find from the MSB
1139                 # we reverse the order of the bits.
1140                 temp_m = Signal(mwid, reset_less=True)
1141                 temp_s = Signal(mwid+1, reset_less=True)
1142                 clz = Signal((len(i.z.e), True), reset_less=True)
1143                 # make sure that the amount to decrease by does NOT
1144                 # go below the minimum non-INF/NaN exponent
1145                 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1146                              i.z.exp_sub_n126)
1147                 m.d.comb += [
1148                     # cat round and guard bits back into the mantissa
1149                     temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1150                     pe.i.eq(temp_m[::-1]),          # inverted
1151                     clz.eq(limclz),                 # count zeros from MSB down
1152                     temp_s.eq(temp_m << clz),       # shift mantissa UP
1153                     self.o.z.e.eq(i.z.e - clz),  # DECREASE exponent
1154                     self.o.z.m.eq(temp_s[2:]),    # exclude bits 0&1
1155                     of.m0.eq(temp_s[2]),          # copy of mantissa[0]
1156                     # overflow in bits 0..1: got shifted too (leave sticky)
1157                     of.guard.eq(temp_s[1]),       # guard
1158                     of.round_bit.eq(temp_s[0]),   # round
1159                 ]
1160             # increase exponent
1161             with m.Elif(increase):
1162                 temp_m = Signal(mwid+1, reset_less=True)
1163                 m.d.comb += [
1164                     temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1165                                   i.z.m)),
1166                     ediff_n126.eq(i.z.N126 - i.z.e),
1167                     # connect multi-shifter to inp/out mantissa (and ediff)
1168                     msr.inp.eq(temp_m),
1169                     msr.diff.eq(ediff_n126),
1170                     self.o.z.m.eq(msr.m[3:]),
1171                     of.m0.eq(temp_s[3]),   # copy of mantissa[0]
1172                     # overflow in bits 0..1: got shifted too (leave sticky)
1173                     of.guard.eq(temp_s[2]),     # guard
1174                     of.round_bit.eq(temp_s[1]), # round
1175                     of.sticky.eq(temp_s[0]),    # sticky
1176                     self.o.z.e.eq(i.z.e + ediff_n126),
1177                 ]
1178
1179         m.d.comb += self.o.mid.eq(self.i.mid)
1180         m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1181         m.d.comb += self.o.oz.eq(self.i.oz)
1182
1183         return m
1184
1185
1186 class FPNorm1ModMulti:
1187
1188     def __init__(self, width, single_cycle=True):
1189         self.width = width
1190         self.in_select = Signal(reset_less=True)
1191         self.in_z = FPNumBase(width, False)
1192         self.in_of = Overflow()
1193         self.temp_z = FPNumBase(width, False)
1194         self.temp_of = Overflow()
1195         self.out_z = FPNumBase(width, False)
1196         self.out_of = Overflow()
1197
1198     def elaborate(self, platform):
1199         m = Module()
1200
1201         m.submodules.norm1_out_z = self.out_z
1202         m.submodules.norm1_out_overflow = self.out_of
1203         m.submodules.norm1_temp_z = self.temp_z
1204         m.submodules.norm1_temp_of = self.temp_of
1205         m.submodules.norm1_in_z = self.in_z
1206         m.submodules.norm1_in_overflow = self.in_of
1207
1208         in_z = FPNumBase(self.width, False)
1209         in_of = Overflow()
1210         m.submodules.norm1_insel_z = in_z
1211         m.submodules.norm1_insel_overflow = in_of
1212
1213         # select which of temp or in z/of to use
1214         with m.If(self.in_select):
1215             m.d.comb += in_z.eq(self.in_z)
1216             m.d.comb += in_of.eq(self.in_of)
1217         with m.Else():
1218             m.d.comb += in_z.eq(self.temp_z)
1219             m.d.comb += in_of.eq(self.temp_of)
1220         # initialise out from in (overridden below)
1221         m.d.comb += self.out_z.eq(in_z)
1222         m.d.comb += self.out_of.eq(in_of)
1223         # normalisation increase/decrease conditions
1224         decrease = Signal(reset_less=True)
1225         increase = Signal(reset_less=True)
1226         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1227         m.d.comb += increase.eq(in_z.exp_lt_n126)
1228         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1229         # decrease exponent
1230         with m.If(decrease):
1231             m.d.comb += [
1232                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1233                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1234                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1235                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1236                 self.out_of.round_bit.eq(0),        # reset round bit
1237                 self.out_of.m0.eq(in_of.guard),
1238             ]
1239         # increase exponent
1240         with m.Elif(increase):
1241             m.d.comb += [
1242                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1243                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1244                 self.out_of.guard.eq(in_z.m[0]),
1245                 self.out_of.m0.eq(in_z.m[1]),
1246                 self.out_of.round_bit.eq(in_of.guard),
1247                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1248             ]
1249
1250         return m
1251
1252
1253 class FPNorm1Single(FPState):
1254
1255     def __init__(self, width, id_wid, single_cycle=True):
1256         FPState.__init__(self, "normalise_1")
1257         self.mod = FPNorm1ModSingle(width)
1258         self.o = self.ospec()
1259         self.out_z = FPNumBase(width, False)
1260         self.out_roundz = Signal(reset_less=True)
1261
1262     def ispec(self):
1263         return self.mod.ispec()
1264
1265     def ospec(self):
1266         return self.mod.ospec()
1267
1268     def setup(self, m, i):
1269         """ links module to inputs and outputs
1270         """
1271         self.mod.setup(m, i)
1272
1273     def action(self, m):
1274         m.next = "round"
1275
1276
1277 class FPNorm1Multi(FPState):
1278
1279     def __init__(self, width, id_wid):
1280         FPState.__init__(self, "normalise_1")
1281         self.mod = FPNorm1ModMulti(width)
1282         self.stb = Signal(reset_less=True)
1283         self.ack = Signal(reset=0, reset_less=True)
1284         self.out_norm = Signal(reset_less=True)
1285         self.in_accept = Signal(reset_less=True)
1286         self.temp_z = FPNumBase(width)
1287         self.temp_of = Overflow()
1288         self.out_z = FPNumBase(width)
1289         self.out_roundz = Signal(reset_less=True)
1290
1291     def setup(self, m, in_z, in_of, norm_stb):
1292         """ links module to inputs and outputs
1293         """
1294         self.mod.setup(m, in_z, in_of, norm_stb,
1295                        self.in_accept, self.temp_z, self.temp_of,
1296                        self.out_z, self.out_norm)
1297
1298         m.d.comb += self.stb.eq(norm_stb)
1299         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1300
1301     def action(self, m):
1302         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1303         m.d.sync += self.temp_of.eq(self.mod.out_of)
1304         m.d.sync += self.temp_z.eq(self.out_z)
1305         with m.If(self.out_norm):
1306             with m.If(self.in_accept):
1307                 m.d.sync += [
1308                     self.ack.eq(1),
1309                 ]
1310             with m.Else():
1311                 m.d.sync += self.ack.eq(0)
1312         with m.Else():
1313             # normalisation not required (or done).
1314             m.next = "round"
1315             m.d.sync += self.ack.eq(1)
1316             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1317
1318
1319 class FPNormToPack(FPState):
1320
1321     def __init__(self, width, id_wid):
1322         FPState.__init__(self, "normalise_1")
1323         self.id_wid = id_wid
1324         self.width = width
1325
1326     def ispec(self):
1327         return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1328
1329     def ospec(self):
1330         return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1331
1332     def setup(self, m, i):
1333         """ links module to inputs and outputs
1334         """
1335
1336         # Normalisation, Rounding Corrections, Pack - in a chain
1337         nmod = FPNorm1ModSingle(self.width, self.id_wid)
1338         rmod = FPRoundMod(self.width, self.id_wid)
1339         cmod = FPCorrectionsMod(self.width, self.id_wid)
1340         pmod = FPPackMod(self.width, self.id_wid)
1341         chain = StageChain([nmod, rmod, cmod, pmod])
1342         chain.setup(m, i)
1343         self.out_z = pmod.ospec()
1344
1345         m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1346         m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1347
1348     def process(self, i):
1349         return self.out_z
1350
1351     def action(self, m):
1352         m.next = "pack_put_z"
1353
1354
1355 class FPRoundData:
1356
1357     def __init__(self, width, id_wid):
1358         self.z = FPNumBase(width, False)
1359         self.out_do_z = Signal(reset_less=True)
1360         self.oz = Signal(width, reset_less=True)
1361         self.mid = Signal(id_wid, reset_less=True)
1362
1363     def eq(self, i):
1364         return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1365                 self.mid.eq(i.mid)]
1366
1367
1368 class FPRoundMod:
1369
1370     def __init__(self, width, id_wid):
1371         self.width = width
1372         self.id_wid = id_wid
1373         self.i = self.ispec()
1374         self.out_z = self.ospec()
1375
1376     def ispec(self):
1377         return FPNorm1Data(self.width, self.id_wid)
1378
1379     def ospec(self):
1380         return FPRoundData(self.width, self.id_wid)
1381
1382     def process(self, i):
1383         return self.out_z
1384
1385     def setup(self, m, i):
1386         m.submodules.roundz = self
1387         m.d.comb += self.i.eq(i)
1388
1389     def elaborate(self, platform):
1390         m = Module()
1391         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1392         with m.If(~self.i.out_do_z):
1393             with m.If(self.i.roundz):
1394                 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1395                 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1396                     m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1397
1398         return m
1399
1400
1401 class FPRound(FPState):
1402
1403     def __init__(self, width, id_wid):
1404         FPState.__init__(self, "round")
1405         self.mod = FPRoundMod(width)
1406         self.out_z = self.ospec()
1407
1408     def ispec(self):
1409         return self.mod.ispec()
1410
1411     def ospec(self):
1412         return self.mod.ospec()
1413
1414     def setup(self, m, i):
1415         """ links module to inputs and outputs
1416         """
1417         self.mod.setup(m, i)
1418
1419         self.idsync(m)
1420         m.d.sync += self.out_z.eq(self.mod.out_z)
1421         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1422
1423     def action(self, m):
1424         m.next = "corrections"
1425
1426
1427 class FPCorrectionsMod:
1428
1429     def __init__(self, width, id_wid):
1430         self.width = width
1431         self.id_wid = id_wid
1432         self.i = self.ispec()
1433         self.out_z = self.ospec()
1434
1435     def ispec(self):
1436         return FPRoundData(self.width, self.id_wid)
1437
1438     def ospec(self):
1439         return FPRoundData(self.width, self.id_wid)
1440
1441     def process(self, i):
1442         return self.out_z
1443
1444     def setup(self, m, i):
1445         """ links module to inputs and outputs
1446         """
1447         m.submodules.corrections = self
1448         m.d.comb += self.i.eq(i)
1449
1450     def elaborate(self, platform):
1451         m = Module()
1452         m.submodules.corr_in_z = self.i.z
1453         m.submodules.corr_out_z = self.out_z.z
1454         m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1455         with m.If(~self.i.out_do_z):
1456             with m.If(self.i.z.is_denormalised):
1457                 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1458         return m
1459
1460
1461 class FPCorrections(FPState):
1462
1463     def __init__(self, width, id_wid):
1464         FPState.__init__(self, "corrections")
1465         self.mod = FPCorrectionsMod(width)
1466         self.out_z = self.ospec()
1467
1468     def ispec(self):
1469         return self.mod.ispec()
1470
1471     def ospec(self):
1472         return self.mod.ospec()
1473
1474     def setup(self, m, in_z):
1475         """ links module to inputs and outputs
1476         """
1477         self.mod.setup(m, in_z)
1478
1479         m.d.sync += self.out_z.eq(self.mod.out_z)
1480         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1481
1482     def action(self, m):
1483         m.next = "pack"
1484
1485
1486 class FPPackData:
1487
1488     def __init__(self, width, id_wid):
1489         self.z = Signal(width, reset_less=True)
1490         self.mid = Signal(id_wid, reset_less=True)
1491
1492     def eq(self, i):
1493         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1494
1495
1496 class FPPackMod:
1497
1498     def __init__(self, width, id_wid):
1499         self.width = width
1500         self.id_wid = id_wid
1501         self.i = self.ispec()
1502         self.o = self.ospec()
1503
1504     def ispec(self):
1505         return FPRoundData(self.width, self.id_wid)
1506
1507     def ospec(self):
1508         return FPPackData(self.width, self.id_wid)
1509
1510     def process(self, i):
1511         return self.o
1512
1513     def setup(self, m, in_z):
1514         """ links module to inputs and outputs
1515         """
1516         m.submodules.pack = self
1517         m.d.comb += self.i.eq(in_z)
1518
1519     def elaborate(self, platform):
1520         m = Module()
1521         z = FPNumOut(self.width, False)
1522         m.submodules.pack_in_z = self.i.z
1523         m.submodules.pack_out_z = z
1524         m.d.comb += self.o.mid.eq(self.i.mid)
1525         with m.If(~self.i.out_do_z):
1526             with m.If(self.i.z.is_overflowed):
1527                 m.d.comb += z.inf(self.i.z.s)
1528             with m.Else():
1529                 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1530         with m.Else():
1531             m.d.comb += z.v.eq(self.i.oz)
1532         m.d.comb += self.o.z.eq(z.v)
1533         return m
1534
1535
1536 class FPPack(FPState):
1537
1538     def __init__(self, width, id_wid):
1539         FPState.__init__(self, "pack")
1540         self.mod = FPPackMod(width)
1541         self.out_z = self.ospec()
1542
1543     def ispec(self):
1544         return self.mod.ispec()
1545
1546     def ospec(self):
1547         return self.mod.ospec()
1548
1549     def setup(self, m, in_z):
1550         """ links module to inputs and outputs
1551         """
1552         self.mod.setup(m, in_z)
1553
1554         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1555         m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1556
1557     def action(self, m):
1558         m.next = "pack_put_z"
1559
1560
1561 class FPPutZ(FPState):
1562
1563     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1564         FPState.__init__(self, state)
1565         if to_state is None:
1566             to_state = "get_ops"
1567         self.to_state = to_state
1568         self.in_z = in_z
1569         self.out_z = out_z
1570         self.in_mid = in_mid
1571         self.out_mid = out_mid
1572
1573     def action(self, m):
1574         if self.in_mid is not None:
1575             m.d.sync += self.out_mid.eq(self.in_mid)
1576         m.d.sync += [
1577           self.out_z.z.v.eq(self.in_z)
1578         ]
1579         with m.If(self.out_z.z.stb & self.out_z.z.ack):
1580             m.d.sync += self.out_z.z.stb.eq(0)
1581             m.next = self.to_state
1582         with m.Else():
1583             m.d.sync += self.out_z.z.stb.eq(1)
1584
1585
1586 class FPPutZIdx(FPState):
1587
1588     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1589         FPState.__init__(self, state)
1590         if to_state is None:
1591             to_state = "get_ops"
1592         self.to_state = to_state
1593         self.in_z = in_z
1594         self.out_zs = out_zs
1595         self.in_mid = in_mid
1596
1597     def action(self, m):
1598         outz_stb = Signal(reset_less=True)
1599         outz_ack = Signal(reset_less=True)
1600         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1601                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1602                     ]
1603         m.d.sync += [
1604           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1605         ]
1606         with m.If(outz_stb & outz_ack):
1607             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1608             m.next = self.to_state
1609         with m.Else():
1610             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1611
1612 class FPADDBaseData:
1613
1614     def __init__(self, width, id_wid):
1615         self.width = width
1616         self.id_wid = id_wid
1617         self.a  = Signal(width)
1618         self.b  = Signal(width)
1619         self.mid = Signal(id_wid, reset_less=True)
1620
1621     def eq(self, i):
1622         return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1623
1624     def ports(self):
1625         return [self.a, self.b, self.mid]
1626
1627 class FPOpData:
1628     def __init__(self, width, id_wid):
1629         self.z = FPOp(width)
1630         self.mid = Signal(id_wid, reset_less=True)
1631
1632     def eq(self, i):
1633         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1634
1635     def ports(self):
1636         return [self.z, self.mid]
1637
1638
1639 class FPADDBaseMod:
1640
1641     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1642         """ IEEE754 FP Add
1643
1644             * width: bit-width of IEEE754.  supported: 16, 32, 64
1645             * id_wid: an identifier that is sync-connected to the input
1646             * single_cycle: True indicates each stage to complete in 1 clock
1647             * compact: True indicates a reduced number of stages
1648         """
1649         self.width = width
1650         self.id_wid = id_wid
1651         self.single_cycle = single_cycle
1652         self.compact = compact
1653
1654         self.in_t = Trigger()
1655         self.i = self.ispec()
1656         self.o = self.ospec()
1657
1658         self.states = []
1659
1660     def ispec(self):
1661         return FPADDBaseData(self.width, self.id_wid)
1662
1663     def ospec(self):
1664         return FPOpData(self.width, self.id_wid)
1665
1666     def add_state(self, state):
1667         self.states.append(state)
1668         return state
1669
1670     def get_fragment(self, platform=None):
1671         """ creates the HDL code-fragment for FPAdd
1672         """
1673         m = Module()
1674         m.submodules.out_z = self.o.z
1675         m.submodules.in_t = self.in_t
1676         if self.compact:
1677             self.get_compact_fragment(m, platform)
1678         else:
1679             self.get_longer_fragment(m, platform)
1680
1681         with m.FSM() as fsm:
1682
1683             for state in self.states:
1684                 with m.State(state.state_from):
1685                     state.action(m)
1686
1687         return m
1688
1689     def get_longer_fragment(self, m, platform=None):
1690
1691         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1692                                       self.width))
1693         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1694         a = get.out_op1
1695         b = get.out_op2
1696
1697         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1698         sc.setup(m, a, b, self.in_mid)
1699
1700         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1701         dn.setup(m, a, b, sc.in_mid)
1702
1703         if self.single_cycle:
1704             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1705             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1706         else:
1707             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1708             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1709
1710         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1711         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1712
1713         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1714         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1715
1716         if self.single_cycle:
1717             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1718             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1719         else:
1720             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1721             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1722
1723         rn = self.add_state(FPRound(self.width, self.id_wid))
1724         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1725
1726         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1727         cor.setup(m, rn.out_z, rn.in_mid)
1728
1729         pa = self.add_state(FPPack(self.width, self.id_wid))
1730         pa.setup(m, cor.out_z, rn.in_mid)
1731
1732         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1733                                     pa.in_mid, self.out_mid))
1734
1735         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1736                                     pa.in_mid, self.out_mid))
1737
1738     def get_compact_fragment(self, m, platform=None):
1739
1740         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1741                                       self.width, self.id_wid))
1742         get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1743
1744         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1745         sc.setup(m, get.o)
1746
1747         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1748         alm.setup(m, sc.o)
1749
1750         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1751         n1.setup(m, alm.a1o)
1752
1753         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1754                                     n1.out_z.mid, self.o.mid))
1755
1756         #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1757         #                            sc.o.mid, self.o.mid))
1758
1759
1760 class FPADDBase(FPState):
1761
1762     def __init__(self, width, id_wid=None, single_cycle=False):
1763         """ IEEE754 FP Add
1764
1765             * width: bit-width of IEEE754.  supported: 16, 32, 64
1766             * id_wid: an identifier that is sync-connected to the input
1767             * single_cycle: True indicates each stage to complete in 1 clock
1768         """
1769         FPState.__init__(self, "fpadd")
1770         self.width = width
1771         self.single_cycle = single_cycle
1772         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1773         self.o = self.ospec()
1774
1775         self.in_t = Trigger()
1776         self.i = self.ispec()
1777
1778         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1779         self.in_accept = Signal(reset_less=True)
1780         self.add_stb = Signal(reset_less=True)
1781         self.add_ack = Signal(reset=0, reset_less=True)
1782
1783     def ispec(self):
1784         return self.mod.ispec()
1785
1786     def ospec(self):
1787         return self.mod.ospec()
1788
1789     def setup(self, m, i, add_stb, in_mid):
1790         m.d.comb += [self.i.eq(i),
1791                      self.mod.i.eq(self.i),
1792                      self.z_done.eq(self.mod.o.z.trigger),
1793                      #self.add_stb.eq(add_stb),
1794                      self.mod.in_t.stb.eq(self.in_t.stb),
1795                      self.in_t.ack.eq(self.mod.in_t.ack),
1796                      self.o.mid.eq(self.mod.o.mid),
1797                      self.o.z.v.eq(self.mod.o.z.v),
1798                      self.o.z.stb.eq(self.mod.o.z.stb),
1799                      self.mod.o.z.ack.eq(self.o.z.ack),
1800                     ]
1801
1802         m.d.sync += self.add_stb.eq(add_stb)
1803         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1804         m.d.sync += self.o.z.ack.eq(0) # likewise
1805         #m.d.sync += self.in_t.stb.eq(0)
1806
1807         m.submodules.fpadd = self.mod
1808
1809     def action(self, m):
1810
1811         # in_accept is set on incoming strobe HIGH and ack LOW.
1812         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1813
1814         #with m.If(self.in_t.ack):
1815         #    m.d.sync += self.in_t.stb.eq(0)
1816         with m.If(~self.z_done):
1817             # not done: test for accepting an incoming operand pair
1818             with m.If(self.in_accept):
1819                 m.d.sync += [
1820                     self.add_ack.eq(1), # acknowledge receipt...
1821                     self.in_t.stb.eq(1), # initiate add
1822                 ]
1823             with m.Else():
1824                 m.d.sync += [self.add_ack.eq(0),
1825                              self.in_t.stb.eq(0),
1826                              self.o.z.ack.eq(1),
1827                             ]
1828         with m.Else():
1829             # done: acknowledge, and write out id and value
1830             m.d.sync += [self.add_ack.eq(1),
1831                          self.in_t.stb.eq(0)
1832                         ]
1833             m.next = "put_z"
1834
1835             return
1836
1837             if self.in_mid is not None:
1838                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1839
1840             m.d.sync += [
1841               self.out_z.v.eq(self.mod.out_z.v)
1842             ]
1843             # move to output state on detecting z ack
1844             with m.If(self.out_z.trigger):
1845                 m.d.sync += self.out_z.stb.eq(0)
1846                 m.next = "put_z"
1847             with m.Else():
1848                 m.d.sync += self.out_z.stb.eq(1)
1849
1850
1851 class FPADDStageOut:
1852     def __init__(self, width, id_wid):
1853         self.z = Signal(width)
1854         self.mid = Signal(id_wid, reset_less=True)
1855
1856     def eq(self, i):
1857         return [self.z.eq(i.z), self.mid.eq(i.mid)]
1858
1859     def ports(self):
1860         return [self.z, self.mid]
1861
1862
1863 # matches the format of FPADDStageOut, allows eq function to do assignments
1864 class PlaceHolder: pass
1865
1866
1867 class FPAddBaseStage:
1868     def __init__(self, width, id_wid):
1869         self.width = width
1870         self.id_wid = id_wid
1871
1872     def ispec(self):
1873         return FPADDBaseData(self.width, self.id_wid)
1874
1875     def ospec(self):
1876         return FPADDStageOut(self.width, self.id_wid)
1877
1878     def process(self, i):
1879         o = PlaceHolder()
1880         o.z = i.a + i.b
1881         o.mid = i.mid
1882         return o
1883
1884
1885 class FPADDBasePipe1(UnbufferedPipeline):
1886     def __init__(self, width, id_wid):
1887         stage = FPAddBaseStage(width, id_wid)
1888         UnbufferedPipeline.__init__(self, stage)
1889
1890
1891 class FPADDBasePipe(ControlBase):
1892     def __init__(self, width, id_wid):
1893         ControlBase.__init__(self)
1894         self.pipe1 = FPADDBasePipe1(width, id_wid)
1895         self._eqs = self.connect([self.pipe1])
1896
1897     def elaborate(self, platform):
1898         m = Module()
1899         m.submodules.pipe1 = self.pipe1
1900         m.d.comb += self._eqs
1901         return m
1902
1903
1904 class PriorityCombPipeline(CombMultiInPipeline):
1905     def __init__(self, stage, p_len):
1906         p_mux = InputPriorityArbiter(self, p_len)
1907         CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1908
1909     def ports(self):
1910         return self.p_mux.ports()
1911
1912
1913 class FPAddInPassThruStage:
1914     def __init__(self, width, id_wid):
1915         self.width, self.id_wid = width, id_wid
1916     def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1917     def ospec(self): return self.ispec()
1918     def process(self, i): return i
1919
1920
1921 class FPADDInMuxPipe(PriorityCombPipeline):
1922     def __init__(self, width, id_width, num_rows):
1923         self.num_rows = num_rows
1924         stage = FPAddInPassThruStage(width, id_width)
1925         PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1926         #self.p.i_data = stage.ispec()
1927         #self.n.o_data = stage.ospec()
1928
1929     def ports(self):
1930         res = []
1931         for i in range(len(self.p)):
1932             res += [self.p[i].i_valid, self.p[i].o_ready] + \
1933                     self.p[i].i_data.ports()
1934         res += [self.n.i_ready, self.n.o_valid] + \
1935                 self.n.o_data.ports()
1936         return res
1937
1938
1939 class MuxCombPipeline(CombMultiOutPipeline):
1940     def __init__(self, stage, n_len):
1941         # HACK: stage is also the n-way multiplexer
1942         CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1943
1944         # HACK: n-mux is also the stage... so set the muxid equal to input mid
1945         stage.m_id = self.p.i_data.mid
1946
1947     def ports(self):
1948         return self.p_mux.ports()
1949
1950
1951 class FPAddOutPassThruStage:
1952     def __init__(self, width, id_wid):
1953         self.width, self.id_wid = width, id_wid
1954     def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1955     def ospec(self): return self.ispec()
1956     def process(self, i): return i
1957
1958
1959 class FPADDMuxOutPipe(MuxCombPipeline):
1960     def __init__(self, width, id_wid, num_rows):
1961         self.num_rows = num_rows
1962         stage = FPAddOutPassThruStage(width, id_wid)
1963         MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1964         #self.p.i_data = stage.ispec()
1965         #self.n.o_data = stage.ospec()
1966
1967     def ports(self):
1968         res = [self.p.i_valid, self.p.o_ready] + \
1969                 self.p.i_data.ports()
1970         for i in range(len(self.n)):
1971             res += [self.n[i].i_ready, self.n[i].o_valid] + \
1972                     self.n[i].o_data.ports()
1973         return res
1974
1975
1976 class FPADDMuxInOut:
1977     """ Reservation-Station version of FPADD pipeline.
1978
1979         fan-in on
1980     """
1981     def __init__(self, width, id_wid, num_rows):
1982         self.num_rows = num_rows
1983         self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows)   # fan-in
1984         self.fpadd = FPADDBasePipe(width, id_wid)               # add stage
1985         self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1986
1987         self.p = self.inpipe.p  # kinda annoying,
1988         self.n = self.outpipe.n # use pipe in/out as this class in/out
1989         self._ports = self.inpipe.ports() + self.outpipe.ports()
1990
1991     def elaborate(self, platform):
1992         m = Module()
1993         m.submodules.inpipe = self.inpipe
1994         m.submodules.fpadd = self.fpadd
1995         m.submodules.outpipe = self.outpipe
1996
1997         m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1998         m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1999
2000         return m
2001
2002     def ports(self):
2003         return self._ports
2004
2005
2006 class ResArray:
2007     def __init__(self, width, id_wid):
2008         self.width = width
2009         self.id_wid = id_wid
2010         res = []
2011         for i in range(rs_sz):
2012             out_z = FPOp(width)
2013             out_z.name = "out_z_%d" % i
2014             res.append(out_z)
2015         self.res = Array(res)
2016         self.in_z = FPOp(width)
2017         self.in_mid = Signal(self.id_wid, reset_less=True)
2018
2019     def setup(self, m, in_z, in_mid):
2020         m.d.comb += [self.in_z.eq(in_z),
2021                      self.in_mid.eq(in_mid)]
2022
2023     def get_fragment(self, platform=None):
2024         """ creates the HDL code-fragment for FPAdd
2025         """
2026         m = Module()
2027         m.submodules.res_in_z = self.in_z
2028         m.submodules += self.res
2029
2030         return m
2031
2032     def ports(self):
2033         res = []
2034         for z in self.res:
2035             res += z.ports()
2036         return res
2037
2038
2039 class FPADD(FPID):
2040     """ FPADD: stages as follows:
2041
2042         FPGetOp (a)
2043            |
2044         FPGetOp (b)
2045            |
2046         FPAddBase---> FPAddBaseMod
2047            |            |
2048         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2049
2050         FPAddBase is tricky: it is both a stage and *has* stages.
2051         Connection to FPAddBaseMod therefore requires an in stb/ack
2052         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
2053         needs to be the thing that raises the incoming stb.
2054     """
2055
2056     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2057         """ IEEE754 FP Add
2058
2059             * width: bit-width of IEEE754.  supported: 16, 32, 64
2060             * id_wid: an identifier that is sync-connected to the input
2061             * single_cycle: True indicates each stage to complete in 1 clock
2062         """
2063         self.width = width
2064         self.id_wid = id_wid
2065         self.single_cycle = single_cycle
2066
2067         #self.out_z = FPOp(width)
2068         self.ids = FPID(id_wid)
2069
2070         rs = []
2071         for i in range(rs_sz):
2072             in_a  = FPOp(width)
2073             in_b  = FPOp(width)
2074             in_a.name = "in_a_%d" % i
2075             in_b.name = "in_b_%d" % i
2076             rs.append((in_a, in_b))
2077         self.rs = Array(rs)
2078
2079         res = []
2080         for i in range(rs_sz):
2081             out_z = FPOp(width)
2082             out_z.name = "out_z_%d" % i
2083             res.append(out_z)
2084         self.res = Array(res)
2085
2086         self.states = []
2087
2088     def add_state(self, state):
2089         self.states.append(state)
2090         return state
2091
2092     def get_fragment(self, platform=None):
2093         """ creates the HDL code-fragment for FPAdd
2094         """
2095         m = Module()
2096         m.submodules += self.rs
2097
2098         in_a = self.rs[0][0]
2099         in_b = self.rs[0][1]
2100
2101         geta = self.add_state(FPGetOp("get_a", "get_b",
2102                                       in_a, self.width))
2103         geta.setup(m, in_a)
2104         a = geta.out_op
2105
2106         getb = self.add_state(FPGetOp("get_b", "fpadd",
2107                                       in_b, self.width))
2108         getb.setup(m, in_b)
2109         b = getb.out_op
2110
2111         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2112         ab = self.add_state(ab)
2113         abd = ab.ispec() # create an input spec object for FPADDBase
2114         m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2115         ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2116         o = ab.o
2117
2118         pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2119                                     o.mid, "get_a"))
2120
2121         with m.FSM() as fsm:
2122
2123             for state in self.states:
2124                 with m.State(state.state_from):
2125                     state.action(m)
2126
2127         return m
2128
2129
2130 if __name__ == "__main__":
2131     if True:
2132         alu = FPADD(width=32, id_wid=5, single_cycle=True)
2133         main(alu, ports=alu.rs[0][0].ports() + \
2134                         alu.rs[0][1].ports() + \
2135                         alu.res[0].ports() + \
2136                         [alu.ids.in_mid, alu.ids.out_mid])
2137     else:
2138         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2139         main(alu, ports=[alu.in_a, alu.in_b] + \
2140                         alu.in_t.ports() + \
2141                         alu.out_z.ports() + \
2142                         [alu.in_mid, alu.out_mid])
2143
2144
2145     # works... but don't use, just do "python fname.py convert -t v"
2146     #print (verilog.convert(alu, ports=[
2147     #                        ports=alu.in_a.ports() + \
2148     #                              alu.in_b.ports() + \
2149     #                              alu.out_z.ports())