src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 #from fpbase import FPNumShiftMultiRight
  13
  14
  15 class FPState(FPBase):
  16     def __init__(self, state_from):
  17         self.state_from = state_from
  18
  19     def set_inputs(self, inputs):
  20         self.inputs = inputs
  21         for k,v in inputs.items():
  22             setattr(self, k, v)
  23
  24     def set_outputs(self, outputs):
  25         self.outputs = outputs
  26         for k,v in outputs.items():
  27             setattr(self, k, v)
  28
  29
  30 class FPGetSyncOpsMod:
  31     def __init__(self, width, num_ops=2):
  32         self.width = width
  33         self.num_ops = num_ops
  34         inops = []
  35         outops = []
  36         for i in range(num_ops):
  37             inops.append(Signal(width, reset_less=True))
  38             outops.append(Signal(width, reset_less=True))
  39         self.in_op = inops
  40         self.out_op = outops
  41         self.stb = Signal(num_ops)
  42         self.ack = Signal()
  43         self.ready = Signal(reset_less=True)
  44         self.out_decode = Signal(reset_less=True)
  45
  46     def elaborate(self, platform):
  47         m = Module()
  48         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  49         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  50         with m.If(self.out_decode):
  51             for i in range(self.num_ops):
  52                 m.d.comb += [
  53                         self.out_op[i].eq(self.in_op[i]),
  54                 ]
  55         return m
  56
  57     def ports(self):
  58         return self.in_op + self.out_op + [self.stb, self.ack]
  59
  60
  61 class FPOps(Trigger):
  62     def __init__(self, width, num_ops):
  63         Trigger.__init__(self)
  64         self.width = width
  65         self.num_ops = num_ops
  66
  67         res = []
  68         for i in range(num_ops):
  69             res.append(Signal(width))
  70         self.v  = Array(res)
  71
  72     def ports(self):
  73         res = []
  74         for i in range(self.num_ops):
  75             res.append(self.v[i])
  76         res.append(self.ack)
  77         res.append(self.stb)
  78         return res
  79
  80
  81 class InputGroup:
  82     def __init__(self, width, num_ops=2, num_rows=4):
  83         self.width = width
  84         self.num_ops = num_ops
  85         self.num_rows = num_rows
  86         self.mmax = int(log(self.num_rows) / log(2))
  87         self.rs = []
  88         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  89         for i in range(num_rows):
  90             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  91         self.rs = Array(self.rs)
  92
  93         self.out_op = FPOps(width, num_ops)
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97
  98         pe = PriorityEncoder(self.num_rows)
  99         m.submodules.selector = pe
 100         m.submodules.out_op = self.out_op
 101         m.submodules += self.rs
 102
 103         # connect priority encoder
 104         in_ready = []
 105         for i in range(self.num_rows):
 106             in_ready.append(self.rs[i].ready)
 107         m.d.comb += pe.i.eq(Cat(*in_ready))
 108         m.d.comb += self.out_op.stb.eq(~pe.n) # strobe-out when encoder active
 109
 110         with m.If(self.out_op.trigger):
 111             m.d.sync += self.mid.eq(pe.o)
 112             for j in range(self.num_ops):
 113                 m.d.sync += self.out_op.v[j].eq(self.rs[pe.o].out_op[j])
 114         return m
 115
 116     def ports(self):
 117         res = []
 118         for i in range(self.num_rows):
 119             inop = self.rs[i]
 120             res += inop.in_op + [inop.stb]
 121         return self.out_op.ports() + res + [self.mid]
 122
 123
 124 class FPGetOpMod:
 125     def __init__(self, width):
 126         self.in_op = FPOp(width)
 127         self.out_op = Signal(width)
 128         self.out_decode = Signal(reset_less=True)
 129
 130     def elaborate(self, platform):
 131         m = Module()
 132         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 133         m.submodules.get_op_in = self.in_op
 134         #m.submodules.get_op_out = self.out_op
 135         with m.If(self.out_decode):
 136             m.d.comb += [
 137                 self.out_op.eq(self.in_op.v),
 138             ]
 139         return m
 140
 141
 142 class FPGetOp(FPState):
 143     """ gets operand
 144     """
 145
 146     def __init__(self, in_state, out_state, in_op, width):
 147         FPState.__init__(self, in_state)
 148         self.out_state = out_state
 149         self.mod = FPGetOpMod(width)
 150         self.in_op = in_op
 151         self.out_op = Signal(width)
 152         self.out_decode = Signal(reset_less=True)
 153
 154     def setup(self, m, in_op):
 155         """ links module to inputs and outputs
 156         """
 157         setattr(m.submodules, self.state_from, self.mod)
 158         m.d.comb += self.mod.in_op.copy(in_op)
 159         #m.d.comb += self.out_op.eq(self.mod.out_op)
 160         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 161
 162     def action(self, m):
 163         with m.If(self.out_decode):
 164             m.next = self.out_state
 165             m.d.sync += [
 166                 self.in_op.ack.eq(0),
 167                 self.out_op.eq(self.mod.out_op)
 168             ]
 169         with m.Else():
 170             m.d.sync += self.in_op.ack.eq(1)
 171
 172
 173 class FPGet2OpMod(Trigger):
 174     def __init__(self, width):
 175         Trigger.__init__(self)
 176         self.in_op1 = Signal(width, reset_less=True)
 177         self.in_op2 = Signal(width, reset_less=True)
 178         self.out_op1 = FPNumIn(None, width)
 179         self.out_op2 = FPNumIn(None, width)
 180
 181     def elaborate(self, platform):
 182         m = Trigger.elaborate(self, platform)
 183         #m.submodules.get_op_in = self.in_op
 184         m.submodules.get_op1_out = self.out_op1
 185         m.submodules.get_op2_out = self.out_op2
 186         with m.If(self.trigger):
 187             m.d.comb += [
 188                 self.out_op1.decode(self.in_op1),
 189                 self.out_op2.decode(self.in_op2),
 190             ]
 191         return m
 192
 193
 194 class FPGet2Op(FPState):
 195     """ gets operands
 196     """
 197
 198     def __init__(self, in_state, out_state, in_op1, in_op2, width):
 199         FPState.__init__(self, in_state)
 200         self.out_state = out_state
 201         self.mod = FPGet2OpMod(width)
 202         self.in_op1 = in_op1
 203         self.in_op2 = in_op2
 204         self.out_op1 = FPNumIn(None, width)
 205         self.out_op2 = FPNumIn(None, width)
 206         self.in_stb = Signal(reset_less=True)
 207         self.out_ack = Signal(reset_less=True)
 208         self.out_decode = Signal(reset_less=True)
 209
 210     def setup(self, m, in_op1, in_op2, in_stb, in_ack):
 211         """ links module to inputs and outputs
 212         """
 213         m.submodules.get_ops = self.mod
 214         m.d.comb += self.mod.in_op1.eq(in_op1)
 215         m.d.comb += self.mod.in_op2.eq(in_op2)
 216         m.d.comb += self.mod.stb.eq(in_stb)
 217         m.d.comb += self.out_ack.eq(self.mod.ack)
 218         m.d.comb += self.out_decode.eq(self.mod.trigger)
 219         m.d.comb += in_ack.eq(self.mod.ack)
 220
 221     def action(self, m):
 222         with m.If(self.out_decode):
 223             m.next = self.out_state
 224             m.d.sync += [
 225                 self.mod.ack.eq(0),
 226                 #self.out_op1.v.eq(self.mod.out_op1.v),
 227                 #self.out_op2.v.eq(self.mod.out_op2.v),
 228                 self.out_op1.copy(self.mod.out_op1),
 229                 self.out_op2.copy(self.mod.out_op2)
 230             ]
 231         with m.Else():
 232             m.d.sync += self.mod.ack.eq(1)
 233
 234
 235 class FPAddSpecialCasesMod:
 236     """ special cases: NaNs, infs, zeros, denormalised
 237         NOTE: some of these are unique to add.  see "Special Operations"
 238         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 239     """
 240
 241     def __init__(self, width):
 242         self.in_a = FPNumBase(width)
 243         self.in_b = FPNumBase(width)
 244         self.out_z = FPNumOut(width, False)
 245         self.out_do_z = Signal(reset_less=True)
 246
 247     def setup(self, m, in_a, in_b, out_do_z):
 248         """ links module to inputs and outputs
 249         """
 250         m.submodules.specialcases = self
 251         m.d.comb += self.in_a.copy(in_a)
 252         m.d.comb += self.in_b.copy(in_b)
 253         m.d.comb += out_do_z.eq(self.out_do_z)
 254
 255     def elaborate(self, platform):
 256         m = Module()
 257
 258         m.submodules.sc_in_a = self.in_a
 259         m.submodules.sc_in_b = self.in_b
 260         m.submodules.sc_out_z = self.out_z
 261
 262         s_nomatch = Signal()
 263         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 264
 265         m_match = Signal()
 266         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 267
 268         # if a is NaN or b is NaN return NaN
 269         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 270             m.d.comb += self.out_do_z.eq(1)
 271             m.d.comb += self.out_z.nan(0)
 272
 273         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 274         # under review
 275
 276         ## if a is zero and b is NaN return -b
 277         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 278         #    m.d.comb += self.out_do_z.eq(1)
 279         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 280
 281         ## if b is zero and a is NaN return -a
 282         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 283         #    m.d.comb += self.out_do_z.eq(1)
 284         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 285
 286         ## if a is -zero and b is NaN return -b
 287         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 288         #    m.d.comb += self.out_do_z.eq(1)
 289         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 290
 291         ## if b is -zero and a is NaN return -a
 292         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 293         #    m.d.comb += self.out_do_z.eq(1)
 294         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 295
 296         # if a is inf return inf (or NaN)
 297         with m.Elif(self.in_a.is_inf):
 298             m.d.comb += self.out_do_z.eq(1)
 299             m.d.comb += self.out_z.inf(self.in_a.s)
 300             # if a is inf and signs don't match return NaN
 301             with m.If(self.in_b.exp_128 & s_nomatch):
 302                 m.d.comb += self.out_z.nan(0)
 303
 304         # if b is inf return inf
 305         with m.Elif(self.in_b.is_inf):
 306             m.d.comb += self.out_do_z.eq(1)
 307             m.d.comb += self.out_z.inf(self.in_b.s)
 308
 309         # if a is zero and b zero return signed-a/b
 310         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 311             m.d.comb += self.out_do_z.eq(1)
 312             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 313                                           self.in_b.e,
 314                                           self.in_b.m[3:-1])
 315
 316         # if a is zero return b
 317         with m.Elif(self.in_a.is_zero):
 318             m.d.comb += self.out_do_z.eq(1)
 319             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 320                                       self.in_b.m[3:-1])
 321
 322         # if b is zero return a
 323         with m.Elif(self.in_b.is_zero):
 324             m.d.comb += self.out_do_z.eq(1)
 325             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 326                                       self.in_a.m[3:-1])
 327
 328         # if a equal to -b return zero (+ve zero)
 329         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 330             m.d.comb += self.out_do_z.eq(1)
 331             m.d.comb += self.out_z.zero(0)
 332
 333         # Denormalised Number checks
 334         with m.Else():
 335             m.d.comb += self.out_do_z.eq(0)
 336
 337         return m
 338
 339
 340 class FPID:
 341     def __init__(self, id_wid):
 342         self.id_wid = id_wid
 343         if self.id_wid:
 344             self.in_mid = Signal(id_wid, reset_less=True)
 345             self.out_mid = Signal(id_wid, reset_less=True)
 346         else:
 347             self.in_mid = None
 348             self.out_mid = None
 349
 350     def idsync(self, m):
 351         if self.id_wid is not None:
 352             m.d.sync += self.out_mid.eq(self.in_mid)
 353
 354
 355 class FPAddSpecialCases(FPState, FPID):
 356     """ special cases: NaNs, infs, zeros, denormalised
 357         NOTE: some of these are unique to add.  see "Special Operations"
 358         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 359     """
 360
 361     def __init__(self, width, id_wid):
 362         FPState.__init__(self, "special_cases")
 363         FPID.__init__(self, id_wid)
 364         self.mod = FPAddSpecialCasesMod(width)
 365         self.out_z = FPNumOut(width, False)
 366         self.out_do_z = Signal(reset_less=True)
 367
 368     def setup(self, m, in_a, in_b, in_mid):
 369         """ links module to inputs and outputs
 370         """
 371         self.mod.setup(m, in_a, in_b, self.out_do_z)
 372         if self.in_mid is not None:
 373             m.d.comb += self.in_mid.eq(in_mid)
 374
 375     def action(self, m):
 376         self.idsync(m)
 377         with m.If(self.out_do_z):
 378             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 379             m.next = "put_z"
 380         with m.Else():
 381             m.next = "denormalise"
 382
 383
 384 class FPAddSpecialCasesDeNorm(FPState, FPID):
 385     """ special cases: NaNs, infs, zeros, denormalised
 386         NOTE: some of these are unique to add.  see "Special Operations"
 387         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 388     """
 389
 390     def __init__(self, width, id_wid):
 391         FPState.__init__(self, "special_cases")
 392         FPID.__init__(self, id_wid)
 393         self.smod = FPAddSpecialCasesMod(width)
 394         self.out_z = FPNumOut(width, False)
 395         self.out_do_z = Signal(reset_less=True)
 396
 397         self.dmod = FPAddDeNormMod(width)
 398         self.out_a = FPNumBase(width)
 399         self.out_b = FPNumBase(width)
 400
 401     def setup(self, m, in_a, in_b, in_mid):
 402         """ links module to inputs and outputs
 403         """
 404         self.smod.setup(m, in_a, in_b, self.out_do_z)
 405         self.dmod.setup(m, in_a, in_b)
 406         if self.in_mid is not None:
 407             m.d.comb += self.in_mid.eq(in_mid)
 408
 409     def action(self, m):
 410         self.idsync(m)
 411         with m.If(self.out_do_z):
 412             m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
 413             m.next = "put_z"
 414         with m.Else():
 415             m.next = "align"
 416             m.d.sync += self.out_a.copy(self.dmod.out_a)
 417             m.d.sync += self.out_b.copy(self.dmod.out_b)
 418
 419
 420 class FPAddDeNormMod(FPState):
 421
 422     def __init__(self, width):
 423         self.in_a = FPNumBase(width)
 424         self.in_b = FPNumBase(width)
 425         self.out_a = FPNumBase(width)
 426         self.out_b = FPNumBase(width)
 427
 428     def setup(self, m, in_a, in_b):
 429         """ links module to inputs and outputs
 430         """
 431         m.submodules.denormalise = self
 432         m.d.comb += self.in_a.copy(in_a)
 433         m.d.comb += self.in_b.copy(in_b)
 434
 435     def elaborate(self, platform):
 436         m = Module()
 437         m.submodules.denorm_in_a = self.in_a
 438         m.submodules.denorm_in_b = self.in_b
 439         m.submodules.denorm_out_a = self.out_a
 440         m.submodules.denorm_out_b = self.out_b
 441         # hmmm, don't like repeating identical code
 442         m.d.comb += self.out_a.copy(self.in_a)
 443         with m.If(self.in_a.exp_n127):
 444             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 445         with m.Else():
 446             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 447
 448         m.d.comb += self.out_b.copy(self.in_b)
 449         with m.If(self.in_b.exp_n127):
 450             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 451         with m.Else():
 452             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 453
 454         return m
 455
 456
 457 class FPAddDeNorm(FPState, FPID):
 458
 459     def __init__(self, width, id_wid):
 460         FPState.__init__(self, "denormalise")
 461         FPID.__init__(self, id_wid)
 462         self.mod = FPAddDeNormMod(width)
 463         self.out_a = FPNumBase(width)
 464         self.out_b = FPNumBase(width)
 465
 466     def setup(self, m, in_a, in_b, in_mid):
 467         """ links module to inputs and outputs
 468         """
 469         self.mod.setup(m, in_a, in_b)
 470         if self.in_mid is not None:
 471             m.d.comb += self.in_mid.eq(in_mid)
 472
 473     def action(self, m):
 474         self.idsync(m)
 475         # Denormalised Number checks
 476         m.next = "align"
 477         m.d.sync += self.out_a.copy(self.mod.out_a)
 478         m.d.sync += self.out_b.copy(self.mod.out_b)
 479
 480
 481 class FPAddAlignMultiMod(FPState):
 482
 483     def __init__(self, width):
 484         self.in_a = FPNumBase(width)
 485         self.in_b = FPNumBase(width)
 486         self.out_a = FPNumIn(None, width)
 487         self.out_b = FPNumIn(None, width)
 488         self.exp_eq = Signal(reset_less=True)
 489
 490     def elaborate(self, platform):
 491         # This one however (single-cycle) will do the shift
 492         # in one go.
 493
 494         m = Module()
 495
 496         m.submodules.align_in_a = self.in_a
 497         m.submodules.align_in_b = self.in_b
 498         m.submodules.align_out_a = self.out_a
 499         m.submodules.align_out_b = self.out_b
 500
 501         # NOTE: this does *not* do single-cycle multi-shifting,
 502         #       it *STAYS* in the align state until exponents match
 503
 504         # exponent of a greater than b: shift b down
 505         m.d.comb += self.exp_eq.eq(0)
 506         m.d.comb += self.out_a.copy(self.in_a)
 507         m.d.comb += self.out_b.copy(self.in_b)
 508         agtb = Signal(reset_less=True)
 509         altb = Signal(reset_less=True)
 510         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 511         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 512         with m.If(agtb):
 513             m.d.comb += self.out_b.shift_down(self.in_b)
 514         # exponent of b greater than a: shift a down
 515         with m.Elif(altb):
 516             m.d.comb += self.out_a.shift_down(self.in_a)
 517         # exponents equal: move to next stage.
 518         with m.Else():
 519             m.d.comb += self.exp_eq.eq(1)
 520         return m
 521
 522
 523 class FPAddAlignMulti(FPState, FPID):
 524
 525     def __init__(self, width, id_wid):
 526         FPID.__init__(self, id_wid)
 527         FPState.__init__(self, "align")
 528         self.mod = FPAddAlignMultiMod(width)
 529         self.out_a = FPNumIn(None, width)
 530         self.out_b = FPNumIn(None, width)
 531         self.exp_eq = Signal(reset_less=True)
 532
 533     def setup(self, m, in_a, in_b, in_mid):
 534         """ links module to inputs and outputs
 535         """
 536         m.submodules.align = self.mod
 537         m.d.comb += self.mod.in_a.copy(in_a)
 538         m.d.comb += self.mod.in_b.copy(in_b)
 539         #m.d.comb += self.out_a.copy(self.mod.out_a)
 540         #m.d.comb += self.out_b.copy(self.mod.out_b)
 541         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 542         if self.in_mid is not None:
 543             m.d.comb += self.in_mid.eq(in_mid)
 544
 545     def action(self, m):
 546         self.idsync(m)
 547         m.d.sync += self.out_a.copy(self.mod.out_a)
 548         m.d.sync += self.out_b.copy(self.mod.out_b)
 549         with m.If(self.exp_eq):
 550             m.next = "add_0"
 551
 552
 553 class FPAddAlignSingleMod:
 554
 555     def __init__(self, width):
 556         self.width = width
 557         self.in_a = FPNumBase(width)
 558         self.in_b = FPNumBase(width)
 559         self.out_a = FPNumIn(None, width)
 560         self.out_b = FPNumIn(None, width)
 561
 562     def setup(self, m, in_a, in_b):
 563         """ links module to inputs and outputs
 564         """
 565         m.submodules.align = self
 566         m.d.comb += self.in_a.copy(in_a)
 567         m.d.comb += self.in_b.copy(in_b)
 568
 569     def elaborate(self, platform):
 570         """ Aligns A against B or B against A, depending on which has the
 571             greater exponent.  This is done in a *single* cycle using
 572             variable-width bit-shift
 573
 574             the shifter used here is quite expensive in terms of gates.
 575             Mux A or B in (and out) into temporaries, as only one of them
 576             needs to be aligned against the other
 577         """
 578         m = Module()
 579
 580         m.submodules.align_in_a = self.in_a
 581         m.submodules.align_in_b = self.in_b
 582         m.submodules.align_out_a = self.out_a
 583         m.submodules.align_out_b = self.out_b
 584
 585         # temporary (muxed) input and output to be shifted
 586         t_inp = FPNumBase(self.width)
 587         t_out = FPNumIn(None, self.width)
 588         espec = (len(self.in_a.e), True)
 589         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 590         m.submodules.align_t_in = t_inp
 591         m.submodules.align_t_out = t_out
 592         m.submodules.multishift_r = msr
 593
 594         ediff = Signal(espec, reset_less=True)
 595         ediffr = Signal(espec, reset_less=True)
 596         tdiff = Signal(espec, reset_less=True)
 597         elz = Signal(reset_less=True)
 598         egz = Signal(reset_less=True)
 599
 600         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 601         m.d.comb += msr.inp.eq(t_inp.m)
 602         m.d.comb += msr.diff.eq(tdiff)
 603         m.d.comb += t_out.m.eq(msr.m)
 604         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 605         m.d.comb += t_out.s.eq(t_inp.s)
 606
 607         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 608         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 609         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 610         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 611
 612         # default: A-exp == B-exp, A and B untouched (fall through)
 613         m.d.comb += self.out_a.copy(self.in_a)
 614         m.d.comb += self.out_b.copy(self.in_b)
 615         # only one shifter (muxed)
 616         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 617         # exponent of a greater than b: shift b down
 618         with m.If(egz):
 619             m.d.comb += [t_inp.copy(self.in_b),
 620                          tdiff.eq(ediff),
 621                          self.out_b.copy(t_out),
 622                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 623                         ]
 624         # exponent of b greater than a: shift a down
 625         with m.Elif(elz):
 626             m.d.comb += [t_inp.copy(self.in_a),
 627                          tdiff.eq(ediffr),
 628                          self.out_a.copy(t_out),
 629                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 630                         ]
 631         return m
 632
 633
 634 class FPAddAlignSingle(FPState, FPID):
 635
 636     def __init__(self, width, id_wid):
 637         FPState.__init__(self, "align")
 638         FPID.__init__(self, id_wid)
 639         self.mod = FPAddAlignSingleMod(width)
 640         self.out_a = FPNumIn(None, width)
 641         self.out_b = FPNumIn(None, width)
 642
 643     def setup(self, m, in_a, in_b, in_mid):
 644         """ links module to inputs and outputs
 645         """
 646         self.mod.setup(m, in_a, in_b)
 647         if self.in_mid is not None:
 648             m.d.comb += self.in_mid.eq(in_mid)
 649
 650     def action(self, m):
 651         self.idsync(m)
 652         # NOTE: could be done as comb
 653         m.d.sync += self.out_a.copy(self.mod.out_a)
 654         m.d.sync += self.out_b.copy(self.mod.out_b)
 655         m.next = "add_0"
 656
 657
 658 class FPAddAlignSingleAdd(FPState, FPID):
 659
 660     def __init__(self, width, id_wid):
 661         FPState.__init__(self, "align")
 662         FPID.__init__(self, id_wid)
 663         self.mod = FPAddAlignSingleMod(width)
 664         self.out_a = FPNumIn(None, width)
 665         self.out_b = FPNumIn(None, width)
 666
 667         self.a0mod = FPAddStage0Mod(width)
 668         self.a0_out_z = FPNumBase(width, False)
 669         self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
 670         self.a0_out_z = FPNumBase(width, False)
 671
 672         self.a1mod = FPAddStage1Mod(width)
 673         self.out_z = FPNumBase(width, False)
 674         self.out_of = Overflow()
 675
 676     def setup(self, m, in_a, in_b, in_mid):
 677         """ links module to inputs and outputs
 678         """
 679         self.mod.setup(m, in_a, in_b)
 680         m.d.comb += self.out_a.copy(self.mod.out_a)
 681         m.d.comb += self.out_b.copy(self.mod.out_b)
 682
 683         self.a0mod.setup(m, self.out_a, self.out_b)
 684         m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
 685         m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
 686
 687         self.a1mod.setup(m, self.out_tot, self.a0_out_z)
 688
 689         if self.in_mid is not None:
 690             m.d.comb += self.in_mid.eq(in_mid)
 691
 692     def action(self, m):
 693         self.idsync(m)
 694         m.d.sync += self.out_of.copy(self.a1mod.out_of)
 695         m.d.sync += self.out_z.copy(self.a1mod.out_z)
 696         m.next = "normalise_1"
 697
 698
 699 class FPAddStage0Mod:
 700
 701     def __init__(self, width):
 702         self.in_a = FPNumBase(width)
 703         self.in_b = FPNumBase(width)
 704         self.in_z = FPNumBase(width, False)
 705         self.out_z = FPNumBase(width, False)
 706         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 707
 708     def setup(self, m, in_a, in_b):
 709         """ links module to inputs and outputs
 710         """
 711         m.submodules.add0 = self
 712         m.d.comb += self.in_a.copy(in_a)
 713         m.d.comb += self.in_b.copy(in_b)
 714
 715     def elaborate(self, platform):
 716         m = Module()
 717         m.submodules.add0_in_a = self.in_a
 718         m.submodules.add0_in_b = self.in_b
 719         m.submodules.add0_out_z = self.out_z
 720
 721         m.d.comb += self.out_z.e.eq(self.in_a.e)
 722
 723         # store intermediate tests (and zero-extended mantissas)
 724         seq = Signal(reset_less=True)
 725         mge = Signal(reset_less=True)
 726         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 727         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 728         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 729                      mge.eq(self.in_a.m >= self.in_b.m),
 730                      am0.eq(Cat(self.in_a.m, 0)),
 731                      bm0.eq(Cat(self.in_b.m, 0))
 732                     ]
 733         # same-sign (both negative or both positive) add mantissas
 734         with m.If(seq):
 735             m.d.comb += [
 736                 self.out_tot.eq(am0 + bm0),
 737                 self.out_z.s.eq(self.in_a.s)
 738             ]
 739         # a mantissa greater than b, use a
 740         with m.Elif(mge):
 741             m.d.comb += [
 742                 self.out_tot.eq(am0 - bm0),
 743                 self.out_z.s.eq(self.in_a.s)
 744             ]
 745         # b mantissa greater than a, use b
 746         with m.Else():
 747             m.d.comb += [
 748                 self.out_tot.eq(bm0 - am0),
 749                 self.out_z.s.eq(self.in_b.s)
 750         ]
 751         return m
 752
 753
 754 class FPAddStage0(FPState, FPID):
 755     """ First stage of add.  covers same-sign (add) and subtract
 756         special-casing when mantissas are greater or equal, to
 757         give greatest accuracy.
 758     """
 759
 760     def __init__(self, width, id_wid):
 761         FPState.__init__(self, "add_0")
 762         FPID.__init__(self, id_wid)
 763         self.mod = FPAddStage0Mod(width)
 764         self.out_z = FPNumBase(width, False)
 765         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 766
 767     def setup(self, m, in_a, in_b, in_mid):
 768         """ links module to inputs and outputs
 769         """
 770         self.mod.setup(m, in_a, in_b)
 771         if self.in_mid is not None:
 772             m.d.comb += self.in_mid.eq(in_mid)
 773
 774     def action(self, m):
 775         self.idsync(m)
 776         # NOTE: these could be done as combinatorial (merge add0+add1)
 777         m.d.sync += self.out_z.copy(self.mod.out_z)
 778         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 779         m.next = "add_1"
 780
 781
 782 class FPAddStage1Mod(FPState):
 783     """ Second stage of add: preparation for normalisation.
 784         detects when tot sum is too big (tot[27] is kinda a carry bit)
 785     """
 786
 787     def __init__(self, width):
 788         self.out_norm = Signal(reset_less=True)
 789         self.in_z = FPNumBase(width, False)
 790         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 791         self.out_z = FPNumBase(width, False)
 792         self.out_of = Overflow()
 793
 794     def setup(self, m, in_tot, in_z):
 795         """ links module to inputs and outputs
 796         """
 797         m.submodules.add1 = self
 798         m.submodules.add1_out_overflow = self.out_of
 799
 800         m.d.comb += self.in_z.copy(in_z)
 801         m.d.comb += self.in_tot.eq(in_tot)
 802
 803     def elaborate(self, platform):
 804         m = Module()
 805         #m.submodules.norm1_in_overflow = self.in_of
 806         #m.submodules.norm1_out_overflow = self.out_of
 807         #m.submodules.norm1_in_z = self.in_z
 808         #m.submodules.norm1_out_z = self.out_z
 809         m.d.comb += self.out_z.copy(self.in_z)
 810         # tot[27] gets set when the sum overflows. shift result down
 811         with m.If(self.in_tot[-1]):
 812             m.d.comb += [
 813                 self.out_z.m.eq(self.in_tot[4:]),
 814                 self.out_of.m0.eq(self.in_tot[4]),
 815                 self.out_of.guard.eq(self.in_tot[3]),
 816                 self.out_of.round_bit.eq(self.in_tot[2]),
 817                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 818                 self.out_z.e.eq(self.in_z.e + 1)
 819         ]
 820         # tot[27] zero case
 821         with m.Else():
 822             m.d.comb += [
 823                 self.out_z.m.eq(self.in_tot[3:]),
 824                 self.out_of.m0.eq(self.in_tot[3]),
 825                 self.out_of.guard.eq(self.in_tot[2]),
 826                 self.out_of.round_bit.eq(self.in_tot[1]),
 827                 self.out_of.sticky.eq(self.in_tot[0])
 828         ]
 829         return m
 830
 831
 832 class FPAddStage1(FPState, FPID):
 833
 834     def __init__(self, width, id_wid):
 835         FPState.__init__(self, "add_1")
 836         FPID.__init__(self, id_wid)
 837         self.mod = FPAddStage1Mod(width)
 838         self.out_z = FPNumBase(width, False)
 839         self.out_of = Overflow()
 840         self.norm_stb = Signal()
 841
 842     def setup(self, m, in_tot, in_z, in_mid):
 843         """ links module to inputs and outputs
 844         """
 845         self.mod.setup(m, in_tot, in_z)
 846
 847         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 848
 849         if self.in_mid is not None:
 850             m.d.comb += self.in_mid.eq(in_mid)
 851
 852     def action(self, m):
 853         self.idsync(m)
 854         m.d.sync += self.out_of.copy(self.mod.out_of)
 855         m.d.sync += self.out_z.copy(self.mod.out_z)
 856         m.d.sync += self.norm_stb.eq(1)
 857         m.next = "normalise_1"
 858
 859
 860 class FPNorm1ModSingle:
 861
 862     def __init__(self, width):
 863         self.width = width
 864         self.out_norm = Signal(reset_less=True)
 865         self.in_z = FPNumBase(width, False)
 866         self.in_of = Overflow()
 867         self.out_z = FPNumBase(width, False)
 868         self.out_of = Overflow()
 869
 870     def setup(self, m, in_z, in_of, out_z):
 871         """ links module to inputs and outputs
 872         """
 873         m.submodules.normalise_1 = self
 874
 875         m.d.comb += self.in_z.copy(in_z)
 876         m.d.comb += self.in_of.copy(in_of)
 877
 878         m.d.comb += out_z.copy(self.out_z)
 879
 880     def elaborate(self, platform):
 881         m = Module()
 882
 883         mwid = self.out_z.m_width+2
 884         pe = PriorityEncoder(mwid)
 885         m.submodules.norm_pe = pe
 886
 887         m.submodules.norm1_out_z = self.out_z
 888         m.submodules.norm1_out_overflow = self.out_of
 889         m.submodules.norm1_in_z = self.in_z
 890         m.submodules.norm1_in_overflow = self.in_of
 891
 892         in_z = FPNumBase(self.width, False)
 893         in_of = Overflow()
 894         m.submodules.norm1_insel_z = in_z
 895         m.submodules.norm1_insel_overflow = in_of
 896
 897         espec = (len(in_z.e), True)
 898         ediff_n126 = Signal(espec, reset_less=True)
 899         msr = MultiShiftRMerge(mwid, espec)
 900         m.submodules.multishift_r = msr
 901
 902         m.d.comb += in_z.copy(self.in_z)
 903         m.d.comb += in_of.copy(self.in_of)
 904         # initialise out from in (overridden below)
 905         m.d.comb += self.out_z.copy(in_z)
 906         m.d.comb += self.out_of.copy(in_of)
 907         # normalisation increase/decrease conditions
 908         decrease = Signal(reset_less=True)
 909         increase = Signal(reset_less=True)
 910         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 911         m.d.comb += increase.eq(in_z.exp_lt_n126)
 912         # decrease exponent
 913         with m.If(decrease):
 914             # *sigh* not entirely obvious: count leading zeros (clz)
 915             # with a PriorityEncoder: to find from the MSB
 916             # we reverse the order of the bits.
 917             temp_m = Signal(mwid, reset_less=True)
 918             temp_s = Signal(mwid+1, reset_less=True)
 919             clz = Signal((len(in_z.e), True), reset_less=True)
 920             # make sure that the amount to decrease by does NOT
 921             # go below the minimum non-INF/NaN exponent
 922             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 923                          in_z.exp_sub_n126)
 924             m.d.comb += [
 925                 # cat round and guard bits back into the mantissa
 926                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 927                 pe.i.eq(temp_m[::-1]),          # inverted
 928                 clz.eq(limclz),                 # count zeros from MSB down
 929                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 930                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 931                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 932                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 933                 # overflow in bits 0..1: got shifted too (leave sticky)
 934                 self.out_of.guard.eq(temp_s[1]),     # guard
 935                 self.out_of.round_bit.eq(temp_s[0]), # round
 936             ]
 937         # increase exponent
 938         with m.Elif(increase):
 939             temp_m = Signal(mwid+1, reset_less=True)
 940             m.d.comb += [
 941                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 942                               in_z.m)),
 943                 ediff_n126.eq(in_z.N126 - in_z.e),
 944                 # connect multi-shifter to inp/out mantissa (and ediff)
 945                 msr.inp.eq(temp_m),
 946                 msr.diff.eq(ediff_n126),
 947                 self.out_z.m.eq(msr.m[3:]),
 948                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 949                 # overflow in bits 0..1: got shifted too (leave sticky)
 950                 self.out_of.guard.eq(temp_s[2]),     # guard
 951                 self.out_of.round_bit.eq(temp_s[1]), # round
 952                 self.out_of.sticky.eq(temp_s[0]), # sticky
 953                 self.out_z.e.eq(in_z.e + ediff_n126),
 954             ]
 955
 956         return m
 957
 958
 959 class FPNorm1ModMulti:
 960
 961     def __init__(self, width, single_cycle=True):
 962         self.width = width
 963         self.in_select = Signal(reset_less=True)
 964         self.out_norm = Signal(reset_less=True)
 965         self.in_z = FPNumBase(width, False)
 966         self.in_of = Overflow()
 967         self.temp_z = FPNumBase(width, False)
 968         self.temp_of = Overflow()
 969         self.out_z = FPNumBase(width, False)
 970         self.out_of = Overflow()
 971
 972     def elaborate(self, platform):
 973         m = Module()
 974
 975         m.submodules.norm1_out_z = self.out_z
 976         m.submodules.norm1_out_overflow = self.out_of
 977         m.submodules.norm1_temp_z = self.temp_z
 978         m.submodules.norm1_temp_of = self.temp_of
 979         m.submodules.norm1_in_z = self.in_z
 980         m.submodules.norm1_in_overflow = self.in_of
 981
 982         in_z = FPNumBase(self.width, False)
 983         in_of = Overflow()
 984         m.submodules.norm1_insel_z = in_z
 985         m.submodules.norm1_insel_overflow = in_of
 986
 987         # select which of temp or in z/of to use
 988         with m.If(self.in_select):
 989             m.d.comb += in_z.copy(self.in_z)
 990             m.d.comb += in_of.copy(self.in_of)
 991         with m.Else():
 992             m.d.comb += in_z.copy(self.temp_z)
 993             m.d.comb += in_of.copy(self.temp_of)
 994         # initialise out from in (overridden below)
 995         m.d.comb += self.out_z.copy(in_z)
 996         m.d.comb += self.out_of.copy(in_of)
 997         # normalisation increase/decrease conditions
 998         decrease = Signal(reset_less=True)
 999         increase = Signal(reset_less=True)
1000         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1001         m.d.comb += increase.eq(in_z.exp_lt_n126)
1002         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1003         # decrease exponent
1004         with m.If(decrease):
1005             m.d.comb += [
1006                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1007                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1008                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1009                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1010                 self.out_of.round_bit.eq(0),        # reset round bit
1011                 self.out_of.m0.eq(in_of.guard),
1012             ]
1013         # increase exponent
1014         with m.Elif(increase):
1015             m.d.comb += [
1016                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1017                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1018                 self.out_of.guard.eq(in_z.m[0]),
1019                 self.out_of.m0.eq(in_z.m[1]),
1020                 self.out_of.round_bit.eq(in_of.guard),
1021                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1022             ]
1023
1024         return m
1025
1026
1027 class FPNorm1Single(FPState, FPID):
1028
1029     def __init__(self, width, id_wid, single_cycle=True):
1030         FPID.__init__(self, id_wid)
1031         FPState.__init__(self, "normalise_1")
1032         self.mod = FPNorm1ModSingle(width)
1033         self.out_norm = Signal(reset_less=True)
1034         self.out_z = FPNumBase(width)
1035         self.out_roundz = Signal(reset_less=True)
1036
1037     def setup(self, m, in_z, in_of, in_mid):
1038         """ links module to inputs and outputs
1039         """
1040         self.mod.setup(m, in_z, in_of, self.out_z)
1041
1042         if self.in_mid is not None:
1043             m.d.comb += self.in_mid.eq(in_mid)
1044
1045     def action(self, m):
1046         self.idsync(m)
1047         m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1048         m.next = "round"
1049
1050
1051 class FPNorm1Multi(FPState, FPID):
1052
1053     def __init__(self, width, id_wid):
1054         FPID.__init__(self, id_wid)
1055         FPState.__init__(self, "normalise_1")
1056         self.mod = FPNorm1ModMulti(width)
1057         self.stb = Signal(reset_less=True)
1058         self.ack = Signal(reset=0, reset_less=True)
1059         self.out_norm = Signal(reset_less=True)
1060         self.in_accept = Signal(reset_less=True)
1061         self.temp_z = FPNumBase(width)
1062         self.temp_of = Overflow()
1063         self.out_z = FPNumBase(width)
1064         self.out_roundz = Signal(reset_less=True)
1065
1066     def setup(self, m, in_z, in_of, norm_stb, in_mid):
1067         """ links module to inputs and outputs
1068         """
1069         self.mod.setup(m, in_z, in_of, norm_stb,
1070                        self.in_accept, self.temp_z, self.temp_of,
1071                        self.out_z, self.out_norm)
1072
1073         m.d.comb += self.stb.eq(norm_stb)
1074         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1075
1076         if self.in_mid is not None:
1077             m.d.comb += self.in_mid.eq(in_mid)
1078
1079     def action(self, m):
1080         self.idsync(m)
1081         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1082         m.d.sync += self.temp_of.copy(self.mod.out_of)
1083         m.d.sync += self.temp_z.copy(self.out_z)
1084         with m.If(self.out_norm):
1085             with m.If(self.in_accept):
1086                 m.d.sync += [
1087                     self.ack.eq(1),
1088                 ]
1089             with m.Else():
1090                 m.d.sync += self.ack.eq(0)
1091         with m.Else():
1092             # normalisation not required (or done).
1093             m.next = "round"
1094             m.d.sync += self.ack.eq(1)
1095             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1096
1097
1098 class FPNormToPack(FPState, FPID):
1099
1100     def __init__(self, width, id_wid):
1101         FPID.__init__(self, id_wid)
1102         FPState.__init__(self, "normalise_1")
1103         self.width = width
1104
1105     def setup(self, m, in_z, in_of, in_mid):
1106         """ links module to inputs and outputs
1107         """
1108
1109         # Normalisation (chained to input in_z+in_of)
1110         nmod = FPNorm1ModSingle(self.width)
1111         n_out_z = FPNumBase(self.width)
1112         n_out_roundz = Signal(reset_less=True)
1113         nmod.setup(m, in_z, in_of, n_out_z)
1114
1115         # Rounding (chained to normalisation)
1116         rmod = FPRoundMod(self.width)
1117         r_out_z = FPNumBase(self.width)
1118         rmod.setup(m, n_out_z, n_out_roundz)
1119         m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1120         m.d.comb += r_out_z.copy(rmod.out_z)
1121
1122         # Corrections (chained to rounding)
1123         cmod = FPCorrectionsMod(self.width)
1124         c_out_z = FPNumBase(self.width)
1125         cmod.setup(m, r_out_z)
1126         m.d.comb += c_out_z.copy(cmod.out_z)
1127
1128         # Pack (chained to corrections)
1129         self.pmod = FPPackMod(self.width)
1130         self.out_z = FPNumBase(self.width)
1131         self.pmod.setup(m, c_out_z)
1132
1133         # Multiplex ID
1134         if self.in_mid is not None:
1135             m.d.comb += self.in_mid.eq(in_mid)
1136
1137     def action(self, m):
1138         self.idsync(m) # copies incoming ID to outgoing
1139         m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1140         m.next = "pack_put_z"
1141
1142
1143 class FPRoundMod:
1144
1145     def __init__(self, width):
1146         self.in_roundz = Signal(reset_less=True)
1147         self.in_z = FPNumBase(width, False)
1148         self.out_z = FPNumBase(width, False)
1149
1150     def setup(self, m, in_z, roundz):
1151         m.submodules.roundz = self
1152
1153         m.d.comb += self.in_z.copy(in_z)
1154         m.d.comb += self.in_roundz.eq(roundz)
1155
1156     def elaborate(self, platform):
1157         m = Module()
1158         m.d.comb += self.out_z.copy(self.in_z)
1159         with m.If(self.in_roundz):
1160             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1161             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1162                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1163         return m
1164
1165
1166 class FPRound(FPState, FPID):
1167
1168     def __init__(self, width, id_wid):
1169         FPState.__init__(self, "round")
1170         FPID.__init__(self, id_wid)
1171         self.mod = FPRoundMod(width)
1172         self.out_z = FPNumBase(width)
1173
1174     def setup(self, m, in_z, roundz, in_mid):
1175         """ links module to inputs and outputs
1176         """
1177         self.mod.setup(m, in_z, roundz)
1178
1179         if self.in_mid is not None:
1180             m.d.comb += self.in_mid.eq(in_mid)
1181
1182     def action(self, m):
1183         self.idsync(m)
1184         m.d.sync += self.out_z.copy(self.mod.out_z)
1185         m.next = "corrections"
1186
1187
1188 class FPCorrectionsMod:
1189
1190     def __init__(self, width):
1191         self.in_z = FPNumOut(width, False)
1192         self.out_z = FPNumOut(width, False)
1193
1194     def setup(self, m, in_z):
1195         """ links module to inputs and outputs
1196         """
1197         m.submodules.corrections = self
1198         m.d.comb += self.in_z.copy(in_z)
1199
1200     def elaborate(self, platform):
1201         m = Module()
1202         m.submodules.corr_in_z = self.in_z
1203         m.submodules.corr_out_z = self.out_z
1204         m.d.comb += self.out_z.copy(self.in_z)
1205         with m.If(self.in_z.is_denormalised):
1206             m.d.comb += self.out_z.e.eq(self.in_z.N127)
1207         return m
1208
1209
1210 class FPCorrections(FPState, FPID):
1211
1212     def __init__(self, width, id_wid):
1213         FPState.__init__(self, "corrections")
1214         FPID.__init__(self, id_wid)
1215         self.mod = FPCorrectionsMod(width)
1216         self.out_z = FPNumBase(width)
1217
1218     def setup(self, m, in_z, in_mid):
1219         """ links module to inputs and outputs
1220         """
1221         self.mod.setup(m, in_z)
1222         if self.in_mid is not None:
1223             m.d.comb += self.in_mid.eq(in_mid)
1224
1225     def action(self, m):
1226         self.idsync(m)
1227         m.d.sync += self.out_z.copy(self.mod.out_z)
1228         m.next = "pack"
1229
1230
1231 class FPPackMod:
1232
1233     def __init__(self, width):
1234         self.in_z = FPNumOut(width, False)
1235         self.out_z = FPNumOut(width, False)
1236
1237     def setup(self, m, in_z):
1238         """ links module to inputs and outputs
1239         """
1240         m.submodules.pack = self
1241         m.d.comb += self.in_z.copy(in_z)
1242
1243     def elaborate(self, platform):
1244         m = Module()
1245         m.submodules.pack_in_z = self.in_z
1246         with m.If(self.in_z.is_overflowed):
1247             m.d.comb += self.out_z.inf(self.in_z.s)
1248         with m.Else():
1249             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1250         return m
1251
1252
1253 class FPPack(FPState, FPID):
1254
1255     def __init__(self, width, id_wid):
1256         FPState.__init__(self, "pack")
1257         FPID.__init__(self, id_wid)
1258         self.mod = FPPackMod(width)
1259         self.out_z = FPNumOut(width, False)
1260
1261     def setup(self, m, in_z, in_mid):
1262         """ links module to inputs and outputs
1263         """
1264         self.mod.setup(m, in_z)
1265         if self.in_mid is not None:
1266             m.d.comb += self.in_mid.eq(in_mid)
1267
1268     def action(self, m):
1269         self.idsync(m)
1270         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1271         m.next = "pack_put_z"
1272
1273
1274 class FPPutZ(FPState):
1275
1276     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1277         FPState.__init__(self, state)
1278         if to_state is None:
1279             to_state = "get_ops"
1280         self.to_state = to_state
1281         self.in_z = in_z
1282         self.out_z = out_z
1283         self.in_mid = in_mid
1284         self.out_mid = out_mid
1285
1286     def action(self, m):
1287         if self.in_mid is not None:
1288             m.d.sync += self.out_mid.eq(self.in_mid)
1289         m.d.sync += [
1290           self.out_z.v.eq(self.in_z.v)
1291         ]
1292         with m.If(self.out_z.stb & self.out_z.ack):
1293             m.d.sync += self.out_z.stb.eq(0)
1294             m.next = self.to_state
1295         with m.Else():
1296             m.d.sync += self.out_z.stb.eq(1)
1297
1298
1299 class FPPutZIdx(FPState):
1300
1301     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1302         FPState.__init__(self, state)
1303         if to_state is None:
1304             to_state = "get_ops"
1305         self.to_state = to_state
1306         self.in_z = in_z
1307         self.out_zs = out_zs
1308         self.in_mid = in_mid
1309
1310     def action(self, m):
1311         outz_stb = Signal(reset_less=True)
1312         outz_ack = Signal(reset_less=True)
1313         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1314                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1315                     ]
1316         m.d.sync += [
1317           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1318         ]
1319         with m.If(outz_stb & outz_ack):
1320             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1321             m.next = self.to_state
1322         with m.Else():
1323             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1324
1325
1326 class FPADDBaseMod(FPID):
1327
1328     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1329         """ IEEE754 FP Add
1330
1331             * width: bit-width of IEEE754.  supported: 16, 32, 64
1332             * id_wid: an identifier that is sync-connected to the input
1333             * single_cycle: True indicates each stage to complete in 1 clock
1334             * compact: True indicates a reduced number of stages
1335         """
1336         FPID.__init__(self, id_wid)
1337         self.width = width
1338         self.single_cycle = single_cycle
1339         self.compact = compact
1340
1341         self.in_t = Trigger()
1342         self.in_a  = Signal(width)
1343         self.in_b  = Signal(width)
1344         self.out_z = FPOp(width)
1345
1346         self.states = []
1347
1348     def add_state(self, state):
1349         self.states.append(state)
1350         return state
1351
1352     def get_fragment(self, platform=None):
1353         """ creates the HDL code-fragment for FPAdd
1354         """
1355         m = Module()
1356         m.submodules.out_z = self.out_z
1357         m.submodules.in_t = self.in_t
1358         if self.compact:
1359             self.get_compact_fragment(m, platform)
1360         else:
1361             self.get_longer_fragment(m, platform)
1362
1363         with m.FSM() as fsm:
1364
1365             for state in self.states:
1366                 with m.State(state.state_from):
1367                     state.action(m)
1368
1369         return m
1370
1371     def get_longer_fragment(self, m, platform=None):
1372
1373         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1374                                       self.in_a, self.in_b, self.width))
1375         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1376         a = get.out_op1
1377         b = get.out_op2
1378
1379         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1380         sc.setup(m, a, b, self.in_mid)
1381
1382         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1383         dn.setup(m, a, b, sc.in_mid)
1384
1385         if self.single_cycle:
1386             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1387             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1388         else:
1389             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1390             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1391
1392         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1393         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1394
1395         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1396         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1397
1398         if self.single_cycle:
1399             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1400             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1401         else:
1402             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1403             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1404
1405         rn = self.add_state(FPRound(self.width, self.id_wid))
1406         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1407
1408         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1409         cor.setup(m, rn.out_z, rn.in_mid)
1410
1411         pa = self.add_state(FPPack(self.width, self.id_wid))
1412         pa.setup(m, cor.out_z, rn.in_mid)
1413
1414         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1415                                     pa.in_mid, self.out_mid))
1416
1417         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1418                                     pa.in_mid, self.out_mid))
1419
1420     def get_compact_fragment(self, m, platform=None):
1421
1422         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1423                                       self.in_a, self.in_b, self.width))
1424         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1425         a = get.out_op1
1426         b = get.out_op2
1427
1428         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1429         sc.setup(m, a, b, self.in_mid)
1430
1431         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1432         alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1433
1434         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1435         n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1436
1437         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1438                                     n1.in_mid, self.out_mid))
1439
1440         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1441                                     sc.in_mid, self.out_mid))
1442
1443
1444 class FPADDBase(FPState, FPID):
1445
1446     def __init__(self, width, id_wid=None, single_cycle=False):
1447         """ IEEE754 FP Add
1448
1449             * width: bit-width of IEEE754.  supported: 16, 32, 64
1450             * id_wid: an identifier that is sync-connected to the input
1451             * single_cycle: True indicates each stage to complete in 1 clock
1452         """
1453         FPID.__init__(self, id_wid)
1454         FPState.__init__(self, "fpadd")
1455         self.width = width
1456         self.single_cycle = single_cycle
1457         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1458
1459         self.in_t = Trigger()
1460         self.in_a  = Signal(width)
1461         self.in_b  = Signal(width)
1462         #self.out_z = FPOp(width)
1463
1464         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1465         self.in_accept = Signal(reset_less=True)
1466         self.add_stb = Signal(reset_less=True)
1467         self.add_ack = Signal(reset=0, reset_less=True)
1468
1469     def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1470         self.out_z = out_z
1471         self.out_mid = out_mid
1472         m.d.comb += [self.in_a.eq(a),
1473                      self.in_b.eq(b),
1474                      self.mod.in_a.eq(self.in_a),
1475                      self.mod.in_b.eq(self.in_b),
1476                      self.in_mid.eq(in_mid),
1477                      self.mod.in_mid.eq(self.in_mid),
1478                      self.z_done.eq(self.mod.out_z.trigger),
1479                      #self.add_stb.eq(add_stb),
1480                      self.mod.in_t.stb.eq(self.in_t.stb),
1481                      self.in_t.ack.eq(self.mod.in_t.ack),
1482                      self.out_mid.eq(self.mod.out_mid),
1483                      self.out_z.v.eq(self.mod.out_z.v),
1484                      self.out_z.stb.eq(self.mod.out_z.stb),
1485                      self.mod.out_z.ack.eq(self.out_z.ack),
1486                     ]
1487
1488         m.d.sync += self.add_stb.eq(add_stb)
1489         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1490         m.d.sync += self.out_z.ack.eq(0) # likewise
1491         #m.d.sync += self.in_t.stb.eq(0)
1492
1493         m.submodules.fpadd = self.mod
1494
1495     def action(self, m):
1496
1497         # in_accept is set on incoming strobe HIGH and ack LOW.
1498         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1499
1500         #with m.If(self.in_t.ack):
1501         #    m.d.sync += self.in_t.stb.eq(0)
1502         with m.If(~self.z_done):
1503             # not done: test for accepting an incoming operand pair
1504             with m.If(self.in_accept):
1505                 m.d.sync += [
1506                     self.add_ack.eq(1), # acknowledge receipt...
1507                     self.in_t.stb.eq(1), # initiate add
1508                 ]
1509             with m.Else():
1510                 m.d.sync += [self.add_ack.eq(0),
1511                              self.in_t.stb.eq(0),
1512                              self.out_z.ack.eq(1),
1513                             ]
1514         with m.Else():
1515             # done: acknowledge, and write out id and value
1516             m.d.sync += [self.add_ack.eq(1),
1517                          self.in_t.stb.eq(0)
1518                         ]
1519             m.next = "put_z"
1520
1521             return
1522
1523             if self.in_mid is not None:
1524                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1525
1526             m.d.sync += [
1527               self.out_z.v.eq(self.mod.out_z.v)
1528             ]
1529             # move to output state on detecting z ack
1530             with m.If(self.out_z.trigger):
1531                 m.d.sync += self.out_z.stb.eq(0)
1532                 m.next = "put_z"
1533             with m.Else():
1534                 m.d.sync += self.out_z.stb.eq(1)
1535
1536 class ResArray:
1537     def __init__(self, width, id_wid):
1538         self.width = width
1539         self.id_wid = id_wid
1540         res = []
1541         for i in range(rs_sz):
1542             out_z = FPOp(width)
1543             out_z.name = "out_z_%d" % i
1544             res.append(out_z)
1545         self.res = Array(res)
1546         self.in_z = FPOp(width)
1547         self.in_mid = Signal(self.id_wid, reset_less=True)
1548
1549     def setup(self, m, in_z, in_mid):
1550         m.d.comb += [self.in_z.copy(in_z),
1551                      self.in_mid.eq(in_mid)]
1552
1553     def get_fragment(self, platform=None):
1554         """ creates the HDL code-fragment for FPAdd
1555         """
1556         m = Module()
1557         m.submodules.res_in_z = self.in_z
1558         m.submodules += self.res
1559
1560         return m
1561
1562     def ports(self):
1563         res = []
1564         for z in self.res:
1565             res += z.ports()
1566         return res
1567
1568
1569 class FPADD(FPID):
1570     """ FPADD: stages as follows:
1571
1572         FPGetOp (a)
1573            |
1574         FPGetOp (b)
1575            |
1576         FPAddBase---> FPAddBaseMod
1577            |            |
1578         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1579
1580         FPAddBase is tricky: it is both a stage and *has* stages.
1581         Connection to FPAddBaseMod therefore requires an in stb/ack
1582         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1583         needs to be the thing that raises the incoming stb.
1584     """
1585
1586     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1587         """ IEEE754 FP Add
1588
1589             * width: bit-width of IEEE754.  supported: 16, 32, 64
1590             * id_wid: an identifier that is sync-connected to the input
1591             * single_cycle: True indicates each stage to complete in 1 clock
1592         """
1593         self.width = width
1594         self.id_wid = id_wid
1595         self.single_cycle = single_cycle
1596
1597         #self.out_z = FPOp(width)
1598         self.ids = FPID(id_wid)
1599
1600         rs = []
1601         for i in range(rs_sz):
1602             in_a  = FPOp(width)
1603             in_b  = FPOp(width)
1604             in_a.name = "in_a_%d" % i
1605             in_b.name = "in_b_%d" % i
1606             rs.append((in_a, in_b))
1607         self.rs = Array(rs)
1608
1609         res = []
1610         for i in range(rs_sz):
1611             out_z = FPOp(width)
1612             out_z.name = "out_z_%d" % i
1613             res.append(out_z)
1614         self.res = Array(res)
1615
1616         self.states = []
1617
1618     def add_state(self, state):
1619         self.states.append(state)
1620         return state
1621
1622     def get_fragment(self, platform=None):
1623         """ creates the HDL code-fragment for FPAdd
1624         """
1625         m = Module()
1626         m.submodules += self.rs
1627
1628         in_a = self.rs[0][0]
1629         in_b = self.rs[0][1]
1630
1631         out_z = FPOp(self.width)
1632         out_mid = Signal(self.id_wid, reset_less=True)
1633         m.submodules.out_z = out_z
1634
1635         geta = self.add_state(FPGetOp("get_a", "get_b",
1636                                       in_a, self.width))
1637         geta.setup(m, in_a)
1638         a = geta.out_op
1639
1640         getb = self.add_state(FPGetOp("get_b", "fpadd",
1641                                       in_b, self.width))
1642         getb.setup(m, in_b)
1643         b = getb.out_op
1644
1645         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1646         ab = self.add_state(ab)
1647         ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1648                  out_z, out_mid)
1649
1650         pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1651                                     out_mid, "get_a"))
1652
1653         with m.FSM() as fsm:
1654
1655             for state in self.states:
1656                 with m.State(state.state_from):
1657                     state.action(m)
1658
1659         return m
1660
1661
1662 if __name__ == "__main__":
1663     if True:
1664         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1665         main(alu, ports=alu.rs[0][0].ports() + \
1666                         alu.rs[0][1].ports() + \
1667                         alu.res[0].ports() + \
1668                         [alu.ids.in_mid, alu.ids.out_mid])
1669     else:
1670         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1671         main(alu, ports=[alu.in_a, alu.in_b] + \
1672                         alu.in_t.ports() + \
1673                         alu.out_z.ports() + \
1674                         [alu.in_mid, alu.out_mid])
1675
1676
1677     # works... but don't use, just do "python fname.py convert -t v"
1678     #print (verilog.convert(alu, ports=[
1679     #                        ports=alu.in_a.ports() + \
1680     #                              alu.in_b.ports() + \
1681     #                              alu.out_z.ports())