src/add/nmigen_add_experiment.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Module, Signal, Cat, Mux, Array, Const
   6 from nmigen.lib.coding import PriorityEncoder
   7 from nmigen.cli import main, verilog
   8 from math import log
   9
  10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
  11 from fpbase import MultiShiftRMerge, Trigger
  12 #from fpbase import FPNumShiftMultiRight
  13
  14
  15 class FPState(FPBase):
  16     def __init__(self, state_from):
  17         self.state_from = state_from
  18
  19     def set_inputs(self, inputs):
  20         self.inputs = inputs
  21         for k,v in inputs.items():
  22             setattr(self, k, v)
  23
  24     def set_outputs(self, outputs):
  25         self.outputs = outputs
  26         for k,v in outputs.items():
  27             setattr(self, k, v)
  28
  29
  30 class FPGetSyncOpsMod:
  31     def __init__(self, width, num_ops=2):
  32         self.width = width
  33         self.num_ops = num_ops
  34         inops = []
  35         outops = []
  36         for i in range(num_ops):
  37             inops.append(Signal(width, reset_less=True))
  38             outops.append(Signal(width, reset_less=True))
  39         self.in_op = inops
  40         self.out_op = outops
  41         self.stb = Signal(num_ops)
  42         self.ack = Signal()
  43         self.ready = Signal(reset_less=True)
  44         self.out_decode = Signal(reset_less=True)
  45
  46     def elaborate(self, platform):
  47         m = Module()
  48         m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
  49         m.d.comb += self.out_decode.eq(self.ack & self.ready)
  50         with m.If(self.out_decode):
  51             for i in range(self.num_ops):
  52                 m.d.comb += [
  53                         self.out_op[i].eq(self.in_op[i]),
  54                 ]
  55         return m
  56
  57     def ports(self):
  58         return self.in_op + self.out_op + [self.stb, self.ack]
  59
  60
  61 class FPOps(Trigger):
  62     def __init__(self, width, num_ops):
  63         Trigger.__init__(self)
  64         self.width = width
  65         self.num_ops = num_ops
  66
  67         res = []
  68         for i in range(num_ops):
  69             res.append(Signal(width))
  70         self.v  = Array(res)
  71
  72     def ports(self):
  73         res = []
  74         for i in range(self.num_ops):
  75             res.append(self.v[i])
  76         res.append(self.ack)
  77         res.append(self.stb)
  78         return res
  79
  80
  81 class InputGroup:
  82     def __init__(self, width, num_ops=2, num_rows=4):
  83         self.width = width
  84         self.num_ops = num_ops
  85         self.num_rows = num_rows
  86         self.mmax = int(log(self.num_rows) / log(2))
  87         self.rs = []
  88         self.mid = Signal(self.mmax, reset_less=True) # multiplex id
  89         for i in range(num_rows):
  90             self.rs.append(FPGetSyncOpsMod(width, num_ops))
  91
  92         self.out_op = FPOps(width, num_ops)
  93
  94     def elaborate(self, platform):
  95         m = Module()
  96
  97         pe = PriorityEncoder(self.num_rows)
  98         m.submodules.selector = pe
  99         m.submodules.out_op = self.out_op
 100         m.submodules += self.rs
 101
 102         # connect priority encoder
 103         in_ready = []
 104         for i in range(self.num_rows):
 105             in_ready.append(self.rs[i].ready)
 106         m.d.comb += pe.i.eq(Cat(*in_ready))
 107         m.d.comb += self.out_op.stb.eq(pe.n) # strobe-out when encoder active
 108
 109         with m.If(pe.n):
 110             m.d.sync += self.mid.eq(pe.o)
 111             for i in range(self.num_rows):
 112                 with m.If(pe.o == Const(i, (self.mmax, False))):
 113                     for j in range(self.num_ops):
 114                         m.d.sync += self.out_op.v[j].eq(self.rs[i].out_op[j])
 115         return m
 116
 117     def ports(self):
 118         res = []
 119         for i in range(self.num_rows):
 120             inop = self.rs[i]
 121             res += inop.in_op + [inop.stb]
 122         return self.out_op.ports() + res + [self.ack + self.stb]
 123
 124
 125 class FPGetOpMod:
 126     def __init__(self, width):
 127         self.in_op = FPOp(width)
 128         self.out_op = Signal(width)
 129         self.out_decode = Signal(reset_less=True)
 130
 131     def elaborate(self, platform):
 132         m = Module()
 133         m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
 134         m.submodules.get_op_in = self.in_op
 135         #m.submodules.get_op_out = self.out_op
 136         with m.If(self.out_decode):
 137             m.d.comb += [
 138                 self.out_op.eq(self.in_op.v),
 139             ]
 140         return m
 141
 142
 143 class FPGetOp(FPState):
 144     """ gets operand
 145     """
 146
 147     def __init__(self, in_state, out_state, in_op, width):
 148         FPState.__init__(self, in_state)
 149         self.out_state = out_state
 150         self.mod = FPGetOpMod(width)
 151         self.in_op = in_op
 152         self.out_op = Signal(width)
 153         self.out_decode = Signal(reset_less=True)
 154
 155     def setup(self, m, in_op):
 156         """ links module to inputs and outputs
 157         """
 158         setattr(m.submodules, self.state_from, self.mod)
 159         m.d.comb += self.mod.in_op.copy(in_op)
 160         #m.d.comb += self.out_op.eq(self.mod.out_op)
 161         m.d.comb += self.out_decode.eq(self.mod.out_decode)
 162
 163     def action(self, m):
 164         with m.If(self.out_decode):
 165             m.next = self.out_state
 166             m.d.sync += [
 167                 self.in_op.ack.eq(0),
 168                 self.out_op.eq(self.mod.out_op)
 169             ]
 170         with m.Else():
 171             m.d.sync += self.in_op.ack.eq(1)
 172
 173
 174 class FPGet2OpMod(Trigger):
 175     def __init__(self, width):
 176         Trigger.__init__(self)
 177         self.in_op1 = Signal(width, reset_less=True)
 178         self.in_op2 = Signal(width, reset_less=True)
 179         self.out_op1 = FPNumIn(None, width)
 180         self.out_op2 = FPNumIn(None, width)
 181
 182     def elaborate(self, platform):
 183         m = Trigger.elaborate(self, platform)
 184         #m.submodules.get_op_in = self.in_op
 185         m.submodules.get_op1_out = self.out_op1
 186         m.submodules.get_op2_out = self.out_op2
 187         with m.If(self.trigger):
 188             m.d.comb += [
 189                 self.out_op1.decode(self.in_op1),
 190                 self.out_op2.decode(self.in_op2),
 191             ]
 192         return m
 193
 194
 195 class FPGet2Op(FPState):
 196     """ gets operands
 197     """
 198
 199     def __init__(self, in_state, out_state, in_op1, in_op2, width):
 200         FPState.__init__(self, in_state)
 201         self.out_state = out_state
 202         self.mod = FPGet2OpMod(width)
 203         self.in_op1 = in_op1
 204         self.in_op2 = in_op2
 205         self.out_op1 = FPNumIn(None, width)
 206         self.out_op2 = FPNumIn(None, width)
 207         self.in_stb = Signal(reset_less=True)
 208         self.out_ack = Signal(reset_less=True)
 209         self.out_decode = Signal(reset_less=True)
 210
 211     def setup(self, m, in_op1, in_op2, in_stb, in_ack):
 212         """ links module to inputs and outputs
 213         """
 214         m.submodules.get_ops = self.mod
 215         m.d.comb += self.mod.in_op1.eq(in_op1)
 216         m.d.comb += self.mod.in_op2.eq(in_op2)
 217         m.d.comb += self.mod.stb.eq(in_stb)
 218         m.d.comb += self.out_ack.eq(self.mod.ack)
 219         m.d.comb += self.out_decode.eq(self.mod.trigger)
 220         m.d.comb += in_ack.eq(self.mod.ack)
 221
 222     def action(self, m):
 223         with m.If(self.out_decode):
 224             m.next = self.out_state
 225             m.d.sync += [
 226                 self.mod.ack.eq(0),
 227                 #self.out_op1.v.eq(self.mod.out_op1.v),
 228                 #self.out_op2.v.eq(self.mod.out_op2.v),
 229                 self.out_op1.copy(self.mod.out_op1),
 230                 self.out_op2.copy(self.mod.out_op2)
 231             ]
 232         with m.Else():
 233             m.d.sync += self.mod.ack.eq(1)
 234
 235
 236 class FPAddSpecialCasesMod:
 237     """ special cases: NaNs, infs, zeros, denormalised
 238         NOTE: some of these are unique to add.  see "Special Operations"
 239         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 240     """
 241
 242     def __init__(self, width):
 243         self.in_a = FPNumBase(width)
 244         self.in_b = FPNumBase(width)
 245         self.out_z = FPNumOut(width, False)
 246         self.out_do_z = Signal(reset_less=True)
 247
 248     def setup(self, m, in_a, in_b, out_do_z):
 249         """ links module to inputs and outputs
 250         """
 251         m.submodules.specialcases = self
 252         m.d.comb += self.in_a.copy(in_a)
 253         m.d.comb += self.in_b.copy(in_b)
 254         m.d.comb += out_do_z.eq(self.out_do_z)
 255
 256     def elaborate(self, platform):
 257         m = Module()
 258
 259         m.submodules.sc_in_a = self.in_a
 260         m.submodules.sc_in_b = self.in_b
 261         m.submodules.sc_out_z = self.out_z
 262
 263         s_nomatch = Signal()
 264         m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
 265
 266         m_match = Signal()
 267         m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
 268
 269         # if a is NaN or b is NaN return NaN
 270         with m.If(self.in_a.is_nan | self.in_b.is_nan):
 271             m.d.comb += self.out_do_z.eq(1)
 272             m.d.comb += self.out_z.nan(0)
 273
 274         # XXX WEIRDNESS for FP16 non-canonical NaN handling
 275         # under review
 276
 277         ## if a is zero and b is NaN return -b
 278         #with m.If(a.is_zero & (a.s==0) & b.is_nan):
 279         #    m.d.comb += self.out_do_z.eq(1)
 280         #    m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
 281
 282         ## if b is zero and a is NaN return -a
 283         #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
 284         #    m.d.comb += self.out_do_z.eq(1)
 285         #    m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
 286
 287         ## if a is -zero and b is NaN return -b
 288         #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
 289         #    m.d.comb += self.out_do_z.eq(1)
 290         #    m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
 291
 292         ## if b is -zero and a is NaN return -a
 293         #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
 294         #    m.d.comb += self.out_do_z.eq(1)
 295         #    m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
 296
 297         # if a is inf return inf (or NaN)
 298         with m.Elif(self.in_a.is_inf):
 299             m.d.comb += self.out_do_z.eq(1)
 300             m.d.comb += self.out_z.inf(self.in_a.s)
 301             # if a is inf and signs don't match return NaN
 302             with m.If(self.in_b.exp_128 & s_nomatch):
 303                 m.d.comb += self.out_z.nan(0)
 304
 305         # if b is inf return inf
 306         with m.Elif(self.in_b.is_inf):
 307             m.d.comb += self.out_do_z.eq(1)
 308             m.d.comb += self.out_z.inf(self.in_b.s)
 309
 310         # if a is zero and b zero return signed-a/b
 311         with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
 312             m.d.comb += self.out_do_z.eq(1)
 313             m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
 314                                           self.in_b.e,
 315                                           self.in_b.m[3:-1])
 316
 317         # if a is zero return b
 318         with m.Elif(self.in_a.is_zero):
 319             m.d.comb += self.out_do_z.eq(1)
 320             m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
 321                                       self.in_b.m[3:-1])
 322
 323         # if b is zero return a
 324         with m.Elif(self.in_b.is_zero):
 325             m.d.comb += self.out_do_z.eq(1)
 326             m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
 327                                       self.in_a.m[3:-1])
 328
 329         # if a equal to -b return zero (+ve zero)
 330         with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
 331             m.d.comb += self.out_do_z.eq(1)
 332             m.d.comb += self.out_z.zero(0)
 333
 334         # Denormalised Number checks
 335         with m.Else():
 336             m.d.comb += self.out_do_z.eq(0)
 337
 338         return m
 339
 340
 341 class FPID:
 342     def __init__(self, id_wid):
 343         self.id_wid = id_wid
 344         if self.id_wid:
 345             self.in_mid = Signal(id_wid, reset_less=True)
 346             self.out_mid = Signal(id_wid, reset_less=True)
 347         else:
 348             self.in_mid = None
 349             self.out_mid = None
 350
 351     def idsync(self, m):
 352         if self.id_wid is not None:
 353             m.d.sync += self.out_mid.eq(self.in_mid)
 354
 355
 356 class FPAddSpecialCases(FPState, FPID):
 357     """ special cases: NaNs, infs, zeros, denormalised
 358         NOTE: some of these are unique to add.  see "Special Operations"
 359         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 360     """
 361
 362     def __init__(self, width, id_wid):
 363         FPState.__init__(self, "special_cases")
 364         FPID.__init__(self, id_wid)
 365         self.mod = FPAddSpecialCasesMod(width)
 366         self.out_z = FPNumOut(width, False)
 367         self.out_do_z = Signal(reset_less=True)
 368
 369     def setup(self, m, in_a, in_b, in_mid):
 370         """ links module to inputs and outputs
 371         """
 372         self.mod.setup(m, in_a, in_b, self.out_do_z)
 373         if self.in_mid is not None:
 374             m.d.comb += self.in_mid.eq(in_mid)
 375
 376     def action(self, m):
 377         self.idsync(m)
 378         with m.If(self.out_do_z):
 379             m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
 380             m.next = "put_z"
 381         with m.Else():
 382             m.next = "denormalise"
 383
 384
 385 class FPAddSpecialCasesDeNorm(FPState, FPID):
 386     """ special cases: NaNs, infs, zeros, denormalised
 387         NOTE: some of these are unique to add.  see "Special Operations"
 388         https://steve.hollasch.net/cgindex/coding/ieeefloat.html
 389     """
 390
 391     def __init__(self, width, id_wid):
 392         FPState.__init__(self, "special_cases")
 393         FPID.__init__(self, id_wid)
 394         self.smod = FPAddSpecialCasesMod(width)
 395         self.out_z = FPNumOut(width, False)
 396         self.out_do_z = Signal(reset_less=True)
 397
 398         self.dmod = FPAddDeNormMod(width)
 399         self.out_a = FPNumBase(width)
 400         self.out_b = FPNumBase(width)
 401
 402     def setup(self, m, in_a, in_b, in_mid):
 403         """ links module to inputs and outputs
 404         """
 405         self.smod.setup(m, in_a, in_b, self.out_do_z)
 406         self.dmod.setup(m, in_a, in_b)
 407         if self.in_mid is not None:
 408             m.d.comb += self.in_mid.eq(in_mid)
 409
 410     def action(self, m):
 411         self.idsync(m)
 412         with m.If(self.out_do_z):
 413             m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
 414             m.next = "put_z"
 415         with m.Else():
 416             m.next = "align"
 417             m.d.sync += self.out_a.copy(self.dmod.out_a)
 418             m.d.sync += self.out_b.copy(self.dmod.out_b)
 419
 420
 421 class FPAddDeNormMod(FPState):
 422
 423     def __init__(self, width):
 424         self.in_a = FPNumBase(width)
 425         self.in_b = FPNumBase(width)
 426         self.out_a = FPNumBase(width)
 427         self.out_b = FPNumBase(width)
 428
 429     def setup(self, m, in_a, in_b):
 430         """ links module to inputs and outputs
 431         """
 432         m.submodules.denormalise = self
 433         m.d.comb += self.in_a.copy(in_a)
 434         m.d.comb += self.in_b.copy(in_b)
 435
 436     def elaborate(self, platform):
 437         m = Module()
 438         m.submodules.denorm_in_a = self.in_a
 439         m.submodules.denorm_in_b = self.in_b
 440         m.submodules.denorm_out_a = self.out_a
 441         m.submodules.denorm_out_b = self.out_b
 442         # hmmm, don't like repeating identical code
 443         m.d.comb += self.out_a.copy(self.in_a)
 444         with m.If(self.in_a.exp_n127):
 445             m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
 446         with m.Else():
 447             m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
 448
 449         m.d.comb += self.out_b.copy(self.in_b)
 450         with m.If(self.in_b.exp_n127):
 451             m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
 452         with m.Else():
 453             m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
 454
 455         return m
 456
 457
 458 class FPAddDeNorm(FPState, FPID):
 459
 460     def __init__(self, width, id_wid):
 461         FPState.__init__(self, "denormalise")
 462         FPID.__init__(self, id_wid)
 463         self.mod = FPAddDeNormMod(width)
 464         self.out_a = FPNumBase(width)
 465         self.out_b = FPNumBase(width)
 466
 467     def setup(self, m, in_a, in_b, in_mid):
 468         """ links module to inputs and outputs
 469         """
 470         self.mod.setup(m, in_a, in_b)
 471         if self.in_mid is not None:
 472             m.d.comb += self.in_mid.eq(in_mid)
 473
 474     def action(self, m):
 475         self.idsync(m)
 476         # Denormalised Number checks
 477         m.next = "align"
 478         m.d.sync += self.out_a.copy(self.mod.out_a)
 479         m.d.sync += self.out_b.copy(self.mod.out_b)
 480
 481
 482 class FPAddAlignMultiMod(FPState):
 483
 484     def __init__(self, width):
 485         self.in_a = FPNumBase(width)
 486         self.in_b = FPNumBase(width)
 487         self.out_a = FPNumIn(None, width)
 488         self.out_b = FPNumIn(None, width)
 489         self.exp_eq = Signal(reset_less=True)
 490
 491     def elaborate(self, platform):
 492         # This one however (single-cycle) will do the shift
 493         # in one go.
 494
 495         m = Module()
 496
 497         m.submodules.align_in_a = self.in_a
 498         m.submodules.align_in_b = self.in_b
 499         m.submodules.align_out_a = self.out_a
 500         m.submodules.align_out_b = self.out_b
 501
 502         # NOTE: this does *not* do single-cycle multi-shifting,
 503         #       it *STAYS* in the align state until exponents match
 504
 505         # exponent of a greater than b: shift b down
 506         m.d.comb += self.exp_eq.eq(0)
 507         m.d.comb += self.out_a.copy(self.in_a)
 508         m.d.comb += self.out_b.copy(self.in_b)
 509         agtb = Signal(reset_less=True)
 510         altb = Signal(reset_less=True)
 511         m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
 512         m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
 513         with m.If(agtb):
 514             m.d.comb += self.out_b.shift_down(self.in_b)
 515         # exponent of b greater than a: shift a down
 516         with m.Elif(altb):
 517             m.d.comb += self.out_a.shift_down(self.in_a)
 518         # exponents equal: move to next stage.
 519         with m.Else():
 520             m.d.comb += self.exp_eq.eq(1)
 521         return m
 522
 523
 524 class FPAddAlignMulti(FPState, FPID):
 525
 526     def __init__(self, width, id_wid):
 527         FPID.__init__(self, id_wid)
 528         FPState.__init__(self, "align")
 529         self.mod = FPAddAlignMultiMod(width)
 530         self.out_a = FPNumIn(None, width)
 531         self.out_b = FPNumIn(None, width)
 532         self.exp_eq = Signal(reset_less=True)
 533
 534     def setup(self, m, in_a, in_b, in_mid):
 535         """ links module to inputs and outputs
 536         """
 537         m.submodules.align = self.mod
 538         m.d.comb += self.mod.in_a.copy(in_a)
 539         m.d.comb += self.mod.in_b.copy(in_b)
 540         #m.d.comb += self.out_a.copy(self.mod.out_a)
 541         #m.d.comb += self.out_b.copy(self.mod.out_b)
 542         m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
 543         if self.in_mid is not None:
 544             m.d.comb += self.in_mid.eq(in_mid)
 545
 546     def action(self, m):
 547         self.idsync(m)
 548         m.d.sync += self.out_a.copy(self.mod.out_a)
 549         m.d.sync += self.out_b.copy(self.mod.out_b)
 550         with m.If(self.exp_eq):
 551             m.next = "add_0"
 552
 553
 554 class FPAddAlignSingleMod:
 555
 556     def __init__(self, width):
 557         self.width = width
 558         self.in_a = FPNumBase(width)
 559         self.in_b = FPNumBase(width)
 560         self.out_a = FPNumIn(None, width)
 561         self.out_b = FPNumIn(None, width)
 562
 563     def setup(self, m, in_a, in_b):
 564         """ links module to inputs and outputs
 565         """
 566         m.submodules.align = self
 567         m.d.comb += self.in_a.copy(in_a)
 568         m.d.comb += self.in_b.copy(in_b)
 569
 570     def elaborate(self, platform):
 571         """ Aligns A against B or B against A, depending on which has the
 572             greater exponent.  This is done in a *single* cycle using
 573             variable-width bit-shift
 574
 575             the shifter used here is quite expensive in terms of gates.
 576             Mux A or B in (and out) into temporaries, as only one of them
 577             needs to be aligned against the other
 578         """
 579         m = Module()
 580
 581         m.submodules.align_in_a = self.in_a
 582         m.submodules.align_in_b = self.in_b
 583         m.submodules.align_out_a = self.out_a
 584         m.submodules.align_out_b = self.out_b
 585
 586         # temporary (muxed) input and output to be shifted
 587         t_inp = FPNumBase(self.width)
 588         t_out = FPNumIn(None, self.width)
 589         espec = (len(self.in_a.e), True)
 590         msr = MultiShiftRMerge(self.in_a.m_width, espec)
 591         m.submodules.align_t_in = t_inp
 592         m.submodules.align_t_out = t_out
 593         m.submodules.multishift_r = msr
 594
 595         ediff = Signal(espec, reset_less=True)
 596         ediffr = Signal(espec, reset_less=True)
 597         tdiff = Signal(espec, reset_less=True)
 598         elz = Signal(reset_less=True)
 599         egz = Signal(reset_less=True)
 600
 601         # connect multi-shifter to t_inp/out mantissa (and tdiff)
 602         m.d.comb += msr.inp.eq(t_inp.m)
 603         m.d.comb += msr.diff.eq(tdiff)
 604         m.d.comb += t_out.m.eq(msr.m)
 605         m.d.comb += t_out.e.eq(t_inp.e + tdiff)
 606         m.d.comb += t_out.s.eq(t_inp.s)
 607
 608         m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
 609         m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
 610         m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
 611         m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
 612
 613         # default: A-exp == B-exp, A and B untouched (fall through)
 614         m.d.comb += self.out_a.copy(self.in_a)
 615         m.d.comb += self.out_b.copy(self.in_b)
 616         # only one shifter (muxed)
 617         #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
 618         # exponent of a greater than b: shift b down
 619         with m.If(egz):
 620             m.d.comb += [t_inp.copy(self.in_b),
 621                          tdiff.eq(ediff),
 622                          self.out_b.copy(t_out),
 623                          self.out_b.s.eq(self.in_b.s), # whoops forgot sign
 624                         ]
 625         # exponent of b greater than a: shift a down
 626         with m.Elif(elz):
 627             m.d.comb += [t_inp.copy(self.in_a),
 628                          tdiff.eq(ediffr),
 629                          self.out_a.copy(t_out),
 630                          self.out_a.s.eq(self.in_a.s), # whoops forgot sign
 631                         ]
 632         return m
 633
 634
 635 class FPAddAlignSingle(FPState, FPID):
 636
 637     def __init__(self, width, id_wid):
 638         FPState.__init__(self, "align")
 639         FPID.__init__(self, id_wid)
 640         self.mod = FPAddAlignSingleMod(width)
 641         self.out_a = FPNumIn(None, width)
 642         self.out_b = FPNumIn(None, width)
 643
 644     def setup(self, m, in_a, in_b, in_mid):
 645         """ links module to inputs and outputs
 646         """
 647         self.mod.setup(m, in_a, in_b)
 648         if self.in_mid is not None:
 649             m.d.comb += self.in_mid.eq(in_mid)
 650
 651     def action(self, m):
 652         self.idsync(m)
 653         # NOTE: could be done as comb
 654         m.d.sync += self.out_a.copy(self.mod.out_a)
 655         m.d.sync += self.out_b.copy(self.mod.out_b)
 656         m.next = "add_0"
 657
 658
 659 class FPAddAlignSingleAdd(FPState, FPID):
 660
 661     def __init__(self, width, id_wid):
 662         FPState.__init__(self, "align")
 663         FPID.__init__(self, id_wid)
 664         self.mod = FPAddAlignSingleMod(width)
 665         self.out_a = FPNumIn(None, width)
 666         self.out_b = FPNumIn(None, width)
 667
 668         self.a0mod = FPAddStage0Mod(width)
 669         self.a0_out_z = FPNumBase(width, False)
 670         self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
 671         self.a0_out_z = FPNumBase(width, False)
 672
 673         self.a1mod = FPAddStage1Mod(width)
 674         self.out_z = FPNumBase(width, False)
 675         self.out_of = Overflow()
 676
 677     def setup(self, m, in_a, in_b, in_mid):
 678         """ links module to inputs and outputs
 679         """
 680         self.mod.setup(m, in_a, in_b)
 681         m.d.comb += self.out_a.copy(self.mod.out_a)
 682         m.d.comb += self.out_b.copy(self.mod.out_b)
 683
 684         self.a0mod.setup(m, self.out_a, self.out_b)
 685         m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
 686         m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
 687
 688         self.a1mod.setup(m, self.out_tot, self.a0_out_z)
 689
 690         if self.in_mid is not None:
 691             m.d.comb += self.in_mid.eq(in_mid)
 692
 693     def action(self, m):
 694         self.idsync(m)
 695         m.d.sync += self.out_of.copy(self.a1mod.out_of)
 696         m.d.sync += self.out_z.copy(self.a1mod.out_z)
 697         m.next = "normalise_1"
 698
 699
 700 class FPAddStage0Mod:
 701
 702     def __init__(self, width):
 703         self.in_a = FPNumBase(width)
 704         self.in_b = FPNumBase(width)
 705         self.in_z = FPNumBase(width, False)
 706         self.out_z = FPNumBase(width, False)
 707         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 708
 709     def setup(self, m, in_a, in_b):
 710         """ links module to inputs and outputs
 711         """
 712         m.submodules.add0 = self
 713         m.d.comb += self.in_a.copy(in_a)
 714         m.d.comb += self.in_b.copy(in_b)
 715
 716     def elaborate(self, platform):
 717         m = Module()
 718         m.submodules.add0_in_a = self.in_a
 719         m.submodules.add0_in_b = self.in_b
 720         m.submodules.add0_out_z = self.out_z
 721
 722         m.d.comb += self.out_z.e.eq(self.in_a.e)
 723
 724         # store intermediate tests (and zero-extended mantissas)
 725         seq = Signal(reset_less=True)
 726         mge = Signal(reset_less=True)
 727         am0 = Signal(len(self.in_a.m)+1, reset_less=True)
 728         bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
 729         m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
 730                      mge.eq(self.in_a.m >= self.in_b.m),
 731                      am0.eq(Cat(self.in_a.m, 0)),
 732                      bm0.eq(Cat(self.in_b.m, 0))
 733                     ]
 734         # same-sign (both negative or both positive) add mantissas
 735         with m.If(seq):
 736             m.d.comb += [
 737                 self.out_tot.eq(am0 + bm0),
 738                 self.out_z.s.eq(self.in_a.s)
 739             ]
 740         # a mantissa greater than b, use a
 741         with m.Elif(mge):
 742             m.d.comb += [
 743                 self.out_tot.eq(am0 - bm0),
 744                 self.out_z.s.eq(self.in_a.s)
 745             ]
 746         # b mantissa greater than a, use b
 747         with m.Else():
 748             m.d.comb += [
 749                 self.out_tot.eq(bm0 - am0),
 750                 self.out_z.s.eq(self.in_b.s)
 751         ]
 752         return m
 753
 754
 755 class FPAddStage0(FPState, FPID):
 756     """ First stage of add.  covers same-sign (add) and subtract
 757         special-casing when mantissas are greater or equal, to
 758         give greatest accuracy.
 759     """
 760
 761     def __init__(self, width, id_wid):
 762         FPState.__init__(self, "add_0")
 763         FPID.__init__(self, id_wid)
 764         self.mod = FPAddStage0Mod(width)
 765         self.out_z = FPNumBase(width, False)
 766         self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
 767
 768     def setup(self, m, in_a, in_b, in_mid):
 769         """ links module to inputs and outputs
 770         """
 771         self.mod.setup(m, in_a, in_b)
 772         if self.in_mid is not None:
 773             m.d.comb += self.in_mid.eq(in_mid)
 774
 775     def action(self, m):
 776         self.idsync(m)
 777         # NOTE: these could be done as combinatorial (merge add0+add1)
 778         m.d.sync += self.out_z.copy(self.mod.out_z)
 779         m.d.sync += self.out_tot.eq(self.mod.out_tot)
 780         m.next = "add_1"
 781
 782
 783 class FPAddStage1Mod(FPState):
 784     """ Second stage of add: preparation for normalisation.
 785         detects when tot sum is too big (tot[27] is kinda a carry bit)
 786     """
 787
 788     def __init__(self, width):
 789         self.out_norm = Signal(reset_less=True)
 790         self.in_z = FPNumBase(width, False)
 791         self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
 792         self.out_z = FPNumBase(width, False)
 793         self.out_of = Overflow()
 794
 795     def setup(self, m, in_tot, in_z):
 796         """ links module to inputs and outputs
 797         """
 798         m.submodules.add1 = self
 799         m.submodules.add1_out_overflow = self.out_of
 800
 801         m.d.comb += self.in_z.copy(in_z)
 802         m.d.comb += self.in_tot.eq(in_tot)
 803
 804     def elaborate(self, platform):
 805         m = Module()
 806         #m.submodules.norm1_in_overflow = self.in_of
 807         #m.submodules.norm1_out_overflow = self.out_of
 808         #m.submodules.norm1_in_z = self.in_z
 809         #m.submodules.norm1_out_z = self.out_z
 810         m.d.comb += self.out_z.copy(self.in_z)
 811         # tot[27] gets set when the sum overflows. shift result down
 812         with m.If(self.in_tot[-1]):
 813             m.d.comb += [
 814                 self.out_z.m.eq(self.in_tot[4:]),
 815                 self.out_of.m0.eq(self.in_tot[4]),
 816                 self.out_of.guard.eq(self.in_tot[3]),
 817                 self.out_of.round_bit.eq(self.in_tot[2]),
 818                 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
 819                 self.out_z.e.eq(self.in_z.e + 1)
 820         ]
 821         # tot[27] zero case
 822         with m.Else():
 823             m.d.comb += [
 824                 self.out_z.m.eq(self.in_tot[3:]),
 825                 self.out_of.m0.eq(self.in_tot[3]),
 826                 self.out_of.guard.eq(self.in_tot[2]),
 827                 self.out_of.round_bit.eq(self.in_tot[1]),
 828                 self.out_of.sticky.eq(self.in_tot[0])
 829         ]
 830         return m
 831
 832
 833 class FPAddStage1(FPState, FPID):
 834
 835     def __init__(self, width, id_wid):
 836         FPState.__init__(self, "add_1")
 837         FPID.__init__(self, id_wid)
 838         self.mod = FPAddStage1Mod(width)
 839         self.out_z = FPNumBase(width, False)
 840         self.out_of = Overflow()
 841         self.norm_stb = Signal()
 842
 843     def setup(self, m, in_tot, in_z, in_mid):
 844         """ links module to inputs and outputs
 845         """
 846         self.mod.setup(m, in_tot, in_z)
 847
 848         m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
 849
 850         if self.in_mid is not None:
 851             m.d.comb += self.in_mid.eq(in_mid)
 852
 853     def action(self, m):
 854         self.idsync(m)
 855         m.d.sync += self.out_of.copy(self.mod.out_of)
 856         m.d.sync += self.out_z.copy(self.mod.out_z)
 857         m.d.sync += self.norm_stb.eq(1)
 858         m.next = "normalise_1"
 859
 860
 861 class FPNorm1ModSingle:
 862
 863     def __init__(self, width):
 864         self.width = width
 865         self.out_norm = Signal(reset_less=True)
 866         self.in_z = FPNumBase(width, False)
 867         self.in_of = Overflow()
 868         self.out_z = FPNumBase(width, False)
 869         self.out_of = Overflow()
 870
 871     def setup(self, m, in_z, in_of, out_z):
 872         """ links module to inputs and outputs
 873         """
 874         m.submodules.normalise_1 = self
 875
 876         m.d.comb += self.in_z.copy(in_z)
 877         m.d.comb += self.in_of.copy(in_of)
 878
 879         m.d.comb += out_z.copy(self.out_z)
 880
 881     def elaborate(self, platform):
 882         m = Module()
 883
 884         mwid = self.out_z.m_width+2
 885         pe = PriorityEncoder(mwid)
 886         m.submodules.norm_pe = pe
 887
 888         m.submodules.norm1_out_z = self.out_z
 889         m.submodules.norm1_out_overflow = self.out_of
 890         m.submodules.norm1_in_z = self.in_z
 891         m.submodules.norm1_in_overflow = self.in_of
 892
 893         in_z = FPNumBase(self.width, False)
 894         in_of = Overflow()
 895         m.submodules.norm1_insel_z = in_z
 896         m.submodules.norm1_insel_overflow = in_of
 897
 898         espec = (len(in_z.e), True)
 899         ediff_n126 = Signal(espec, reset_less=True)
 900         msr = MultiShiftRMerge(mwid, espec)
 901         m.submodules.multishift_r = msr
 902
 903         m.d.comb += in_z.copy(self.in_z)
 904         m.d.comb += in_of.copy(self.in_of)
 905         # initialise out from in (overridden below)
 906         m.d.comb += self.out_z.copy(in_z)
 907         m.d.comb += self.out_of.copy(in_of)
 908         # normalisation increase/decrease conditions
 909         decrease = Signal(reset_less=True)
 910         increase = Signal(reset_less=True)
 911         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
 912         m.d.comb += increase.eq(in_z.exp_lt_n126)
 913         # decrease exponent
 914         with m.If(decrease):
 915             # *sigh* not entirely obvious: count leading zeros (clz)
 916             # with a PriorityEncoder: to find from the MSB
 917             # we reverse the order of the bits.
 918             temp_m = Signal(mwid, reset_less=True)
 919             temp_s = Signal(mwid+1, reset_less=True)
 920             clz = Signal((len(in_z.e), True), reset_less=True)
 921             # make sure that the amount to decrease by does NOT
 922             # go below the minimum non-INF/NaN exponent
 923             limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
 924                          in_z.exp_sub_n126)
 925             m.d.comb += [
 926                 # cat round and guard bits back into the mantissa
 927                 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
 928                 pe.i.eq(temp_m[::-1]),          # inverted
 929                 clz.eq(limclz),                 # count zeros from MSB down
 930                 temp_s.eq(temp_m << clz),       # shift mantissa UP
 931                 self.out_z.e.eq(in_z.e - clz),  # DECREASE exponent
 932                 self.out_z.m.eq(temp_s[2:]),    # exclude bits 0&1
 933                 self.out_of.m0.eq(temp_s[2]),   # copy of mantissa[0]
 934                 # overflow in bits 0..1: got shifted too (leave sticky)
 935                 self.out_of.guard.eq(temp_s[1]),     # guard
 936                 self.out_of.round_bit.eq(temp_s[0]), # round
 937             ]
 938         # increase exponent
 939         with m.Elif(increase):
 940             temp_m = Signal(mwid+1, reset_less=True)
 941             m.d.comb += [
 942                 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
 943                               in_z.m)),
 944                 ediff_n126.eq(in_z.N126 - in_z.e),
 945                 # connect multi-shifter to inp/out mantissa (and ediff)
 946                 msr.inp.eq(temp_m),
 947                 msr.diff.eq(ediff_n126),
 948                 self.out_z.m.eq(msr.m[3:]),
 949                 self.out_of.m0.eq(temp_s[3]),   # copy of mantissa[0]
 950                 # overflow in bits 0..1: got shifted too (leave sticky)
 951                 self.out_of.guard.eq(temp_s[2]),     # guard
 952                 self.out_of.round_bit.eq(temp_s[1]), # round
 953                 self.out_of.sticky.eq(temp_s[0]), # sticky
 954                 self.out_z.e.eq(in_z.e + ediff_n126),
 955             ]
 956
 957         return m
 958
 959
 960 class FPNorm1ModMulti:
 961
 962     def __init__(self, width, single_cycle=True):
 963         self.width = width
 964         self.in_select = Signal(reset_less=True)
 965         self.out_norm = Signal(reset_less=True)
 966         self.in_z = FPNumBase(width, False)
 967         self.in_of = Overflow()
 968         self.temp_z = FPNumBase(width, False)
 969         self.temp_of = Overflow()
 970         self.out_z = FPNumBase(width, False)
 971         self.out_of = Overflow()
 972
 973     def elaborate(self, platform):
 974         m = Module()
 975
 976         m.submodules.norm1_out_z = self.out_z
 977         m.submodules.norm1_out_overflow = self.out_of
 978         m.submodules.norm1_temp_z = self.temp_z
 979         m.submodules.norm1_temp_of = self.temp_of
 980         m.submodules.norm1_in_z = self.in_z
 981         m.submodules.norm1_in_overflow = self.in_of
 982
 983         in_z = FPNumBase(self.width, False)
 984         in_of = Overflow()
 985         m.submodules.norm1_insel_z = in_z
 986         m.submodules.norm1_insel_overflow = in_of
 987
 988         # select which of temp or in z/of to use
 989         with m.If(self.in_select):
 990             m.d.comb += in_z.copy(self.in_z)
 991             m.d.comb += in_of.copy(self.in_of)
 992         with m.Else():
 993             m.d.comb += in_z.copy(self.temp_z)
 994             m.d.comb += in_of.copy(self.temp_of)
 995         # initialise out from in (overridden below)
 996         m.d.comb += self.out_z.copy(in_z)
 997         m.d.comb += self.out_of.copy(in_of)
 998         # normalisation increase/decrease conditions
 999         decrease = Signal(reset_less=True)
1000         increase = Signal(reset_less=True)
1001         m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1002         m.d.comb += increase.eq(in_z.exp_lt_n126)
1003         m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1004         # decrease exponent
1005         with m.If(decrease):
1006             m.d.comb += [
1007                 self.out_z.e.eq(in_z.e - 1),  # DECREASE exponent
1008                 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1009                 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1010                 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1011                 self.out_of.round_bit.eq(0),        # reset round bit
1012                 self.out_of.m0.eq(in_of.guard),
1013             ]
1014         # increase exponent
1015         with m.Elif(increase):
1016             m.d.comb += [
1017                 self.out_z.e.eq(in_z.e + 1),  # INCREASE exponent
1018                 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1019                 self.out_of.guard.eq(in_z.m[0]),
1020                 self.out_of.m0.eq(in_z.m[1]),
1021                 self.out_of.round_bit.eq(in_of.guard),
1022                 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1023             ]
1024
1025         return m
1026
1027
1028 class FPNorm1Single(FPState, FPID):
1029
1030     def __init__(self, width, id_wid, single_cycle=True):
1031         FPID.__init__(self, id_wid)
1032         FPState.__init__(self, "normalise_1")
1033         self.mod = FPNorm1ModSingle(width)
1034         self.out_norm = Signal(reset_less=True)
1035         self.out_z = FPNumBase(width)
1036         self.out_roundz = Signal(reset_less=True)
1037
1038     def setup(self, m, in_z, in_of, in_mid):
1039         """ links module to inputs and outputs
1040         """
1041         self.mod.setup(m, in_z, in_of, self.out_z)
1042
1043         if self.in_mid is not None:
1044             m.d.comb += self.in_mid.eq(in_mid)
1045
1046     def action(self, m):
1047         self.idsync(m)
1048         m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1049         m.next = "round"
1050
1051
1052 class FPNorm1Multi(FPState, FPID):
1053
1054     def __init__(self, width, id_wid):
1055         FPID.__init__(self, id_wid)
1056         FPState.__init__(self, "normalise_1")
1057         self.mod = FPNorm1ModMulti(width)
1058         self.stb = Signal(reset_less=True)
1059         self.ack = Signal(reset=0, reset_less=True)
1060         self.out_norm = Signal(reset_less=True)
1061         self.in_accept = Signal(reset_less=True)
1062         self.temp_z = FPNumBase(width)
1063         self.temp_of = Overflow()
1064         self.out_z = FPNumBase(width)
1065         self.out_roundz = Signal(reset_less=True)
1066
1067     def setup(self, m, in_z, in_of, norm_stb, in_mid):
1068         """ links module to inputs and outputs
1069         """
1070         self.mod.setup(m, in_z, in_of, norm_stb,
1071                        self.in_accept, self.temp_z, self.temp_of,
1072                        self.out_z, self.out_norm)
1073
1074         m.d.comb += self.stb.eq(norm_stb)
1075         m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1076
1077         if self.in_mid is not None:
1078             m.d.comb += self.in_mid.eq(in_mid)
1079
1080     def action(self, m):
1081         self.idsync(m)
1082         m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1083         m.d.sync += self.temp_of.copy(self.mod.out_of)
1084         m.d.sync += self.temp_z.copy(self.out_z)
1085         with m.If(self.out_norm):
1086             with m.If(self.in_accept):
1087                 m.d.sync += [
1088                     self.ack.eq(1),
1089                 ]
1090             with m.Else():
1091                 m.d.sync += self.ack.eq(0)
1092         with m.Else():
1093             # normalisation not required (or done).
1094             m.next = "round"
1095             m.d.sync += self.ack.eq(1)
1096             m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1097
1098
1099 class FPNormToPack(FPState, FPID):
1100
1101     def __init__(self, width, id_wid):
1102         FPID.__init__(self, id_wid)
1103         FPState.__init__(self, "normalise_1")
1104         self.width = width
1105
1106     def setup(self, m, in_z, in_of, in_mid):
1107         """ links module to inputs and outputs
1108         """
1109
1110         # Normalisation (chained to input in_z+in_of)
1111         nmod = FPNorm1ModSingle(self.width)
1112         n_out_z = FPNumBase(self.width)
1113         n_out_roundz = Signal(reset_less=True)
1114         nmod.setup(m, in_z, in_of, n_out_z)
1115
1116         # Rounding (chained to normalisation)
1117         rmod = FPRoundMod(self.width)
1118         r_out_z = FPNumBase(self.width)
1119         rmod.setup(m, n_out_z, n_out_roundz)
1120         m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1121         m.d.comb += r_out_z.copy(rmod.out_z)
1122
1123         # Corrections (chained to rounding)
1124         cmod = FPCorrectionsMod(self.width)
1125         c_out_z = FPNumBase(self.width)
1126         cmod.setup(m, r_out_z)
1127         m.d.comb += c_out_z.copy(cmod.out_z)
1128
1129         # Pack (chained to corrections)
1130         self.pmod = FPPackMod(self.width)
1131         self.out_z = FPNumBase(self.width)
1132         self.pmod.setup(m, c_out_z)
1133
1134         # Multiplex ID
1135         if self.in_mid is not None:
1136             m.d.comb += self.in_mid.eq(in_mid)
1137
1138     def action(self, m):
1139         self.idsync(m) # copies incoming ID to outgoing
1140         m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1141         m.next = "pack_put_z"
1142
1143
1144 class FPRoundMod:
1145
1146     def __init__(self, width):
1147         self.in_roundz = Signal(reset_less=True)
1148         self.in_z = FPNumBase(width, False)
1149         self.out_z = FPNumBase(width, False)
1150
1151     def setup(self, m, in_z, roundz):
1152         m.submodules.roundz = self
1153
1154         m.d.comb += self.in_z.copy(in_z)
1155         m.d.comb += self.in_roundz.eq(roundz)
1156
1157     def elaborate(self, platform):
1158         m = Module()
1159         m.d.comb += self.out_z.copy(self.in_z)
1160         with m.If(self.in_roundz):
1161             m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1162             with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1163                 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1164         return m
1165
1166
1167 class FPRound(FPState, FPID):
1168
1169     def __init__(self, width, id_wid):
1170         FPState.__init__(self, "round")
1171         FPID.__init__(self, id_wid)
1172         self.mod = FPRoundMod(width)
1173         self.out_z = FPNumBase(width)
1174
1175     def setup(self, m, in_z, roundz, in_mid):
1176         """ links module to inputs and outputs
1177         """
1178         self.mod.setup(m, in_z, roundz)
1179
1180         if self.in_mid is not None:
1181             m.d.comb += self.in_mid.eq(in_mid)
1182
1183     def action(self, m):
1184         self.idsync(m)
1185         m.d.sync += self.out_z.copy(self.mod.out_z)
1186         m.next = "corrections"
1187
1188
1189 class FPCorrectionsMod:
1190
1191     def __init__(self, width):
1192         self.in_z = FPNumOut(width, False)
1193         self.out_z = FPNumOut(width, False)
1194
1195     def setup(self, m, in_z):
1196         """ links module to inputs and outputs
1197         """
1198         m.submodules.corrections = self
1199         m.d.comb += self.in_z.copy(in_z)
1200
1201     def elaborate(self, platform):
1202         m = Module()
1203         m.submodules.corr_in_z = self.in_z
1204         m.submodules.corr_out_z = self.out_z
1205         m.d.comb += self.out_z.copy(self.in_z)
1206         with m.If(self.in_z.is_denormalised):
1207             m.d.comb += self.out_z.e.eq(self.in_z.N127)
1208         return m
1209
1210
1211 class FPCorrections(FPState, FPID):
1212
1213     def __init__(self, width, id_wid):
1214         FPState.__init__(self, "corrections")
1215         FPID.__init__(self, id_wid)
1216         self.mod = FPCorrectionsMod(width)
1217         self.out_z = FPNumBase(width)
1218
1219     def setup(self, m, in_z, in_mid):
1220         """ links module to inputs and outputs
1221         """
1222         self.mod.setup(m, in_z)
1223         if self.in_mid is not None:
1224             m.d.comb += self.in_mid.eq(in_mid)
1225
1226     def action(self, m):
1227         self.idsync(m)
1228         m.d.sync += self.out_z.copy(self.mod.out_z)
1229         m.next = "pack"
1230
1231
1232 class FPPackMod:
1233
1234     def __init__(self, width):
1235         self.in_z = FPNumOut(width, False)
1236         self.out_z = FPNumOut(width, False)
1237
1238     def setup(self, m, in_z):
1239         """ links module to inputs and outputs
1240         """
1241         m.submodules.pack = self
1242         m.d.comb += self.in_z.copy(in_z)
1243
1244     def elaborate(self, platform):
1245         m = Module()
1246         m.submodules.pack_in_z = self.in_z
1247         with m.If(self.in_z.is_overflowed):
1248             m.d.comb += self.out_z.inf(self.in_z.s)
1249         with m.Else():
1250             m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1251         return m
1252
1253
1254 class FPPack(FPState, FPID):
1255
1256     def __init__(self, width, id_wid):
1257         FPState.__init__(self, "pack")
1258         FPID.__init__(self, id_wid)
1259         self.mod = FPPackMod(width)
1260         self.out_z = FPNumOut(width, False)
1261
1262     def setup(self, m, in_z, in_mid):
1263         """ links module to inputs and outputs
1264         """
1265         self.mod.setup(m, in_z)
1266         if self.in_mid is not None:
1267             m.d.comb += self.in_mid.eq(in_mid)
1268
1269     def action(self, m):
1270         self.idsync(m)
1271         m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1272         m.next = "pack_put_z"
1273
1274
1275 class FPPutZ(FPState):
1276
1277     def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1278         FPState.__init__(self, state)
1279         if to_state is None:
1280             to_state = "get_ops"
1281         self.to_state = to_state
1282         self.in_z = in_z
1283         self.out_z = out_z
1284         self.in_mid = in_mid
1285         self.out_mid = out_mid
1286
1287     def action(self, m):
1288         if self.in_mid is not None:
1289             m.d.sync += self.out_mid.eq(self.in_mid)
1290         m.d.sync += [
1291           self.out_z.v.eq(self.in_z.v)
1292         ]
1293         with m.If(self.out_z.stb & self.out_z.ack):
1294             m.d.sync += self.out_z.stb.eq(0)
1295             m.next = self.to_state
1296         with m.Else():
1297             m.d.sync += self.out_z.stb.eq(1)
1298
1299
1300 class FPPutZIdx(FPState):
1301
1302     def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1303         FPState.__init__(self, state)
1304         if to_state is None:
1305             to_state = "get_ops"
1306         self.to_state = to_state
1307         self.in_z = in_z
1308         self.out_zs = out_zs
1309         self.in_mid = in_mid
1310
1311     def action(self, m):
1312         outz_stb = Signal(reset_less=True)
1313         outz_ack = Signal(reset_less=True)
1314         m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1315                      outz_ack.eq(self.out_zs[self.in_mid].ack),
1316                     ]
1317         m.d.sync += [
1318           self.out_zs[self.in_mid].v.eq(self.in_z.v)
1319         ]
1320         with m.If(outz_stb & outz_ack):
1321             m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1322             m.next = self.to_state
1323         with m.Else():
1324             m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1325
1326
1327 class FPADDBaseMod(FPID):
1328
1329     def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1330         """ IEEE754 FP Add
1331
1332             * width: bit-width of IEEE754.  supported: 16, 32, 64
1333             * id_wid: an identifier that is sync-connected to the input
1334             * single_cycle: True indicates each stage to complete in 1 clock
1335             * compact: True indicates a reduced number of stages
1336         """
1337         FPID.__init__(self, id_wid)
1338         self.width = width
1339         self.single_cycle = single_cycle
1340         self.compact = compact
1341
1342         self.in_t = Trigger()
1343         self.in_a  = Signal(width)
1344         self.in_b  = Signal(width)
1345         self.out_z = FPOp(width)
1346
1347         self.states = []
1348
1349     def add_state(self, state):
1350         self.states.append(state)
1351         return state
1352
1353     def get_fragment(self, platform=None):
1354         """ creates the HDL code-fragment for FPAdd
1355         """
1356         m = Module()
1357         m.submodules.out_z = self.out_z
1358         m.submodules.in_t = self.in_t
1359         if self.compact:
1360             self.get_compact_fragment(m, platform)
1361         else:
1362             self.get_longer_fragment(m, platform)
1363
1364         with m.FSM() as fsm:
1365
1366             for state in self.states:
1367                 with m.State(state.state_from):
1368                     state.action(m)
1369
1370         return m
1371
1372     def get_longer_fragment(self, m, platform=None):
1373
1374         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1375                                       self.in_a, self.in_b, self.width))
1376         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1377         a = get.out_op1
1378         b = get.out_op2
1379
1380         sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1381         sc.setup(m, a, b, self.in_mid)
1382
1383         dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1384         dn.setup(m, a, b, sc.in_mid)
1385
1386         if self.single_cycle:
1387             alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1388             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1389         else:
1390             alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1391             alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1392
1393         add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1394         add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1395
1396         add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1397         add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1398
1399         if self.single_cycle:
1400             n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1401             n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1402         else:
1403             n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1404             n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1405
1406         rn = self.add_state(FPRound(self.width, self.id_wid))
1407         rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1408
1409         cor = self.add_state(FPCorrections(self.width, self.id_wid))
1410         cor.setup(m, rn.out_z, rn.in_mid)
1411
1412         pa = self.add_state(FPPack(self.width, self.id_wid))
1413         pa.setup(m, cor.out_z, rn.in_mid)
1414
1415         ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1416                                     pa.in_mid, self.out_mid))
1417
1418         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1419                                     pa.in_mid, self.out_mid))
1420
1421     def get_compact_fragment(self, m, platform=None):
1422
1423         get = self.add_state(FPGet2Op("get_ops", "special_cases",
1424                                       self.in_a, self.in_b, self.width))
1425         get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1426         a = get.out_op1
1427         b = get.out_op2
1428
1429         sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1430         sc.setup(m, a, b, self.in_mid)
1431
1432         alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1433         alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1434
1435         n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1436         n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1437
1438         ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1439                                     n1.in_mid, self.out_mid))
1440
1441         pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1442                                     sc.in_mid, self.out_mid))
1443
1444
1445 class FPADDBase(FPState, FPID):
1446
1447     def __init__(self, width, id_wid=None, single_cycle=False):
1448         """ IEEE754 FP Add
1449
1450             * width: bit-width of IEEE754.  supported: 16, 32, 64
1451             * id_wid: an identifier that is sync-connected to the input
1452             * single_cycle: True indicates each stage to complete in 1 clock
1453         """
1454         FPID.__init__(self, id_wid)
1455         FPState.__init__(self, "fpadd")
1456         self.width = width
1457         self.single_cycle = single_cycle
1458         self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1459
1460         self.in_t = Trigger()
1461         self.in_a  = Signal(width)
1462         self.in_b  = Signal(width)
1463         #self.out_z = FPOp(width)
1464
1465         self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1466         self.in_accept = Signal(reset_less=True)
1467         self.add_stb = Signal(reset_less=True)
1468         self.add_ack = Signal(reset=0, reset_less=True)
1469
1470     def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1471         self.out_z = out_z
1472         self.out_mid = out_mid
1473         m.d.comb += [self.in_a.eq(a),
1474                      self.in_b.eq(b),
1475                      self.mod.in_a.eq(self.in_a),
1476                      self.mod.in_b.eq(self.in_b),
1477                      self.in_mid.eq(in_mid),
1478                      self.mod.in_mid.eq(self.in_mid),
1479                      self.z_done.eq(self.mod.out_z.trigger),
1480                      #self.add_stb.eq(add_stb),
1481                      self.mod.in_t.stb.eq(self.in_t.stb),
1482                      self.in_t.ack.eq(self.mod.in_t.ack),
1483                      self.out_mid.eq(self.mod.out_mid),
1484                      self.out_z.v.eq(self.mod.out_z.v),
1485                      self.out_z.stb.eq(self.mod.out_z.stb),
1486                      self.mod.out_z.ack.eq(self.out_z.ack),
1487                     ]
1488
1489         m.d.sync += self.add_stb.eq(add_stb)
1490         m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1491         m.d.sync += self.out_z.ack.eq(0) # likewise
1492         #m.d.sync += self.in_t.stb.eq(0)
1493
1494         m.submodules.fpadd = self.mod
1495
1496     def action(self, m):
1497
1498         # in_accept is set on incoming strobe HIGH and ack LOW.
1499         m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1500
1501         #with m.If(self.in_t.ack):
1502         #    m.d.sync += self.in_t.stb.eq(0)
1503         with m.If(~self.z_done):
1504             # not done: test for accepting an incoming operand pair
1505             with m.If(self.in_accept):
1506                 m.d.sync += [
1507                     self.add_ack.eq(1), # acknowledge receipt...
1508                     self.in_t.stb.eq(1), # initiate add
1509                 ]
1510             with m.Else():
1511                 m.d.sync += [self.add_ack.eq(0),
1512                              self.in_t.stb.eq(0),
1513                              self.out_z.ack.eq(1),
1514                             ]
1515         with m.Else():
1516             # done: acknowledge, and write out id and value
1517             m.d.sync += [self.add_ack.eq(1),
1518                          self.in_t.stb.eq(0)
1519                         ]
1520             m.next = "put_z"
1521
1522             return
1523
1524             if self.in_mid is not None:
1525                 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1526
1527             m.d.sync += [
1528               self.out_z.v.eq(self.mod.out_z.v)
1529             ]
1530             # move to output state on detecting z ack
1531             with m.If(self.out_z.trigger):
1532                 m.d.sync += self.out_z.stb.eq(0)
1533                 m.next = "put_z"
1534             with m.Else():
1535                 m.d.sync += self.out_z.stb.eq(1)
1536
1537 class ResArray:
1538     def __init__(self, width, id_wid):
1539         self.width = width
1540         self.id_wid = id_wid
1541         res = []
1542         for i in range(rs_sz):
1543             out_z = FPOp(width)
1544             out_z.name = "out_z_%d" % i
1545             res.append(out_z)
1546         self.res = Array(res)
1547         self.in_z = FPOp(width)
1548         self.in_mid = Signal(self.id_wid, reset_less=True)
1549
1550     def setup(self, m, in_z, in_mid):
1551         m.d.comb += [self.in_z.copy(in_z),
1552                      self.in_mid.eq(in_mid)]
1553
1554     def get_fragment(self, platform=None):
1555         """ creates the HDL code-fragment for FPAdd
1556         """
1557         m = Module()
1558         m.submodules.res_in_z = self.in_z
1559         m.submodules += self.res
1560
1561         return m
1562
1563     def ports(self):
1564         res = []
1565         for z in self.res:
1566             res += z.ports()
1567         return res
1568
1569
1570 class FPADD(FPID):
1571     """ FPADD: stages as follows:
1572
1573         FPGetOp (a)
1574            |
1575         FPGetOp (b)
1576            |
1577         FPAddBase---> FPAddBaseMod
1578            |            |
1579         PutZ          GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1580
1581         FPAddBase is tricky: it is both a stage and *has* stages.
1582         Connection to FPAddBaseMod therefore requires an in stb/ack
1583         and an out stb/ack.  Just as with Add1-Norm1 interaction, FPGetOp
1584         needs to be the thing that raises the incoming stb.
1585     """
1586
1587     def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1588         """ IEEE754 FP Add
1589
1590             * width: bit-width of IEEE754.  supported: 16, 32, 64
1591             * id_wid: an identifier that is sync-connected to the input
1592             * single_cycle: True indicates each stage to complete in 1 clock
1593         """
1594         self.width = width
1595         self.id_wid = id_wid
1596         self.single_cycle = single_cycle
1597
1598         #self.out_z = FPOp(width)
1599         self.ids = FPID(id_wid)
1600
1601         rs = []
1602         for i in range(rs_sz):
1603             in_a  = FPOp(width)
1604             in_b  = FPOp(width)
1605             in_a.name = "in_a_%d" % i
1606             in_b.name = "in_b_%d" % i
1607             rs.append((in_a, in_b))
1608         self.rs = Array(rs)
1609
1610         res = []
1611         for i in range(rs_sz):
1612             out_z = FPOp(width)
1613             out_z.name = "out_z_%d" % i
1614             res.append(out_z)
1615         self.res = Array(res)
1616
1617         self.states = []
1618
1619     def add_state(self, state):
1620         self.states.append(state)
1621         return state
1622
1623     def get_fragment(self, platform=None):
1624         """ creates the HDL code-fragment for FPAdd
1625         """
1626         m = Module()
1627         m.submodules += self.rs
1628
1629         in_a = self.rs[0][0]
1630         in_b = self.rs[0][1]
1631
1632         out_z = FPOp(self.width)
1633         out_mid = Signal(self.id_wid, reset_less=True)
1634         m.submodules.out_z = out_z
1635
1636         geta = self.add_state(FPGetOp("get_a", "get_b",
1637                                       in_a, self.width))
1638         geta.setup(m, in_a)
1639         a = geta.out_op
1640
1641         getb = self.add_state(FPGetOp("get_b", "fpadd",
1642                                       in_b, self.width))
1643         getb.setup(m, in_b)
1644         b = getb.out_op
1645
1646         ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1647         ab = self.add_state(ab)
1648         ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1649                  out_z, out_mid)
1650
1651         pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1652                                     out_mid, "get_a"))
1653
1654         with m.FSM() as fsm:
1655
1656             for state in self.states:
1657                 with m.State(state.state_from):
1658                     state.action(m)
1659
1660         return m
1661
1662
1663 if __name__ == "__main__":
1664     if True:
1665         alu = FPADD(width=32, id_wid=5, single_cycle=True)
1666         main(alu, ports=alu.rs[0][0].ports() + \
1667                         alu.rs[0][1].ports() + \
1668                         alu.res[0].ports() + \
1669                         [alu.ids.in_mid, alu.ids.out_mid])
1670     else:
1671         alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1672         main(alu, ports=[alu.in_a, alu.in_b] + \
1673                         alu.in_t.ports() + \
1674                         alu.out_z.ports() + \
1675                         [alu.in_mid, alu.out_mid])
1676
1677
1678     # works... but don't use, just do "python fname.py convert -t v"
1679     #print (verilog.convert(alu, ports=[
1680     #                        ports=alu.in_a.ports() + \
1681     #                              alu.in_b.ports() + \
1682     #                              alu.out_z.ports())