src/ieee754/fpcommon/fpbase.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable
   6 from math import log
   7 from operator import or_
   8 from functools import reduce
   9
  10 from nmutil.singlepipe import PrevControl, NextControl
  11 from nmutil.pipeline import ObjectProxy
  12
  13
  14 class MultiShiftR:
  15
  16     def __init__(self, width):
  17         self.width = width
  18         self.smax = int(log(width) / log(2))
  19         self.i = Signal(width, reset_less=True)
  20         self.s = Signal(self.smax, reset_less=True)
  21         self.o = Signal(width, reset_less=True)
  22
  23     def elaborate(self, platform):
  24         m = Module()
  25         m.d.comb += self.o.eq(self.i >> self.s)
  26         return m
  27
  28
  29 class MultiShift:
  30     """ Generates variable-length single-cycle shifter from a series
  31         of conditional tests on each bit of the left/right shift operand.
  32         Each bit tested produces output shifted by that number of bits,
  33         in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
  34         shifts by 2 bits, each partial result cascading to the next Mux.
  35
  36         Could be adapted to do arithmetic shift by taking copies of the
  37         MSB instead of zeros.
  38     """
  39
  40     def __init__(self, width):
  41         self.width = width
  42         self.smax = int(log(width) / log(2))
  43
  44     def lshift(self, op, s):
  45         res = op << s
  46         return res[:len(op)]
  47         res = op
  48         for i in range(self.smax):
  49             zeros = [0] * (1<<i)
  50             res = Mux(s & (1<<i), Cat(zeros, res[0:-(1<<i)]), res)
  51         return res
  52
  53     def rshift(self, op, s):
  54         res = op >> s
  55         return res[:len(op)]
  56         res = op
  57         for i in range(self.smax):
  58             zeros = [0] * (1<<i)
  59             res = Mux(s & (1<<i), Cat(res[(1<<i):], zeros), res)
  60         return res
  61
  62
  63 class FPNumBaseRecord:
  64     """ Floating-point Base Number Class
  65     """
  66     def __init__(self, width, m_extra=True):
  67         self.width = width
  68         m_width = {16: 11, 32: 24, 64: 53}[width] # 1 extra bit (overflow)
  69         e_width = {16: 7,  32: 10, 64: 13}[width] # 2 extra bits (overflow)
  70         e_max = 1<<(e_width-3)
  71         self.rmw = m_width # real mantissa width (not including extras)
  72         self.e_max = e_max
  73         if m_extra:
  74             # mantissa extra bits (top,guard,round)
  75             self.m_extra = 3
  76             m_width += self.m_extra
  77         else:
  78             self.m_extra = 0
  79         #print (m_width, e_width, e_max, self.rmw, self.m_extra)
  80         self.m_width = m_width
  81         self.e_width = e_width
  82         self.e_start = self.rmw - 1
  83         self.e_end = self.rmw + self.e_width - 3 # for decoding
  84
  85         self.v = Signal(width, reset_less=True)      # Latched copy of value
  86         self.m = Signal(m_width, reset_less=True)    # Mantissa
  87         self.e = Signal((e_width, True), reset_less=True) # exp+2 bits, signed
  88         self.s = Signal(reset_less=True)           # Sign bit
  89
  90         self.mzero = Const(0, (m_width, False))
  91         m_msb = 1<<(self.m_width-2)
  92         self.msb1 = Const(m_msb, (m_width, False))
  93         self.m1s = Const(-1, (m_width, False))
  94         self.P128 = Const(e_max, (e_width, True))
  95         self.P127 = Const(e_max-1, (e_width, True))
  96         self.N127 = Const(-(e_max-1), (e_width, True))
  97         self.N126 = Const(-(e_max-2), (e_width, True))
  98
  99     def drop_in(self, fp):
 100         fp.s = self.s
 101         fp.e = self.e
 102         fp.m = self.m
 103         fp.v = self.v
 104         fp.width = self.width
 105         fp.e_width = self.e_width
 106         fp.m_width = self.m_width
 107         fp.e_start = self.e_start
 108         fp.e_end = self.e_end
 109         fp.m_extra = self.m_extra
 110
 111     def create(self, s, e, m):
 112         """ creates a value from sign / exponent / mantissa
 113
 114             bias is added here, to the exponent
 115         """
 116         return [
 117           self.v[-1].eq(s),          # sign
 118           self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
 119           self.v[0:self.e_start].eq(m)         # mantissa
 120         ]
 121
 122     def nan(self, s):
 123         return self.create(s, self.P128, 1<<(self.e_start-1))
 124
 125     def inf(self, s):
 126         return self.create(s, self.P128, 0)
 127
 128     def zero(self, s):
 129         return self.create(s, self.N127, 0)
 130
 131     def create2(self, s, e, m):
 132         """ creates a value from sign / exponent / mantissa
 133
 134             bias is added here, to the exponent
 135         """
 136         e = e + self.P127 # exp (add on bias)
 137         return Cat(m[0:self.e_start],
 138                    e[0:self.e_end-self.e_start],
 139                    s)
 140
 141     def nan2(self, s):
 142         return self.create2(s, self.P128, self.msb1)
 143
 144     def inf2(self, s):
 145         return self.create2(s, self.P128, self.mzero)
 146
 147     def zero2(self, s):
 148         return self.create2(s, self.N127, self.mzero)
 149
 150     def __iter__(self):
 151         yield self.s
 152         yield self.e
 153         yield self.m
 154
 155     def eq(self, inp):
 156         return [self.s.eq(inp.s), self.e.eq(inp.e), self.m.eq(inp.m)]
 157
 158
 159 class FPNumBase(FPNumBaseRecord, Elaboratable):
 160     """ Floating-point Base Number Class
 161     """
 162     def __init__(self, fp):
 163         fp.drop_in(self)
 164         self.fp = fp
 165         e_width = fp.e_width
 166
 167         self.is_nan = Signal(reset_less=True)
 168         self.is_zero = Signal(reset_less=True)
 169         self.is_inf = Signal(reset_less=True)
 170         self.is_overflowed = Signal(reset_less=True)
 171         self.is_denormalised = Signal(reset_less=True)
 172         self.exp_128 = Signal(reset_less=True)
 173         self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
 174         self.exp_lt_n126 = Signal(reset_less=True)
 175         self.exp_gt_n126 = Signal(reset_less=True)
 176         self.exp_gt127 = Signal(reset_less=True)
 177         self.exp_n127 = Signal(reset_less=True)
 178         self.exp_n126 = Signal(reset_less=True)
 179         self.m_zero = Signal(reset_less=True)
 180         self.m_msbzero = Signal(reset_less=True)
 181
 182     def elaborate(self, platform):
 183         m = Module()
 184         m.d.comb += self.is_nan.eq(self._is_nan())
 185         m.d.comb += self.is_zero.eq(self._is_zero())
 186         m.d.comb += self.is_inf.eq(self._is_inf())
 187         m.d.comb += self.is_overflowed.eq(self._is_overflowed())
 188         m.d.comb += self.is_denormalised.eq(self._is_denormalised())
 189         m.d.comb += self.exp_128.eq(self.e == self.fp.P128)
 190         m.d.comb += self.exp_sub_n126.eq(self.e - self.fp.N126)
 191         m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
 192         m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
 193         m.d.comb += self.exp_gt127.eq(self.e > self.fp.P127)
 194         m.d.comb += self.exp_n127.eq(self.e == self.fp.N127)
 195         m.d.comb += self.exp_n126.eq(self.e == self.fp.N126)
 196         m.d.comb += self.m_zero.eq(self.m == self.fp.mzero)
 197         m.d.comb += self.m_msbzero.eq(self.m[self.fp.e_start] == 0)
 198
 199         return m
 200
 201     def _is_nan(self):
 202         return (self.exp_128) & (~self.m_zero)
 203
 204     def _is_inf(self):
 205         return (self.exp_128) & (self.m_zero)
 206
 207     def _is_zero(self):
 208         return (self.exp_n127) & (self.m_zero)
 209
 210     def _is_overflowed(self):
 211         return self.exp_gt127
 212
 213     def _is_denormalised(self):
 214         return (self.exp_n126) & (self.m_msbzero)
 215
 216
 217 class FPNumOut(FPNumBase):
 218     """ Floating-point Number Class
 219
 220         Contains signals for an incoming copy of the value, decoded into
 221         sign / exponent / mantissa.
 222         Also contains encoding functions, creation and recognition of
 223         zero, NaN and inf (all signed)
 224
 225         Four extra bits are included in the mantissa: the top bit
 226         (m[-1]) is effectively a carry-overflow.  The other three are
 227         guard (m[2]), round (m[1]), and sticky (m[0])
 228     """
 229     def __init__(self, fp):
 230         FPNumBase.__init__(self, fp)
 231
 232     def elaborate(self, platform):
 233         m = FPNumBase.elaborate(self, platform)
 234
 235         return m
 236
 237
 238 class MultiShiftRMerge(Elaboratable):
 239     """ shifts down (right) and merges lower bits into m[0].
 240         m[0] is the "sticky" bit, basically
 241     """
 242     def __init__(self, width, s_max=None):
 243         if s_max is None:
 244             s_max = int(log(width) / log(2))
 245         self.smax = s_max
 246         self.m = Signal(width, reset_less=True)
 247         self.inp = Signal(width, reset_less=True)
 248         self.diff = Signal(s_max, reset_less=True)
 249         self.width = width
 250
 251     def elaborate(self, platform):
 252         m = Module()
 253
 254         rs = Signal(self.width, reset_less=True)
 255         m_mask = Signal(self.width, reset_less=True)
 256         smask = Signal(self.width, reset_less=True)
 257         stickybit = Signal(reset_less=True)
 258         maxslen = Signal(self.smax, reset_less=True)
 259         maxsleni = Signal(self.smax, reset_less=True)
 260
 261         sm = MultiShift(self.width-1)
 262         m0s = Const(0, self.width-1)
 263         mw = Const(self.width-1, len(self.diff))
 264         m.d.comb += [maxslen.eq(Mux(self.diff > mw, mw, self.diff)),
 265                      maxsleni.eq(Mux(self.diff > mw, 0, mw-self.diff)),
 266                     ]
 267
 268         m.d.comb += [
 269                 # shift mantissa by maxslen, mask by inverse
 270                 rs.eq(sm.rshift(self.inp[1:], maxslen)),
 271                 m_mask.eq(sm.rshift(~m0s, maxsleni)),
 272                 smask.eq(self.inp[1:] & m_mask),
 273                 # sticky bit combines all mask (and mantissa low bit)
 274                 stickybit.eq(smask.bool() | self.inp[0]),
 275                 # mantissa result contains m[0] already.
 276                 self.m.eq(Cat(stickybit, rs))
 277            ]
 278         return m
 279
 280
 281 class FPNumShift(FPNumBase, Elaboratable):
 282     """ Floating-point Number Class for shifting
 283     """
 284     def __init__(self, mainm, op, inv, width, m_extra=True):
 285         FPNumBase.__init__(self, width, m_extra)
 286         self.latch_in = Signal()
 287         self.mainm = mainm
 288         self.inv = inv
 289         self.op = op
 290
 291     def elaborate(self, platform):
 292         m = FPNumBase.elaborate(self, platform)
 293
 294         m.d.comb += self.s.eq(op.s)
 295         m.d.comb += self.e.eq(op.e)
 296         m.d.comb += self.m.eq(op.m)
 297
 298         with self.mainm.State("align"):
 299             with m.If(self.e < self.inv.e):
 300                 m.d.sync += self.shift_down()
 301
 302         return m
 303
 304     def shift_down(self, inp):
 305         """ shifts a mantissa down by one. exponent is increased to compensate
 306
 307             accuracy is lost as a result in the mantissa however there are 3
 308             guard bits (the latter of which is the "sticky" bit)
 309         """
 310         return [self.e.eq(inp.e + 1),
 311                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 312                ]
 313
 314     def shift_down_multi(self, diff):
 315         """ shifts a mantissa down. exponent is increased to compensate
 316
 317             accuracy is lost as a result in the mantissa however there are 3
 318             guard bits (the latter of which is the "sticky" bit)
 319
 320             this code works by variable-shifting the mantissa by up to
 321             its maximum bit-length: no point doing more (it'll still be
 322             zero).
 323
 324             the sticky bit is computed by shifting a batch of 1s by
 325             the same amount, which will introduce zeros.  it's then
 326             inverted and used as a mask to get the LSBs of the mantissa.
 327             those are then |'d into the sticky bit.
 328         """
 329         sm = MultiShift(self.width)
 330         mw = Const(self.m_width-1, len(diff))
 331         maxslen = Mux(diff > mw, mw, diff)
 332         rs = sm.rshift(self.m[1:], maxslen)
 333         maxsleni = mw - maxslen
 334         m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
 335
 336         stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
 337         return [self.e.eq(self.e + diff),
 338                 self.m.eq(Cat(stickybits, rs))
 339                ]
 340
 341     def shift_up_multi(self, diff):
 342         """ shifts a mantissa up. exponent is decreased to compensate
 343         """
 344         sm = MultiShift(self.width)
 345         mw = Const(self.m_width, len(diff))
 346         maxslen = Mux(diff > mw, mw, diff)
 347
 348         return [self.e.eq(self.e - diff),
 349                 self.m.eq(sm.lshift(self.m, maxslen))
 350                ]
 351
 352
 353 class FPNumDecode(FPNumBase):
 354     """ Floating-point Number Class
 355
 356         Contains signals for an incoming copy of the value, decoded into
 357         sign / exponent / mantissa.
 358         Also contains encoding functions, creation and recognition of
 359         zero, NaN and inf (all signed)
 360
 361         Four extra bits are included in the mantissa: the top bit
 362         (m[-1]) is effectively a carry-overflow.  The other three are
 363         guard (m[2]), round (m[1]), and sticky (m[0])
 364     """
 365     def __init__(self, op, fp):
 366         FPNumBase.__init__(self, fp)
 367         self.op = op
 368
 369     def elaborate(self, platform):
 370         m = FPNumBase.elaborate(self, platform)
 371
 372         m.d.comb += self.decode(self.v)
 373
 374         return m
 375
 376     def decode(self, v):
 377         """ decodes a latched value into sign / exponent / mantissa
 378
 379             bias is subtracted here, from the exponent.  exponent
 380             is extended to 10 bits so that subtract 127 is done on
 381             a 10-bit number
 382         """
 383         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 384         #print ("decode", self.e_end)
 385         return [self.m.eq(Cat(*args)), # mantissa
 386                 self.e.eq(v[self.e_start:self.e_end] - self.fp.P127), # exp
 387                 self.s.eq(v[-1]),                 # sign
 388                 ]
 389
 390 class FPNumIn(FPNumBase):
 391     """ Floating-point Number Class
 392
 393         Contains signals for an incoming copy of the value, decoded into
 394         sign / exponent / mantissa.
 395         Also contains encoding functions, creation and recognition of
 396         zero, NaN and inf (all signed)
 397
 398         Four extra bits are included in the mantissa: the top bit
 399         (m[-1]) is effectively a carry-overflow.  The other three are
 400         guard (m[2]), round (m[1]), and sticky (m[0])
 401     """
 402     def __init__(self, op, fp):
 403         FPNumBase.__init__(self, fp)
 404         self.latch_in = Signal()
 405         self.op = op
 406
 407     def decode2(self, m):
 408         """ decodes a latched value into sign / exponent / mantissa
 409
 410             bias is subtracted here, from the exponent.  exponent
 411             is extended to 10 bits so that subtract 127 is done on
 412             a 10-bit number
 413         """
 414         v = self.v
 415         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 416         #print ("decode", self.e_end)
 417         res = ObjectProxy(m, pipemode=False)
 418         res.m = Cat(*args)                             # mantissa
 419         res.e = v[self.e_start:self.e_end] - self.P127 # exp
 420         res.s = v[-1]                                  # sign
 421         return res
 422
 423     def decode(self, v):
 424         """ decodes a latched value into sign / exponent / mantissa
 425
 426             bias is subtracted here, from the exponent.  exponent
 427             is extended to 10 bits so that subtract 127 is done on
 428             a 10-bit number
 429         """
 430         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 431         #print ("decode", self.e_end)
 432         return [self.m.eq(Cat(*args)), # mantissa
 433                 self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
 434                 self.s.eq(v[-1]),                 # sign
 435                 ]
 436
 437     def shift_down(self, inp):
 438         """ shifts a mantissa down by one. exponent is increased to compensate
 439
 440             accuracy is lost as a result in the mantissa however there are 3
 441             guard bits (the latter of which is the "sticky" bit)
 442         """
 443         return [self.e.eq(inp.e + 1),
 444                 self.m.eq(Cat(inp.m[0] | inp.m[1], inp.m[2:], 0))
 445                ]
 446
 447     def shift_down_multi(self, diff, inp=None):
 448         """ shifts a mantissa down. exponent is increased to compensate
 449
 450             accuracy is lost as a result in the mantissa however there are 3
 451             guard bits (the latter of which is the "sticky" bit)
 452
 453             this code works by variable-shifting the mantissa by up to
 454             its maximum bit-length: no point doing more (it'll still be
 455             zero).
 456
 457             the sticky bit is computed by shifting a batch of 1s by
 458             the same amount, which will introduce zeros.  it's then
 459             inverted and used as a mask to get the LSBs of the mantissa.
 460             those are then |'d into the sticky bit.
 461         """
 462         if inp is None:
 463             inp = self
 464         sm = MultiShift(self.width)
 465         mw = Const(self.m_width-1, len(diff))
 466         maxslen = Mux(diff > mw, mw, diff)
 467         rs = sm.rshift(inp.m[1:], maxslen)
 468         maxsleni = mw - maxslen
 469         m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
 470
 471         #stickybit = reduce(or_, inp.m[1:] & m_mask) | inp.m[0]
 472         stickybit = (inp.m[1:] & m_mask).bool() | inp.m[0]
 473         return [self.e.eq(inp.e + diff),
 474                 self.m.eq(Cat(stickybit, rs))
 475                ]
 476
 477     def shift_up_multi(self, diff):
 478         """ shifts a mantissa up. exponent is decreased to compensate
 479         """
 480         sm = MultiShift(self.width)
 481         mw = Const(self.m_width, len(diff))
 482         maxslen = Mux(diff > mw, mw, diff)
 483
 484         return [self.e.eq(self.e - diff),
 485                 self.m.eq(sm.lshift(self.m, maxslen))
 486                ]
 487
 488 class Trigger(Elaboratable):
 489     def __init__(self):
 490
 491         self.stb = Signal(reset=0)
 492         self.ack = Signal()
 493         self.trigger = Signal(reset_less=True)
 494
 495     def elaborate(self, platform):
 496         m = Module()
 497         m.d.comb += self.trigger.eq(self.stb & self.ack)
 498         return m
 499
 500     def eq(self, inp):
 501         return [self.stb.eq(inp.stb),
 502                 self.ack.eq(inp.ack)
 503                ]
 504
 505     def ports(self):
 506         return [self.stb, self.ack]
 507
 508
 509 class FPOpIn(PrevControl):
 510     def __init__(self, width):
 511         PrevControl.__init__(self)
 512         self.width = width
 513
 514     @property
 515     def v(self):
 516         return self.data_i
 517
 518     def chain_inv(self, in_op, extra=None):
 519         stb = in_op.stb
 520         if extra is not None:
 521             stb = stb & extra
 522         return [self.v.eq(in_op.v),          # receive value
 523                 self.stb.eq(stb),      # receive STB
 524                 in_op.ack.eq(~self.ack), # send ACK
 525                ]
 526
 527     def chain_from(self, in_op, extra=None):
 528         stb = in_op.stb
 529         if extra is not None:
 530             stb = stb & extra
 531         return [self.v.eq(in_op.v),          # receive value
 532                 self.stb.eq(stb),      # receive STB
 533                 in_op.ack.eq(self.ack), # send ACK
 534                ]
 535
 536
 537 class FPOpOut(NextControl):
 538     def __init__(self, width):
 539         NextControl.__init__(self)
 540         self.width = width
 541
 542     @property
 543     def v(self):
 544         return self.data_o
 545
 546     def chain_inv(self, in_op, extra=None):
 547         stb = in_op.stb
 548         if extra is not None:
 549             stb = stb & extra
 550         return [self.v.eq(in_op.v),          # receive value
 551                 self.stb.eq(stb),      # receive STB
 552                 in_op.ack.eq(~self.ack), # send ACK
 553                ]
 554
 555     def chain_from(self, in_op, extra=None):
 556         stb = in_op.stb
 557         if extra is not None:
 558             stb = stb & extra
 559         return [self.v.eq(in_op.v),          # receive value
 560                 self.stb.eq(stb),      # receive STB
 561                 in_op.ack.eq(self.ack), # send ACK
 562                ]
 563
 564
 565 class Overflow: #(Elaboratable):
 566     def __init__(self):
 567         self.guard = Signal(reset_less=True)     # tot[2]
 568         self.round_bit = Signal(reset_less=True) # tot[1]
 569         self.sticky = Signal(reset_less=True)    # tot[0]
 570         self.m0 = Signal(reset_less=True)        # mantissa zero bit
 571
 572         #self.roundz = Signal(reset_less=True)
 573
 574     def __iter__(self):
 575         yield self.guard
 576         yield self.round_bit
 577         yield self.sticky
 578         yield self.m0
 579
 580     def eq(self, inp):
 581         return [self.guard.eq(inp.guard),
 582                 self.round_bit.eq(inp.round_bit),
 583                 self.sticky.eq(inp.sticky),
 584                 self.m0.eq(inp.m0)]
 585
 586     @property
 587     def roundz(self):
 588         return self.guard & (self.round_bit | self.sticky | self.m0)
 589
 590
 591 class FPBase:
 592     """ IEEE754 Floating Point Base Class
 593
 594         contains common functions for FP manipulation, such as
 595         extracting and packing operands, normalisation, denormalisation,
 596         rounding etc.
 597     """
 598
 599     def get_op(self, m, op, v, next_state):
 600         """ this function moves to the next state and copies the operand
 601             when both stb and ack are 1.
 602             acknowledgement is sent by setting ack to ZERO.
 603         """
 604         res = v.decode2(m)
 605         ack = Signal()
 606         with m.If((op.ready_o) & (op.valid_i_test)):
 607             m.next = next_state
 608             # op is latched in from FPNumIn class on same ack/stb
 609             m.d.comb += ack.eq(0)
 610         with m.Else():
 611             m.d.comb += ack.eq(1)
 612         return [res, ack]
 613
 614     def denormalise(self, m, a):
 615         """ denormalises a number.  this is probably the wrong name for
 616             this function.  for normalised numbers (exponent != minimum)
 617             one *extra* bit (the implicit 1) is added *back in*.
 618             for denormalised numbers, the mantissa is left alone
 619             and the exponent increased by 1.
 620
 621             both cases *effectively multiply the number stored by 2*,
 622             which has to be taken into account when extracting the result.
 623         """
 624         with m.If(a.exp_n127):
 625             m.d.sync += a.e.eq(a.N126) # limit a exponent
 626         with m.Else():
 627             m.d.sync += a.m[-1].eq(1) # set top mantissa bit
 628
 629     def op_normalise(self, m, op, next_state):
 630         """ operand normalisation
 631             NOTE: just like "align", this one keeps going round every clock
 632                   until the result's exponent is within acceptable "range"
 633         """
 634         with m.If((op.m[-1] == 0)): # check last bit of mantissa
 635             m.d.sync +=[
 636                 op.e.eq(op.e - 1),  # DECREASE exponent
 637                 op.m.eq(op.m << 1), # shift mantissa UP
 638             ]
 639         with m.Else():
 640             m.next = next_state
 641
 642     def normalise_1(self, m, z, of, next_state):
 643         """ first stage normalisation
 644
 645             NOTE: just like "align", this one keeps going round every clock
 646                   until the result's exponent is within acceptable "range"
 647             NOTE: the weirdness of reassigning guard and round is due to
 648                   the extra mantissa bits coming from tot[0..2]
 649         """
 650         with m.If((z.m[-1] == 0) & (z.e > z.N126)):
 651             m.d.sync += [
 652                 z.e.eq(z.e - 1),  # DECREASE exponent
 653                 z.m.eq(z.m << 1), # shift mantissa UP
 654                 z.m[0].eq(of.guard),       # steal guard bit (was tot[2])
 655                 of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
 656                 of.round_bit.eq(0),        # reset round bit
 657                 of.m0.eq(of.guard),
 658             ]
 659         with m.Else():
 660             m.next = next_state
 661
 662     def normalise_2(self, m, z, of, next_state):
 663         """ second stage normalisation
 664
 665             NOTE: just like "align", this one keeps going round every clock
 666                   until the result's exponent is within acceptable "range"
 667             NOTE: the weirdness of reassigning guard and round is due to
 668                   the extra mantissa bits coming from tot[0..2]
 669         """
 670         with m.If(z.e < z.N126):
 671             m.d.sync +=[
 672                 z.e.eq(z.e + 1),  # INCREASE exponent
 673                 z.m.eq(z.m >> 1), # shift mantissa DOWN
 674                 of.guard.eq(z.m[0]),
 675                 of.m0.eq(z.m[1]),
 676                 of.round_bit.eq(of.guard),
 677                 of.sticky.eq(of.sticky | of.round_bit)
 678             ]
 679         with m.Else():
 680             m.next = next_state
 681
 682     def roundz(self, m, z, roundz):
 683         """ performs rounding on the output.  TODO: different kinds of rounding
 684         """
 685         with m.If(roundz):
 686             m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
 687             with m.If(z.m == z.m1s): # all 1s
 688                 m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
 689
 690     def corrections(self, m, z, next_state):
 691         """ denormalisation and sign-bug corrections
 692         """
 693         m.next = next_state
 694         # denormalised, correct exponent to zero
 695         with m.If(z.is_denormalised):
 696             m.d.sync += z.e.eq(z.N127)
 697
 698     def pack(self, m, z, next_state):
 699         """ packs the result into the output (detects overflow->Inf)
 700         """
 701         m.next = next_state
 702         # if overflow occurs, return inf
 703         with m.If(z.is_overflowed):
 704             m.d.sync += z.inf(z.s)
 705         with m.Else():
 706             m.d.sync += z.create(z.s, z.e, z.m)
 707
 708     def put_z(self, m, z, out_z, next_state):
 709         """ put_z: stores the result in the output.  raises stb and waits
 710             for ack to be set to 1 before moving to the next state.
 711             resets stb back to zero when that occurs, as acknowledgement.
 712         """
 713         m.d.sync += [
 714           out_z.v.eq(z.v)
 715         ]
 716         with m.If(out_z.valid_o & out_z.ready_i_test):
 717             m.d.sync += out_z.valid_o.eq(0)
 718             m.next = next_state
 719         with m.Else():
 720             m.d.sync += out_z.valid_o.eq(1)
 721
 722
 723 class FPState(FPBase):
 724     def __init__(self, state_from):
 725         self.state_from = state_from
 726
 727     def set_inputs(self, inputs):
 728         self.inputs = inputs
 729         for k,v in inputs.items():
 730             setattr(self, k, v)
 731
 732     def set_outputs(self, outputs):
 733         self.outputs = outputs
 734         for k,v in outputs.items():
 735             setattr(self, k, v)
 736
 737
 738 class FPID:
 739     def __init__(self, id_wid):
 740         self.id_wid = id_wid
 741         if self.id_wid:
 742             self.in_mid = Signal(id_wid, reset_less=True)
 743             self.out_mid = Signal(id_wid, reset_less=True)
 744         else:
 745             self.in_mid = None
 746             self.out_mid = None
 747
 748     def idsync(self, m):
 749         if self.id_wid is not None:
 750             m.d.sync += self.out_mid.eq(self.in_mid)
 751
 752