src/add/fpbase.py

   1 # IEEE Floating Point Adder (Single Precision)
   2 # Copyright (C) Jonathan P Dawson 2013
   3 # 2013-12-12
   4
   5 from nmigen import Signal, Cat, Const, Mux, Module
   6 from math import log
   7 from operator import or_
   8 from functools import reduce
   9
  10 class MultiShiftR:
  11
  12     def __init__(self, width):
  13         self.width = width
  14         self.smax = int(log(width) / log(2))
  15         self.i = Signal(width)
  16         self.s = Signal(self.smax)
  17         self.o = Signal(width)
  18
  19     def elaborate(self, platform):
  20         m = Module()
  21         m.d.comb += self.o.eq(self.i >> self.s)
  22         return m
  23
  24
  25 class MultiShift:
  26     """ Generates variable-length single-cycle shifter from a series
  27         of conditional tests on each bit of the left/right shift operand.
  28         Each bit tested produces output shifted by that number of bits,
  29         in a binary fashion: bit 1 if set shifts by 1 bit, bit 2 if set
  30         shifts by 2 bits, each partial result cascading to the next Mux.
  31
  32         Could be adapted to do arithmetic shift by taking copies of the
  33         MSB instead of zeros.
  34     """
  35
  36     def __init__(self, width):
  37         self.width = width
  38         self.smax = int(log(width) / log(2))
  39
  40     def lshift(self, op, s):
  41         res = op << s
  42         return res[:len(op)]
  43         res = op
  44         for i in range(self.smax):
  45             zeros = [0] * (1<<i)
  46             res = Mux(s & (1<<i), Cat(zeros, res[0:-(1<<i)]), res)
  47         return res
  48
  49     def rshift(self, op, s):
  50         res = op >> s
  51         return res[:len(op)]
  52         res = op
  53         for i in range(self.smax):
  54             zeros = [0] * (1<<i)
  55             res = Mux(s & (1<<i), Cat(res[(1<<i):], zeros), res)
  56         return res
  57
  58
  59 class FPNum:
  60     """ Floating-point Number Class, variable-width TODO (currently 32-bit)
  61
  62         Contains signals for an incoming copy of the value, decoded into
  63         sign / exponent / mantissa.
  64         Also contains encoding functions, creation and recognition of
  65         zero, NaN and inf (all signed)
  66
  67         Four extra bits are included in the mantissa: the top bit
  68         (m[-1]) is effectively a carry-overflow.  The other three are
  69         guard (m[2]), round (m[1]), and sticky (m[0])
  70     """
  71     def __init__(self, width, m_extra=True):
  72         self.width = width
  73         m_width = {32: 24, 64: 53}[width]
  74         e_width = {32: 10, 64: 13}[width]
  75         e_max = 1<<(e_width-3)
  76         self.rmw = m_width # real mantissa width (not including extras)
  77         self.e_max = e_max
  78         if m_extra:
  79             # mantissa extra bits (top,guard,round)
  80             self.m_extra = 3
  81             m_width += self.m_extra
  82         else:
  83             self.m_extra = 0
  84         #print (m_width, e_width, e_max, self.rmw, self.m_extra)
  85         self.m_width = m_width
  86         self.e_width = e_width
  87         self.e_start = self.rmw - 1
  88         self.e_end = self.rmw + self.e_width - 3 # for decoding
  89
  90         self.v = Signal(width)      # Latched copy of value
  91         self.m = Signal(m_width)    # Mantissa
  92         self.e = Signal((e_width, True)) # Exponent: 10 bits, signed
  93         self.s = Signal()           # Sign bit
  94
  95         self.mzero = Const(0, (m_width, False))
  96         self.m1s = Const(-1, (m_width, False))
  97         self.P128 = Const(e_max, (e_width, True))
  98         self.P127 = Const(e_max-1, (e_width, True))
  99         self.N127 = Const(-(e_max-1), (e_width, True))
 100         self.N126 = Const(-(e_max-2), (e_width, True))
 101
 102     def decode(self, v):
 103         """ decodes a latched value into sign / exponent / mantissa
 104
 105             bias is subtracted here, from the exponent.  exponent
 106             is extended to 10 bits so that subtract 127 is done on
 107             a 10-bit number
 108         """
 109         args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
 110         #print ("decode", self.e_end)
 111         return [self.m.eq(Cat(*args)), # mantissa
 112                 self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
 113                 self.s.eq(v[-1]),                 # sign
 114                 ]
 115
 116     def create(self, s, e, m):
 117         """ creates a value from sign / exponent / mantissa
 118
 119             bias is added here, to the exponent
 120         """
 121         return [
 122           self.v[-1].eq(s),          # sign
 123           self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
 124           self.v[0:self.e_start].eq(m)         # mantissa
 125         ]
 126
 127     def shift_down(self):
 128         """ shifts a mantissa down by one. exponent is increased to compensate
 129
 130             accuracy is lost as a result in the mantissa however there are 3
 131             guard bits (the latter of which is the "sticky" bit)
 132         """
 133         return [self.e.eq(self.e + 1),
 134                 self.m.eq(Cat(self.m[0] | self.m[1], self.m[2:], 0))
 135                ]
 136
 137     def shift_down_multi(self, diff):
 138         """ shifts a mantissa down. exponent is increased to compensate
 139
 140             accuracy is lost as a result in the mantissa however there are 3
 141             guard bits (the latter of which is the "sticky" bit)
 142
 143             this code works by variable-shifting the mantissa by up to
 144             its maximum bit-length: no point doing more (it'll still be
 145             zero).
 146
 147             the sticky bit is computed by shifting a batch of 1s by
 148             the same amount, which will introduce zeros.  it's then
 149             inverted and used as a mask to get the LSBs of the mantissa.
 150             those are then |'d into the sticky bit.
 151         """
 152         sm = MultiShift(self.width)
 153         mw = Const(self.m_width-1, len(diff))
 154         maxslen = Mux(diff > mw, mw, diff)
 155         rs = sm.rshift(self.m[1:], maxslen)
 156         maxsleni = mw - maxslen
 157         m_mask = sm.rshift(self.m1s[1:], maxsleni) # shift and invert
 158
 159         stickybits = reduce(or_, self.m[1:] & m_mask) | self.m[0]
 160         return [self.e.eq(self.e + diff),
 161                 self.m.eq(Cat(stickybits, rs))
 162                ]
 163
 164     def nan(self, s):
 165         return self.create(s, self.P128, 1<<(self.e_start-1))
 166
 167     def inf(self, s):
 168         return self.create(s, self.P128, 0)
 169
 170     def zero(self, s):
 171         return self.create(s, self.N127, 0)
 172
 173     def is_nan(self):
 174         return (self.e == self.P128) & (self.m != 0)
 175
 176     def is_inf(self):
 177         return (self.e == self.P128) & (self.m == 0)
 178
 179     def is_zero(self):
 180         return (self.e == self.N127) & (self.m == self.mzero)
 181
 182     def is_overflowed(self):
 183         return (self.e > self.P127)
 184
 185     def is_denormalised(self):
 186         return (self.e == self.N126) & (self.m[self.e_start] == 0)
 187
 188
 189 class FPOp:
 190     def __init__(self, width):
 191         self.width = width
 192
 193         self.v   = Signal(width)
 194         self.stb = Signal()
 195         self.ack = Signal()
 196
 197     def ports(self):
 198         return [self.v, self.stb, self.ack]
 199
 200
 201 class Overflow:
 202     def __init__(self):
 203         self.guard = Signal()     # tot[2]
 204         self.round_bit = Signal() # tot[1]
 205         self.sticky = Signal()    # tot[0]
 206
 207
 208 class FPBase:
 209     """ IEEE754 Floating Point Base Class
 210
 211         contains common functions for FP manipulation, such as
 212         extracting and packing operands, normalisation, denormalisation,
 213         rounding etc.
 214     """
 215
 216     def get_op(self, m, op, v, next_state):
 217         """ this function moves to the next state and copies the operand
 218             when both stb and ack are 1.
 219             acknowledgement is sent by setting ack to ZERO.
 220         """
 221         with m.If((op.ack) & (op.stb)):
 222             m.next = next_state
 223             m.d.sync += [
 224                 v.decode(op.v),
 225                 op.ack.eq(0)
 226             ]
 227         with m.Else():
 228             m.d.sync += op.ack.eq(1)
 229
 230     def denormalise(self, m, a):
 231         """ denormalises a number.  this is probably the wrong name for
 232             this function.  for normalised numbers (exponent != minimum)
 233             one *extra* bit (the implicit 1) is added *back in*.
 234             for denormalised numbers, the mantissa is left alone
 235             and the exponent increased by 1.
 236
 237             both cases *effectively multiply the number stored by 2*,
 238             which has to be taken into account when extracting the result.
 239         """
 240         with m.If(a.e == a.N127):
 241             m.d.sync += a.e.eq(a.N126) # limit a exponent
 242         with m.Else():
 243             m.d.sync += a.m[-1].eq(1) # set top mantissa bit
 244
 245     def op_normalise(self, m, op, next_state):
 246         """ operand normalisation
 247             NOTE: just like "align", this one keeps going round every clock
 248                   until the result's exponent is within acceptable "range"
 249         """
 250         with m.If((op.m[-1] == 0)): # check last bit of mantissa
 251             m.d.sync +=[
 252                 op.e.eq(op.e - 1),  # DECREASE exponent
 253                 op.m.eq(op.m << 1), # shift mantissa UP
 254             ]
 255         with m.Else():
 256             m.next = next_state
 257
 258     def normalise_1(self, m, z, of, next_state):
 259         """ first stage normalisation
 260
 261             NOTE: just like "align", this one keeps going round every clock
 262                   until the result's exponent is within acceptable "range"
 263             NOTE: the weirdness of reassigning guard and round is due to
 264                   the extra mantissa bits coming from tot[0..2]
 265         """
 266         with m.If((z.m[-1] == 0) & (z.e > z.N126)):
 267             m.d.sync +=[
 268                 z.e.eq(z.e - 1),  # DECREASE exponent
 269                 z.m.eq(z.m << 1), # shift mantissa UP
 270                 z.m[0].eq(of.guard),       # steal guard bit (was tot[2])
 271                 of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
 272                 of.round_bit.eq(0),        # reset round bit
 273             ]
 274         with m.Else():
 275             m.next = next_state
 276
 277     def normalise_2(self, m, z, of, next_state):
 278         """ second stage normalisation
 279
 280             NOTE: just like "align", this one keeps going round every clock
 281                   until the result's exponent is within acceptable "range"
 282             NOTE: the weirdness of reassigning guard and round is due to
 283                   the extra mantissa bits coming from tot[0..2]
 284         """
 285         with m.If(z.e < z.N126):
 286             m.d.sync +=[
 287                 z.e.eq(z.e + 1),  # INCREASE exponent
 288                 z.m.eq(z.m >> 1), # shift mantissa DOWN
 289                 of.guard.eq(z.m[0]),
 290                 of.round_bit.eq(of.guard),
 291                 of.sticky.eq(of.sticky | of.round_bit)
 292             ]
 293         with m.Else():
 294             m.next = next_state
 295
 296     def roundz(self, m, z, of, next_state):
 297         """ performs rounding on the output.  TODO: different kinds of rounding
 298         """
 299         m.next = next_state
 300         with m.If(of.guard & (of.round_bit | of.sticky | z.m[0])):
 301             m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
 302             with m.If(z.m == z.m1s): # all 1s
 303                 m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
 304
 305     def corrections(self, m, z, next_state):
 306         """ denormalisation and sign-bug corrections
 307         """
 308         m.next = next_state
 309         # denormalised, correct exponent to zero
 310         with m.If(z.is_denormalised()):
 311             m.d.sync += z.e.eq(z.N127)
 312
 313     def pack(self, m, z, next_state):
 314         """ packs the result into the output (detects overflow->Inf)
 315         """
 316         m.next = next_state
 317         # if overflow occurs, return inf
 318         with m.If(z.is_overflowed()):
 319             m.d.sync += z.inf(z.s)
 320         with m.Else():
 321             m.d.sync += z.create(z.s, z.e, z.m)
 322
 323     def put_z(self, m, z, out_z, next_state):
 324         """ put_z: stores the result in the output.  raises stb and waits
 325             for ack to be set to 1 before moving to the next state.
 326             resets stb back to zero when that occurs, as acknowledgement.
 327         """
 328         m.d.sync += [
 329           out_z.stb.eq(1),
 330           out_z.v.eq(z.v)
 331         ]
 332         with m.If(out_z.stb & out_z.ack):
 333             m.d.sync += out_z.stb.eq(0)
 334             m.next = next_state
 335
 336