add extra comments
[ieee754fpu.git] / src / add / fpbase.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Signal, Cat, Const
6
7
8 class FPNum:
9 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
10
11 Contains signals for an incoming copy of the value, decoded into
12 sign / exponent / mantissa.
13 Also contains encoding functions, creation and recognition of
14 zero, NaN and inf (all signed)
15
16 Four extra bits are included in the mantissa: the top bit
17 (m[-1]) is effectively a carry-overflow. The other three are
18 guard (m[2]), round (m[1]), and sticky (m[0])
19 """
20 def __init__(self, width, m_extra=True):
21 self.width = width
22 m_width = {32: 24, 64: 53}[width]
23 e_width = {32: 10, 64: 13}[width]
24 e_max = 1<<(e_width-3)
25 self.rmw = m_width # real mantissa width (not including extras)
26 if m_extra:
27 # mantissa extra bits (top,guard,round)
28 self.m_extra = 3
29 m_width += self.m_extra
30 else:
31 self.m_extra = 0
32 print (m_width, e_width, e_max, self.rmw, self.m_extra)
33 self.m_width = m_width
34 self.e_width = e_width
35 self.e_start = self.rmw - 1
36 self.e_end = self.rmw + self.e_width - 3 # for decoding
37
38 self.v = Signal(width) # Latched copy of value
39 self.m = Signal(m_width) # Mantissa
40 self.e = Signal((e_width, True)) # Exponent: 10 bits, signed
41 self.s = Signal() # Sign bit
42
43 self.mzero = Const(0, (m_width, False))
44 self.m1s = Const(-1, (m_width, False))
45 self.P128 = Const(e_max, (e_width, True))
46 self.P127 = Const(e_max-1, (e_width, True))
47 self.N127 = Const(-(e_max-1), (e_width, True))
48 self.N126 = Const(-(e_max-2), (e_width, True))
49
50 def decode(self, v):
51 """ decodes a latched value into sign / exponent / mantissa
52
53 bias is subtracted here, from the exponent. exponent
54 is extended to 10 bits so that subtract 127 is done on
55 a 10-bit number
56 """
57 args = [0] * self.m_extra + [v[0:self.e_start]] # pad with extra zeros
58 print (self.e_end)
59 return [self.m.eq(Cat(*args)), # mantissa
60 self.e.eq(v[self.e_start:self.e_end] - self.P127), # exp
61 self.s.eq(v[-1]), # sign
62 ]
63
64 def create(self, s, e, m):
65 """ creates a value from sign / exponent / mantissa
66
67 bias is added here, to the exponent
68 """
69 return [
70 self.v[-1].eq(s), # sign
71 self.v[self.e_start:self.e_end].eq(e + self.P127), # exp (add on bias)
72 self.v[0:self.e_start].eq(m) # mantissa
73 ]
74
75 def shift_down(self):
76 """ shifts a mantissa down by one. exponent is increased to compensate
77
78 accuracy is lost as a result in the mantissa however there are 3
79 guard bits (the latter of which is the "sticky" bit)
80 """
81 return [self.e.eq(self.e + 1),
82 self.m.eq(Cat(self.m[0] | self.m[1], self.m[2:], 0))
83 ]
84
85 def nan(self, s):
86 return self.create(s, self.P128, 1<<(self.e_start-1))
87
88 def inf(self, s):
89 return self.create(s, self.P128, 0)
90
91 def zero(self, s):
92 return self.create(s, self.N127, 0)
93
94 def is_nan(self):
95 return (self.e == self.P128) & (self.m != 0)
96
97 def is_inf(self):
98 return (self.e == self.P128) & (self.m == 0)
99
100 def is_zero(self):
101 return (self.e == self.N127) & (self.m == self.mzero)
102
103 def is_overflowed(self):
104 return (self.e > self.P127)
105
106 def is_denormalised(self):
107 return (self.e == self.N126) & (self.m[self.e_start] == 0)
108
109
110 class FPOp:
111 def __init__(self, width):
112 self.width = width
113
114 self.v = Signal(width)
115 self.stb = Signal()
116 self.ack = Signal()
117
118 def ports(self):
119 return [self.v, self.stb, self.ack]
120
121
122 class Overflow:
123 def __init__(self):
124 self.guard = Signal() # tot[2]
125 self.round_bit = Signal() # tot[1]
126 self.sticky = Signal() # tot[0]
127
128
129 class FPBase:
130 """ IEEE754 Floating Point Base Class
131
132 contains common functions for FP manipulation, such as
133 extracting and packing operands, normalisation, denormalisation,
134 rounding etc.
135 """
136
137 def get_op(self, m, op, v, next_state):
138 """ this function moves to the next state and copies the operand
139 when both stb and ack are 1.
140 acknowledgement is sent by setting ack to ZERO.
141 """
142 with m.If((op.ack) & (op.stb)):
143 m.next = next_state
144 m.d.sync += [
145 v.decode(op.v),
146 op.ack.eq(0)
147 ]
148 with m.Else():
149 m.d.sync += op.ack.eq(1)
150
151 def denormalise(self, m, a):
152 """ denormalises a number. this is probably the wrong name for
153 this function. for normalised numbers (exponent != minimum)
154 one *extra* bit (the implicit 1) is added *back in*.
155 for denormalised numbers, the mantissa is left alone
156 and the exponent increased by 1.
157
158 both cases *effectively multiply the number stored by 2*,
159 which has to be taken into account when extracting the result.
160 """
161 with m.If(a.e == a.N127):
162 m.d.sync += a.e.eq(a.N126) # limit a exponent
163 with m.Else():
164 m.d.sync += a.m[-1].eq(1) # set top mantissa bit
165
166 def op_normalise(self, m, op, next_state):
167 """ operand normalisation
168 NOTE: just like "align", this one keeps going round every clock
169 until the result's exponent is within acceptable "range"
170 """
171 with m.If((op.m[-1] == 0)): # check last bit of mantissa
172 m.d.sync +=[
173 op.e.eq(op.e - 1), # DECREASE exponent
174 op.m.eq(op.m << 1), # shift mantissa UP
175 ]
176 with m.Else():
177 m.next = next_state
178
179 def normalise_1(self, m, z, of, next_state):
180 """ first stage normalisation
181
182 NOTE: just like "align", this one keeps going round every clock
183 until the result's exponent is within acceptable "range"
184 NOTE: the weirdness of reassigning guard and round is due to
185 the extra mantissa bits coming from tot[0..2]
186 """
187 with m.If((z.m[-1] == 0) & (z.e > z.N126)):
188 m.d.sync +=[
189 z.e.eq(z.e - 1), # DECREASE exponent
190 z.m.eq(z.m << 1), # shift mantissa UP
191 z.m[0].eq(of.guard), # steal guard bit (was tot[2])
192 of.guard.eq(of.round_bit), # steal round_bit (was tot[1])
193 of.round_bit.eq(0), # reset round bit
194 ]
195 with m.Else():
196 m.next = next_state
197
198 def normalise_2(self, m, z, of, next_state):
199 """ second stage normalisation
200
201 NOTE: just like "align", this one keeps going round every clock
202 until the result's exponent is within acceptable "range"
203 NOTE: the weirdness of reassigning guard and round is due to
204 the extra mantissa bits coming from tot[0..2]
205 """
206 with m.If(z.e < z.N126):
207 m.d.sync +=[
208 z.e.eq(z.e + 1), # INCREASE exponent
209 z.m.eq(z.m >> 1), # shift mantissa DOWN
210 of.guard.eq(z.m[0]),
211 of.round_bit.eq(of.guard),
212 of.sticky.eq(of.sticky | of.round_bit)
213 ]
214 with m.Else():
215 m.next = next_state
216
217 def roundz(self, m, z, of, next_state):
218 """ performs rounding on the output. TODO: different kinds of rounding
219 """
220 m.next = next_state
221 with m.If(of.guard & (of.round_bit | of.sticky | z.m[0])):
222 m.d.sync += z.m.eq(z.m + 1) # mantissa rounds up
223 with m.If(z.m == z.m1s): # all 1s
224 m.d.sync += z.e.eq(z.e + 1) # exponent rounds up
225
226 def corrections(self, m, z, next_state):
227 """ denormalisation and sign-bug corrections
228 """
229 m.next = next_state
230 # denormalised, correct exponent to zero
231 with m.If(z.is_denormalised()):
232 m.d.sync += z.m.eq(z.N127)
233 # FIX SIGN BUG: -a + a = +0.
234 with m.If((z.e == z.N126) & (z.m[0:] == 0)):
235 m.d.sync += z.s.eq(0)
236
237 def pack(self, m, z, next_state):
238 """ packs the result into the output (detects overflow->Inf)
239 """
240 m.next = next_state
241 # if overflow occurs, return inf
242 with m.If(z.is_overflowed()):
243 m.d.sync += z.inf(0)
244 with m.Else():
245 m.d.sync += z.create(z.s, z.e, z.m)
246
247 def put_z(self, m, z, out_z, next_state):
248 """ put_z: stores the result in the output. raises stb and waits
249 for ack to be set to 1 before moving to the next state.
250 resets stb back to zero when that occurs, as acknowledgement.
251 """
252 m.d.sync += [
253 out_z.stb.eq(1),
254 out_z.v.eq(z.v)
255 ]
256 with m.If(out_z.stb & out_z.ack):
257 m.d.sync += out_z.stb.eq(0)
258 m.next = next_state
259
260