1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Const
6 from nmigen
.cli
import main
, verilog
10 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
12 Contains signals for an incoming copy of the value, decoded into
13 sign / exponent / mantissa.
14 Also contains encoding functions, creation and recognition of
15 zero, NaN and inf (all signed)
17 Four extra bits are included in the mantissa: the top bit
18 (m[-1]) is effectively a carry-overflow. The other three are
19 guard (m[2]), round (m[1]), and sticky (m[0])
21 def __init__(self
, width
, m_width
=None):
24 m_width
= width
- 5 # mantissa extra bits (top,guard,round)
25 self
.v
= Signal(width
) # Latched copy of value
26 self
.m
= Signal(m_width
) # Mantissa
27 self
.e
= Signal((10, True)) # Exponent: 10 bits, signed
28 self
.s
= Signal() # Sign bit
30 self
.mzero
= Const(0, (m_width
, False))
31 self
.m1s
= Const(-1, (m_width
, False))
32 self
.P128
= Const(128, (10, True))
33 self
.P127
= Const(127, (10, True))
34 self
.N127
= Const(-127, (10, True))
35 self
.N126
= Const(-126, (10, True))
38 """ decodes a latched value into sign / exponent / mantissa
40 bias is subtracted here, from the exponent. exponent
41 is extended to 10 bits so that subtract 127 is done on
44 return [self
.m
.eq(Cat(0, 0, 0, v
[0:23])), # mantissa
45 self
.e
.eq(v
[23:31] - self
.P127
), # exp (minus bias)
46 self
.s
.eq(v
[31]), # sign
49 def create(self
, s
, e
, m
):
50 """ creates a value from sign / exponent / mantissa
52 bias is added here, to the exponent
55 self
.v
[31].eq(s
), # sign
56 self
.v
[23:31].eq(e
+ self
.P127
), # exp (add on bias)
57 self
.v
[0:23].eq(m
) # mantissa
61 """ shifts a mantissa down by one. exponent is increased to compensate
63 accuracy is lost as a result in the mantissa however there are 3
64 guard bits (the latter of which is the "sticky" bit)
66 return [self
.e
.eq(self
.e
+ 1),
67 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
71 return self
.create(s
, self
.P128
, 1<<22)
74 return self
.create(s
, self
.P128
, 0)
77 return self
.create(s
, self
.N127
, 0)
80 return (self
.e
== self
.P128
) & (self
.m
!= 0)
83 return (self
.e
== self
.P128
) & (self
.m
== 0)
86 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
88 def is_overflowed(self
):
89 return (self
.e
> self
.P127
)
91 def is_denormalised(self
):
92 return (self
.e
== self
.N126
) & (self
.m
[23] == 0)
95 def __init__(self
, width
):
98 self
.v
= Signal(width
)
103 return [self
.v
, self
.stb
, self
.ack
]
108 self
.guard
= Signal() # tot[2]
109 self
.round_bit
= Signal() # tot[1]
110 self
.sticky
= Signal() # tot[0]
114 def __init__(self
, width
):
117 self
.in_a
= FPOp(width
)
118 self
.in_b
= FPOp(width
)
119 self
.out_z
= FPOp(width
)
121 def get_op(self
, m
, op
, v
, next_state
):
122 """ this function moves to the next state and copies the operand
123 when both stb and ack are 1.
124 acknowledgement is sent by setting ack to ZERO.
127 with m
.If((op
.ack
) & (op
.stb
)):
134 m
.d
.sync
+= op
.ack
.eq(1)
136 def normalise_1(self
, m
, z
, of
, next_state
):
137 """ first stage normalisation
139 NOTE: just like "align", this one keeps going round every clock
140 until the result's exponent is within acceptable "range"
141 NOTE: the weirdness of reassigning guard and round is due to
142 the extra mantissa bits coming from tot[0..2]
144 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
146 z
.e
.eq(z
.e
- 1), # DECREASE exponent
147 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
148 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
149 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
150 of
.round_bit
.eq(0), # reset round bit
155 def normalise_2(self
, m
, z
, of
, next_state
):
156 """ second stage normalisation
158 NOTE: just like "align", this one keeps going round every clock
159 until the result's exponent is within acceptable "range"
160 NOTE: the weirdness of reassigning guard and round is due to
161 the extra mantissa bits coming from tot[0..2]
163 with m
.If(z
.e
< z
.N126
):
165 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
166 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
168 of
.round_bit
.eq(of
.guard
),
169 of
.sticky
.eq(of
.sticky | of
.round_bit
)
174 def roundz(self
, m
, z
, of
, next_state
):
176 with m
.If(of
.guard
& (of
.round_bit | of
.sticky | z
.m
[0])):
177 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
178 with m
.If(z
.m
== z
.m1s
): # all 1s
179 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
181 def corrections(self
, m
, z
, next_state
):
183 # denormalised, correct exponent to zero
184 with m
.If(z
.is_denormalised()):
185 m
.d
.sync
+= z
.m
.eq(-127)
186 # FIX SIGN BUG: -a + a = +0.
187 with m
.If((z
.e
== z
.N126
) & (z
.m
[0:] == 0)):
188 m
.d
.sync
+= z
.s
.eq(0)
190 def pack(self
, m
, z
, next_state
):
192 # if overflow occurs, return inf
193 with m
.If(z
.is_overflowed()):
196 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
198 def put_z(self
, m
, z
, out_z
, next_state
):
199 """ put_z: stores the result in the output. raises stb and waits
200 for ack to be set to 1 before moving to the next state.
201 resets stb back to zero when that occurs, as acknowledgement.
207 with m
.If(out_z
.stb
& out_z
.ack
):
208 m
.d
.sync
+= out_z
.stb
.eq(0)
211 def get_fragment(self
, platform
=None):
215 a
= FPNum(self
.width
)
216 b
= FPNum(self
.width
)
217 z
= FPNum(self
.width
, 24)
219 tot
= Signal(28) # sticky/round/guard bits, 23 result, 1 overflow
228 with m
.State("get_a"):
229 self
.get_op(m
, self
.in_a
, a
, "get_b")
234 with m
.State("get_b"):
235 self
.get_op(m
, self
.in_b
, b
, "special_cases")
238 # special cases: NaNs, infs, zeros, denormalised
240 with m
.State("special_cases"):
242 # if a is NaN or b is NaN return NaN
243 with m
.If(a
.is_nan() | b
.is_nan()):
247 # if a is inf return inf (or NaN)
248 with m
.Elif(a
.is_inf()):
250 m
.d
.sync
+= z
.inf(a
.s
)
251 # if a is inf and signs don't match return NaN
252 with m
.If((b
.e
== b
.P128
) & (a
.s
!= b
.s
)):
253 m
.d
.sync
+= z
.nan(b
.s
)
255 # if b is inf return inf
256 with m
.Elif(b
.is_inf()):
258 m
.d
.sync
+= z
.inf(b
.s
)
260 # if a is zero and b zero return signed-a/b
261 with m
.Elif(a
.is_zero() & b
.is_zero()):
263 m
.d
.sync
+= z
.create(a
.s
& b
.s
, b
.e
[0:8], b
.m
[3:-1])
265 # if a is zero return b
266 with m
.Elif(a
.is_zero()):
268 m
.d
.sync
+= z
.create(b
.s
, b
.e
[0:8], b
.m
[3:-1])
270 # if b is zero return a
271 with m
.Elif(b
.is_zero()):
273 m
.d
.sync
+= z
.create(a
.s
, a
.e
[0:8], a
.m
[3:-1])
275 # Denormalised Number checks
278 # denormalise a check
279 with m
.If(a
.e
== a
.N127
):
280 m
.d
.sync
+= a
.e
.eq(-126) # limit a exponent
282 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
283 # denormalise b check
284 with m
.If(b
.e
== a
.N127
):
285 m
.d
.sync
+= b
.e
.eq(-126) # limit b exponent
287 m
.d
.sync
+= b
.m
[-1].eq(1) # set top mantissa bit
290 # align. NOTE: this does *not* do single-cycle multi-shifting,
291 # it *STAYS* in the align state until the exponents match
293 with m
.State("align"):
294 # exponent of a greater than b: increment b exp, shift b mant
295 with m
.If(a
.e
> b
.e
):
296 m
.d
.sync
+= b
.shift_down()
297 # exponent of b greater than a: increment a exp, shift a mant
298 with m
.Elif(a
.e
< b
.e
):
299 m
.d
.sync
+= a
.shift_down()
300 # exponents equal: move to next stage.
305 # First stage of add. covers same-sign (add) and subtract
306 # special-casing when mantissas are greater or equal, to
307 # give greatest accuracy.
309 with m
.State("add_0"):
311 m
.d
.sync
+= z
.e
.eq(a
.e
)
312 # same-sign (both negative or both positive) add mantissas
313 with m
.If(a
.s
== b
.s
):
318 # a mantissa greater than b, use a
319 with m
.Elif(a
.m
>= b
.m
):
324 # b mantissa greater than a, use b
332 # Second stage of add: preparation for normalisation.
333 # detects when tot sum is too big (tot[27] is kinda a carry bit)
335 with m
.State("add_1"):
336 m
.next
= "normalise_1"
337 # tot[27] gets set when the sum overflows. shift result down
342 of
.round_bit
.eq(tot
[2]),
343 of
.sticky
.eq(tot
[1] | tot
[0]),
351 of
.round_bit
.eq(tot
[1]),
356 # First stage of normalisation.
358 with m
.State("normalise_1"):
359 self
.normalise_1(m
, z
, of
, "normalise_2")
362 # Second stage of normalisation.
364 with m
.State("normalise_2"):
365 self
.normalise_2(m
, z
, of
, "round")
370 with m
.State("round"):
371 self
.roundz(m
, z
, of
, "corrections")
376 with m
.State("corrections"):
377 self
.corrections(m
, z
, "pack")
382 with m
.State("pack"):
383 self
.pack(m
, z
, "put_z")
388 with m
.State("put_z"):
389 self
.put_z(m
, z
, self
.out_z
, "get_a")
394 if __name__
== "__main__":
395 alu
= FPADD(width
=32)
396 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
399 # works... but don't use, just do "python fname.py convert -t v"
400 #print (verilog.convert(alu, ports=[
401 # ports=alu.in_a.ports() + \
402 # alu.in_b.ports() + \