1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Const
6 from nmigen
.cli
import main
, verilog
10 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
12 Contains signals for an incoming copy of the value, decoded into
13 sign / exponent / mantissa.
14 Also contains encoding functions, creation and recognition of
15 zero, NaN and inf (all signed)
17 Four extra bits are included in the mantissa: the top bit
18 (m[-1]) is effectively a carry-overflow. The other three are
19 guard (m[2]), round (m[1]), and sticky (m[0])
21 def __init__(self
, width
, m_width
=None):
24 m_width
= width
- 5 # mantissa extra bits (top,guard,round)
25 self
.m_width
= m_width
26 self
.v
= Signal(width
) # Latched copy of value
27 self
.m
= Signal(m_width
) # Mantissa
28 self
.e
= Signal((10, True)) # Exponent: 10 bits, signed
29 self
.s
= Signal() # Sign bit
31 self
.mzero
= Const(0, (m_width
, False))
32 self
.m1s
= Const(-1, (m_width
, False))
33 self
.P128
= Const(128, (10, True))
34 self
.P127
= Const(127, (10, True))
35 self
.N127
= Const(-127, (10, True))
36 self
.N126
= Const(-126, (10, True))
39 """ decodes a latched value into sign / exponent / mantissa
41 bias is subtracted here, from the exponent. exponent
42 is extended to 10 bits so that subtract 127 is done on
45 args
= [0] * (self
.m_width
-24) + [v
[0:23]] # pad with extra zeros
46 return [self
.m
.eq(Cat(*args
)), # mantissa
47 self
.e
.eq(v
[23:31] - self
.P127
), # exp (minus bias)
48 self
.s
.eq(v
[31]), # sign
51 def create(self
, s
, e
, m
):
52 """ creates a value from sign / exponent / mantissa
54 bias is added here, to the exponent
57 self
.v
[31].eq(s
), # sign
58 self
.v
[23:31].eq(e
+ self
.P127
), # exp (add on bias)
59 self
.v
[0:23].eq(m
) # mantissa
63 """ shifts a mantissa down by one. exponent is increased to compensate
65 accuracy is lost as a result in the mantissa however there are 3
66 guard bits (the latter of which is the "sticky" bit)
68 return [self
.e
.eq(self
.e
+ 1),
69 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
73 return self
.create(s
, self
.P128
, 1<<22)
76 return self
.create(s
, self
.P128
, 0)
79 return self
.create(s
, self
.N127
, 0)
82 return (self
.e
== self
.P128
) & (self
.m
!= 0)
85 return (self
.e
== self
.P128
) & (self
.m
== 0)
88 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
90 def is_overflowed(self
):
91 return (self
.e
> self
.P127
)
93 def is_denormalised(self
):
94 return (self
.e
== self
.N126
) & (self
.m
[23] == 0)
98 def __init__(self
, width
):
101 self
.v
= Signal(width
)
106 return [self
.v
, self
.stb
, self
.ack
]
111 self
.guard
= Signal() # tot[2]
112 self
.round_bit
= Signal() # tot[1]
113 self
.sticky
= Signal() # tot[0]
117 """ IEEE754 Floating Point Base Class
119 contains common functions for FP manipulation, such as
120 extracting and packing operands, normalisation, denormalisation,
124 def get_op(self
, m
, op
, v
, next_state
):
125 """ this function moves to the next state and copies the operand
126 when both stb and ack are 1.
127 acknowledgement is sent by setting ack to ZERO.
129 with m
.If((op
.ack
) & (op
.stb
)):
136 m
.d
.sync
+= op
.ack
.eq(1)
138 def denormalise(self
, m
, a
):
139 """ denormalises a number
141 with m
.If(a
.e
== a
.N127
):
142 m
.d
.sync
+= a
.e
.eq(-126) # limit a exponent
144 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
146 def op_normalise(self
, m
, op
, of
, next_state
):
147 """ operand normalisation
148 NOTE: just like "align", this one keeps going round every clock
149 until the result's exponent is within acceptable "range"
151 with m
.If((op
.m
[-1] == 0)): # check last bit of mantissa
153 op
.e
.eq(op
.e
- 1), # DECREASE exponent
154 op
.m
.eq(op
.m
<< 1), # shift mantissa UP
159 def normalise_1(self
, m
, z
, of
, next_state
):
160 """ first stage normalisation
162 NOTE: just like "align", this one keeps going round every clock
163 until the result's exponent is within acceptable "range"
164 NOTE: the weirdness of reassigning guard and round is due to
165 the extra mantissa bits coming from tot[0..2]
167 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
169 z
.e
.eq(z
.e
- 1), # DECREASE exponent
170 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
171 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
172 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
173 of
.round_bit
.eq(0), # reset round bit
178 def normalise_2(self
, m
, z
, of
, next_state
):
179 """ second stage normalisation
181 NOTE: just like "align", this one keeps going round every clock
182 until the result's exponent is within acceptable "range"
183 NOTE: the weirdness of reassigning guard and round is due to
184 the extra mantissa bits coming from tot[0..2]
186 with m
.If(z
.e
< z
.N126
):
188 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
189 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
191 of
.round_bit
.eq(of
.guard
),
192 of
.sticky
.eq(of
.sticky | of
.round_bit
)
197 def roundz(self
, m
, z
, of
, next_state
):
198 """ performs rounding on the output. TODO: different kinds of rounding
201 with m
.If(of
.guard
& (of
.round_bit | of
.sticky | z
.m
[0])):
202 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
203 with m
.If(z
.m
== z
.m1s
): # all 1s
204 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
206 def corrections(self
, m
, z
, next_state
):
207 """ denormalisation and sign-bug corrections
210 # denormalised, correct exponent to zero
211 with m
.If(z
.is_denormalised()):
212 m
.d
.sync
+= z
.m
.eq(-127)
213 # FIX SIGN BUG: -a + a = +0.
214 with m
.If((z
.e
== z
.N126
) & (z
.m
[0:] == 0)):
215 m
.d
.sync
+= z
.s
.eq(0)
217 def pack(self
, m
, z
, next_state
):
218 """ packs the result into the output (detects overflow->Inf)
221 # if overflow occurs, return inf
222 with m
.If(z
.is_overflowed()):
225 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
227 def put_z(self
, m
, z
, out_z
, next_state
):
228 """ put_z: stores the result in the output. raises stb and waits
229 for ack to be set to 1 before moving to the next state.
230 resets stb back to zero when that occurs, as acknowledgement.
236 with m
.If(out_z
.stb
& out_z
.ack
):
237 m
.d
.sync
+= out_z
.stb
.eq(0)
243 def __init__(self
, width
):
244 FPBase
.__init
__(self
)
247 self
.in_a
= FPOp(width
)
248 self
.in_b
= FPOp(width
)
249 self
.out_z
= FPOp(width
)
251 def get_fragment(self
, platform
=None):
252 """ creates the HDL code-fragment for FPAdd
257 a
= FPNum(self
.width
)
258 b
= FPNum(self
.width
)
259 z
= FPNum(self
.width
, 24)
261 tot
= Signal(28) # sticky/round/guard bits, 23 result, 1 overflow
270 with m
.State("get_a"):
271 self
.get_op(m
, self
.in_a
, a
, "get_b")
276 with m
.State("get_b"):
277 self
.get_op(m
, self
.in_b
, b
, "special_cases")
280 # special cases: NaNs, infs, zeros, denormalised
281 # NOTE: some of these are unique to add. see "Special Operations"
282 # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 with m
.State("special_cases"):
286 # if a is NaN or b is NaN return NaN
287 with m
.If(a
.is_nan() | b
.is_nan()):
291 # if a is inf return inf (or NaN)
292 with m
.Elif(a
.is_inf()):
294 m
.d
.sync
+= z
.inf(a
.s
)
295 # if a is inf and signs don't match return NaN
296 with m
.If((b
.e
== b
.P128
) & (a
.s
!= b
.s
)):
297 m
.d
.sync
+= z
.nan(b
.s
)
299 # if b is inf return inf
300 with m
.Elif(b
.is_inf()):
302 m
.d
.sync
+= z
.inf(b
.s
)
304 # if a is zero and b zero return signed-a/b
305 with m
.Elif(a
.is_zero() & b
.is_zero()):
307 m
.d
.sync
+= z
.create(a
.s
& b
.s
, b
.e
[0:8], b
.m
[3:-1])
309 # if a is zero return b
310 with m
.Elif(a
.is_zero()):
312 m
.d
.sync
+= z
.create(b
.s
, b
.e
[0:8], b
.m
[3:-1])
314 # if b is zero return a
315 with m
.Elif(b
.is_zero()):
317 m
.d
.sync
+= z
.create(a
.s
, a
.e
[0:8], a
.m
[3:-1])
319 # Denormalised Number checks
322 self
.denormalise(m
, a
)
323 self
.denormalise(m
, b
)
326 # align. NOTE: this does *not* do single-cycle multi-shifting,
327 # it *STAYS* in the align state until the exponents match
329 with m
.State("align"):
330 # exponent of a greater than b: increment b exp, shift b mant
331 with m
.If(a
.e
> b
.e
):
332 m
.d
.sync
+= b
.shift_down()
333 # exponent of b greater than a: increment a exp, shift a mant
334 with m
.Elif(a
.e
< b
.e
):
335 m
.d
.sync
+= a
.shift_down()
336 # exponents equal: move to next stage.
341 # First stage of add. covers same-sign (add) and subtract
342 # special-casing when mantissas are greater or equal, to
343 # give greatest accuracy.
345 with m
.State("add_0"):
347 m
.d
.sync
+= z
.e
.eq(a
.e
)
348 # same-sign (both negative or both positive) add mantissas
349 with m
.If(a
.s
== b
.s
):
354 # a mantissa greater than b, use a
355 with m
.Elif(a
.m
>= b
.m
):
360 # b mantissa greater than a, use b
368 # Second stage of add: preparation for normalisation.
369 # detects when tot sum is too big (tot[27] is kinda a carry bit)
371 with m
.State("add_1"):
372 m
.next
= "normalise_1"
373 # tot[27] gets set when the sum overflows. shift result down
378 of
.round_bit
.eq(tot
[2]),
379 of
.sticky
.eq(tot
[1] | tot
[0]),
387 of
.round_bit
.eq(tot
[1]),
392 # First stage of normalisation.
394 with m
.State("normalise_1"):
395 self
.normalise_1(m
, z
, of
, "normalise_2")
398 # Second stage of normalisation.
400 with m
.State("normalise_2"):
401 self
.normalise_2(m
, z
, of
, "round")
406 with m
.State("round"):
407 self
.roundz(m
, z
, of
, "corrections")
412 with m
.State("corrections"):
413 self
.corrections(m
, z
, "pack")
418 with m
.State("pack"):
419 self
.pack(m
, z
, "put_z")
424 with m
.State("put_z"):
425 self
.put_z(m
, z
, self
.out_z
, "get_a")
430 if __name__
== "__main__":
431 alu
= FPADD(width
=32)
432 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
435 # works... but don't use, just do "python fname.py convert -t v"
436 #print (verilog.convert(alu, ports=[
437 # ports=alu.in_a.ports() + \
438 # alu.in_b.ports() + \