1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Const
6 from nmigen
.cli
import main
, verilog
10 """ Floating-point Number Class, variable-width TODO (currently 32-bit)
12 Contains signals for an incoming copy of the value, decoded into
13 sign / exponent / mantissa.
14 Also contains encoding functions, creation and recognition of
15 zero, NaN and inf (all signed)
17 Four extra bits are included in the mantissa: the top bit
18 (m[-1]) is effectively a carry-overflow. The other three are
19 guard (m[2]), round (m[1]), and sticky (m[0])
21 def __init__(self
, width
, m_width
=None):
24 m_width
= width
- 5 # mantissa extra bits (top,guard,round)
25 self
.m_width
= m_width
26 self
.v
= Signal(width
) # Latched copy of value
27 self
.m
= Signal(m_width
) # Mantissa
28 self
.e
= Signal((10, True)) # Exponent: 10 bits, signed
29 self
.s
= Signal() # Sign bit
31 self
.mzero
= Const(0, (m_width
, False))
32 self
.m1s
= Const(-1, (m_width
, False))
33 self
.P128
= Const(128, (10, True))
34 self
.P127
= Const(127, (10, True))
35 self
.N127
= Const(-127, (10, True))
36 self
.N126
= Const(-126, (10, True))
39 """ decodes a latched value into sign / exponent / mantissa
41 bias is subtracted here, from the exponent. exponent
42 is extended to 10 bits so that subtract 127 is done on
45 args
= [0] * (self
.m_width
-24) + [v
[0:23]] # pad with extra zeros
46 return [self
.m
.eq(Cat(*args
)), # mantissa
47 self
.e
.eq(v
[23:31] - self
.P127
), # exp (minus bias)
48 self
.s
.eq(v
[31]), # sign
51 def create(self
, s
, e
, m
):
52 """ creates a value from sign / exponent / mantissa
54 bias is added here, to the exponent
57 self
.v
[31].eq(s
), # sign
58 self
.v
[23:31].eq(e
+ self
.P127
), # exp (add on bias)
59 self
.v
[0:23].eq(m
) # mantissa
63 """ shifts a mantissa down by one. exponent is increased to compensate
65 accuracy is lost as a result in the mantissa however there are 3
66 guard bits (the latter of which is the "sticky" bit)
68 return [self
.e
.eq(self
.e
+ 1),
69 self
.m
.eq(Cat(self
.m
[0] | self
.m
[1], self
.m
[2:], 0))
73 return self
.create(s
, self
.P128
, 1<<22)
76 return self
.create(s
, self
.P128
, 0)
79 return self
.create(s
, self
.N127
, 0)
82 return (self
.e
== self
.P128
) & (self
.m
!= 0)
85 return (self
.e
== self
.P128
) & (self
.m
== 0)
88 return (self
.e
== self
.N127
) & (self
.m
== self
.mzero
)
90 def is_overflowed(self
):
91 return (self
.e
> self
.P127
)
93 def is_denormalised(self
):
94 return (self
.e
== self
.N126
) & (self
.m
[23] == 0)
98 def __init__(self
, width
):
101 self
.v
= Signal(width
)
106 return [self
.v
, self
.stb
, self
.ack
]
111 self
.guard
= Signal() # tot[2]
112 self
.round_bit
= Signal() # tot[1]
113 self
.sticky
= Signal() # tot[0]
117 """ IEEE754 Floating Point Base Class
119 contains common functions for FP manipulation, such as
120 extracting and packing operands, normalisation, denormalisation,
124 def get_op(self
, m
, op
, v
, next_state
):
125 """ this function moves to the next state and copies the operand
126 when both stb and ack are 1.
127 acknowledgement is sent by setting ack to ZERO.
129 with m
.If((op
.ack
) & (op
.stb
)):
136 m
.d
.sync
+= op
.ack
.eq(1)
138 def denormalise(self
, m
, a
):
139 """ denormalises a number
141 with m
.If(a
.e
== a
.N127
):
142 m
.d
.sync
+= a
.e
.eq(-126) # limit a exponent
144 m
.d
.sync
+= a
.m
[-1].eq(1) # set top mantissa bit
146 def normalise_1(self
, m
, z
, of
, next_state
):
147 """ first stage normalisation
149 NOTE: just like "align", this one keeps going round every clock
150 until the result's exponent is within acceptable "range"
151 NOTE: the weirdness of reassigning guard and round is due to
152 the extra mantissa bits coming from tot[0..2]
154 with m
.If((z
.m
[-1] == 0) & (z
.e
> z
.N126
)):
156 z
.e
.eq(z
.e
- 1), # DECREASE exponent
157 z
.m
.eq(z
.m
<< 1), # shift mantissa UP
158 z
.m
[0].eq(of
.guard
), # steal guard bit (was tot[2])
159 of
.guard
.eq(of
.round_bit
), # steal round_bit (was tot[1])
160 of
.round_bit
.eq(0), # reset round bit
165 def normalise_2(self
, m
, z
, of
, next_state
):
166 """ second stage normalisation
168 NOTE: just like "align", this one keeps going round every clock
169 until the result's exponent is within acceptable "range"
170 NOTE: the weirdness of reassigning guard and round is due to
171 the extra mantissa bits coming from tot[0..2]
173 with m
.If(z
.e
< z
.N126
):
175 z
.e
.eq(z
.e
+ 1), # INCREASE exponent
176 z
.m
.eq(z
.m
>> 1), # shift mantissa DOWN
178 of
.round_bit
.eq(of
.guard
),
179 of
.sticky
.eq(of
.sticky | of
.round_bit
)
184 def roundz(self
, m
, z
, of
, next_state
):
185 """ performs rounding on the output. TODO: different kinds of rounding
188 with m
.If(of
.guard
& (of
.round_bit | of
.sticky | z
.m
[0])):
189 m
.d
.sync
+= z
.m
.eq(z
.m
+ 1) # mantissa rounds up
190 with m
.If(z
.m
== z
.m1s
): # all 1s
191 m
.d
.sync
+= z
.e
.eq(z
.e
+ 1) # exponent rounds up
193 def corrections(self
, m
, z
, next_state
):
194 """ denormalisation and sign-bug corrections
197 # denormalised, correct exponent to zero
198 with m
.If(z
.is_denormalised()):
199 m
.d
.sync
+= z
.m
.eq(-127)
200 # FIX SIGN BUG: -a + a = +0.
201 with m
.If((z
.e
== z
.N126
) & (z
.m
[0:] == 0)):
202 m
.d
.sync
+= z
.s
.eq(0)
204 def pack(self
, m
, z
, next_state
):
205 """ packs the result into the output (detects overflow->Inf)
208 # if overflow occurs, return inf
209 with m
.If(z
.is_overflowed()):
212 m
.d
.sync
+= z
.create(z
.s
, z
.e
, z
.m
)
214 def put_z(self
, m
, z
, out_z
, next_state
):
215 """ put_z: stores the result in the output. raises stb and waits
216 for ack to be set to 1 before moving to the next state.
217 resets stb back to zero when that occurs, as acknowledgement.
223 with m
.If(out_z
.stb
& out_z
.ack
):
224 m
.d
.sync
+= out_z
.stb
.eq(0)
230 def __init__(self
, width
):
231 FPBase
.__init
__(self
)
234 self
.in_a
= FPOp(width
)
235 self
.in_b
= FPOp(width
)
236 self
.out_z
= FPOp(width
)
238 def get_fragment(self
, platform
=None):
239 """ creates the HDL code-fragment for FPAdd
244 a
= FPNum(self
.width
)
245 b
= FPNum(self
.width
)
246 z
= FPNum(self
.width
, 24)
248 tot
= Signal(28) # sticky/round/guard bits, 23 result, 1 overflow
257 with m
.State("get_a"):
258 self
.get_op(m
, self
.in_a
, a
, "get_b")
263 with m
.State("get_b"):
264 self
.get_op(m
, self
.in_b
, b
, "special_cases")
267 # special cases: NaNs, infs, zeros, denormalised
268 # NOTE: some of these are unique to add. see "Special Operations"
269 # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
271 with m
.State("special_cases"):
273 # if a is NaN or b is NaN return NaN
274 with m
.If(a
.is_nan() | b
.is_nan()):
278 # if a is inf return inf (or NaN)
279 with m
.Elif(a
.is_inf()):
281 m
.d
.sync
+= z
.inf(a
.s
)
282 # if a is inf and signs don't match return NaN
283 with m
.If((b
.e
== b
.P128
) & (a
.s
!= b
.s
)):
284 m
.d
.sync
+= z
.nan(b
.s
)
286 # if b is inf return inf
287 with m
.Elif(b
.is_inf()):
289 m
.d
.sync
+= z
.inf(b
.s
)
291 # if a is zero and b zero return signed-a/b
292 with m
.Elif(a
.is_zero() & b
.is_zero()):
294 m
.d
.sync
+= z
.create(a
.s
& b
.s
, b
.e
[0:8], b
.m
[3:-1])
296 # if a is zero return b
297 with m
.Elif(a
.is_zero()):
299 m
.d
.sync
+= z
.create(b
.s
, b
.e
[0:8], b
.m
[3:-1])
301 # if b is zero return a
302 with m
.Elif(b
.is_zero()):
304 m
.d
.sync
+= z
.create(a
.s
, a
.e
[0:8], a
.m
[3:-1])
306 # Denormalised Number checks
309 self
.denormalise(m
, a
)
310 self
.denormalise(m
, b
)
313 # align. NOTE: this does *not* do single-cycle multi-shifting,
314 # it *STAYS* in the align state until the exponents match
316 with m
.State("align"):
317 # exponent of a greater than b: increment b exp, shift b mant
318 with m
.If(a
.e
> b
.e
):
319 m
.d
.sync
+= b
.shift_down()
320 # exponent of b greater than a: increment a exp, shift a mant
321 with m
.Elif(a
.e
< b
.e
):
322 m
.d
.sync
+= a
.shift_down()
323 # exponents equal: move to next stage.
328 # First stage of add. covers same-sign (add) and subtract
329 # special-casing when mantissas are greater or equal, to
330 # give greatest accuracy.
332 with m
.State("add_0"):
334 m
.d
.sync
+= z
.e
.eq(a
.e
)
335 # same-sign (both negative or both positive) add mantissas
336 with m
.If(a
.s
== b
.s
):
341 # a mantissa greater than b, use a
342 with m
.Elif(a
.m
>= b
.m
):
347 # b mantissa greater than a, use b
355 # Second stage of add: preparation for normalisation.
356 # detects when tot sum is too big (tot[27] is kinda a carry bit)
358 with m
.State("add_1"):
359 m
.next
= "normalise_1"
360 # tot[27] gets set when the sum overflows. shift result down
365 of
.round_bit
.eq(tot
[2]),
366 of
.sticky
.eq(tot
[1] | tot
[0]),
374 of
.round_bit
.eq(tot
[1]),
379 # First stage of normalisation.
381 with m
.State("normalise_1"):
382 self
.normalise_1(m
, z
, of
, "normalise_2")
385 # Second stage of normalisation.
387 with m
.State("normalise_2"):
388 self
.normalise_2(m
, z
, of
, "round")
393 with m
.State("round"):
394 self
.roundz(m
, z
, of
, "corrections")
399 with m
.State("corrections"):
400 self
.corrections(m
, z
, "pack")
405 with m
.State("pack"):
406 self
.pack(m
, z
, "put_z")
411 with m
.State("put_z"):
412 self
.put_z(m
, z
, self
.out_z
, "get_a")
417 if __name__
== "__main__":
418 alu
= FPADD(width
=32)
419 main(alu
, ports
=alu
.in_a
.ports() + alu
.in_b
.ports() + alu
.out_z
.ports())
422 # works... but don't use, just do "python fname.py convert -t v"
423 #print (verilog.convert(alu, ports=[
424 # ports=alu.in_a.ports() + \
425 # alu.in_b.ports() + \