ac2f726e61b125a266be016ab74dbc31e0e3f33f
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
8 from nmigen
import Module
, Signal
, Cat
, Const
, Mux
, Elaboratable
9 from nmigen
.cli
import main
, verilog
11 from nmutil
.singlepipe
import ControlBase
12 from nmutil
.concurrentunit
import ReservationStations
, num_bits
14 from ieee754
.fpcommon
.fpbase
import Overflow
15 from ieee754
.fpcommon
.getop
import FPADDBaseData
16 from ieee754
.fpcommon
.pack
import FPPackData
17 from ieee754
.fpcommon
.normtopack
import FPNormToPack
18 from ieee754
.fpcommon
.postcalc
import FPAddStage1Data
19 from ieee754
.fpcommon
.msbhigh
import FPMSBHigh
20 from ieee754
.fpcommon
.exphigh
import FPEXPHigh
23 from nmigen
import Module
, Signal
, Elaboratable
26 from ieee754
.fpcommon
.fpbase
import FPNumIn
, FPNumOut
, FPNumBaseRecord
27 from ieee754
.fpcommon
.fpbase
import FPState
, FPNumBase
28 from ieee754
.fpcommon
.getop
import FPPipeContext
30 from ieee754
.fpcommon
.fpbase
import FPNumDecode
, FPNumBaseRecord
31 from nmutil
.singlepipe
import SimpleHandshake
, StageChain
33 from ieee754
.fpcommon
.fpbase
import FPState
34 from ieee754
.pipeline
import PipelineSpec
36 from ieee754
.fcvt
.float2int
import FPCVTFloatToIntMod
41 self
.signed
= Signal(reset_less
=True)
44 return [self
.signed
.eq(i
)]
47 class FPCVTIntToFloatMod(Elaboratable
):
48 """ FP integer conversion: copes with 16/32/64 int to 16/32/64 fp.
50 self.ctx.i.op & 0x1 == 0x1 : SIGNED int
51 self.ctx.i.op & 0x1 == 0x0 : UNSIGNED int
53 def __init__(self
, in_pspec
, out_pspec
):
54 self
.in_pspec
= in_pspec
55 self
.out_pspec
= out_pspec
60 return FPADDBaseData(self
.in_pspec
)
63 return FPAddStage1Data(self
.out_pspec
, e_extra
=True)
65 def setup(self
, m
, i
):
66 """ links module to inputs and outputs
68 m
.submodules
.intconvert
= self
74 def elaborate(self
, platform
):
78 #m.submodules.sc_out_z = self.o.z
80 # decode: XXX really should move to separate stage
81 print("in_width out", self
.in_pspec
.width
,
83 print("a1", self
.in_pspec
.width
)
85 print("z1", z1
.width
, z1
.rmw
, z1
.e_width
, z1
.e_start
, z1
.e_end
)
87 me
= self
.in_pspec
.width
90 print("ms-me", ms
, me
, mz
)
92 # 3 extra bits for guard/round/sticky
93 msb
= FPMSBHigh(me
+3, z1
.e_width
)
94 m
.submodules
.norm_msb
= msb
96 # signed or unsigned, use operator context
97 signed
= Signal(reset_less
=True)
98 comb
+= signed
.eq(self
.i
.ctx
.op
[0])
100 # copy of mantissa (one less bit if signed)
101 mantissa
= Signal(me
, reset_less
=True)
103 # detect signed/unsigned. key case: -ve numbers need inversion
104 # to +ve because the FP sign says if it's -ve or not.
106 comb
+= z1
.s
.eq(self
.i
.a
[-1]) # sign in top bit of a
108 comb
+= mantissa
.eq(-self
.i
.a
) # invert input if sign -ve
110 comb
+= mantissa
.eq(self
.i
.a
) # leave as-is
112 comb
+= mantissa
.eq(self
.i
.a
) # unsigned, use full a
115 # set input from full INT
116 comb
+= msb
.m_in
.eq(Cat(0, 0, 0, mantissa
)) # g/r/s + input
117 comb
+= msb
.e_in
.eq(me
) # exp = int width
119 # to do with FP16... not yet resolved why
123 comb
+= z1
.e
.eq(msb
.e_out
-1)
124 mmsb
= msb
.m_out
[-mz
-1:]
126 # larger int to smaller FP (uint32/64 -> fp16 most likely)
127 comb
+= z1
.m
[ms
-1:].eq(mmsb
)
128 else: # 32? XXX weirdness...
129 comb
+= z1
.m
.eq(mmsb
)
131 # smaller int to larger FP
132 comb
+= z1
.e
.eq(msb
.e_out
)
133 comb
+= z1
.m
[ms
:].eq(msb
.m_out
[3:])
134 comb
+= z1
.create(z1
.s
, z1
.e
, z1
.m
) # ... here
136 # note: post-normalisation actually appears to be capable of
137 # detecting overflow to infinity (FPPackMod). so it's ok to
138 # drop the bits into the mantissa (with a fixed exponent),
139 # do some rounding (which might result in exceeding the
140 # range of the target FP by re-increasing the exponent),
141 # and basically *not* have to do any kind of range-checking
142 # here: just set up guard/round/sticky, drop the INT into the
143 # mantissa, and away we go. XXX TODO: see if FPNormaliseMod
144 # is even necessary. it probably isn't
146 # initialise rounding (but only activate if needed)
148 # larger int to smaller FP (uint32/64 -> fp16 most likely)
149 comb
+= self
.o
.of
.guard
.eq(msb
.m_out
[-mz
-2])
150 comb
+= self
.o
.of
.round_bit
.eq(msb
.m_out
[-mz
-3])
151 comb
+= self
.o
.of
.sticky
.eq(msb
.m_out
[:-mz
-3].bool())
152 comb
+= self
.o
.of
.m0
.eq(msb
.m_out
[-mz
-1])
154 # smaller int to larger FP
155 comb
+= self
.o
.of
.guard
.eq(msb
.m_out
[2])
156 comb
+= self
.o
.of
.round_bit
.eq(msb
.m_out
[1])
157 comb
+= self
.o
.of
.sticky
.eq(msb
.m_out
[:1].bool())
158 comb
+= self
.o
.of
.m0
.eq(msb
.m_out
[3])
160 # special cases active by default
161 comb
+= self
.o
.out_do_z
.eq(1)
164 with m
.If(~self
.i
.a
.bool()):
165 comb
+= self
.o
.z
.zero(0)
167 comb
+= self
.o
.out_do_z
.eq(0) # activate normalisation
169 # copy the context (muxid, operator)
170 comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
171 comb
+= self
.o
.ctx
.eq(self
.i
.ctx
)
176 class FPCVTUpConvertMod(Elaboratable
):
177 """ FP up-conversion (lower to higher bitwidth)
179 def __init__(self
, in_pspec
, out_pspec
):
180 self
.in_pspec
= in_pspec
181 self
.out_pspec
= out_pspec
182 self
.i
= self
.ispec()
183 self
.o
= self
.ospec()
186 return FPADDBaseData(self
.in_pspec
)
189 return FPAddStage1Data(self
.out_pspec
, e_extra
=False)
191 def setup(self
, m
, i
):
192 """ links module to inputs and outputs
194 m
.submodules
.upconvert
= self
197 def process(self
, i
):
200 def elaborate(self
, platform
):
204 #m.submodules.sc_out_z = self.o.z
206 # decode: XXX really should move to separate stage
207 print("in_width out", self
.in_pspec
.width
,
208 self
.out_pspec
.width
)
209 a1
= FPNumBaseRecord(self
.in_pspec
.width
, False)
210 print("a1", a1
.width
, a1
.rmw
, a1
.e_width
, a1
.e_start
, a1
.e_end
)
211 m
.submodules
.sc_decode_a
= a1
= FPNumDecode(None, a1
)
212 comb
+= a1
.v
.eq(self
.i
.a
)
214 print("z1", z1
.width
, z1
.rmw
, z1
.e_width
, z1
.e_start
, z1
.e_end
)
217 ms
= self
.o
.z
.rmw
- a1
.rmw
218 print("ms-me", ms
, me
, self
.o
.z
.rmw
, a1
.rmw
)
220 # conversion can mostly be done manually...
221 comb
+= self
.o
.z
.s
.eq(a1
.s
)
222 comb
+= self
.o
.z
.e
.eq(a1
.e
)
223 comb
+= self
.o
.z
.m
[ms
:].eq(a1
.m
)
224 comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, self
.o
.z
.m
) # ... here
226 # initialise rounding to all zeros (deactivate)
227 comb
+= self
.o
.of
.guard
.eq(0)
228 comb
+= self
.o
.of
.round_bit
.eq(0)
229 comb
+= self
.o
.of
.sticky
.eq(0)
230 comb
+= self
.o
.of
.m0
.eq(a1
.m
[0])
232 # most special cases active (except tiny-number normalisation, below)
233 comb
+= self
.o
.out_do_z
.eq(1)
235 # detect NaN/Inf first
236 with m
.If(a1
.exp_128
):
237 with m
.If(~a1
.m_zero
):
238 comb
+= self
.o
.z
.nan(0) # RISC-V wants normalised NaN
240 comb
+= self
.o
.z
.inf(a1
.s
) # RISC-V wants signed INF
242 with m
.If(a1
.exp_n127
):
243 with m
.If(~a1
.m_zero
):
244 comb
+= self
.o
.z
.m
[ms
:].eq(Cat(0, a1
.m
))
245 comb
+= self
.o
.out_do_z
.eq(0) # activate normalisation
247 # RISC-V zero needs actual zero
248 comb
+= self
.o
.z
.zero(a1
.s
)
250 # copy the context (muxid, operator)
251 comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
252 comb
+= self
.o
.ctx
.eq(self
.i
.ctx
)
257 class FPCVTDownConvertMod(Elaboratable
):
258 """ FP down-conversion (higher to lower bitwidth)
260 def __init__(self
, in_pspec
, out_pspec
):
261 self
.in_pspec
= in_pspec
262 self
.out_pspec
= out_pspec
263 self
.i
= self
.ispec()
264 self
.o
= self
.ospec()
267 return FPADDBaseData(self
.in_pspec
)
270 return FPAddStage1Data(self
.out_pspec
, e_extra
=True)
272 def setup(self
, m
, i
):
273 """ links module to inputs and outputs
275 m
.submodules
.downconvert
= self
278 def process(self
, i
):
281 def elaborate(self
, platform
):
285 #m.submodules.sc_out_z = self.o.z
287 # decode: XXX really should move to separate stage
288 print("in_width out", self
.in_pspec
.width
,
289 self
.out_pspec
.width
)
290 a1
= FPNumBaseRecord(self
.in_pspec
.width
, False)
291 print("a1", a1
.width
, a1
.rmw
, a1
.e_width
, a1
.e_start
, a1
.e_end
)
292 m
.submodules
.sc_decode_a
= a1
= FPNumDecode(None, a1
)
293 comb
+= a1
.v
.eq(self
.i
.a
)
295 print("z1", z1
.width
, z1
.rmw
, z1
.e_width
, z1
.e_start
, z1
.e_end
)
298 ms
= a1
.rmw
- self
.o
.z
.rmw
299 print("ms-me", ms
, me
)
302 exp_sub_n126
= Signal((a1
.e_width
, True), reset_less
=True)
303 exp_gt127
= Signal(reset_less
=True)
304 # constants from z1, at the bit-width of a1.
305 N126
= Const(z1
.fp
.N126
.value
, (a1
.e_width
, True))
306 P127
= Const(z1
.fp
.P127
.value
, (a1
.e_width
, True))
307 comb
+= exp_sub_n126
.eq(a1
.e
- N126
)
308 comb
+= exp_gt127
.eq(a1
.e
> P127
)
310 # if a zero, return zero (signed)
311 with m
.If(a1
.exp_n127
):
312 comb
+= self
.o
.z
.zero(a1
.s
)
313 comb
+= self
.o
.out_do_z
.eq(1)
315 # if a range outside z's min range (-126)
316 with m
.Elif(exp_sub_n126
< 0):
317 comb
+= self
.o
.of
.guard
.eq(a1
.m
[ms
-1])
318 comb
+= self
.o
.of
.round_bit
.eq(a1
.m
[ms
-2])
319 comb
+= self
.o
.of
.sticky
.eq(a1
.m
[:ms
-2].bool())
320 comb
+= self
.o
.of
.m0
.eq(a1
.m
[ms
]) # bit of a1
322 comb
+= self
.o
.z
.s
.eq(a1
.s
)
323 comb
+= self
.o
.z
.e
.eq(a1
.e
)
324 comb
+= self
.o
.z
.m
.eq(a1
.m
[-self
.o
.z
.rmw
-1:])
325 comb
+= self
.o
.z
.m
[-1].eq(1)
327 # if a is inf return inf
328 with m
.Elif(a1
.is_inf
):
329 comb
+= self
.o
.z
.inf(a1
.s
)
330 comb
+= self
.o
.out_do_z
.eq(1)
332 # if a is NaN return NaN
333 with m
.Elif(a1
.is_nan
):
334 comb
+= self
.o
.z
.nan(0)
335 comb
+= self
.o
.out_do_z
.eq(1)
337 # if a mantissa greater than 127, return inf
338 with m
.Elif(exp_gt127
):
339 print("inf", self
.o
.z
.inf(a1
.s
))
340 comb
+= self
.o
.z
.inf(a1
.s
)
341 comb
+= self
.o
.out_do_z
.eq(1)
343 # ok after all that, anything else should fit fine (whew)
345 comb
+= self
.o
.of
.guard
.eq(a1
.m
[ms
-1])
346 comb
+= self
.o
.of
.round_bit
.eq(a1
.m
[ms
-2])
347 comb
+= self
.o
.of
.sticky
.eq(a1
.m
[:ms
-2].bool())
348 comb
+= self
.o
.of
.m0
.eq(a1
.m
[ms
]) # bit of a1
350 # XXX TODO: this is basically duplicating FPRoundMod. hmmm...
351 print("alen", a1
.e_start
, z1
.fp
.N126
, N126
)
352 print("m1", self
.o
.z
.rmw
, a1
.m
[-self
.o
.z
.rmw
-1:])
353 mo
= Signal(self
.o
.z
.m_width
-1)
354 comb
+= mo
.eq(a1
.m
[ms
:me
])
355 with m
.If(self
.o
.of
.roundz
):
356 with m
.If((~mo
== 0)): # all 1s
357 comb
+= self
.o
.z
.create(a1
.s
, a1
.e
+1, mo
+1)
359 comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, mo
+1)
361 comb
+= self
.o
.z
.create(a1
.s
, a1
.e
, a1
.m
[-self
.o
.z
.rmw
-1:])
362 comb
+= self
.o
.out_do_z
.eq(1)
364 # copy the context (muxid, operator)
365 comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
366 comb
+= self
.o
.ctx
.eq(self
.i
.ctx
)
371 class FPCVTConvertDeNorm(FPState
, SimpleHandshake
):
372 """ FPConversion and De-norm
375 def __init__(self
, modkls
, in_pspec
, out_pspec
):
376 FPState
.__init
__(self
, "cvt")
377 sc
= modkls(in_pspec
, out_pspec
)
378 SimpleHandshake
.__init
__(self
, sc
)
379 self
.out
= self
.ospec(None)
382 class FPCVTFtoIntBasePipe(ControlBase
):
383 def __init__(self
, modkls
, e_extra
, in_pspec
, out_pspec
):
384 ControlBase
.__init
__(self
)
385 self
.pipe1
= FPCVTConvertDeNorm(modkls
, in_pspec
, out_pspec
)
386 #self.pipe2 = FPNormToPack(out_pspec, e_extra=e_extra)
388 #self._eqs = self.connect([self.pipe1, self.pipe2])
389 self
._eqs
= self
.connect([self
.pipe1
, ])
391 def elaborate(self
, platform
):
392 m
= ControlBase
.elaborate(self
, platform
)
393 m
.submodules
.down
= self
.pipe1
394 #m.submodules.normpack = self.pipe2
395 m
.d
.comb
+= self
._eqs
399 class FPCVTBasePipe(ControlBase
):
400 def __init__(self
, modkls
, e_extra
, in_pspec
, out_pspec
):
401 ControlBase
.__init
__(self
)
402 self
.pipe1
= FPCVTConvertDeNorm(modkls
, in_pspec
, out_pspec
)
403 self
.pipe2
= FPNormToPack(out_pspec
, e_extra
=e_extra
)
405 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
])
407 def elaborate(self
, platform
):
408 m
= ControlBase
.elaborate(self
, platform
)
409 m
.submodules
.down
= self
.pipe1
410 m
.submodules
.normpack
= self
.pipe2
411 m
.d
.comb
+= self
._eqs
415 class FPCVTMuxInOutBase(ReservationStations
):
416 """ Reservation-Station version of FPCVT pipeline.
418 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
419 * 2-stage multiplier pipeline
420 * fan-out on outputs (an array of FPPackData: z,mid)
422 Fan-in and Fan-out are combinatorial.
425 def __init__(self
, modkls
, e_extra
, in_width
, out_width
,
426 num_rows
, op_wid
=0, pkls
=FPCVTBasePipe
):
428 self
.id_wid
= num_bits(in_width
)
429 self
.out_id_wid
= num_bits(out_width
)
431 self
.in_pspec
= PipelineSpec(in_width
, self
.id_wid
, self
.op_wid
)
432 self
.out_pspec
= PipelineSpec(out_width
, self
.out_id_wid
, op_wid
)
434 self
.alu
= pkls(modkls
, e_extra
, self
.in_pspec
, self
.out_pspec
)
435 ReservationStations
.__init
__(self
, num_rows
)
438 return FPADDBaseData(self
.in_pspec
)
441 return FPPackData(self
.out_pspec
)
444 def getkls(*args
, **kwargs
):
445 print ("getkls", args
, kwargs
)
446 return FPCVTMuxInOutBase(*args
, **kwargs
)
449 # factory which creates near-identical class structures that differ by
450 # the module and the e_extra argument. at some point it would be good
451 # to merge these into a single dynamic "thing" that takes an operator.
452 # however, the difference(s) in the bitwidths makes that a little less
454 muxfactoryinput
= [("FPCVTDownMuxInOut", FPCVTDownConvertMod
, True, ),
455 ("FPCVTUpMuxInOut", FPCVTUpConvertMod
, False, ),
456 ("FPCVTIntMuxInOut", FPCVTIntToFloatMod
, True, ),
459 for (name
, kls
, e_extra
) in muxfactoryinput
:
460 fn
= functools
.partial(getkls
, kls
, e_extra
)
461 setattr(sys
.modules
[__name__
], name
, fn
)
464 class FPCVTF2IntMuxInOut(FPCVTMuxInOutBase
):
465 """ Reservation-Station version of FPCVT pipeline.
467 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
468 * 2-stage multiplier pipeline
469 * fan-out on outputs (an array of FPPackData: z,mid)
471 Fan-in and Fan-out are combinatorial.
474 def __init__(self
, in_width
, out_width
, num_rows
, op_wid
=0):
475 FPCVTMuxInOutBase
.__init
__(self
, FPCVTFloatToIntMod
, False,
478 pkls
=FPCVTFtoIntBasePipe
)