1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
, Trigger
11 #from fpbase import FPNumShiftMultiRight
14 class FPState(FPBase
):
15 def __init__(self
, state_from
):
16 self
.state_from
= state_from
18 def set_inputs(self
, inputs
):
20 for k
,v
in inputs
.items():
23 def set_outputs(self
, outputs
):
24 self
.outputs
= outputs
25 for k
,v
in outputs
.items():
30 def __init__(self
, width
):
31 self
.in_op
= FPOp(width
)
32 self
.out_op
= Signal(width
)
33 self
.out_decode
= Signal(reset_less
=True)
35 def elaborate(self
, platform
):
37 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
38 m
.submodules
.get_op_in
= self
.in_op
39 #m.submodules.get_op_out = self.out_op
40 with m
.If(self
.out_decode
):
42 self
.out_op
.eq(self
.in_op
.v
),
47 class FPGetOp(FPState
):
51 def __init__(self
, in_state
, out_state
, in_op
, width
):
52 FPState
.__init
__(self
, in_state
)
53 self
.out_state
= out_state
54 self
.mod
= FPGetOpMod(width
)
56 self
.out_op
= Signal(width
)
57 self
.out_decode
= Signal(reset_less
=True)
59 def setup(self
, m
, in_op
):
60 """ links module to inputs and outputs
62 setattr(m
.submodules
, self
.state_from
, self
.mod
)
63 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
64 #m.d.comb += self.out_op.eq(self.mod.out_op)
65 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
68 with m
.If(self
.out_decode
):
69 m
.next
= self
.out_state
72 self
.out_op
.eq(self
.mod
.out_op
)
75 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
78 class FPGet2OpMod(Trigger
):
79 def __init__(self
, width
):
80 Trigger
.__init
__(self
)
81 self
.in_op1
= Signal(width
, reset_less
=True)
82 self
.in_op2
= Signal(width
, reset_less
=True)
83 self
.out_op1
= FPNumIn(None, width
)
84 self
.out_op2
= FPNumIn(None, width
)
86 def elaborate(self
, platform
):
87 m
= Trigger
.elaborate(self
, platform
)
88 #m.submodules.get_op_in = self.in_op
89 m
.submodules
.get_op1_out
= self
.out_op1
90 m
.submodules
.get_op2_out
= self
.out_op2
91 with m
.If(self
.trigger
):
93 self
.out_op1
.decode(self
.in_op1
),
94 self
.out_op2
.decode(self
.in_op2
),
99 class FPGet2Op(FPState
):
103 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
104 FPState
.__init
__(self
, in_state
)
105 self
.out_state
= out_state
106 self
.mod
= FPGet2OpMod(width
)
109 self
.out_op1
= FPNumIn(None, width
)
110 self
.out_op2
= FPNumIn(None, width
)
111 self
.in_stb
= Signal(reset_less
=True)
112 self
.out_ack
= Signal(reset_less
=True)
113 self
.out_decode
= Signal(reset_less
=True)
115 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
116 """ links module to inputs and outputs
118 m
.submodules
.get_ops
= self
.mod
119 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
120 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
121 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
122 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
123 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
124 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
127 with m
.If(self
.out_decode
):
128 m
.next
= self
.out_state
131 #self.out_op1.v.eq(self.mod.out_op1.v),
132 #self.out_op2.v.eq(self.mod.out_op2.v),
133 self
.out_op1
.copy(self
.mod
.out_op1
),
134 self
.out_op2
.copy(self
.mod
.out_op2
)
137 m
.d
.sync
+= self
.mod
.ack
.eq(1)
140 class FPAddSpecialCasesMod
:
141 """ special cases: NaNs, infs, zeros, denormalised
142 NOTE: some of these are unique to add. see "Special Operations"
143 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
146 def __init__(self
, width
):
147 self
.in_a
= FPNumBase(width
)
148 self
.in_b
= FPNumBase(width
)
149 self
.out_z
= FPNumOut(width
, False)
150 self
.out_do_z
= Signal(reset_less
=True)
152 def setup(self
, m
, in_a
, in_b
, out_do_z
):
153 """ links module to inputs and outputs
155 m
.submodules
.specialcases
= self
156 m
.d
.comb
+= self
.in_a
.copy(in_a
)
157 m
.d
.comb
+= self
.in_b
.copy(in_b
)
158 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
160 def elaborate(self
, platform
):
163 m
.submodules
.sc_in_a
= self
.in_a
164 m
.submodules
.sc_in_b
= self
.in_b
165 m
.submodules
.sc_out_z
= self
.out_z
168 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
171 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
173 # if a is NaN or b is NaN return NaN
174 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
175 m
.d
.comb
+= self
.out_do_z
.eq(1)
176 m
.d
.comb
+= self
.out_z
.nan(0)
178 # XXX WEIRDNESS for FP16 non-canonical NaN handling
181 ## if a is zero and b is NaN return -b
182 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
183 # m.d.comb += self.out_do_z.eq(1)
184 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
186 ## if b is zero and a is NaN return -a
187 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
188 # m.d.comb += self.out_do_z.eq(1)
189 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
191 ## if a is -zero and b is NaN return -b
192 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
193 # m.d.comb += self.out_do_z.eq(1)
194 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
196 ## if b is -zero and a is NaN return -a
197 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
198 # m.d.comb += self.out_do_z.eq(1)
199 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
201 # if a is inf return inf (or NaN)
202 with m
.Elif(self
.in_a
.is_inf
):
203 m
.d
.comb
+= self
.out_do_z
.eq(1)
204 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
205 # if a is inf and signs don't match return NaN
206 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
207 m
.d
.comb
+= self
.out_z
.nan(0)
209 # if b is inf return inf
210 with m
.Elif(self
.in_b
.is_inf
):
211 m
.d
.comb
+= self
.out_do_z
.eq(1)
212 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
214 # if a is zero and b zero return signed-a/b
215 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
216 m
.d
.comb
+= self
.out_do_z
.eq(1)
217 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
221 # if a is zero return b
222 with m
.Elif(self
.in_a
.is_zero
):
223 m
.d
.comb
+= self
.out_do_z
.eq(1)
224 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
227 # if b is zero return a
228 with m
.Elif(self
.in_b
.is_zero
):
229 m
.d
.comb
+= self
.out_do_z
.eq(1)
230 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
233 # if a equal to -b return zero (+ve zero)
234 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
235 m
.d
.comb
+= self
.out_do_z
.eq(1)
236 m
.d
.comb
+= self
.out_z
.zero(0)
238 # Denormalised Number checks
240 m
.d
.comb
+= self
.out_do_z
.eq(0)
246 def __init__(self
, id_wid
):
249 self
.in_mid
= Signal(id_wid
, reset_less
=True)
250 self
.out_mid
= Signal(id_wid
, reset_less
=True)
256 if self
.id_wid
is not None:
257 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
260 class FPAddSpecialCases(FPState
, FPID
):
261 """ special cases: NaNs, infs, zeros, denormalised
262 NOTE: some of these are unique to add. see "Special Operations"
263 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
266 def __init__(self
, width
, id_wid
):
267 FPState
.__init
__(self
, "special_cases")
268 FPID
.__init
__(self
, id_wid
)
269 self
.mod
= FPAddSpecialCasesMod(width
)
270 self
.out_z
= FPNumOut(width
, False)
271 self
.out_do_z
= Signal(reset_less
=True)
273 def setup(self
, m
, in_a
, in_b
, in_mid
):
274 """ links module to inputs and outputs
276 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
277 if self
.in_mid
is not None:
278 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
282 with m
.If(self
.out_do_z
):
283 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
286 m
.next
= "denormalise"
289 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
290 """ special cases: NaNs, infs, zeros, denormalised
291 NOTE: some of these are unique to add. see "Special Operations"
292 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
295 def __init__(self
, width
, id_wid
):
296 FPState
.__init
__(self
, "special_cases")
297 FPID
.__init
__(self
, id_wid
)
298 self
.smod
= FPAddSpecialCasesMod(width
)
299 self
.out_z
= FPNumOut(width
, False)
300 self
.out_do_z
= Signal(reset_less
=True)
302 self
.dmod
= FPAddDeNormMod(width
)
303 self
.out_a
= FPNumBase(width
)
304 self
.out_b
= FPNumBase(width
)
306 def setup(self
, m
, in_a
, in_b
, in_mid
):
307 """ links module to inputs and outputs
309 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
310 self
.dmod
.setup(m
, in_a
, in_b
)
311 if self
.in_mid
is not None:
312 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
316 with m
.If(self
.out_do_z
):
317 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
321 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
322 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
325 class FPAddDeNormMod(FPState
):
327 def __init__(self
, width
):
328 self
.in_a
= FPNumBase(width
)
329 self
.in_b
= FPNumBase(width
)
330 self
.out_a
= FPNumBase(width
)
331 self
.out_b
= FPNumBase(width
)
333 def setup(self
, m
, in_a
, in_b
):
334 """ links module to inputs and outputs
336 m
.submodules
.denormalise
= self
337 m
.d
.comb
+= self
.in_a
.copy(in_a
)
338 m
.d
.comb
+= self
.in_b
.copy(in_b
)
340 def elaborate(self
, platform
):
342 m
.submodules
.denorm_in_a
= self
.in_a
343 m
.submodules
.denorm_in_b
= self
.in_b
344 m
.submodules
.denorm_out_a
= self
.out_a
345 m
.submodules
.denorm_out_b
= self
.out_b
346 # hmmm, don't like repeating identical code
347 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
348 with m
.If(self
.in_a
.exp_n127
):
349 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
351 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
353 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
354 with m
.If(self
.in_b
.exp_n127
):
355 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
357 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
362 class FPAddDeNorm(FPState
, FPID
):
364 def __init__(self
, width
, id_wid
):
365 FPState
.__init
__(self
, "denormalise")
366 FPID
.__init
__(self
, id_wid
)
367 self
.mod
= FPAddDeNormMod(width
)
368 self
.out_a
= FPNumBase(width
)
369 self
.out_b
= FPNumBase(width
)
371 def setup(self
, m
, in_a
, in_b
, in_mid
):
372 """ links module to inputs and outputs
374 self
.mod
.setup(m
, in_a
, in_b
)
375 if self
.in_mid
is not None:
376 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
380 # Denormalised Number checks
382 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
383 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
386 class FPAddAlignMultiMod(FPState
):
388 def __init__(self
, width
):
389 self
.in_a
= FPNumBase(width
)
390 self
.in_b
= FPNumBase(width
)
391 self
.out_a
= FPNumIn(None, width
)
392 self
.out_b
= FPNumIn(None, width
)
393 self
.exp_eq
= Signal(reset_less
=True)
395 def elaborate(self
, platform
):
396 # This one however (single-cycle) will do the shift
401 m
.submodules
.align_in_a
= self
.in_a
402 m
.submodules
.align_in_b
= self
.in_b
403 m
.submodules
.align_out_a
= self
.out_a
404 m
.submodules
.align_out_b
= self
.out_b
406 # NOTE: this does *not* do single-cycle multi-shifting,
407 # it *STAYS* in the align state until exponents match
409 # exponent of a greater than b: shift b down
410 m
.d
.comb
+= self
.exp_eq
.eq(0)
411 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
412 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
413 agtb
= Signal(reset_less
=True)
414 altb
= Signal(reset_less
=True)
415 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
416 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
418 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
419 # exponent of b greater than a: shift a down
421 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
422 # exponents equal: move to next stage.
424 m
.d
.comb
+= self
.exp_eq
.eq(1)
428 class FPAddAlignMulti(FPState
, FPID
):
430 def __init__(self
, width
, id_wid
):
431 FPID
.__init
__(self
, id_wid
)
432 FPState
.__init
__(self
, "align")
433 self
.mod
= FPAddAlignMultiMod(width
)
434 self
.out_a
= FPNumIn(None, width
)
435 self
.out_b
= FPNumIn(None, width
)
436 self
.exp_eq
= Signal(reset_less
=True)
438 def setup(self
, m
, in_a
, in_b
, in_mid
):
439 """ links module to inputs and outputs
441 m
.submodules
.align
= self
.mod
442 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
443 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
444 #m.d.comb += self.out_a.copy(self.mod.out_a)
445 #m.d.comb += self.out_b.copy(self.mod.out_b)
446 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
447 if self
.in_mid
is not None:
448 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
452 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
453 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
454 with m
.If(self
.exp_eq
):
458 class FPAddAlignSingleMod
:
460 def __init__(self
, width
):
462 self
.in_a
= FPNumBase(width
)
463 self
.in_b
= FPNumBase(width
)
464 self
.out_a
= FPNumIn(None, width
)
465 self
.out_b
= FPNumIn(None, width
)
467 def setup(self
, m
, in_a
, in_b
):
468 """ links module to inputs and outputs
470 m
.submodules
.align
= self
471 m
.d
.comb
+= self
.in_a
.copy(in_a
)
472 m
.d
.comb
+= self
.in_b
.copy(in_b
)
474 def elaborate(self
, platform
):
475 """ Aligns A against B or B against A, depending on which has the
476 greater exponent. This is done in a *single* cycle using
477 variable-width bit-shift
479 the shifter used here is quite expensive in terms of gates.
480 Mux A or B in (and out) into temporaries, as only one of them
481 needs to be aligned against the other
485 m
.submodules
.align_in_a
= self
.in_a
486 m
.submodules
.align_in_b
= self
.in_b
487 m
.submodules
.align_out_a
= self
.out_a
488 m
.submodules
.align_out_b
= self
.out_b
490 # temporary (muxed) input and output to be shifted
491 t_inp
= FPNumBase(self
.width
)
492 t_out
= FPNumIn(None, self
.width
)
493 espec
= (len(self
.in_a
.e
), True)
494 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
495 m
.submodules
.align_t_in
= t_inp
496 m
.submodules
.align_t_out
= t_out
497 m
.submodules
.multishift_r
= msr
499 ediff
= Signal(espec
, reset_less
=True)
500 ediffr
= Signal(espec
, reset_less
=True)
501 tdiff
= Signal(espec
, reset_less
=True)
502 elz
= Signal(reset_less
=True)
503 egz
= Signal(reset_less
=True)
505 # connect multi-shifter to t_inp/out mantissa (and tdiff)
506 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
507 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
508 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
509 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
510 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
512 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
513 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
514 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
515 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
517 # default: A-exp == B-exp, A and B untouched (fall through)
518 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
519 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
520 # only one shifter (muxed)
521 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
522 # exponent of a greater than b: shift b down
524 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
526 self
.out_b
.copy(t_out
),
527 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
529 # exponent of b greater than a: shift a down
531 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
533 self
.out_a
.copy(t_out
),
534 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
539 class FPAddAlignSingle(FPState
, FPID
):
541 def __init__(self
, width
, id_wid
):
542 FPState
.__init
__(self
, "align")
543 FPID
.__init
__(self
, id_wid
)
544 self
.mod
= FPAddAlignSingleMod(width
)
545 self
.out_a
= FPNumIn(None, width
)
546 self
.out_b
= FPNumIn(None, width
)
548 def setup(self
, m
, in_a
, in_b
, in_mid
):
549 """ links module to inputs and outputs
551 self
.mod
.setup(m
, in_a
, in_b
)
552 if self
.in_mid
is not None:
553 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
557 # NOTE: could be done as comb
558 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
559 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
563 class FPAddAlignSingleAdd(FPState
, FPID
):
565 def __init__(self
, width
, id_wid
):
566 FPState
.__init
__(self
, "align")
567 FPID
.__init
__(self
, id_wid
)
568 self
.mod
= FPAddAlignSingleMod(width
)
569 self
.out_a
= FPNumIn(None, width
)
570 self
.out_b
= FPNumIn(None, width
)
572 self
.a0mod
= FPAddStage0Mod(width
)
573 self
.a0_out_z
= FPNumBase(width
, False)
574 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
575 self
.a0_out_z
= FPNumBase(width
, False)
577 self
.a1mod
= FPAddStage1Mod(width
)
578 self
.out_z
= FPNumBase(width
, False)
579 self
.out_of
= Overflow()
581 def setup(self
, m
, in_a
, in_b
, in_mid
):
582 """ links module to inputs and outputs
584 self
.mod
.setup(m
, in_a
, in_b
)
585 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
586 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
588 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
589 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
590 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
592 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
594 if self
.in_mid
is not None:
595 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
599 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
600 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
601 m
.next
= "normalise_1"
604 class FPAddStage0Mod
:
606 def __init__(self
, width
):
607 self
.in_a
= FPNumBase(width
)
608 self
.in_b
= FPNumBase(width
)
609 self
.in_z
= FPNumBase(width
, False)
610 self
.out_z
= FPNumBase(width
, False)
611 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
613 def setup(self
, m
, in_a
, in_b
):
614 """ links module to inputs and outputs
616 m
.submodules
.add0
= self
617 m
.d
.comb
+= self
.in_a
.copy(in_a
)
618 m
.d
.comb
+= self
.in_b
.copy(in_b
)
620 def elaborate(self
, platform
):
622 m
.submodules
.add0_in_a
= self
.in_a
623 m
.submodules
.add0_in_b
= self
.in_b
624 m
.submodules
.add0_out_z
= self
.out_z
626 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
628 # store intermediate tests (and zero-extended mantissas)
629 seq
= Signal(reset_less
=True)
630 mge
= Signal(reset_less
=True)
631 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
632 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
633 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
634 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
635 am0
.eq(Cat(self
.in_a
.m
, 0)),
636 bm0
.eq(Cat(self
.in_b
.m
, 0))
638 # same-sign (both negative or both positive) add mantissas
641 self
.out_tot
.eq(am0
+ bm0
),
642 self
.out_z
.s
.eq(self
.in_a
.s
)
644 # a mantissa greater than b, use a
647 self
.out_tot
.eq(am0
- bm0
),
648 self
.out_z
.s
.eq(self
.in_a
.s
)
650 # b mantissa greater than a, use b
653 self
.out_tot
.eq(bm0
- am0
),
654 self
.out_z
.s
.eq(self
.in_b
.s
)
659 class FPAddStage0(FPState
, FPID
):
660 """ First stage of add. covers same-sign (add) and subtract
661 special-casing when mantissas are greater or equal, to
662 give greatest accuracy.
665 def __init__(self
, width
, id_wid
):
666 FPState
.__init
__(self
, "add_0")
667 FPID
.__init
__(self
, id_wid
)
668 self
.mod
= FPAddStage0Mod(width
)
669 self
.out_z
= FPNumBase(width
, False)
670 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
672 def setup(self
, m
, in_a
, in_b
, in_mid
):
673 """ links module to inputs and outputs
675 self
.mod
.setup(m
, in_a
, in_b
)
676 if self
.in_mid
is not None:
677 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
681 # NOTE: these could be done as combinatorial (merge add0+add1)
682 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
683 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
687 class FPAddStage1Mod(FPState
):
688 """ Second stage of add: preparation for normalisation.
689 detects when tot sum is too big (tot[27] is kinda a carry bit)
692 def __init__(self
, width
):
693 self
.out_norm
= Signal(reset_less
=True)
694 self
.in_z
= FPNumBase(width
, False)
695 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
696 self
.out_z
= FPNumBase(width
, False)
697 self
.out_of
= Overflow()
699 def setup(self
, m
, in_tot
, in_z
):
700 """ links module to inputs and outputs
702 m
.submodules
.add1
= self
703 m
.submodules
.add1_out_overflow
= self
.out_of
705 m
.d
.comb
+= self
.in_z
.copy(in_z
)
706 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
708 def elaborate(self
, platform
):
710 #m.submodules.norm1_in_overflow = self.in_of
711 #m.submodules.norm1_out_overflow = self.out_of
712 #m.submodules.norm1_in_z = self.in_z
713 #m.submodules.norm1_out_z = self.out_z
714 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
715 # tot[27] gets set when the sum overflows. shift result down
716 with m
.If(self
.in_tot
[-1]):
718 self
.out_z
.m
.eq(self
.in_tot
[4:]),
719 self
.out_of
.m0
.eq(self
.in_tot
[4]),
720 self
.out_of
.guard
.eq(self
.in_tot
[3]),
721 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
722 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
723 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
728 self
.out_z
.m
.eq(self
.in_tot
[3:]),
729 self
.out_of
.m0
.eq(self
.in_tot
[3]),
730 self
.out_of
.guard
.eq(self
.in_tot
[2]),
731 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
732 self
.out_of
.sticky
.eq(self
.in_tot
[0])
737 class FPAddStage1(FPState
, FPID
):
739 def __init__(self
, width
, id_wid
):
740 FPState
.__init
__(self
, "add_1")
741 FPID
.__init
__(self
, id_wid
)
742 self
.mod
= FPAddStage1Mod(width
)
743 self
.out_z
= FPNumBase(width
, False)
744 self
.out_of
= Overflow()
745 self
.norm_stb
= Signal()
747 def setup(self
, m
, in_tot
, in_z
, in_mid
):
748 """ links module to inputs and outputs
750 self
.mod
.setup(m
, in_tot
, in_z
)
752 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
754 if self
.in_mid
is not None:
755 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
759 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
760 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
761 m
.d
.sync
+= self
.norm_stb
.eq(1)
762 m
.next
= "normalise_1"
765 class FPNorm1ModSingle
:
767 def __init__(self
, width
):
769 self
.out_norm
= Signal(reset_less
=True)
770 self
.in_z
= FPNumBase(width
, False)
771 self
.in_of
= Overflow()
772 self
.out_z
= FPNumBase(width
, False)
773 self
.out_of
= Overflow()
775 def setup(self
, m
, in_z
, in_of
, out_z
):
776 """ links module to inputs and outputs
778 m
.submodules
.normalise_1
= self
780 m
.d
.comb
+= self
.in_z
.copy(in_z
)
781 m
.d
.comb
+= self
.in_of
.copy(in_of
)
783 m
.d
.comb
+= out_z
.copy(self
.out_z
)
785 def elaborate(self
, platform
):
788 mwid
= self
.out_z
.m_width
+2
789 pe
= PriorityEncoder(mwid
)
790 m
.submodules
.norm_pe
= pe
792 m
.submodules
.norm1_out_z
= self
.out_z
793 m
.submodules
.norm1_out_overflow
= self
.out_of
794 m
.submodules
.norm1_in_z
= self
.in_z
795 m
.submodules
.norm1_in_overflow
= self
.in_of
797 in_z
= FPNumBase(self
.width
, False)
799 m
.submodules
.norm1_insel_z
= in_z
800 m
.submodules
.norm1_insel_overflow
= in_of
802 espec
= (len(in_z
.e
), True)
803 ediff_n126
= Signal(espec
, reset_less
=True)
804 msr
= MultiShiftRMerge(mwid
, espec
)
805 m
.submodules
.multishift_r
= msr
807 m
.d
.comb
+= in_z
.copy(self
.in_z
)
808 m
.d
.comb
+= in_of
.copy(self
.in_of
)
809 # initialise out from in (overridden below)
810 m
.d
.comb
+= self
.out_z
.copy(in_z
)
811 m
.d
.comb
+= self
.out_of
.copy(in_of
)
812 # normalisation increase/decrease conditions
813 decrease
= Signal(reset_less
=True)
814 increase
= Signal(reset_less
=True)
815 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
816 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
819 # *sigh* not entirely obvious: count leading zeros (clz)
820 # with a PriorityEncoder: to find from the MSB
821 # we reverse the order of the bits.
822 temp_m
= Signal(mwid
, reset_less
=True)
823 temp_s
= Signal(mwid
+1, reset_less
=True)
824 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
825 # make sure that the amount to decrease by does NOT
826 # go below the minimum non-INF/NaN exponent
827 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
830 # cat round and guard bits back into the mantissa
831 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
832 pe
.i
.eq(temp_m
[::-1]), # inverted
833 clz
.eq(limclz
), # count zeros from MSB down
834 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
835 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
836 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
837 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
838 # overflow in bits 0..1: got shifted too (leave sticky)
839 self
.out_of
.guard
.eq(temp_s
[1]), # guard
840 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
843 with m
.Elif(increase
):
844 temp_m
= Signal(mwid
+1, reset_less
=True)
846 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
848 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
849 # connect multi-shifter to inp/out mantissa (and ediff)
851 msr
.diff
.eq(ediff_n126
),
852 self
.out_z
.m
.eq(msr
.m
[3:]),
853 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
854 # overflow in bits 0..1: got shifted too (leave sticky)
855 self
.out_of
.guard
.eq(temp_s
[2]), # guard
856 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
857 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
858 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
864 class FPNorm1ModMulti
:
866 def __init__(self
, width
, single_cycle
=True):
868 self
.in_select
= Signal(reset_less
=True)
869 self
.out_norm
= Signal(reset_less
=True)
870 self
.in_z
= FPNumBase(width
, False)
871 self
.in_of
= Overflow()
872 self
.temp_z
= FPNumBase(width
, False)
873 self
.temp_of
= Overflow()
874 self
.out_z
= FPNumBase(width
, False)
875 self
.out_of
= Overflow()
877 def elaborate(self
, platform
):
880 m
.submodules
.norm1_out_z
= self
.out_z
881 m
.submodules
.norm1_out_overflow
= self
.out_of
882 m
.submodules
.norm1_temp_z
= self
.temp_z
883 m
.submodules
.norm1_temp_of
= self
.temp_of
884 m
.submodules
.norm1_in_z
= self
.in_z
885 m
.submodules
.norm1_in_overflow
= self
.in_of
887 in_z
= FPNumBase(self
.width
, False)
889 m
.submodules
.norm1_insel_z
= in_z
890 m
.submodules
.norm1_insel_overflow
= in_of
892 # select which of temp or in z/of to use
893 with m
.If(self
.in_select
):
894 m
.d
.comb
+= in_z
.copy(self
.in_z
)
895 m
.d
.comb
+= in_of
.copy(self
.in_of
)
897 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
898 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
899 # initialise out from in (overridden below)
900 m
.d
.comb
+= self
.out_z
.copy(in_z
)
901 m
.d
.comb
+= self
.out_of
.copy(in_of
)
902 # normalisation increase/decrease conditions
903 decrease
= Signal(reset_less
=True)
904 increase
= Signal(reset_less
=True)
905 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
906 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
907 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
911 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
912 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
913 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
914 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
915 self
.out_of
.round_bit
.eq(0), # reset round bit
916 self
.out_of
.m0
.eq(in_of
.guard
),
919 with m
.Elif(increase
):
921 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
922 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
923 self
.out_of
.guard
.eq(in_z
.m
[0]),
924 self
.out_of
.m0
.eq(in_z
.m
[1]),
925 self
.out_of
.round_bit
.eq(in_of
.guard
),
926 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
932 class FPNorm1Single(FPState
, FPID
):
934 def __init__(self
, width
, id_wid
, single_cycle
=True):
935 FPID
.__init
__(self
, id_wid
)
936 FPState
.__init
__(self
, "normalise_1")
937 self
.mod
= FPNorm1ModSingle(width
)
938 self
.out_norm
= Signal(reset_less
=True)
939 self
.out_z
= FPNumBase(width
)
940 self
.out_roundz
= Signal(reset_less
=True)
942 def setup(self
, m
, in_z
, in_of
, in_mid
):
943 """ links module to inputs and outputs
945 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
947 if self
.in_mid
is not None:
948 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
952 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
956 class FPNorm1Multi(FPState
, FPID
):
958 def __init__(self
, width
, id_wid
):
959 FPID
.__init
__(self
, id_wid
)
960 FPState
.__init
__(self
, "normalise_1")
961 self
.mod
= FPNorm1ModMulti(width
)
962 self
.stb
= Signal(reset_less
=True)
963 self
.ack
= Signal(reset
=0, reset_less
=True)
964 self
.out_norm
= Signal(reset_less
=True)
965 self
.in_accept
= Signal(reset_less
=True)
966 self
.temp_z
= FPNumBase(width
)
967 self
.temp_of
= Overflow()
968 self
.out_z
= FPNumBase(width
)
969 self
.out_roundz
= Signal(reset_less
=True)
971 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
972 """ links module to inputs and outputs
974 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
975 self
.in_accept
, self
.temp_z
, self
.temp_of
,
976 self
.out_z
, self
.out_norm
)
978 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
979 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
981 if self
.in_mid
is not None:
982 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
986 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
987 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
988 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
989 with m
.If(self
.out_norm
):
990 with m
.If(self
.in_accept
):
995 m
.d
.sync
+= self
.ack
.eq(0)
997 # normalisation not required (or done).
999 m
.d
.sync
+= self
.ack
.eq(1)
1000 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1003 class FPNormToPack(FPState
, FPID
):
1005 def __init__(self
, width
, id_wid
):
1006 FPID
.__init
__(self
, id_wid
)
1007 FPState
.__init
__(self
, "normalise_1")
1010 def setup(self
, m
, in_z
, in_of
, in_mid
):
1011 """ links module to inputs and outputs
1014 # Normalisation (chained to input in_z+in_of)
1015 nmod
= FPNorm1ModSingle(self
.width
)
1016 n_out_z
= FPNumBase(self
.width
)
1017 n_out_roundz
= Signal(reset_less
=True)
1018 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1020 # Rounding (chained to normalisation)
1021 rmod
= FPRoundMod(self
.width
)
1022 r_out_z
= FPNumBase(self
.width
)
1023 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1024 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1025 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1027 # Corrections (chained to rounding)
1028 cmod
= FPCorrectionsMod(self
.width
)
1029 c_out_z
= FPNumBase(self
.width
)
1030 cmod
.setup(m
, r_out_z
)
1031 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1033 # Pack (chained to corrections)
1034 self
.pmod
= FPPackMod(self
.width
)
1035 self
.out_z
= FPNumBase(self
.width
)
1036 self
.pmod
.setup(m
, c_out_z
)
1039 if self
.in_mid
is not None:
1040 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1042 def action(self
, m
):
1043 self
.idsync(m
) # copies incoming ID to outgoing
1044 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1045 m
.next
= "pack_put_z"
1050 def __init__(self
, width
):
1051 self
.in_roundz
= Signal(reset_less
=True)
1052 self
.in_z
= FPNumBase(width
, False)
1053 self
.out_z
= FPNumBase(width
, False)
1055 def setup(self
, m
, in_z
, roundz
):
1056 m
.submodules
.roundz
= self
1058 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1059 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1061 def elaborate(self
, platform
):
1063 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1064 with m
.If(self
.in_roundz
):
1065 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1066 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1067 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1071 class FPRound(FPState
, FPID
):
1073 def __init__(self
, width
, id_wid
):
1074 FPState
.__init
__(self
, "round")
1075 FPID
.__init
__(self
, id_wid
)
1076 self
.mod
= FPRoundMod(width
)
1077 self
.out_z
= FPNumBase(width
)
1079 def setup(self
, m
, in_z
, roundz
, in_mid
):
1080 """ links module to inputs and outputs
1082 self
.mod
.setup(m
, in_z
, roundz
)
1084 if self
.in_mid
is not None:
1085 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1087 def action(self
, m
):
1089 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1090 m
.next
= "corrections"
1093 class FPCorrectionsMod
:
1095 def __init__(self
, width
):
1096 self
.in_z
= FPNumOut(width
, False)
1097 self
.out_z
= FPNumOut(width
, False)
1099 def setup(self
, m
, in_z
):
1100 """ links module to inputs and outputs
1102 m
.submodules
.corrections
= self
1103 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1105 def elaborate(self
, platform
):
1107 m
.submodules
.corr_in_z
= self
.in_z
1108 m
.submodules
.corr_out_z
= self
.out_z
1109 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1110 with m
.If(self
.in_z
.is_denormalised
):
1111 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1115 class FPCorrections(FPState
, FPID
):
1117 def __init__(self
, width
, id_wid
):
1118 FPState
.__init
__(self
, "corrections")
1119 FPID
.__init
__(self
, id_wid
)
1120 self
.mod
= FPCorrectionsMod(width
)
1121 self
.out_z
= FPNumBase(width
)
1123 def setup(self
, m
, in_z
, in_mid
):
1124 """ links module to inputs and outputs
1126 self
.mod
.setup(m
, in_z
)
1127 if self
.in_mid
is not None:
1128 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1130 def action(self
, m
):
1132 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1138 def __init__(self
, width
):
1139 self
.in_z
= FPNumOut(width
, False)
1140 self
.out_z
= FPNumOut(width
, False)
1142 def setup(self
, m
, in_z
):
1143 """ links module to inputs and outputs
1145 m
.submodules
.pack
= self
1146 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1148 def elaborate(self
, platform
):
1150 m
.submodules
.pack_in_z
= self
.in_z
1151 with m
.If(self
.in_z
.is_overflowed
):
1152 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1154 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1158 class FPPack(FPState
, FPID
):
1160 def __init__(self
, width
, id_wid
):
1161 FPState
.__init
__(self
, "pack")
1162 FPID
.__init
__(self
, id_wid
)
1163 self
.mod
= FPPackMod(width
)
1164 self
.out_z
= FPNumOut(width
, False)
1166 def setup(self
, m
, in_z
, in_mid
):
1167 """ links module to inputs and outputs
1169 self
.mod
.setup(m
, in_z
)
1170 if self
.in_mid
is not None:
1171 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1173 def action(self
, m
):
1175 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1176 m
.next
= "pack_put_z"
1179 class FPPutZ(FPState
):
1181 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1182 FPState
.__init
__(self
, state
)
1183 if to_state
is None:
1184 to_state
= "get_ops"
1185 self
.to_state
= to_state
1188 self
.in_mid
= in_mid
1189 self
.out_mid
= out_mid
1191 def action(self
, m
):
1192 if self
.in_mid
is not None:
1193 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1195 self
.out_z
.v
.eq(self
.in_z
.v
)
1197 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1198 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1199 m
.next
= self
.to_state
1201 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1204 class FPPutZIdx(FPState
):
1206 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1207 FPState
.__init
__(self
, state
)
1208 if to_state
is None:
1209 to_state
= "get_ops"
1210 self
.to_state
= to_state
1212 self
.out_zs
= out_zs
1213 self
.in_mid
= in_mid
1215 def action(self
, m
):
1216 outz_stb
= Signal(reset_less
=True)
1217 outz_ack
= Signal(reset_less
=True)
1218 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1219 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1222 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1224 with m
.If(outz_stb
& outz_ack
):
1225 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1226 m
.next
= self
.to_state
1228 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1231 class FPADDBaseMod(FPID
):
1233 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1236 * width: bit-width of IEEE754. supported: 16, 32, 64
1237 * id_wid: an identifier that is sync-connected to the input
1238 * single_cycle: True indicates each stage to complete in 1 clock
1239 * compact: True indicates a reduced number of stages
1241 FPID
.__init
__(self
, id_wid
)
1243 self
.single_cycle
= single_cycle
1244 self
.compact
= compact
1246 self
.in_t
= Trigger()
1247 self
.in_a
= Signal(width
)
1248 self
.in_b
= Signal(width
)
1249 self
.out_z
= FPOp(width
)
1253 def add_state(self
, state
):
1254 self
.states
.append(state
)
1257 def get_fragment(self
, platform
=None):
1258 """ creates the HDL code-fragment for FPAdd
1261 m
.submodules
.out_z
= self
.out_z
1262 m
.submodules
.in_t
= self
.in_t
1264 self
.get_compact_fragment(m
, platform
)
1266 self
.get_longer_fragment(m
, platform
)
1268 with m
.FSM() as fsm
:
1270 for state
in self
.states
:
1271 with m
.State(state
.state_from
):
1276 def get_longer_fragment(self
, m
, platform
=None):
1278 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1279 self
.in_a
, self
.in_b
, self
.width
))
1280 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1284 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1285 sc
.setup(m
, a
, b
, self
.in_mid
)
1287 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1288 dn
.setup(m
, a
, b
, sc
.in_mid
)
1290 if self
.single_cycle
:
1291 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1292 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1294 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1295 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1297 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1298 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1300 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1301 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1303 if self
.single_cycle
:
1304 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1305 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1307 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1308 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1310 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1311 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1313 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1314 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1316 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1317 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1319 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1320 pa
.in_mid
, self
.out_mid
))
1322 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1323 pa
.in_mid
, self
.out_mid
))
1325 def get_compact_fragment(self
, m
, platform
=None):
1327 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1328 self
.in_a
, self
.in_b
, self
.width
))
1329 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1333 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1334 sc
.setup(m
, a
, b
, self
.in_mid
)
1336 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1337 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1339 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1340 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1342 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1343 n1
.in_mid
, self
.out_mid
))
1345 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1346 sc
.in_mid
, self
.out_mid
))
1349 class FPADDBase(FPState
, FPID
):
1351 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1354 * width: bit-width of IEEE754. supported: 16, 32, 64
1355 * id_wid: an identifier that is sync-connected to the input
1356 * single_cycle: True indicates each stage to complete in 1 clock
1358 FPID
.__init
__(self
, id_wid
)
1359 FPState
.__init
__(self
, "fpadd")
1361 self
.single_cycle
= single_cycle
1362 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1364 self
.in_t
= Trigger()
1365 self
.in_a
= Signal(width
)
1366 self
.in_b
= Signal(width
)
1367 #self.out_z = FPOp(width)
1369 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1370 self
.in_accept
= Signal(reset_less
=True)
1371 self
.add_stb
= Signal(reset_less
=True)
1372 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1374 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1376 self
.out_mid
= out_mid
1377 m
.d
.comb
+= [self
.in_a
.eq(a
),
1379 self
.mod
.in_a
.eq(self
.in_a
),
1380 self
.mod
.in_b
.eq(self
.in_b
),
1381 self
.in_mid
.eq(in_mid
),
1382 self
.mod
.in_mid
.eq(self
.in_mid
),
1383 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1384 #self.add_stb.eq(add_stb),
1385 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1386 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1387 self
.out_mid
.eq(self
.mod
.out_mid
),
1388 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1389 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1390 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1393 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1394 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1395 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1396 #m.d.sync += self.in_t.stb.eq(0)
1398 m
.submodules
.fpadd
= self
.mod
1400 def action(self
, m
):
1402 # in_accept is set on incoming strobe HIGH and ack LOW.
1403 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1405 #with m.If(self.in_t.ack):
1406 # m.d.sync += self.in_t.stb.eq(0)
1407 with m
.If(~self
.z_done
):
1408 # not done: test for accepting an incoming operand pair
1409 with m
.If(self
.in_accept
):
1411 self
.add_ack
.eq(1), # acknowledge receipt...
1412 self
.in_t
.stb
.eq(1), # initiate add
1415 m
.d
.sync
+= [self
.add_ack
.eq(0),
1416 self
.in_t
.stb
.eq(0),
1417 self
.out_z
.ack
.eq(1),
1420 # done: acknowledge, and write out id and value
1421 m
.d
.sync
+= [self
.add_ack
.eq(1),
1428 if self
.in_mid
is not None:
1429 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1432 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1434 # move to output state on detecting z ack
1435 with m
.If(self
.out_z
.trigger
):
1436 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1439 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1442 def __init__(self
, width
, id_wid
):
1444 self
.id_wid
= id_wid
1446 for i
in range(rs_sz
):
1448 out_z
.name
= "out_z_%d" % i
1450 self
.res
= Array(res
)
1451 self
.in_z
= FPOp(width
)
1452 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1454 def setup(self
, m
, in_z
, in_mid
):
1455 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1456 self
.in_mid
.eq(in_mid
)]
1458 def get_fragment(self
, platform
=None):
1459 """ creates the HDL code-fragment for FPAdd
1462 m
.submodules
.res_in_z
= self
.in_z
1463 m
.submodules
+= self
.res
1475 """ FPADD: stages as follows:
1481 FPAddBase---> FPAddBaseMod
1483 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1485 FPAddBase is tricky: it is both a stage and *has* stages.
1486 Connection to FPAddBaseMod therefore requires an in stb/ack
1487 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1488 needs to be the thing that raises the incoming stb.
1491 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1494 * width: bit-width of IEEE754. supported: 16, 32, 64
1495 * id_wid: an identifier that is sync-connected to the input
1496 * single_cycle: True indicates each stage to complete in 1 clock
1499 self
.id_wid
= id_wid
1500 self
.single_cycle
= single_cycle
1502 #self.out_z = FPOp(width)
1503 self
.ids
= FPID(id_wid
)
1506 for i
in range(rs_sz
):
1509 in_a
.name
= "in_a_%d" % i
1510 in_b
.name
= "in_b_%d" % i
1511 rs
.append((in_a
, in_b
))
1515 for i
in range(rs_sz
):
1517 out_z
.name
= "out_z_%d" % i
1519 self
.res
= Array(res
)
1523 def add_state(self
, state
):
1524 self
.states
.append(state
)
1527 def get_fragment(self
, platform
=None):
1528 """ creates the HDL code-fragment for FPAdd
1531 m
.submodules
+= self
.rs
1533 in_a
= self
.rs
[0][0]
1534 in_b
= self
.rs
[0][1]
1536 out_z
= FPOp(self
.width
)
1537 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1538 m
.submodules
.out_z
= out_z
1540 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1545 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1550 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1551 ab
= self
.add_state(ab
)
1552 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1555 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1558 with m
.FSM() as fsm
:
1560 for state
in self
.states
:
1561 with m
.State(state
.state_from
):
1567 if __name__
== "__main__":
1569 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1570 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1571 alu
.rs
[0][1].ports() + \
1572 alu
.res
[0].ports() + \
1573 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1575 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1576 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1577 alu
.in_t
.ports() + \
1578 alu
.out_z
.ports() + \
1579 [alu
.in_mid
, alu
.out_mid
])
1582 # works... but don't use, just do "python fname.py convert -t v"
1583 #print (verilog.convert(alu, ports=[
1584 # ports=alu.in_a.ports() + \
1585 # alu.in_b.ports() + \
1586 # alu.out_z.ports())