1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
212 self
.o
.mid
.eq(self
.i
.mid
)
217 class FPGet2Op(FPState
):
221 def __init__(self
, in_state
, out_state
, width
, id_wid
):
222 FPState
.__init
__(self
, in_state
)
223 self
.out_state
= out_state
224 self
.mod
= FPGet2OpMod(width
, id_wid
)
225 self
.o
= self
.mod
.ospec()
226 self
.in_stb
= Signal(reset_less
=True)
227 self
.out_ack
= Signal(reset_less
=True)
228 self
.out_decode
= Signal(reset_less
=True)
230 def setup(self
, m
, i
, in_stb
, in_ack
):
231 """ links module to inputs and outputs
233 m
.submodules
.get_ops
= self
.mod
234 m
.d
.comb
+= self
.mod
.i
.eq(i
)
235 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
236 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
237 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
238 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
241 with m
.If(self
.out_decode
):
242 m
.next
= self
.out_state
245 self
.o
.eq(self
.mod
.o
),
248 m
.d
.sync
+= self
.mod
.ack
.eq(1)
253 def __init__(self
, width
, id_wid
, m_extra
=True):
254 self
.a
= FPNumBase(width
, m_extra
)
255 self
.b
= FPNumBase(width
, m_extra
)
256 self
.mid
= Signal(id_wid
, reset_less
=True)
259 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
262 class FPAddSpecialCasesMod
:
263 """ special cases: NaNs, infs, zeros, denormalised
264 NOTE: some of these are unique to add. see "Special Operations"
265 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
268 def __init__(self
, width
, id_wid
):
271 self
.i
= self
.ispec()
272 self
.o
= self
.ospec()
273 self
.out_do_z
= Signal(reset_less
=True)
276 return FPNumBase2Ops(self
.width
, self
.id_wid
)
279 return FPPackData(self
.width
, self
.id_wid
)
281 def setup(self
, m
, i
, out_do_z
):
282 """ links module to inputs and outputs
284 m
.submodules
.specialcases
= self
285 m
.d
.comb
+= self
.i
.eq(i
)
286 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
288 def elaborate(self
, platform
):
291 m
.submodules
.sc_in_a
= self
.i
.a
292 m
.submodules
.sc_in_b
= self
.i
.b
293 m
.submodules
.sc_out_z
= self
.o
.z
296 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
299 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
301 # if a is NaN or b is NaN return NaN
302 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
303 m
.d
.comb
+= self
.out_do_z
.eq(1)
304 m
.d
.comb
+= self
.o
.z
.nan(0)
306 # XXX WEIRDNESS for FP16 non-canonical NaN handling
309 ## if a is zero and b is NaN return -b
310 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
311 # m.d.comb += self.out_do_z.eq(1)
312 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
314 ## if b is zero and a is NaN return -a
315 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
316 # m.d.comb += self.out_do_z.eq(1)
317 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
319 ## if a is -zero and b is NaN return -b
320 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
321 # m.d.comb += self.out_do_z.eq(1)
322 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
324 ## if b is -zero and a is NaN return -a
325 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
326 # m.d.comb += self.out_do_z.eq(1)
327 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
329 # if a is inf return inf (or NaN)
330 with m
.Elif(self
.i
.a
.is_inf
):
331 m
.d
.comb
+= self
.out_do_z
.eq(1)
332 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
333 # if a is inf and signs don't match return NaN
334 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
335 m
.d
.comb
+= self
.o
.z
.nan(0)
337 # if b is inf return inf
338 with m
.Elif(self
.i
.b
.is_inf
):
339 m
.d
.comb
+= self
.out_do_z
.eq(1)
340 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
342 # if a is zero and b zero return signed-a/b
343 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
344 m
.d
.comb
+= self
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
349 # if a is zero return b
350 with m
.Elif(self
.i
.a
.is_zero
):
351 m
.d
.comb
+= self
.out_do_z
.eq(1)
352 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
355 # if b is zero return a
356 with m
.Elif(self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
361 # if a equal to -b return zero (+ve zero)
362 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
363 m
.d
.comb
+= self
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.zero(0)
366 # Denormalised Number checks
368 m
.d
.comb
+= self
.out_do_z
.eq(0)
370 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
376 def __init__(self
, id_wid
):
379 self
.in_mid
= Signal(id_wid
, reset_less
=True)
380 self
.out_mid
= Signal(id_wid
, reset_less
=True)
386 if self
.id_wid
is not None:
387 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
390 class FPAddSpecialCases(FPState
):
391 """ special cases: NaNs, infs, zeros, denormalised
392 NOTE: some of these are unique to add. see "Special Operations"
393 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
396 def __init__(self
, width
, id_wid
):
397 FPState
.__init
__(self
, "special_cases")
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, i
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, i
, self
.out_do_z
)
406 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
407 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
411 with m
.If(self
.out_do_z
):
414 m
.next
= "denormalise"
417 class FPAddSpecialCasesDeNorm(FPState
):
418 """ special cases: NaNs, infs, zeros, denormalised
419 NOTE: some of these are unique to add. see "Special Operations"
420 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
423 def __init__(self
, width
, id_wid
):
424 FPState
.__init
__(self
, "special_cases")
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
439 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
440 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
442 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
445 with m
.If(self
.out_do_z
):
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
495 class FPAddDeNorm(FPState
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 self
.mod
= FPAddDeNormMod(width
)
500 self
.out_a
= FPNumBase(width
)
501 self
.out_b
= FPNumBase(width
)
503 def setup(self
, m
, i
):
504 """ links module to inputs and outputs
508 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
509 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
512 # Denormalised Number checks
516 class FPAddAlignMultiMod(FPState
):
518 def __init__(self
, width
):
519 self
.in_a
= FPNumBase(width
)
520 self
.in_b
= FPNumBase(width
)
521 self
.out_a
= FPNumIn(None, width
)
522 self
.out_b
= FPNumIn(None, width
)
523 self
.exp_eq
= Signal(reset_less
=True)
525 def elaborate(self
, platform
):
526 # This one however (single-cycle) will do the shift
531 m
.submodules
.align_in_a
= self
.in_a
532 m
.submodules
.align_in_b
= self
.in_b
533 m
.submodules
.align_out_a
= self
.out_a
534 m
.submodules
.align_out_b
= self
.out_b
536 # NOTE: this does *not* do single-cycle multi-shifting,
537 # it *STAYS* in the align state until exponents match
539 # exponent of a greater than b: shift b down
540 m
.d
.comb
+= self
.exp_eq
.eq(0)
541 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
542 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
543 agtb
= Signal(reset_less
=True)
544 altb
= Signal(reset_less
=True)
545 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
546 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
548 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
549 # exponent of b greater than a: shift a down
551 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
552 # exponents equal: move to next stage.
554 m
.d
.comb
+= self
.exp_eq
.eq(1)
558 class FPAddAlignMulti(FPState
):
560 def __init__(self
, width
, id_wid
):
561 FPState
.__init
__(self
, "align")
562 self
.mod
= FPAddAlignMultiMod(width
)
563 self
.out_a
= FPNumIn(None, width
)
564 self
.out_b
= FPNumIn(None, width
)
565 self
.exp_eq
= Signal(reset_less
=True)
567 def setup(self
, m
, in_a
, in_b
):
568 """ links module to inputs and outputs
570 m
.submodules
.align
= self
.mod
571 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
572 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
573 #m.d.comb += self.out_a.eq(self.mod.out_a)
574 #m.d.comb += self.out_b.eq(self.mod.out_b)
575 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
576 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
577 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
580 with m
.If(self
.exp_eq
):
586 def __init__(self
, width
, id_wid
):
587 self
.a
= FPNumIn(None, width
)
588 self
.b
= FPNumIn(None, width
)
589 self
.mid
= Signal(id_wid
, reset_less
=True)
592 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
595 class FPAddAlignSingleMod
:
597 def __init__(self
, width
, id_wid
):
600 self
.i
= self
.ispec()
601 self
.o
= self
.ospec()
604 return FPNumBase2Ops(self
.width
, self
.id_wid
)
607 return FPNumIn2Ops(self
.width
, self
.id_wid
)
609 def setup(self
, m
, i
):
610 """ links module to inputs and outputs
612 m
.submodules
.align
= self
613 m
.d
.comb
+= self
.i
.eq(i
)
615 def elaborate(self
, platform
):
616 """ Aligns A against B or B against A, depending on which has the
617 greater exponent. This is done in a *single* cycle using
618 variable-width bit-shift
620 the shifter used here is quite expensive in terms of gates.
621 Mux A or B in (and out) into temporaries, as only one of them
622 needs to be aligned against the other
626 m
.submodules
.align_in_a
= self
.i
.a
627 m
.submodules
.align_in_b
= self
.i
.b
628 m
.submodules
.align_out_a
= self
.o
.a
629 m
.submodules
.align_out_b
= self
.o
.b
631 # temporary (muxed) input and output to be shifted
632 t_inp
= FPNumBase(self
.width
)
633 t_out
= FPNumIn(None, self
.width
)
634 espec
= (len(self
.i
.a
.e
), True)
635 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
636 m
.submodules
.align_t_in
= t_inp
637 m
.submodules
.align_t_out
= t_out
638 m
.submodules
.multishift_r
= msr
640 ediff
= Signal(espec
, reset_less
=True)
641 ediffr
= Signal(espec
, reset_less
=True)
642 tdiff
= Signal(espec
, reset_less
=True)
643 elz
= Signal(reset_less
=True)
644 egz
= Signal(reset_less
=True)
646 # connect multi-shifter to t_inp/out mantissa (and tdiff)
647 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
648 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
649 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
650 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
651 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
653 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
654 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
655 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
656 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
658 # default: A-exp == B-exp, A and B untouched (fall through)
659 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
660 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
661 # only one shifter (muxed)
662 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
663 # exponent of a greater than b: shift b down
665 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
668 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
670 # exponent of b greater than a: shift a down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
675 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
678 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
683 class FPAddAlignSingle(FPState
):
685 def __init__(self
, width
, id_wid
):
686 FPState
.__init
__(self
, "align")
687 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
688 self
.out_a
= FPNumIn(None, width
)
689 self
.out_b
= FPNumIn(None, width
)
691 def setup(self
, m
, i
):
692 """ links module to inputs and outputs
696 # NOTE: could be done as comb
697 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
698 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
704 class FPAddAlignSingleAdd(FPState
):
706 def __init__(self
, width
, id_wid
):
707 FPState
.__init
__(self
, "align")
710 self
.a1o
= self
.ospec()
713 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
716 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
718 def setup(self
, m
, i
):
719 """ links module to inputs and outputs
721 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
724 m
.d
.comb
+= o
.eq(mod
.o
)
726 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
731 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
733 self
.a1modo
= a1mod
.o
735 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
738 m
.next
= "normalise_1"
741 class FPAddStage0Data
:
743 def __init__(self
, width
, id_wid
):
744 self
.z
= FPNumBase(width
, False)
745 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
746 self
.mid
= Signal(id_wid
, reset_less
=True)
749 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
752 class FPAddStage0Mod
:
754 def __init__(self
, width
, id_wid
):
757 self
.i
= self
.ispec()
758 self
.o
= self
.ospec()
761 return FPNumBase2Ops(self
.width
, self
.id_wid
)
764 return FPAddStage0Data(self
.width
, self
.id_wid
)
766 def setup(self
, m
, i
):
767 """ links module to inputs and outputs
769 m
.submodules
.add0
= self
770 m
.d
.comb
+= self
.i
.eq(i
)
772 def elaborate(self
, platform
):
774 m
.submodules
.add0_in_a
= self
.i
.a
775 m
.submodules
.add0_in_b
= self
.i
.b
776 m
.submodules
.add0_out_z
= self
.o
.z
778 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
779 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
781 # store intermediate tests (and zero-extended mantissas)
782 seq
= Signal(reset_less
=True)
783 mge
= Signal(reset_less
=True)
784 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
785 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
786 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
787 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
788 am0
.eq(Cat(self
.i
.a
.m
, 0)),
789 bm0
.eq(Cat(self
.i
.b
.m
, 0))
791 # same-sign (both negative or both positive) add mantissas
794 self
.o
.tot
.eq(am0
+ bm0
),
795 self
.o
.z
.s
.eq(self
.i
.a
.s
)
797 # a mantissa greater than b, use a
800 self
.o
.tot
.eq(am0
- bm0
),
801 self
.o
.z
.s
.eq(self
.i
.a
.s
)
803 # b mantissa greater than a, use b
806 self
.o
.tot
.eq(bm0
- am0
),
807 self
.o
.z
.s
.eq(self
.i
.b
.s
)
812 class FPAddStage0(FPState
):
813 """ First stage of add. covers same-sign (add) and subtract
814 special-casing when mantissas are greater or equal, to
815 give greatest accuracy.
818 def __init__(self
, width
, id_wid
):
819 FPState
.__init
__(self
, "add_0")
820 self
.mod
= FPAddStage0Mod(width
)
821 self
.o
= self
.mod
.ospec()
823 def setup(self
, m
, i
):
824 """ links module to inputs and outputs
828 # NOTE: these could be done as combinatorial (merge add0+add1)
829 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
835 class FPAddStage1Data
:
837 def __init__(self
, width
, id_wid
):
838 self
.z
= FPNumBase(width
, False)
840 self
.mid
= Signal(id_wid
, reset_less
=True)
843 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
847 class FPAddStage1Mod(FPState
):
848 """ Second stage of add: preparation for normalisation.
849 detects when tot sum is too big (tot[27] is kinda a carry bit)
852 def __init__(self
, width
, id_wid
):
855 self
.i
= self
.ispec()
856 self
.o
= self
.ospec()
859 return FPAddStage0Data(self
.width
, self
.id_wid
)
862 return FPAddStage1Data(self
.width
, self
.id_wid
)
864 def setup(self
, m
, i
):
865 """ links module to inputs and outputs
867 m
.submodules
.add1
= self
868 m
.submodules
.add1_out_overflow
= self
.o
.of
870 m
.d
.comb
+= self
.i
.eq(i
)
872 def elaborate(self
, platform
):
874 #m.submodules.norm1_in_overflow = self.in_of
875 #m.submodules.norm1_out_overflow = self.out_of
876 #m.submodules.norm1_in_z = self.in_z
877 #m.submodules.norm1_out_z = self.out_z
878 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
879 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
880 # tot[-1] (MSB) gets set when the sum overflows. shift result down
881 with m
.If(self
.i
.tot
[-1]):
883 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
884 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
885 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
886 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
887 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
888 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
890 # tot[-1] (MSB) zero case
893 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
894 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
895 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
896 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
897 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
902 class FPAddStage1(FPState
):
904 def __init__(self
, width
, id_wid
):
905 FPState
.__init
__(self
, "add_1")
906 self
.mod
= FPAddStage1Mod(width
)
907 self
.out_z
= FPNumBase(width
, False)
908 self
.out_of
= Overflow()
909 self
.norm_stb
= Signal()
911 def setup(self
, m
, i
):
912 """ links module to inputs and outputs
916 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
918 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
919 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
920 m
.d
.sync
+= self
.norm_stb
.eq(1)
923 m
.next
= "normalise_1"
926 class FPNormaliseModSingle
:
928 def __init__(self
, width
):
930 self
.in_z
= self
.ispec()
931 self
.out_z
= self
.ospec()
934 return FPNumBase(self
.width
, False)
937 return FPNumBase(self
.width
, False)
939 def setup(self
, m
, i
):
940 """ links module to inputs and outputs
942 m
.submodules
.normalise
= self
943 m
.d
.comb
+= self
.i
.eq(i
)
945 def elaborate(self
, platform
):
948 mwid
= self
.out_z
.m_width
+2
949 pe
= PriorityEncoder(mwid
)
950 m
.submodules
.norm_pe
= pe
952 m
.submodules
.norm1_out_z
= self
.out_z
953 m
.submodules
.norm1_in_z
= self
.in_z
955 in_z
= FPNumBase(self
.width
, False)
957 m
.submodules
.norm1_insel_z
= in_z
958 m
.submodules
.norm1_insel_overflow
= in_of
960 espec
= (len(in_z
.e
), True)
961 ediff_n126
= Signal(espec
, reset_less
=True)
962 msr
= MultiShiftRMerge(mwid
, espec
)
963 m
.submodules
.multishift_r
= msr
965 m
.d
.comb
+= in_z
.eq(self
.in_z
)
966 m
.d
.comb
+= in_of
.eq(self
.in_of
)
967 # initialise out from in (overridden below)
968 m
.d
.comb
+= self
.out_z
.eq(in_z
)
969 m
.d
.comb
+= self
.out_of
.eq(in_of
)
970 # normalisation decrease condition
971 decrease
= Signal(reset_less
=True)
972 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
975 # *sigh* not entirely obvious: count leading zeros (clz)
976 # with a PriorityEncoder: to find from the MSB
977 # we reverse the order of the bits.
978 temp_m
= Signal(mwid
, reset_less
=True)
979 temp_s
= Signal(mwid
+1, reset_less
=True)
980 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
982 # cat round and guard bits back into the mantissa
983 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
984 pe
.i
.eq(temp_m
[::-1]), # inverted
985 clz
.eq(pe
.o
), # count zeros from MSB down
986 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
987 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
988 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
995 def __init__(self
, width
, id_wid
):
996 self
.roundz
= Signal(reset_less
=True)
997 self
.z
= FPNumBase(width
, False)
998 self
.mid
= Signal(id_wid
, reset_less
=True)
1001 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1004 class FPNorm1ModSingle
:
1006 def __init__(self
, width
, id_wid
):
1008 self
.id_wid
= id_wid
1009 self
.i
= self
.ispec()
1010 self
.o
= self
.ospec()
1013 return FPAddStage1Data(self
.width
, self
.id_wid
)
1016 return FPNorm1Data(self
.width
, self
.id_wid
)
1018 def setup(self
, m
, i
):
1019 """ links module to inputs and outputs
1021 m
.submodules
.normalise_1
= self
1022 m
.d
.comb
+= self
.i
.eq(i
)
1024 def process(self
, i
):
1027 def elaborate(self
, platform
):
1030 mwid
= self
.o
.z
.m_width
+2
1031 pe
= PriorityEncoder(mwid
)
1032 m
.submodules
.norm_pe
= pe
1035 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1037 m
.submodules
.norm1_out_z
= self
.o
.z
1038 m
.submodules
.norm1_out_overflow
= of
1039 m
.submodules
.norm1_in_z
= self
.i
.z
1040 m
.submodules
.norm1_in_overflow
= self
.i
.of
1043 m
.submodules
.norm1_insel_z
= i
.z
1044 m
.submodules
.norm1_insel_overflow
= i
.of
1046 espec
= (len(i
.z
.e
), True)
1047 ediff_n126
= Signal(espec
, reset_less
=True)
1048 msr
= MultiShiftRMerge(mwid
, espec
)
1049 m
.submodules
.multishift_r
= msr
1051 m
.d
.comb
+= i
.eq(self
.i
)
1052 # initialise out from in (overridden below)
1053 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1054 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1055 m
.d
.comb
+= of
.eq(i
.of
)
1056 # normalisation increase/decrease conditions
1057 decrease
= Signal(reset_less
=True)
1058 increase
= Signal(reset_less
=True)
1059 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1060 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1062 with m
.If(decrease
):
1063 # *sigh* not entirely obvious: count leading zeros (clz)
1064 # with a PriorityEncoder: to find from the MSB
1065 # we reverse the order of the bits.
1066 temp_m
= Signal(mwid
, reset_less
=True)
1067 temp_s
= Signal(mwid
+1, reset_less
=True)
1068 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1069 # make sure that the amount to decrease by does NOT
1070 # go below the minimum non-INF/NaN exponent
1071 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1074 # cat round and guard bits back into the mantissa
1075 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1076 pe
.i
.eq(temp_m
[::-1]), # inverted
1077 clz
.eq(limclz
), # count zeros from MSB down
1078 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1079 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1080 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1081 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1082 # overflow in bits 0..1: got shifted too (leave sticky)
1083 of
.guard
.eq(temp_s
[1]), # guard
1084 of
.round_bit
.eq(temp_s
[0]), # round
1087 with m
.Elif(increase
):
1088 temp_m
= Signal(mwid
+1, reset_less
=True)
1090 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1092 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1093 # connect multi-shifter to inp/out mantissa (and ediff)
1095 msr
.diff
.eq(ediff_n126
),
1096 self
.o
.z
.m
.eq(msr
.m
[3:]),
1097 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1098 # overflow in bits 0..1: got shifted too (leave sticky)
1099 of
.guard
.eq(temp_s
[2]), # guard
1100 of
.round_bit
.eq(temp_s
[1]), # round
1101 of
.sticky
.eq(temp_s
[0]), # sticky
1102 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1108 class FPNorm1ModMulti
:
1110 def __init__(self
, width
, single_cycle
=True):
1112 self
.in_select
= Signal(reset_less
=True)
1113 self
.in_z
= FPNumBase(width
, False)
1114 self
.in_of
= Overflow()
1115 self
.temp_z
= FPNumBase(width
, False)
1116 self
.temp_of
= Overflow()
1117 self
.out_z
= FPNumBase(width
, False)
1118 self
.out_of
= Overflow()
1120 def elaborate(self
, platform
):
1123 m
.submodules
.norm1_out_z
= self
.out_z
1124 m
.submodules
.norm1_out_overflow
= self
.out_of
1125 m
.submodules
.norm1_temp_z
= self
.temp_z
1126 m
.submodules
.norm1_temp_of
= self
.temp_of
1127 m
.submodules
.norm1_in_z
= self
.in_z
1128 m
.submodules
.norm1_in_overflow
= self
.in_of
1130 in_z
= FPNumBase(self
.width
, False)
1132 m
.submodules
.norm1_insel_z
= in_z
1133 m
.submodules
.norm1_insel_overflow
= in_of
1135 # select which of temp or in z/of to use
1136 with m
.If(self
.in_select
):
1137 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1138 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1140 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1141 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1142 # initialise out from in (overridden below)
1143 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1144 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1145 # normalisation increase/decrease conditions
1146 decrease
= Signal(reset_less
=True)
1147 increase
= Signal(reset_less
=True)
1148 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1149 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1150 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1152 with m
.If(decrease
):
1154 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1155 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1156 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1157 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1158 self
.out_of
.round_bit
.eq(0), # reset round bit
1159 self
.out_of
.m0
.eq(in_of
.guard
),
1162 with m
.Elif(increase
):
1164 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1165 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1166 self
.out_of
.guard
.eq(in_z
.m
[0]),
1167 self
.out_of
.m0
.eq(in_z
.m
[1]),
1168 self
.out_of
.round_bit
.eq(in_of
.guard
),
1169 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1175 class FPNorm1Single(FPState
):
1177 def __init__(self
, width
, id_wid
, single_cycle
=True):
1178 FPState
.__init
__(self
, "normalise_1")
1179 self
.mod
= FPNorm1ModSingle(width
)
1180 self
.o
= self
.ospec()
1181 self
.out_z
= FPNumBase(width
, False)
1182 self
.out_roundz
= Signal(reset_less
=True)
1185 return self
.mod
.ispec()
1188 return self
.mod
.ospec()
1190 def setup(self
, m
, i
):
1191 """ links module to inputs and outputs
1193 self
.mod
.setup(m
, i
)
1195 def action(self
, m
):
1199 class FPNorm1Multi(FPState
):
1201 def __init__(self
, width
, id_wid
):
1202 FPState
.__init
__(self
, "normalise_1")
1203 self
.mod
= FPNorm1ModMulti(width
)
1204 self
.stb
= Signal(reset_less
=True)
1205 self
.ack
= Signal(reset
=0, reset_less
=True)
1206 self
.out_norm
= Signal(reset_less
=True)
1207 self
.in_accept
= Signal(reset_less
=True)
1208 self
.temp_z
= FPNumBase(width
)
1209 self
.temp_of
= Overflow()
1210 self
.out_z
= FPNumBase(width
)
1211 self
.out_roundz
= Signal(reset_less
=True)
1213 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1214 """ links module to inputs and outputs
1216 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1217 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1218 self
.out_z
, self
.out_norm
)
1220 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1221 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1223 def action(self
, m
):
1224 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1225 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1226 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1227 with m
.If(self
.out_norm
):
1228 with m
.If(self
.in_accept
):
1233 m
.d
.sync
+= self
.ack
.eq(0)
1235 # normalisation not required (or done).
1237 m
.d
.sync
+= self
.ack
.eq(1)
1238 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1241 class FPNormToPack(FPState
):
1243 def __init__(self
, width
, id_wid
):
1244 FPState
.__init
__(self
, "normalise_1")
1245 self
.id_wid
= id_wid
1249 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1252 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1254 def setup(self
, m
, i
):
1255 """ links module to inputs and outputs
1258 # Normalisation (chained to input in_z+in_of)
1259 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1261 n_out
= nmod
.ospec()
1262 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1264 # Rounding (chained to normalisation)
1265 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1266 rmod
.setup(m
, n_out
)
1267 r_out_z
= rmod
.ospec()
1268 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1270 # Corrections (chained to rounding)
1271 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1272 cmod
.setup(m
, r_out_z
)
1273 c_out_z
= cmod
.ospec()
1274 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1276 # Pack (chained to corrections)
1277 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1278 self
.pmod
.setup(m
, c_out_z
)
1279 self
.out_z
= self
.pmod
.ospec()
1281 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
)
1282 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1284 def action(self
, m
):
1285 m
.next
= "pack_put_z"
1290 def __init__(self
, width
, id_wid
):
1291 self
.z
= FPNumBase(width
, False)
1292 self
.mid
= Signal(id_wid
, reset_less
=True)
1295 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1300 def __init__(self
, width
, id_wid
):
1302 self
.id_wid
= id_wid
1303 self
.i
= self
.ispec()
1304 self
.out_z
= self
.ospec()
1307 return FPNorm1Data(self
.width
, self
.id_wid
)
1310 return FPRoundData(self
.width
, self
.id_wid
)
1312 def process(self
, i
):
1315 def setup(self
, m
, i
):
1316 m
.submodules
.roundz
= self
1317 m
.d
.comb
+= self
.i
.eq(i
)
1319 def elaborate(self
, platform
):
1321 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1322 with m
.If(self
.i
.roundz
):
1323 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1324 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1325 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1329 class FPRound(FPState
):
1331 def __init__(self
, width
, id_wid
):
1332 FPState
.__init
__(self
, "round")
1333 self
.mod
= FPRoundMod(width
)
1334 self
.out_z
= self
.ospec()
1337 return self
.mod
.ispec()
1340 return self
.mod
.ospec()
1342 def setup(self
, m
, i
):
1343 """ links module to inputs and outputs
1345 self
.mod
.setup(m
, i
)
1348 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1349 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1351 def action(self
, m
):
1352 m
.next
= "corrections"
1355 class FPCorrectionsMod
:
1357 def __init__(self
, width
, id_wid
):
1359 self
.id_wid
= id_wid
1360 self
.i
= self
.ispec()
1361 self
.out_z
= self
.ospec()
1364 return FPRoundData(self
.width
, self
.id_wid
)
1367 return FPRoundData(self
.width
, self
.id_wid
)
1369 def process(self
, i
):
1372 def setup(self
, m
, i
):
1373 """ links module to inputs and outputs
1375 m
.submodules
.corrections
= self
1376 m
.d
.comb
+= self
.i
.eq(i
)
1378 def elaborate(self
, platform
):
1380 m
.submodules
.corr_in_z
= self
.i
.z
1381 m
.submodules
.corr_out_z
= self
.out_z
.z
1382 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1383 with m
.If(self
.i
.z
.is_denormalised
):
1384 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1388 class FPCorrections(FPState
):
1390 def __init__(self
, width
, id_wid
):
1391 FPState
.__init
__(self
, "corrections")
1392 self
.mod
= FPCorrectionsMod(width
)
1393 self
.out_z
= self
.ospec()
1396 return self
.mod
.ispec()
1399 return self
.mod
.ospec()
1401 def setup(self
, m
, in_z
):
1402 """ links module to inputs and outputs
1404 self
.mod
.setup(m
, in_z
)
1406 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1407 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1409 def action(self
, m
):
1415 def __init__(self
, width
, id_wid
):
1416 self
.z
= FPNumOut(width
, False)
1417 self
.mid
= Signal(id_wid
, reset_less
=True)
1420 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1425 def __init__(self
, width
, id_wid
):
1427 self
.id_wid
= id_wid
1428 self
.i
= self
.ispec()
1429 self
.o
= self
.ospec()
1432 return FPRoundData(self
.width
, self
.id_wid
)
1435 return FPPackData(self
.width
, self
.id_wid
)
1437 def process(self
, i
):
1440 def setup(self
, m
, in_z
):
1441 """ links module to inputs and outputs
1443 m
.submodules
.pack
= self
1444 m
.d
.comb
+= self
.i
.eq(in_z
)
1446 def elaborate(self
, platform
):
1448 m
.submodules
.pack_in_z
= self
.i
.z
1449 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1450 with m
.If(self
.i
.z
.is_overflowed
):
1451 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1453 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1458 def __init__(self
, width
, id_wid
):
1459 self
.z
= FPNumOut(width
, False)
1460 self
.mid
= Signal(id_wid
, reset_less
=True)
1463 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1466 class FPPack(FPState
):
1468 def __init__(self
, width
, id_wid
):
1469 FPState
.__init
__(self
, "pack")
1470 self
.mod
= FPPackMod(width
)
1471 self
.out_z
= self
.ospec()
1474 return self
.mod
.ispec()
1477 return self
.mod
.ospec()
1479 def setup(self
, m
, in_z
):
1480 """ links module to inputs and outputs
1482 self
.mod
.setup(m
, in_z
)
1484 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1485 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1487 def action(self
, m
):
1488 m
.next
= "pack_put_z"
1491 class FPPutZ(FPState
):
1493 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1494 FPState
.__init
__(self
, state
)
1495 if to_state
is None:
1496 to_state
= "get_ops"
1497 self
.to_state
= to_state
1500 self
.in_mid
= in_mid
1501 self
.out_mid
= out_mid
1503 def action(self
, m
):
1504 if self
.in_mid
is not None:
1505 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1507 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1509 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1510 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1511 m
.next
= self
.to_state
1513 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1516 class FPPutZIdx(FPState
):
1518 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1519 FPState
.__init
__(self
, state
)
1520 if to_state
is None:
1521 to_state
= "get_ops"
1522 self
.to_state
= to_state
1524 self
.out_zs
= out_zs
1525 self
.in_mid
= in_mid
1527 def action(self
, m
):
1528 outz_stb
= Signal(reset_less
=True)
1529 outz_ack
= Signal(reset_less
=True)
1530 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1531 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1534 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1536 with m
.If(outz_stb
& outz_ack
):
1537 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1538 m
.next
= self
.to_state
1540 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1542 class FPADDBaseData
:
1544 def __init__(self
, width
, id_wid
):
1546 self
.id_wid
= id_wid
1547 self
.a
= Signal(width
)
1548 self
.b
= Signal(width
)
1549 self
.mid
= Signal(id_wid
, reset_less
=True)
1552 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1556 def __init__(self
, width
, id_wid
):
1557 self
.z
= FPOp(width
)
1558 self
.mid
= Signal(id_wid
, reset_less
=True)
1561 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1566 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1569 * width: bit-width of IEEE754. supported: 16, 32, 64
1570 * id_wid: an identifier that is sync-connected to the input
1571 * single_cycle: True indicates each stage to complete in 1 clock
1572 * compact: True indicates a reduced number of stages
1575 self
.id_wid
= id_wid
1576 self
.single_cycle
= single_cycle
1577 self
.compact
= compact
1579 self
.in_t
= Trigger()
1580 self
.i
= self
.ispec()
1581 self
.o
= self
.ospec()
1586 return FPADDBaseData(self
.width
, self
.id_wid
)
1589 return FPOpData(self
.width
, self
.id_wid
)
1591 def add_state(self
, state
):
1592 self
.states
.append(state
)
1595 def get_fragment(self
, platform
=None):
1596 """ creates the HDL code-fragment for FPAdd
1599 m
.submodules
.out_z
= self
.o
.z
1600 m
.submodules
.in_t
= self
.in_t
1602 self
.get_compact_fragment(m
, platform
)
1604 self
.get_longer_fragment(m
, platform
)
1606 with m
.FSM() as fsm
:
1608 for state
in self
.states
:
1609 with m
.State(state
.state_from
):
1614 def get_longer_fragment(self
, m
, platform
=None):
1616 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1618 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1622 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1623 sc
.setup(m
, a
, b
, self
.in_mid
)
1625 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1626 dn
.setup(m
, a
, b
, sc
.in_mid
)
1628 if self
.single_cycle
:
1629 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1630 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1632 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1633 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1635 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1636 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1638 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1639 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1641 if self
.single_cycle
:
1642 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1643 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1645 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1646 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1648 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1649 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1651 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1652 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1654 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1655 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1657 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1658 pa
.in_mid
, self
.out_mid
))
1660 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1661 pa
.in_mid
, self
.out_mid
))
1663 def get_compact_fragment(self
, m
, platform
=None):
1665 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1666 self
.width
, self
.id_wid
))
1667 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1669 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1672 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1675 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1676 n1
.setup(m
, alm
.a1o
)
1678 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1679 n1
.out_z
.mid
, self
.o
.mid
))
1681 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1682 sc
.o
.mid
, self
.o
.mid
))
1685 class FPADDBase(FPState
):
1687 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1690 * width: bit-width of IEEE754. supported: 16, 32, 64
1691 * id_wid: an identifier that is sync-connected to the input
1692 * single_cycle: True indicates each stage to complete in 1 clock
1694 FPState
.__init
__(self
, "fpadd")
1696 self
.single_cycle
= single_cycle
1697 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1698 self
.o
= self
.ospec()
1700 self
.in_t
= Trigger()
1701 self
.i
= self
.ispec()
1703 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1704 self
.in_accept
= Signal(reset_less
=True)
1705 self
.add_stb
= Signal(reset_less
=True)
1706 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1709 return self
.mod
.ispec()
1712 return self
.mod
.ospec()
1714 def setup(self
, m
, i
, add_stb
, in_mid
):
1715 m
.d
.comb
+= [self
.i
.eq(i
),
1716 self
.mod
.i
.eq(self
.i
),
1717 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1718 #self.add_stb.eq(add_stb),
1719 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1720 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1721 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1722 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1723 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1724 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1727 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1728 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1729 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1730 #m.d.sync += self.in_t.stb.eq(0)
1732 m
.submodules
.fpadd
= self
.mod
1734 def action(self
, m
):
1736 # in_accept is set on incoming strobe HIGH and ack LOW.
1737 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1739 #with m.If(self.in_t.ack):
1740 # m.d.sync += self.in_t.stb.eq(0)
1741 with m
.If(~self
.z_done
):
1742 # not done: test for accepting an incoming operand pair
1743 with m
.If(self
.in_accept
):
1745 self
.add_ack
.eq(1), # acknowledge receipt...
1746 self
.in_t
.stb
.eq(1), # initiate add
1749 m
.d
.sync
+= [self
.add_ack
.eq(0),
1750 self
.in_t
.stb
.eq(0),
1754 # done: acknowledge, and write out id and value
1755 m
.d
.sync
+= [self
.add_ack
.eq(1),
1762 if self
.in_mid
is not None:
1763 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1766 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1768 # move to output state on detecting z ack
1769 with m
.If(self
.out_z
.trigger
):
1770 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1773 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1777 def __init__(self
, width
, id_wid
):
1779 self
.id_wid
= id_wid
1781 for i
in range(rs_sz
):
1783 out_z
.name
= "out_z_%d" % i
1785 self
.res
= Array(res
)
1786 self
.in_z
= FPOp(width
)
1787 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1789 def setup(self
, m
, in_z
, in_mid
):
1790 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1791 self
.in_mid
.eq(in_mid
)]
1793 def get_fragment(self
, platform
=None):
1794 """ creates the HDL code-fragment for FPAdd
1797 m
.submodules
.res_in_z
= self
.in_z
1798 m
.submodules
+= self
.res
1810 """ FPADD: stages as follows:
1816 FPAddBase---> FPAddBaseMod
1818 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1820 FPAddBase is tricky: it is both a stage and *has* stages.
1821 Connection to FPAddBaseMod therefore requires an in stb/ack
1822 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1823 needs to be the thing that raises the incoming stb.
1826 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1829 * width: bit-width of IEEE754. supported: 16, 32, 64
1830 * id_wid: an identifier that is sync-connected to the input
1831 * single_cycle: True indicates each stage to complete in 1 clock
1834 self
.id_wid
= id_wid
1835 self
.single_cycle
= single_cycle
1837 #self.out_z = FPOp(width)
1838 self
.ids
= FPID(id_wid
)
1841 for i
in range(rs_sz
):
1844 in_a
.name
= "in_a_%d" % i
1845 in_b
.name
= "in_b_%d" % i
1846 rs
.append((in_a
, in_b
))
1850 for i
in range(rs_sz
):
1852 out_z
.name
= "out_z_%d" % i
1854 self
.res
= Array(res
)
1858 def add_state(self
, state
):
1859 self
.states
.append(state
)
1862 def get_fragment(self
, platform
=None):
1863 """ creates the HDL code-fragment for FPAdd
1866 m
.submodules
+= self
.rs
1868 in_a
= self
.rs
[0][0]
1869 in_b
= self
.rs
[0][1]
1871 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1876 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1881 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1882 ab
= self
.add_state(ab
)
1883 abd
= ab
.ispec() # create an input spec object for FPADDBase
1884 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1885 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1888 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1891 with m
.FSM() as fsm
:
1893 for state
in self
.states
:
1894 with m
.State(state
.state_from
):
1900 if __name__
== "__main__":
1902 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1903 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1904 alu
.rs
[0][1].ports() + \
1905 alu
.res
[0].ports() + \
1906 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1908 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1909 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1910 alu
.in_t
.ports() + \
1911 alu
.out_z
.ports() + \
1912 [alu
.in_mid
, alu
.out_mid
])
1915 # works... but don't use, just do "python fname.py convert -t v"
1916 #print (verilog.convert(alu, ports=[
1917 # ports=alu.in_a.ports() + \
1918 # alu.in_b.ports() + \
1919 # alu.out_z.ports())