1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
212 self
.o
.mid
.eq(self
.i
.mid
)
217 class FPGet2Op(FPState
):
221 def __init__(self
, in_state
, out_state
, width
, id_wid
):
222 FPState
.__init
__(self
, in_state
)
223 self
.out_state
= out_state
224 self
.mod
= FPGet2OpMod(width
, id_wid
)
225 self
.o
= self
.mod
.ospec()
226 self
.in_stb
= Signal(reset_less
=True)
227 self
.out_ack
= Signal(reset_less
=True)
228 self
.out_decode
= Signal(reset_less
=True)
230 def setup(self
, m
, i
, in_stb
, in_ack
):
231 """ links module to inputs and outputs
233 m
.submodules
.get_ops
= self
.mod
234 m
.d
.comb
+= self
.mod
.i
.eq(i
)
235 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
236 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
237 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
238 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
241 with m
.If(self
.out_decode
):
242 m
.next
= self
.out_state
245 self
.o
.eq(self
.mod
.o
),
248 m
.d
.sync
+= self
.mod
.ack
.eq(1)
253 def __init__(self
, width
, id_wid
, m_extra
=True):
254 self
.a
= FPNumBase(width
, m_extra
)
255 self
.b
= FPNumBase(width
, m_extra
)
256 self
.mid
= Signal(id_wid
, reset_less
=True)
259 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
262 class FPAddSpecialCasesMod
:
263 """ special cases: NaNs, infs, zeros, denormalised
264 NOTE: some of these are unique to add. see "Special Operations"
265 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
268 def __init__(self
, width
, id_wid
):
271 self
.i
= self
.ispec()
272 self
.o
= self
.ospec()
273 self
.out_do_z
= Signal(reset_less
=True)
276 return FPNumBase2Ops(self
.width
, self
.id_wid
)
279 return FPPackData(self
.width
, self
.id_wid
)
281 def setup(self
, m
, i
, out_do_z
):
282 """ links module to inputs and outputs
284 m
.submodules
.specialcases
= self
285 m
.d
.comb
+= self
.i
.eq(i
)
286 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
288 def elaborate(self
, platform
):
291 m
.submodules
.sc_in_a
= self
.i
.a
292 m
.submodules
.sc_in_b
= self
.i
.b
293 m
.submodules
.sc_out_z
= self
.o
.z
296 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
299 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
301 # if a is NaN or b is NaN return NaN
302 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
303 m
.d
.comb
+= self
.out_do_z
.eq(1)
304 m
.d
.comb
+= self
.o
.z
.nan(0)
306 # XXX WEIRDNESS for FP16 non-canonical NaN handling
309 ## if a is zero and b is NaN return -b
310 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
311 # m.d.comb += self.out_do_z.eq(1)
312 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
314 ## if b is zero and a is NaN return -a
315 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
316 # m.d.comb += self.out_do_z.eq(1)
317 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
319 ## if a is -zero and b is NaN return -b
320 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
321 # m.d.comb += self.out_do_z.eq(1)
322 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
324 ## if b is -zero and a is NaN return -a
325 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
326 # m.d.comb += self.out_do_z.eq(1)
327 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
329 # if a is inf return inf (or NaN)
330 with m
.Elif(self
.i
.a
.is_inf
):
331 m
.d
.comb
+= self
.out_do_z
.eq(1)
332 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
333 # if a is inf and signs don't match return NaN
334 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
335 m
.d
.comb
+= self
.o
.z
.nan(0)
337 # if b is inf return inf
338 with m
.Elif(self
.i
.b
.is_inf
):
339 m
.d
.comb
+= self
.out_do_z
.eq(1)
340 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
342 # if a is zero and b zero return signed-a/b
343 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
344 m
.d
.comb
+= self
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
349 # if a is zero return b
350 with m
.Elif(self
.i
.a
.is_zero
):
351 m
.d
.comb
+= self
.out_do_z
.eq(1)
352 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
355 # if b is zero return a
356 with m
.Elif(self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
361 # if a equal to -b return zero (+ve zero)
362 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
363 m
.d
.comb
+= self
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.zero(0)
366 # Denormalised Number checks
368 m
.d
.comb
+= self
.out_do_z
.eq(0)
370 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
376 def __init__(self
, id_wid
):
379 self
.in_mid
= Signal(id_wid
, reset_less
=True)
380 self
.out_mid
= Signal(id_wid
, reset_less
=True)
386 if self
.id_wid
is not None:
387 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
390 class FPAddSpecialCases(FPState
):
391 """ special cases: NaNs, infs, zeros, denormalised
392 NOTE: some of these are unique to add. see "Special Operations"
393 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
396 def __init__(self
, width
, id_wid
):
397 FPState
.__init
__(self
, "special_cases")
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, i
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, i
, self
.out_do_z
)
406 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
407 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
411 with m
.If(self
.out_do_z
):
414 m
.next
= "denormalise"
417 class FPAddSpecialCasesDeNorm(FPState
):
418 """ special cases: NaNs, infs, zeros, denormalised
419 NOTE: some of these are unique to add. see "Special Operations"
420 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
423 def __init__(self
, width
, id_wid
):
424 FPState
.__init
__(self
, "special_cases")
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
439 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
440 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
442 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
445 with m
.If(self
.out_do_z
):
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
495 class FPAddDeNorm(FPState
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 self
.mod
= FPAddDeNormMod(width
)
500 self
.out_a
= FPNumBase(width
)
501 self
.out_b
= FPNumBase(width
)
503 def setup(self
, m
, i
):
504 """ links module to inputs and outputs
508 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
509 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
512 # Denormalised Number checks
516 class FPAddAlignMultiMod(FPState
):
518 def __init__(self
, width
):
519 self
.in_a
= FPNumBase(width
)
520 self
.in_b
= FPNumBase(width
)
521 self
.out_a
= FPNumIn(None, width
)
522 self
.out_b
= FPNumIn(None, width
)
523 self
.exp_eq
= Signal(reset_less
=True)
525 def elaborate(self
, platform
):
526 # This one however (single-cycle) will do the shift
531 m
.submodules
.align_in_a
= self
.in_a
532 m
.submodules
.align_in_b
= self
.in_b
533 m
.submodules
.align_out_a
= self
.out_a
534 m
.submodules
.align_out_b
= self
.out_b
536 # NOTE: this does *not* do single-cycle multi-shifting,
537 # it *STAYS* in the align state until exponents match
539 # exponent of a greater than b: shift b down
540 m
.d
.comb
+= self
.exp_eq
.eq(0)
541 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
542 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
543 agtb
= Signal(reset_less
=True)
544 altb
= Signal(reset_less
=True)
545 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
546 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
548 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
549 # exponent of b greater than a: shift a down
551 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
552 # exponents equal: move to next stage.
554 m
.d
.comb
+= self
.exp_eq
.eq(1)
558 class FPAddAlignMulti(FPState
):
560 def __init__(self
, width
, id_wid
):
561 FPState
.__init
__(self
, "align")
562 self
.mod
= FPAddAlignMultiMod(width
)
563 self
.out_a
= FPNumIn(None, width
)
564 self
.out_b
= FPNumIn(None, width
)
565 self
.exp_eq
= Signal(reset_less
=True)
567 def setup(self
, m
, in_a
, in_b
):
568 """ links module to inputs and outputs
570 m
.submodules
.align
= self
.mod
571 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
572 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
573 #m.d.comb += self.out_a.eq(self.mod.out_a)
574 #m.d.comb += self.out_b.eq(self.mod.out_b)
575 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
576 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
577 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
580 with m
.If(self
.exp_eq
):
586 def __init__(self
, width
, id_wid
):
587 self
.a
= FPNumIn(None, width
)
588 self
.b
= FPNumIn(None, width
)
589 self
.mid
= Signal(id_wid
, reset_less
=True)
592 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
595 class FPAddAlignSingleMod
:
597 def __init__(self
, width
, id_wid
):
600 self
.i
= self
.ispec()
601 self
.o
= self
.ospec()
604 return FPNumBase2Ops(self
.width
, self
.id_wid
)
607 return FPNumIn2Ops(self
.width
, self
.id_wid
)
609 def setup(self
, m
, i
):
610 """ links module to inputs and outputs
612 m
.submodules
.align
= self
613 m
.d
.comb
+= self
.i
.eq(i
)
615 def elaborate(self
, platform
):
616 """ Aligns A against B or B against A, depending on which has the
617 greater exponent. This is done in a *single* cycle using
618 variable-width bit-shift
620 the shifter used here is quite expensive in terms of gates.
621 Mux A or B in (and out) into temporaries, as only one of them
622 needs to be aligned against the other
626 m
.submodules
.align_in_a
= self
.i
.a
627 m
.submodules
.align_in_b
= self
.i
.b
628 m
.submodules
.align_out_a
= self
.o
.a
629 m
.submodules
.align_out_b
= self
.o
.b
631 # temporary (muxed) input and output to be shifted
632 t_inp
= FPNumBase(self
.width
)
633 t_out
= FPNumIn(None, self
.width
)
634 espec
= (len(self
.i
.a
.e
), True)
635 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
636 m
.submodules
.align_t_in
= t_inp
637 m
.submodules
.align_t_out
= t_out
638 m
.submodules
.multishift_r
= msr
640 ediff
= Signal(espec
, reset_less
=True)
641 ediffr
= Signal(espec
, reset_less
=True)
642 tdiff
= Signal(espec
, reset_less
=True)
643 elz
= Signal(reset_less
=True)
644 egz
= Signal(reset_less
=True)
646 # connect multi-shifter to t_inp/out mantissa (and tdiff)
647 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
648 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
649 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
650 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
651 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
653 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
654 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
655 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
656 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
658 # default: A-exp == B-exp, A and B untouched (fall through)
659 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
660 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
661 # only one shifter (muxed)
662 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
663 # exponent of a greater than b: shift b down
665 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
668 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
670 # exponent of b greater than a: shift a down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
675 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
678 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
683 class FPAddAlignSingle(FPState
):
685 def __init__(self
, width
, id_wid
):
686 FPState
.__init
__(self
, "align")
687 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
688 self
.out_a
= FPNumIn(None, width
)
689 self
.out_b
= FPNumIn(None, width
)
691 def setup(self
, m
, i
):
692 """ links module to inputs and outputs
696 # NOTE: could be done as comb
697 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
698 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
704 class FPAddAlignSingleAdd(FPState
):
706 def __init__(self
, width
, id_wid
):
707 FPState
.__init
__(self
, "align")
710 self
.a1o
= self
.ospec()
713 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
716 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
718 def setup(self
, m
, i
):
719 """ links module to inputs and outputs
721 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
724 m
.d
.comb
+= o
.eq(mod
.o
)
726 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
731 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
733 self
.a1modo
= a1mod
.o
735 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
738 m
.next
= "normalise_1"
741 class FPAddStage0Data
:
743 def __init__(self
, width
, id_wid
):
744 self
.z
= FPNumBase(width
, False)
745 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
746 self
.mid
= Signal(id_wid
, reset_less
=True)
749 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
752 class FPAddStage0Mod
:
754 def __init__(self
, width
, id_wid
):
757 self
.i
= self
.ispec()
758 self
.o
= self
.ospec()
761 return FPNumBase2Ops(self
.width
, self
.id_wid
)
764 return FPAddStage0Data(self
.width
, self
.id_wid
)
766 def setup(self
, m
, i
):
767 """ links module to inputs and outputs
769 m
.submodules
.add0
= self
770 m
.d
.comb
+= self
.i
.eq(i
)
772 def elaborate(self
, platform
):
774 m
.submodules
.add0_in_a
= self
.i
.a
775 m
.submodules
.add0_in_b
= self
.i
.b
776 m
.submodules
.add0_out_z
= self
.o
.z
778 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
779 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
781 # store intermediate tests (and zero-extended mantissas)
782 seq
= Signal(reset_less
=True)
783 mge
= Signal(reset_less
=True)
784 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
785 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
786 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
787 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
788 am0
.eq(Cat(self
.i
.a
.m
, 0)),
789 bm0
.eq(Cat(self
.i
.b
.m
, 0))
791 # same-sign (both negative or both positive) add mantissas
794 self
.o
.tot
.eq(am0
+ bm0
),
795 self
.o
.z
.s
.eq(self
.i
.a
.s
)
797 # a mantissa greater than b, use a
800 self
.o
.tot
.eq(am0
- bm0
),
801 self
.o
.z
.s
.eq(self
.i
.a
.s
)
803 # b mantissa greater than a, use b
806 self
.o
.tot
.eq(bm0
- am0
),
807 self
.o
.z
.s
.eq(self
.i
.b
.s
)
812 class FPAddStage0(FPState
):
813 """ First stage of add. covers same-sign (add) and subtract
814 special-casing when mantissas are greater or equal, to
815 give greatest accuracy.
818 def __init__(self
, width
, id_wid
):
819 FPState
.__init
__(self
, "add_0")
820 self
.mod
= FPAddStage0Mod(width
)
821 self
.o
= self
.mod
.ospec()
823 def setup(self
, m
, i
):
824 """ links module to inputs and outputs
828 # NOTE: these could be done as combinatorial (merge add0+add1)
829 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
835 class FPAddStage1Data
:
837 def __init__(self
, width
, id_wid
):
838 self
.z
= FPNumBase(width
, False)
840 self
.mid
= Signal(id_wid
, reset_less
=True)
843 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
847 class FPAddStage1Mod(FPState
):
848 """ Second stage of add: preparation for normalisation.
849 detects when tot sum is too big (tot[27] is kinda a carry bit)
852 def __init__(self
, width
, id_wid
):
855 self
.i
= self
.ispec()
856 self
.o
= self
.ospec()
859 return FPAddStage0Data(self
.width
, self
.id_wid
)
862 return FPAddStage1Data(self
.width
, self
.id_wid
)
864 def setup(self
, m
, i
):
865 """ links module to inputs and outputs
867 m
.submodules
.add1
= self
868 m
.submodules
.add1_out_overflow
= self
.o
.of
870 m
.d
.comb
+= self
.i
.eq(i
)
872 def elaborate(self
, platform
):
874 #m.submodules.norm1_in_overflow = self.in_of
875 #m.submodules.norm1_out_overflow = self.out_of
876 #m.submodules.norm1_in_z = self.in_z
877 #m.submodules.norm1_out_z = self.out_z
878 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
879 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
880 # tot[-1] (MSB) gets set when the sum overflows. shift result down
881 with m
.If(self
.i
.tot
[-1]):
883 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
884 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
885 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
886 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
887 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
888 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
890 # tot[-1] (MSB) zero case
893 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
894 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
895 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
896 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
897 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
902 class FPAddStage1(FPState
):
904 def __init__(self
, width
, id_wid
):
905 FPState
.__init
__(self
, "add_1")
906 self
.mod
= FPAddStage1Mod(width
)
907 self
.out_z
= FPNumBase(width
, False)
908 self
.out_of
= Overflow()
909 self
.norm_stb
= Signal()
911 def setup(self
, m
, i
):
912 """ links module to inputs and outputs
916 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
918 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
919 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
920 m
.d
.sync
+= self
.norm_stb
.eq(1)
923 m
.next
= "normalise_1"
926 class FPNormaliseModSingle
:
928 def __init__(self
, width
):
930 self
.in_z
= self
.ispec()
931 self
.out_z
= self
.ospec()
934 return FPNumBase(self
.width
, False)
937 return FPNumBase(self
.width
, False)
939 def setup(self
, m
, i
):
940 """ links module to inputs and outputs
942 m
.submodules
.normalise
= self
943 m
.d
.comb
+= self
.i
.eq(i
)
945 def elaborate(self
, platform
):
948 mwid
= self
.out_z
.m_width
+2
949 pe
= PriorityEncoder(mwid
)
950 m
.submodules
.norm_pe
= pe
952 m
.submodules
.norm1_out_z
= self
.out_z
953 m
.submodules
.norm1_in_z
= self
.in_z
955 in_z
= FPNumBase(self
.width
, False)
957 m
.submodules
.norm1_insel_z
= in_z
958 m
.submodules
.norm1_insel_overflow
= in_of
960 espec
= (len(in_z
.e
), True)
961 ediff_n126
= Signal(espec
, reset_less
=True)
962 msr
= MultiShiftRMerge(mwid
, espec
)
963 m
.submodules
.multishift_r
= msr
965 m
.d
.comb
+= in_z
.eq(self
.in_z
)
966 m
.d
.comb
+= in_of
.eq(self
.in_of
)
967 # initialise out from in (overridden below)
968 m
.d
.comb
+= self
.out_z
.eq(in_z
)
969 m
.d
.comb
+= self
.out_of
.eq(in_of
)
970 # normalisation decrease condition
971 decrease
= Signal(reset_less
=True)
972 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
975 # *sigh* not entirely obvious: count leading zeros (clz)
976 # with a PriorityEncoder: to find from the MSB
977 # we reverse the order of the bits.
978 temp_m
= Signal(mwid
, reset_less
=True)
979 temp_s
= Signal(mwid
+1, reset_less
=True)
980 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
982 # cat round and guard bits back into the mantissa
983 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
984 pe
.i
.eq(temp_m
[::-1]), # inverted
985 clz
.eq(pe
.o
), # count zeros from MSB down
986 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
987 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
988 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
995 def __init__(self
, width
, id_wid
):
996 self
.roundz
= Signal(reset_less
=True)
997 self
.z
= FPNumBase(width
, False)
998 self
.mid
= Signal(id_wid
, reset_less
=True)
1001 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1004 class FPNorm1ModSingle
:
1006 def __init__(self
, width
, id_wid
):
1008 self
.id_wid
= id_wid
1009 self
.i
= self
.ispec()
1010 self
.o
= self
.ospec()
1013 return FPAddStage1Data(self
.width
, self
.id_wid
)
1016 return FPNorm1Data(self
.width
, self
.id_wid
)
1018 def setup(self
, m
, i
):
1019 """ links module to inputs and outputs
1021 m
.submodules
.normalise_1
= self
1022 m
.d
.comb
+= self
.i
.eq(i
)
1024 def elaborate(self
, platform
):
1027 mwid
= self
.o
.z
.m_width
+2
1028 pe
= PriorityEncoder(mwid
)
1029 m
.submodules
.norm_pe
= pe
1032 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1034 m
.submodules
.norm1_out_z
= self
.o
.z
1035 m
.submodules
.norm1_out_overflow
= of
1036 m
.submodules
.norm1_in_z
= self
.i
.z
1037 m
.submodules
.norm1_in_overflow
= self
.i
.of
1040 m
.submodules
.norm1_insel_z
= i
.z
1041 m
.submodules
.norm1_insel_overflow
= i
.of
1043 espec
= (len(i
.z
.e
), True)
1044 ediff_n126
= Signal(espec
, reset_less
=True)
1045 msr
= MultiShiftRMerge(mwid
, espec
)
1046 m
.submodules
.multishift_r
= msr
1048 m
.d
.comb
+= i
.eq(self
.i
)
1049 # initialise out from in (overridden below)
1050 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1051 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1052 m
.d
.comb
+= of
.eq(i
.of
)
1053 # normalisation increase/decrease conditions
1054 decrease
= Signal(reset_less
=True)
1055 increase
= Signal(reset_less
=True)
1056 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1057 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1059 with m
.If(decrease
):
1060 # *sigh* not entirely obvious: count leading zeros (clz)
1061 # with a PriorityEncoder: to find from the MSB
1062 # we reverse the order of the bits.
1063 temp_m
= Signal(mwid
, reset_less
=True)
1064 temp_s
= Signal(mwid
+1, reset_less
=True)
1065 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1066 # make sure that the amount to decrease by does NOT
1067 # go below the minimum non-INF/NaN exponent
1068 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1071 # cat round and guard bits back into the mantissa
1072 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1073 pe
.i
.eq(temp_m
[::-1]), # inverted
1074 clz
.eq(limclz
), # count zeros from MSB down
1075 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1076 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1077 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1078 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1079 # overflow in bits 0..1: got shifted too (leave sticky)
1080 of
.guard
.eq(temp_s
[1]), # guard
1081 of
.round_bit
.eq(temp_s
[0]), # round
1084 with m
.Elif(increase
):
1085 temp_m
= Signal(mwid
+1, reset_less
=True)
1087 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1089 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1090 # connect multi-shifter to inp/out mantissa (and ediff)
1092 msr
.diff
.eq(ediff_n126
),
1093 self
.o
.z
.m
.eq(msr
.m
[3:]),
1094 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1095 # overflow in bits 0..1: got shifted too (leave sticky)
1096 of
.guard
.eq(temp_s
[2]), # guard
1097 of
.round_bit
.eq(temp_s
[1]), # round
1098 of
.sticky
.eq(temp_s
[0]), # sticky
1099 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1105 class FPNorm1ModMulti
:
1107 def __init__(self
, width
, single_cycle
=True):
1109 self
.in_select
= Signal(reset_less
=True)
1110 self
.in_z
= FPNumBase(width
, False)
1111 self
.in_of
= Overflow()
1112 self
.temp_z
= FPNumBase(width
, False)
1113 self
.temp_of
= Overflow()
1114 self
.out_z
= FPNumBase(width
, False)
1115 self
.out_of
= Overflow()
1117 def elaborate(self
, platform
):
1120 m
.submodules
.norm1_out_z
= self
.out_z
1121 m
.submodules
.norm1_out_overflow
= self
.out_of
1122 m
.submodules
.norm1_temp_z
= self
.temp_z
1123 m
.submodules
.norm1_temp_of
= self
.temp_of
1124 m
.submodules
.norm1_in_z
= self
.in_z
1125 m
.submodules
.norm1_in_overflow
= self
.in_of
1127 in_z
= FPNumBase(self
.width
, False)
1129 m
.submodules
.norm1_insel_z
= in_z
1130 m
.submodules
.norm1_insel_overflow
= in_of
1132 # select which of temp or in z/of to use
1133 with m
.If(self
.in_select
):
1134 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1135 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1137 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1138 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1139 # initialise out from in (overridden below)
1140 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1141 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1142 # normalisation increase/decrease conditions
1143 decrease
= Signal(reset_less
=True)
1144 increase
= Signal(reset_less
=True)
1145 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1146 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1147 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1149 with m
.If(decrease
):
1151 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1152 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1153 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1154 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1155 self
.out_of
.round_bit
.eq(0), # reset round bit
1156 self
.out_of
.m0
.eq(in_of
.guard
),
1159 with m
.Elif(increase
):
1161 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1162 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1163 self
.out_of
.guard
.eq(in_z
.m
[0]),
1164 self
.out_of
.m0
.eq(in_z
.m
[1]),
1165 self
.out_of
.round_bit
.eq(in_of
.guard
),
1166 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1172 class FPNorm1Single(FPState
):
1174 def __init__(self
, width
, id_wid
, single_cycle
=True):
1175 FPState
.__init
__(self
, "normalise_1")
1176 self
.mod
= FPNorm1ModSingle(width
)
1177 self
.out_z
= FPNumBase(width
, False)
1178 self
.out_roundz
= Signal(reset_less
=True)
1180 def setup(self
, m
, i
):
1181 """ links module to inputs and outputs
1183 self
.mod
.setup(m
, i
, self
.out_z
)
1185 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1187 def action(self
, m
):
1191 class FPNorm1Multi(FPState
):
1193 def __init__(self
, width
, id_wid
):
1194 FPState
.__init
__(self
, "normalise_1")
1195 self
.mod
= FPNorm1ModMulti(width
)
1196 self
.stb
= Signal(reset_less
=True)
1197 self
.ack
= Signal(reset
=0, reset_less
=True)
1198 self
.out_norm
= Signal(reset_less
=True)
1199 self
.in_accept
= Signal(reset_less
=True)
1200 self
.temp_z
= FPNumBase(width
)
1201 self
.temp_of
= Overflow()
1202 self
.out_z
= FPNumBase(width
)
1203 self
.out_roundz
= Signal(reset_less
=True)
1205 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1206 """ links module to inputs and outputs
1208 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1209 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1210 self
.out_z
, self
.out_norm
)
1212 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1213 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1215 def action(self
, m
):
1216 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1217 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1218 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1219 with m
.If(self
.out_norm
):
1220 with m
.If(self
.in_accept
):
1225 m
.d
.sync
+= self
.ack
.eq(0)
1227 # normalisation not required (or done).
1229 m
.d
.sync
+= self
.ack
.eq(1)
1230 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1233 class FPNormToPack(FPState
):
1235 def __init__(self
, width
, id_wid
):
1236 FPState
.__init
__(self
, "normalise_1")
1237 self
.id_wid
= id_wid
1241 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1244 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1246 def setup(self
, m
, i
):
1247 """ links module to inputs and outputs
1250 # Normalisation (chained to input in_z+in_of)
1251 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1253 n_out
= nmod
.ospec()
1254 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1256 # Rounding (chained to normalisation)
1257 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1258 rmod
.setup(m
, n_out
)
1259 r_out_z
= rmod
.ospec()
1260 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1262 # Corrections (chained to rounding)
1263 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1264 cmod
.setup(m
, r_out_z
)
1265 c_out_z
= cmod
.ospec()
1266 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1268 # Pack (chained to corrections)
1269 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1270 self
.pmod
.setup(m
, c_out_z
)
1271 self
.out_z
= self
.pmod
.ospec()
1273 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
)
1274 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1276 def action(self
, m
):
1277 m
.next
= "pack_put_z"
1282 def __init__(self
, width
, id_wid
):
1283 self
.z
= FPNumBase(width
, False)
1284 self
.mid
= Signal(id_wid
, reset_less
=True)
1287 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1292 def __init__(self
, width
, id_wid
):
1294 self
.id_wid
= id_wid
1295 self
.i
= self
.ispec()
1296 self
.out_z
= self
.ospec()
1299 return FPNorm1Data(self
.width
, self
.id_wid
)
1302 return FPRoundData(self
.width
, self
.id_wid
)
1304 def setup(self
, m
, i
):
1305 m
.submodules
.roundz
= self
1306 m
.d
.comb
+= self
.i
.eq(i
)
1308 def elaborate(self
, platform
):
1310 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1311 with m
.If(self
.i
.roundz
):
1312 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1313 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1314 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1318 class FPRound(FPState
):
1320 def __init__(self
, width
, id_wid
):
1321 FPState
.__init
__(self
, "round")
1322 self
.mod
= FPRoundMod(width
)
1323 self
.out_z
= self
.ospec()
1326 return self
.mod
.ispec()
1329 return self
.mod
.ospec()
1331 def setup(self
, m
, i
):
1332 """ links module to inputs and outputs
1334 self
.mod
.setup(m
, i
)
1337 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1338 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1340 def action(self
, m
):
1341 m
.next
= "corrections"
1344 class FPCorrectionsMod
:
1346 def __init__(self
, width
, id_wid
):
1348 self
.id_wid
= id_wid
1349 self
.i
= self
.ispec()
1350 self
.out_z
= self
.ospec()
1353 return FPRoundData(self
.width
, self
.id_wid
)
1356 return FPRoundData(self
.width
, self
.id_wid
)
1358 def setup(self
, m
, i
):
1359 """ links module to inputs and outputs
1361 m
.submodules
.corrections
= self
1362 m
.d
.comb
+= self
.i
.eq(i
)
1364 def elaborate(self
, platform
):
1366 m
.submodules
.corr_in_z
= self
.i
.z
1367 m
.submodules
.corr_out_z
= self
.out_z
.z
1368 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1369 with m
.If(self
.i
.z
.is_denormalised
):
1370 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1374 class FPCorrections(FPState
):
1376 def __init__(self
, width
, id_wid
):
1377 FPState
.__init
__(self
, "corrections")
1378 self
.mod
= FPCorrectionsMod(width
)
1379 self
.out_z
= self
.ospec()
1382 return self
.mod
.ispec()
1385 return self
.mod
.ospec()
1387 def setup(self
, m
, in_z
):
1388 """ links module to inputs and outputs
1390 self
.mod
.setup(m
, in_z
)
1392 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1393 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1395 def action(self
, m
):
1401 def __init__(self
, width
, id_wid
):
1402 self
.z
= FPNumOut(width
, False)
1403 self
.mid
= Signal(id_wid
, reset_less
=True)
1406 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1411 def __init__(self
, width
, id_wid
):
1413 self
.id_wid
= id_wid
1414 self
.i
= self
.ispec()
1415 self
.o
= self
.ospec()
1418 return FPRoundData(self
.width
, self
.id_wid
)
1421 return FPPackData(self
.width
, self
.id_wid
)
1423 def setup(self
, m
, in_z
):
1424 """ links module to inputs and outputs
1426 m
.submodules
.pack
= self
1427 m
.d
.comb
+= self
.i
.eq(in_z
)
1429 def elaborate(self
, platform
):
1431 m
.submodules
.pack_in_z
= self
.i
.z
1432 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1433 with m
.If(self
.i
.z
.is_overflowed
):
1434 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1436 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1441 def __init__(self
, width
, id_wid
):
1442 self
.z
= FPNumOut(width
, False)
1443 self
.mid
= Signal(id_wid
, reset_less
=True)
1446 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1449 class FPPack(FPState
):
1451 def __init__(self
, width
, id_wid
):
1452 FPState
.__init
__(self
, "pack")
1453 self
.mod
= FPPackMod(width
)
1454 self
.out_z
= self
.ospec()
1457 return self
.mod
.ispec()
1460 return self
.mod
.ospec()
1462 def setup(self
, m
, in_z
):
1463 """ links module to inputs and outputs
1465 self
.mod
.setup(m
, in_z
)
1467 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1468 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1470 def action(self
, m
):
1471 m
.next
= "pack_put_z"
1474 class FPPutZ(FPState
):
1476 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1477 FPState
.__init
__(self
, state
)
1478 if to_state
is None:
1479 to_state
= "get_ops"
1480 self
.to_state
= to_state
1483 self
.in_mid
= in_mid
1484 self
.out_mid
= out_mid
1486 def action(self
, m
):
1487 if self
.in_mid
is not None:
1488 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1490 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1492 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1493 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1494 m
.next
= self
.to_state
1496 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1499 class FPPutZIdx(FPState
):
1501 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1502 FPState
.__init
__(self
, state
)
1503 if to_state
is None:
1504 to_state
= "get_ops"
1505 self
.to_state
= to_state
1507 self
.out_zs
= out_zs
1508 self
.in_mid
= in_mid
1510 def action(self
, m
):
1511 outz_stb
= Signal(reset_less
=True)
1512 outz_ack
= Signal(reset_less
=True)
1513 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1514 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1517 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1519 with m
.If(outz_stb
& outz_ack
):
1520 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1521 m
.next
= self
.to_state
1523 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1525 class FPADDBaseData
:
1527 def __init__(self
, width
, id_wid
):
1529 self
.id_wid
= id_wid
1530 self
.a
= Signal(width
)
1531 self
.b
= Signal(width
)
1532 self
.mid
= Signal(id_wid
, reset_less
=True)
1535 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1539 def __init__(self
, width
, id_wid
):
1540 self
.z
= FPOp(width
)
1541 self
.mid
= Signal(id_wid
, reset_less
=True)
1544 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1549 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1552 * width: bit-width of IEEE754. supported: 16, 32, 64
1553 * id_wid: an identifier that is sync-connected to the input
1554 * single_cycle: True indicates each stage to complete in 1 clock
1555 * compact: True indicates a reduced number of stages
1558 self
.id_wid
= id_wid
1559 self
.single_cycle
= single_cycle
1560 self
.compact
= compact
1562 self
.in_t
= Trigger()
1563 self
.i
= self
.ispec()
1564 self
.o
= self
.ospec()
1569 return FPADDBaseData(self
.width
, self
.id_wid
)
1572 return FPOpData(self
.width
, self
.id_wid
)
1574 def add_state(self
, state
):
1575 self
.states
.append(state
)
1578 def get_fragment(self
, platform
=None):
1579 """ creates the HDL code-fragment for FPAdd
1582 m
.submodules
.out_z
= self
.o
.z
1583 m
.submodules
.in_t
= self
.in_t
1585 self
.get_compact_fragment(m
, platform
)
1587 self
.get_longer_fragment(m
, platform
)
1589 with m
.FSM() as fsm
:
1591 for state
in self
.states
:
1592 with m
.State(state
.state_from
):
1597 def get_longer_fragment(self
, m
, platform
=None):
1599 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1601 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1605 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1606 sc
.setup(m
, a
, b
, self
.in_mid
)
1608 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1609 dn
.setup(m
, a
, b
, sc
.in_mid
)
1611 if self
.single_cycle
:
1612 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1613 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1615 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1616 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1618 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1619 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1621 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1622 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1624 if self
.single_cycle
:
1625 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1626 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1628 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1629 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1631 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1632 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1634 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1635 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1637 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1638 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1640 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1641 pa
.in_mid
, self
.out_mid
))
1643 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1644 pa
.in_mid
, self
.out_mid
))
1646 def get_compact_fragment(self
, m
, platform
=None):
1648 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1649 self
.width
, self
.id_wid
))
1650 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1652 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1655 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1658 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1659 n1
.setup(m
, alm
.a1o
)
1661 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1662 n1
.out_z
.mid
, self
.o
.mid
))
1664 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1665 sc
.o
.mid
, self
.o
.mid
))
1668 class FPADDBase(FPState
):
1670 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1673 * width: bit-width of IEEE754. supported: 16, 32, 64
1674 * id_wid: an identifier that is sync-connected to the input
1675 * single_cycle: True indicates each stage to complete in 1 clock
1677 FPState
.__init
__(self
, "fpadd")
1679 self
.single_cycle
= single_cycle
1680 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1681 self
.o
= self
.ospec()
1683 self
.in_t
= Trigger()
1684 self
.i
= self
.ispec()
1686 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1687 self
.in_accept
= Signal(reset_less
=True)
1688 self
.add_stb
= Signal(reset_less
=True)
1689 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1692 return self
.mod
.ispec()
1695 return self
.mod
.ospec()
1697 def setup(self
, m
, i
, add_stb
, in_mid
):
1698 m
.d
.comb
+= [self
.i
.eq(i
),
1699 self
.mod
.i
.eq(self
.i
),
1700 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1701 #self.add_stb.eq(add_stb),
1702 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1703 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1704 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1705 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1706 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1707 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1710 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1711 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1712 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1713 #m.d.sync += self.in_t.stb.eq(0)
1715 m
.submodules
.fpadd
= self
.mod
1717 def action(self
, m
):
1719 # in_accept is set on incoming strobe HIGH and ack LOW.
1720 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1722 #with m.If(self.in_t.ack):
1723 # m.d.sync += self.in_t.stb.eq(0)
1724 with m
.If(~self
.z_done
):
1725 # not done: test for accepting an incoming operand pair
1726 with m
.If(self
.in_accept
):
1728 self
.add_ack
.eq(1), # acknowledge receipt...
1729 self
.in_t
.stb
.eq(1), # initiate add
1732 m
.d
.sync
+= [self
.add_ack
.eq(0),
1733 self
.in_t
.stb
.eq(0),
1737 # done: acknowledge, and write out id and value
1738 m
.d
.sync
+= [self
.add_ack
.eq(1),
1745 if self
.in_mid
is not None:
1746 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1749 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1751 # move to output state on detecting z ack
1752 with m
.If(self
.out_z
.trigger
):
1753 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1756 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1760 def __init__(self
, width
, id_wid
):
1762 self
.id_wid
= id_wid
1764 for i
in range(rs_sz
):
1766 out_z
.name
= "out_z_%d" % i
1768 self
.res
= Array(res
)
1769 self
.in_z
= FPOp(width
)
1770 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1772 def setup(self
, m
, in_z
, in_mid
):
1773 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1774 self
.in_mid
.eq(in_mid
)]
1776 def get_fragment(self
, platform
=None):
1777 """ creates the HDL code-fragment for FPAdd
1780 m
.submodules
.res_in_z
= self
.in_z
1781 m
.submodules
+= self
.res
1793 """ FPADD: stages as follows:
1799 FPAddBase---> FPAddBaseMod
1801 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1803 FPAddBase is tricky: it is both a stage and *has* stages.
1804 Connection to FPAddBaseMod therefore requires an in stb/ack
1805 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1806 needs to be the thing that raises the incoming stb.
1809 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1812 * width: bit-width of IEEE754. supported: 16, 32, 64
1813 * id_wid: an identifier that is sync-connected to the input
1814 * single_cycle: True indicates each stage to complete in 1 clock
1817 self
.id_wid
= id_wid
1818 self
.single_cycle
= single_cycle
1820 #self.out_z = FPOp(width)
1821 self
.ids
= FPID(id_wid
)
1824 for i
in range(rs_sz
):
1827 in_a
.name
= "in_a_%d" % i
1828 in_b
.name
= "in_b_%d" % i
1829 rs
.append((in_a
, in_b
))
1833 for i
in range(rs_sz
):
1835 out_z
.name
= "out_z_%d" % i
1837 self
.res
= Array(res
)
1841 def add_state(self
, state
):
1842 self
.states
.append(state
)
1845 def get_fragment(self
, platform
=None):
1846 """ creates the HDL code-fragment for FPAdd
1849 m
.submodules
+= self
.rs
1851 in_a
= self
.rs
[0][0]
1852 in_b
= self
.rs
[0][1]
1854 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1859 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1864 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1865 ab
= self
.add_state(ab
)
1866 abd
= ab
.ispec() # create an input spec object for FPADDBase
1867 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1868 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1871 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1874 with m
.FSM() as fsm
:
1876 for state
in self
.states
:
1877 with m
.State(state
.state_from
):
1883 if __name__
== "__main__":
1885 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1886 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1887 alu
.rs
[0][1].ports() + \
1888 alu
.res
[0].ports() + \
1889 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1891 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1892 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1893 alu
.in_t
.ports() + \
1894 alu
.out_z
.ports() + \
1895 [alu
.in_mid
, alu
.out_mid
])
1898 # works... but don't use, just do "python fname.py convert -t v"
1899 #print (verilog.convert(alu, ports=[
1900 # ports=alu.in_a.ports() + \
1901 # alu.in_b.ports() + \
1902 # alu.out_z.ports())