f340897fdcd9ed8de6c981db22446c01d942c6ff
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
212 self
.o
.mid
.eq(self
.i
.mid
)
217 class FPGet2Op(FPState
):
221 def __init__(self
, in_state
, out_state
, width
, id_wid
):
222 FPState
.__init
__(self
, in_state
)
223 self
.out_state
= out_state
224 self
.mod
= FPGet2OpMod(width
, id_wid
)
225 self
.o
= self
.mod
.ospec()
226 self
.in_stb
= Signal(reset_less
=True)
227 self
.out_ack
= Signal(reset_less
=True)
228 self
.out_decode
= Signal(reset_less
=True)
230 def setup(self
, m
, i
, in_stb
, in_ack
):
231 """ links module to inputs and outputs
233 m
.submodules
.get_ops
= self
.mod
234 m
.d
.comb
+= self
.mod
.i
.eq(i
)
235 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
236 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
237 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
238 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
241 with m
.If(self
.out_decode
):
242 m
.next
= self
.out_state
245 self
.o
.eq(self
.mod
.o
),
248 m
.d
.sync
+= self
.mod
.ack
.eq(1)
253 def __init__(self
, width
, id_wid
, m_extra
=True):
254 self
.a
= FPNumBase(width
, m_extra
)
255 self
.b
= FPNumBase(width
, m_extra
)
256 self
.mid
= Signal(id_wid
, reset_less
=True)
259 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
262 class FPAddSpecialCasesMod
:
263 """ special cases: NaNs, infs, zeros, denormalised
264 NOTE: some of these are unique to add. see "Special Operations"
265 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
268 def __init__(self
, width
, id_wid
):
271 self
.i
= self
.ispec()
272 self
.o
= self
.ospec()
273 self
.out_do_z
= Signal(reset_less
=True)
276 return FPNumBase2Ops(self
.width
, self
.id_wid
)
279 return FPPackData(self
.width
, self
.id_wid
)
281 def setup(self
, m
, i
, out_do_z
):
282 """ links module to inputs and outputs
284 m
.submodules
.specialcases
= self
285 m
.d
.comb
+= self
.i
.eq(i
)
286 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
288 def elaborate(self
, platform
):
291 m
.submodules
.sc_in_a
= self
.i
.a
292 m
.submodules
.sc_in_b
= self
.i
.b
293 m
.submodules
.sc_out_z
= self
.o
.z
296 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
299 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
301 # if a is NaN or b is NaN return NaN
302 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
303 m
.d
.comb
+= self
.out_do_z
.eq(1)
304 m
.d
.comb
+= self
.o
.z
.nan(0)
306 # XXX WEIRDNESS for FP16 non-canonical NaN handling
309 ## if a is zero and b is NaN return -b
310 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
311 # m.d.comb += self.out_do_z.eq(1)
312 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
314 ## if b is zero and a is NaN return -a
315 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
316 # m.d.comb += self.out_do_z.eq(1)
317 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
319 ## if a is -zero and b is NaN return -b
320 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
321 # m.d.comb += self.out_do_z.eq(1)
322 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
324 ## if b is -zero and a is NaN return -a
325 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
326 # m.d.comb += self.out_do_z.eq(1)
327 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
329 # if a is inf return inf (or NaN)
330 with m
.Elif(self
.i
.a
.is_inf
):
331 m
.d
.comb
+= self
.out_do_z
.eq(1)
332 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
333 # if a is inf and signs don't match return NaN
334 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
335 m
.d
.comb
+= self
.o
.z
.nan(0)
337 # if b is inf return inf
338 with m
.Elif(self
.i
.b
.is_inf
):
339 m
.d
.comb
+= self
.out_do_z
.eq(1)
340 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
342 # if a is zero and b zero return signed-a/b
343 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
344 m
.d
.comb
+= self
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
349 # if a is zero return b
350 with m
.Elif(self
.i
.a
.is_zero
):
351 m
.d
.comb
+= self
.out_do_z
.eq(1)
352 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
355 # if b is zero return a
356 with m
.Elif(self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
361 # if a equal to -b return zero (+ve zero)
362 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
363 m
.d
.comb
+= self
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.zero(0)
366 # Denormalised Number checks
368 m
.d
.comb
+= self
.out_do_z
.eq(0)
370 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
376 def __init__(self
, id_wid
):
379 self
.in_mid
= Signal(id_wid
, reset_less
=True)
380 self
.out_mid
= Signal(id_wid
, reset_less
=True)
386 if self
.id_wid
is not None:
387 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
390 class FPAddSpecialCases(FPState
):
391 """ special cases: NaNs, infs, zeros, denormalised
392 NOTE: some of these are unique to add. see "Special Operations"
393 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
396 def __init__(self
, width
, id_wid
):
397 FPState
.__init
__(self
, "special_cases")
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, i
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, i
, self
.out_do_z
)
406 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
407 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
411 with m
.If(self
.out_do_z
):
414 m
.next
= "denormalise"
417 class FPAddSpecialCasesDeNorm(FPState
):
418 """ special cases: NaNs, infs, zeros, denormalised
419 NOTE: some of these are unique to add. see "Special Operations"
420 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
423 def __init__(self
, width
, id_wid
):
424 FPState
.__init
__(self
, "special_cases")
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
439 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
440 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
442 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
445 with m
.If(self
.out_do_z
):
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
495 class FPAddDeNorm(FPState
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 self
.mod
= FPAddDeNormMod(width
)
500 self
.out_a
= FPNumBase(width
)
501 self
.out_b
= FPNumBase(width
)
503 def setup(self
, m
, i
):
504 """ links module to inputs and outputs
508 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
509 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
512 # Denormalised Number checks
516 class FPAddAlignMultiMod(FPState
):
518 def __init__(self
, width
):
519 self
.in_a
= FPNumBase(width
)
520 self
.in_b
= FPNumBase(width
)
521 self
.out_a
= FPNumIn(None, width
)
522 self
.out_b
= FPNumIn(None, width
)
523 self
.exp_eq
= Signal(reset_less
=True)
525 def elaborate(self
, platform
):
526 # This one however (single-cycle) will do the shift
531 m
.submodules
.align_in_a
= self
.in_a
532 m
.submodules
.align_in_b
= self
.in_b
533 m
.submodules
.align_out_a
= self
.out_a
534 m
.submodules
.align_out_b
= self
.out_b
536 # NOTE: this does *not* do single-cycle multi-shifting,
537 # it *STAYS* in the align state until exponents match
539 # exponent of a greater than b: shift b down
540 m
.d
.comb
+= self
.exp_eq
.eq(0)
541 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
542 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
543 agtb
= Signal(reset_less
=True)
544 altb
= Signal(reset_less
=True)
545 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
546 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
548 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
549 # exponent of b greater than a: shift a down
551 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
552 # exponents equal: move to next stage.
554 m
.d
.comb
+= self
.exp_eq
.eq(1)
558 class FPAddAlignMulti(FPState
):
560 def __init__(self
, width
, id_wid
):
561 FPState
.__init
__(self
, "align")
562 self
.mod
= FPAddAlignMultiMod(width
)
563 self
.out_a
= FPNumIn(None, width
)
564 self
.out_b
= FPNumIn(None, width
)
565 self
.exp_eq
= Signal(reset_less
=True)
567 def setup(self
, m
, in_a
, in_b
):
568 """ links module to inputs and outputs
570 m
.submodules
.align
= self
.mod
571 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
572 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
573 #m.d.comb += self.out_a.eq(self.mod.out_a)
574 #m.d.comb += self.out_b.eq(self.mod.out_b)
575 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
576 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
577 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
580 with m
.If(self
.exp_eq
):
586 def __init__(self
, width
, id_wid
):
587 self
.a
= FPNumIn(None, width
)
588 self
.b
= FPNumIn(None, width
)
589 self
.mid
= Signal(id_wid
, reset_less
=True)
592 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
595 class FPAddAlignSingleMod
:
597 def __init__(self
, width
, id_wid
):
600 self
.i
= self
.ispec()
601 self
.o
= self
.ospec()
604 return FPNumBase2Ops(self
.width
, self
.id_wid
)
607 return FPNumIn2Ops(self
.width
, self
.id_wid
)
609 def setup(self
, m
, i
):
610 """ links module to inputs and outputs
612 m
.submodules
.align
= self
613 m
.d
.comb
+= self
.i
.eq(i
)
615 def elaborate(self
, platform
):
616 """ Aligns A against B or B against A, depending on which has the
617 greater exponent. This is done in a *single* cycle using
618 variable-width bit-shift
620 the shifter used here is quite expensive in terms of gates.
621 Mux A or B in (and out) into temporaries, as only one of them
622 needs to be aligned against the other
626 m
.submodules
.align_in_a
= self
.i
.a
627 m
.submodules
.align_in_b
= self
.i
.b
628 m
.submodules
.align_out_a
= self
.o
.a
629 m
.submodules
.align_out_b
= self
.o
.b
631 # temporary (muxed) input and output to be shifted
632 t_inp
= FPNumBase(self
.width
)
633 t_out
= FPNumIn(None, self
.width
)
634 espec
= (len(self
.i
.a
.e
), True)
635 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
636 m
.submodules
.align_t_in
= t_inp
637 m
.submodules
.align_t_out
= t_out
638 m
.submodules
.multishift_r
= msr
640 ediff
= Signal(espec
, reset_less
=True)
641 ediffr
= Signal(espec
, reset_less
=True)
642 tdiff
= Signal(espec
, reset_less
=True)
643 elz
= Signal(reset_less
=True)
644 egz
= Signal(reset_less
=True)
646 # connect multi-shifter to t_inp/out mantissa (and tdiff)
647 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
648 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
649 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
650 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
651 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
653 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
654 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
655 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
656 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
658 # default: A-exp == B-exp, A and B untouched (fall through)
659 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
660 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
661 # only one shifter (muxed)
662 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
663 # exponent of a greater than b: shift b down
665 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
668 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
670 # exponent of b greater than a: shift a down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
675 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
678 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
683 class FPAddAlignSingle(FPState
):
685 def __init__(self
, width
, id_wid
):
686 FPState
.__init
__(self
, "align")
687 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
688 self
.out_a
= FPNumIn(None, width
)
689 self
.out_b
= FPNumIn(None, width
)
691 def setup(self
, m
, i
):
692 """ links module to inputs and outputs
696 # NOTE: could be done as comb
697 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
698 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
704 class FPAddAlignSingleAdd(FPState
):
706 def __init__(self
, width
, id_wid
):
707 FPState
.__init
__(self
, "align")
710 self
.a1o
= self
.ospec()
713 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
716 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
718 def setup(self
, m
, i
):
719 """ links module to inputs and outputs
721 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
724 m
.d
.comb
+= o
.eq(mod
.o
)
726 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
731 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
733 self
.a1modo
= a1mod
.o
735 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
738 m
.next
= "normalise_1"
741 class FPAddStage0Data
:
743 def __init__(self
, width
, id_wid
):
744 self
.z
= FPNumBase(width
, False)
745 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
746 self
.mid
= Signal(id_wid
, reset_less
=True)
749 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
752 class FPAddStage0Mod
:
754 def __init__(self
, width
, id_wid
):
757 self
.i
= self
.ispec()
758 self
.o
= self
.ospec()
761 return FPNumBase2Ops(self
.width
, self
.id_wid
)
764 return FPAddStage0Data(self
.width
, self
.id_wid
)
766 def setup(self
, m
, i
):
767 """ links module to inputs and outputs
769 m
.submodules
.add0
= self
770 m
.d
.comb
+= self
.i
.eq(i
)
772 def elaborate(self
, platform
):
774 m
.submodules
.add0_in_a
= self
.i
.a
775 m
.submodules
.add0_in_b
= self
.i
.b
776 m
.submodules
.add0_out_z
= self
.o
.z
778 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
779 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
781 # store intermediate tests (and zero-extended mantissas)
782 seq
= Signal(reset_less
=True)
783 mge
= Signal(reset_less
=True)
784 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
785 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
786 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
787 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
788 am0
.eq(Cat(self
.i
.a
.m
, 0)),
789 bm0
.eq(Cat(self
.i
.b
.m
, 0))
791 # same-sign (both negative or both positive) add mantissas
794 self
.o
.tot
.eq(am0
+ bm0
),
795 self
.o
.z
.s
.eq(self
.i
.a
.s
)
797 # a mantissa greater than b, use a
800 self
.o
.tot
.eq(am0
- bm0
),
801 self
.o
.z
.s
.eq(self
.i
.a
.s
)
803 # b mantissa greater than a, use b
806 self
.o
.tot
.eq(bm0
- am0
),
807 self
.o
.z
.s
.eq(self
.i
.b
.s
)
812 class FPAddStage0(FPState
):
813 """ First stage of add. covers same-sign (add) and subtract
814 special-casing when mantissas are greater or equal, to
815 give greatest accuracy.
818 def __init__(self
, width
, id_wid
):
819 FPState
.__init
__(self
, "add_0")
820 self
.mod
= FPAddStage0Mod(width
)
821 self
.o
= self
.mod
.ospec()
823 def setup(self
, m
, i
):
824 """ links module to inputs and outputs
828 # NOTE: these could be done as combinatorial (merge add0+add1)
829 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
835 class FPAddStage1Data
:
837 def __init__(self
, width
, id_wid
):
838 self
.z
= FPNumBase(width
, False)
840 self
.mid
= Signal(id_wid
, reset_less
=True)
843 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
847 class FPAddStage1Mod(FPState
):
848 """ Second stage of add: preparation for normalisation.
849 detects when tot sum is too big (tot[27] is kinda a carry bit)
852 def __init__(self
, width
, id_wid
):
855 self
.i
= self
.ispec()
856 self
.o
= self
.ospec()
859 return FPAddStage0Data(self
.width
, self
.id_wid
)
862 return FPAddStage1Data(self
.width
, self
.id_wid
)
864 def setup(self
, m
, i
):
865 """ links module to inputs and outputs
867 m
.submodules
.add1
= self
868 m
.submodules
.add1_out_overflow
= self
.o
.of
870 m
.d
.comb
+= self
.i
.eq(i
)
872 def elaborate(self
, platform
):
874 #m.submodules.norm1_in_overflow = self.in_of
875 #m.submodules.norm1_out_overflow = self.out_of
876 #m.submodules.norm1_in_z = self.in_z
877 #m.submodules.norm1_out_z = self.out_z
878 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
879 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
880 # tot[-1] (MSB) gets set when the sum overflows. shift result down
881 with m
.If(self
.i
.tot
[-1]):
883 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
884 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
885 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
886 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
887 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
888 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
890 # tot[-1] (MSB) zero case
893 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
894 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
895 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
896 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
897 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
902 class FPAddStage1(FPState
):
904 def __init__(self
, width
, id_wid
):
905 FPState
.__init
__(self
, "add_1")
906 self
.mod
= FPAddStage1Mod(width
)
907 self
.out_z
= FPNumBase(width
, False)
908 self
.out_of
= Overflow()
909 self
.norm_stb
= Signal()
911 def setup(self
, m
, i
):
912 """ links module to inputs and outputs
916 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
918 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
919 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
920 m
.d
.sync
+= self
.norm_stb
.eq(1)
923 m
.next
= "normalise_1"
926 class FPNormaliseModSingle
:
928 def __init__(self
, width
):
930 self
.in_z
= self
.ispec()
931 self
.out_z
= self
.ospec()
934 return FPNumBase(self
.width
, False)
937 return FPNumBase(self
.width
, False)
939 def setup(self
, m
, i
):
940 """ links module to inputs and outputs
942 m
.submodules
.normalise
= self
943 m
.d
.comb
+= self
.i
.eq(i
)
945 def elaborate(self
, platform
):
948 mwid
= self
.out_z
.m_width
+2
949 pe
= PriorityEncoder(mwid
)
950 m
.submodules
.norm_pe
= pe
952 m
.submodules
.norm1_out_z
= self
.out_z
953 m
.submodules
.norm1_in_z
= self
.in_z
955 in_z
= FPNumBase(self
.width
, False)
957 m
.submodules
.norm1_insel_z
= in_z
958 m
.submodules
.norm1_insel_overflow
= in_of
960 espec
= (len(in_z
.e
), True)
961 ediff_n126
= Signal(espec
, reset_less
=True)
962 msr
= MultiShiftRMerge(mwid
, espec
)
963 m
.submodules
.multishift_r
= msr
965 m
.d
.comb
+= in_z
.eq(self
.in_z
)
966 m
.d
.comb
+= in_of
.eq(self
.in_of
)
967 # initialise out from in (overridden below)
968 m
.d
.comb
+= self
.out_z
.eq(in_z
)
969 m
.d
.comb
+= self
.out_of
.eq(in_of
)
970 # normalisation decrease condition
971 decrease
= Signal(reset_less
=True)
972 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
975 # *sigh* not entirely obvious: count leading zeros (clz)
976 # with a PriorityEncoder: to find from the MSB
977 # we reverse the order of the bits.
978 temp_m
= Signal(mwid
, reset_less
=True)
979 temp_s
= Signal(mwid
+1, reset_less
=True)
980 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
982 # cat round and guard bits back into the mantissa
983 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
984 pe
.i
.eq(temp_m
[::-1]), # inverted
985 clz
.eq(pe
.o
), # count zeros from MSB down
986 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
987 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
988 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
995 def __init__(self
, width
, id_wid
):
996 self
.roundz
= Signal(reset_less
=True)
997 self
.z
= FPNumBase(width
, False)
998 self
.mid
= Signal(id_wid
, reset_less
=True)
1001 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1004 class FPNorm1ModSingle
:
1006 def __init__(self
, width
, id_wid
):
1008 self
.id_wid
= id_wid
1009 self
.i
= self
.ispec()
1010 self
.o
= self
.ospec()
1013 return FPAddStage1Data(self
.width
, self
.id_wid
)
1016 return FPNorm1Data(self
.width
, self
.id_wid
)
1018 def setup(self
, m
, i
):
1019 """ links module to inputs and outputs
1021 m
.submodules
.normalise_1
= self
1022 m
.d
.comb
+= self
.i
.eq(i
)
1024 def elaborate(self
, platform
):
1027 mwid
= self
.o
.z
.m_width
+2
1028 pe
= PriorityEncoder(mwid
)
1029 m
.submodules
.norm_pe
= pe
1032 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1034 m
.submodules
.norm1_out_z
= self
.o
.z
1035 m
.submodules
.norm1_out_overflow
= of
1036 m
.submodules
.norm1_in_z
= self
.i
.z
1037 m
.submodules
.norm1_in_overflow
= self
.i
.of
1040 m
.submodules
.norm1_insel_z
= i
.z
1041 m
.submodules
.norm1_insel_overflow
= i
.of
1043 espec
= (len(i
.z
.e
), True)
1044 ediff_n126
= Signal(espec
, reset_less
=True)
1045 msr
= MultiShiftRMerge(mwid
, espec
)
1046 m
.submodules
.multishift_r
= msr
1048 m
.d
.comb
+= i
.eq(self
.i
)
1049 # initialise out from in (overridden below)
1050 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1051 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1052 m
.d
.comb
+= of
.eq(i
.of
)
1053 # normalisation increase/decrease conditions
1054 decrease
= Signal(reset_less
=True)
1055 increase
= Signal(reset_less
=True)
1056 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1057 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1059 with m
.If(decrease
):
1060 # *sigh* not entirely obvious: count leading zeros (clz)
1061 # with a PriorityEncoder: to find from the MSB
1062 # we reverse the order of the bits.
1063 temp_m
= Signal(mwid
, reset_less
=True)
1064 temp_s
= Signal(mwid
+1, reset_less
=True)
1065 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1066 # make sure that the amount to decrease by does NOT
1067 # go below the minimum non-INF/NaN exponent
1068 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1071 # cat round and guard bits back into the mantissa
1072 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1073 pe
.i
.eq(temp_m
[::-1]), # inverted
1074 clz
.eq(limclz
), # count zeros from MSB down
1075 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1076 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1077 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1078 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1079 # overflow in bits 0..1: got shifted too (leave sticky)
1080 of
.guard
.eq(temp_s
[1]), # guard
1081 of
.round_bit
.eq(temp_s
[0]), # round
1084 with m
.Elif(increase
):
1085 temp_m
= Signal(mwid
+1, reset_less
=True)
1087 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1089 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1090 # connect multi-shifter to inp/out mantissa (and ediff)
1092 msr
.diff
.eq(ediff_n126
),
1093 self
.o
.z
.m
.eq(msr
.m
[3:]),
1094 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1095 # overflow in bits 0..1: got shifted too (leave sticky)
1096 of
.guard
.eq(temp_s
[2]), # guard
1097 of
.round_bit
.eq(temp_s
[1]), # round
1098 of
.sticky
.eq(temp_s
[0]), # sticky
1099 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1105 class FPNorm1ModMulti
:
1107 def __init__(self
, width
, single_cycle
=True):
1109 self
.in_select
= Signal(reset_less
=True)
1110 self
.in_z
= FPNumBase(width
, False)
1111 self
.in_of
= Overflow()
1112 self
.temp_z
= FPNumBase(width
, False)
1113 self
.temp_of
= Overflow()
1114 self
.out_z
= FPNumBase(width
, False)
1115 self
.out_of
= Overflow()
1117 def elaborate(self
, platform
):
1120 m
.submodules
.norm1_out_z
= self
.out_z
1121 m
.submodules
.norm1_out_overflow
= self
.out_of
1122 m
.submodules
.norm1_temp_z
= self
.temp_z
1123 m
.submodules
.norm1_temp_of
= self
.temp_of
1124 m
.submodules
.norm1_in_z
= self
.in_z
1125 m
.submodules
.norm1_in_overflow
= self
.in_of
1127 in_z
= FPNumBase(self
.width
, False)
1129 m
.submodules
.norm1_insel_z
= in_z
1130 m
.submodules
.norm1_insel_overflow
= in_of
1132 # select which of temp or in z/of to use
1133 with m
.If(self
.in_select
):
1134 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1135 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1137 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1138 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1139 # initialise out from in (overridden below)
1140 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1141 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1142 # normalisation increase/decrease conditions
1143 decrease
= Signal(reset_less
=True)
1144 increase
= Signal(reset_less
=True)
1145 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1146 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1147 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1149 with m
.If(decrease
):
1151 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1152 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1153 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1154 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1155 self
.out_of
.round_bit
.eq(0), # reset round bit
1156 self
.out_of
.m0
.eq(in_of
.guard
),
1159 with m
.Elif(increase
):
1161 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1162 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1163 self
.out_of
.guard
.eq(in_z
.m
[0]),
1164 self
.out_of
.m0
.eq(in_z
.m
[1]),
1165 self
.out_of
.round_bit
.eq(in_of
.guard
),
1166 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1172 class FPNorm1Single(FPState
):
1174 def __init__(self
, width
, id_wid
, single_cycle
=True):
1175 FPState
.__init
__(self
, "normalise_1")
1176 self
.mod
= FPNorm1ModSingle(width
)
1177 self
.o
= self
.ospec()
1178 self
.out_z
= FPNumBase(width
, False)
1179 self
.out_roundz
= Signal(reset_less
=True)
1182 return self
.mod
.ispec()
1185 return self
.mod
.ospec()
1187 def setup(self
, m
, i
):
1188 """ links module to inputs and outputs
1190 self
.mod
.setup(m
, i
)
1192 def action(self
, m
):
1196 class FPNorm1Multi(FPState
):
1198 def __init__(self
, width
, id_wid
):
1199 FPState
.__init
__(self
, "normalise_1")
1200 self
.mod
= FPNorm1ModMulti(width
)
1201 self
.stb
= Signal(reset_less
=True)
1202 self
.ack
= Signal(reset
=0, reset_less
=True)
1203 self
.out_norm
= Signal(reset_less
=True)
1204 self
.in_accept
= Signal(reset_less
=True)
1205 self
.temp_z
= FPNumBase(width
)
1206 self
.temp_of
= Overflow()
1207 self
.out_z
= FPNumBase(width
)
1208 self
.out_roundz
= Signal(reset_less
=True)
1210 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1211 """ links module to inputs and outputs
1213 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1214 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1215 self
.out_z
, self
.out_norm
)
1217 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1218 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1220 def action(self
, m
):
1221 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1222 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1223 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1224 with m
.If(self
.out_norm
):
1225 with m
.If(self
.in_accept
):
1230 m
.d
.sync
+= self
.ack
.eq(0)
1232 # normalisation not required (or done).
1234 m
.d
.sync
+= self
.ack
.eq(1)
1235 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1238 class FPNormToPack(FPState
):
1240 def __init__(self
, width
, id_wid
):
1241 FPState
.__init
__(self
, "normalise_1")
1242 self
.id_wid
= id_wid
1246 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1249 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1251 def setup(self
, m
, i
):
1252 """ links module to inputs and outputs
1255 # Normalisation (chained to input in_z+in_of)
1256 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1258 n_out
= nmod
.ospec()
1259 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1261 # Rounding (chained to normalisation)
1262 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1263 rmod
.setup(m
, n_out
)
1264 r_out_z
= rmod
.ospec()
1265 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1267 # Corrections (chained to rounding)
1268 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1269 cmod
.setup(m
, r_out_z
)
1270 c_out_z
= cmod
.ospec()
1271 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1273 # Pack (chained to corrections)
1274 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1275 self
.pmod
.setup(m
, c_out_z
)
1276 self
.out_z
= self
.pmod
.ospec()
1278 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
)
1279 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1281 def action(self
, m
):
1282 m
.next
= "pack_put_z"
1287 def __init__(self
, width
, id_wid
):
1288 self
.z
= FPNumBase(width
, False)
1289 self
.mid
= Signal(id_wid
, reset_less
=True)
1292 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1297 def __init__(self
, width
, id_wid
):
1299 self
.id_wid
= id_wid
1300 self
.i
= self
.ispec()
1301 self
.out_z
= self
.ospec()
1304 return FPNorm1Data(self
.width
, self
.id_wid
)
1307 return FPRoundData(self
.width
, self
.id_wid
)
1309 def setup(self
, m
, i
):
1310 m
.submodules
.roundz
= self
1311 m
.d
.comb
+= self
.i
.eq(i
)
1313 def elaborate(self
, platform
):
1315 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1316 with m
.If(self
.i
.roundz
):
1317 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1318 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1319 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1323 class FPRound(FPState
):
1325 def __init__(self
, width
, id_wid
):
1326 FPState
.__init
__(self
, "round")
1327 self
.mod
= FPRoundMod(width
)
1328 self
.out_z
= self
.ospec()
1331 return self
.mod
.ispec()
1334 return self
.mod
.ospec()
1336 def setup(self
, m
, i
):
1337 """ links module to inputs and outputs
1339 self
.mod
.setup(m
, i
)
1342 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1343 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1345 def action(self
, m
):
1346 m
.next
= "corrections"
1349 class FPCorrectionsMod
:
1351 def __init__(self
, width
, id_wid
):
1353 self
.id_wid
= id_wid
1354 self
.i
= self
.ispec()
1355 self
.out_z
= self
.ospec()
1358 return FPRoundData(self
.width
, self
.id_wid
)
1361 return FPRoundData(self
.width
, self
.id_wid
)
1363 def setup(self
, m
, i
):
1364 """ links module to inputs and outputs
1366 m
.submodules
.corrections
= self
1367 m
.d
.comb
+= self
.i
.eq(i
)
1369 def elaborate(self
, platform
):
1371 m
.submodules
.corr_in_z
= self
.i
.z
1372 m
.submodules
.corr_out_z
= self
.out_z
.z
1373 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1374 with m
.If(self
.i
.z
.is_denormalised
):
1375 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1379 class FPCorrections(FPState
):
1381 def __init__(self
, width
, id_wid
):
1382 FPState
.__init
__(self
, "corrections")
1383 self
.mod
= FPCorrectionsMod(width
)
1384 self
.out_z
= self
.ospec()
1387 return self
.mod
.ispec()
1390 return self
.mod
.ospec()
1392 def setup(self
, m
, in_z
):
1393 """ links module to inputs and outputs
1395 self
.mod
.setup(m
, in_z
)
1397 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1398 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1400 def action(self
, m
):
1406 def __init__(self
, width
, id_wid
):
1407 self
.z
= FPNumOut(width
, False)
1408 self
.mid
= Signal(id_wid
, reset_less
=True)
1411 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1416 def __init__(self
, width
, id_wid
):
1418 self
.id_wid
= id_wid
1419 self
.i
= self
.ispec()
1420 self
.o
= self
.ospec()
1423 return FPRoundData(self
.width
, self
.id_wid
)
1426 return FPPackData(self
.width
, self
.id_wid
)
1428 def setup(self
, m
, in_z
):
1429 """ links module to inputs and outputs
1431 m
.submodules
.pack
= self
1432 m
.d
.comb
+= self
.i
.eq(in_z
)
1434 def elaborate(self
, platform
):
1436 m
.submodules
.pack_in_z
= self
.i
.z
1437 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1438 with m
.If(self
.i
.z
.is_overflowed
):
1439 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1441 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1446 def __init__(self
, width
, id_wid
):
1447 self
.z
= FPNumOut(width
, False)
1448 self
.mid
= Signal(id_wid
, reset_less
=True)
1451 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1454 class FPPack(FPState
):
1456 def __init__(self
, width
, id_wid
):
1457 FPState
.__init
__(self
, "pack")
1458 self
.mod
= FPPackMod(width
)
1459 self
.out_z
= self
.ospec()
1462 return self
.mod
.ispec()
1465 return self
.mod
.ospec()
1467 def setup(self
, m
, in_z
):
1468 """ links module to inputs and outputs
1470 self
.mod
.setup(m
, in_z
)
1472 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1473 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1475 def action(self
, m
):
1476 m
.next
= "pack_put_z"
1479 class FPPutZ(FPState
):
1481 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1482 FPState
.__init
__(self
, state
)
1483 if to_state
is None:
1484 to_state
= "get_ops"
1485 self
.to_state
= to_state
1488 self
.in_mid
= in_mid
1489 self
.out_mid
= out_mid
1491 def action(self
, m
):
1492 if self
.in_mid
is not None:
1493 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1495 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1497 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1498 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1499 m
.next
= self
.to_state
1501 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1504 class FPPutZIdx(FPState
):
1506 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1507 FPState
.__init
__(self
, state
)
1508 if to_state
is None:
1509 to_state
= "get_ops"
1510 self
.to_state
= to_state
1512 self
.out_zs
= out_zs
1513 self
.in_mid
= in_mid
1515 def action(self
, m
):
1516 outz_stb
= Signal(reset_less
=True)
1517 outz_ack
= Signal(reset_less
=True)
1518 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1519 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1522 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1524 with m
.If(outz_stb
& outz_ack
):
1525 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1526 m
.next
= self
.to_state
1528 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1530 class FPADDBaseData
:
1532 def __init__(self
, width
, id_wid
):
1534 self
.id_wid
= id_wid
1535 self
.a
= Signal(width
)
1536 self
.b
= Signal(width
)
1537 self
.mid
= Signal(id_wid
, reset_less
=True)
1540 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1544 def __init__(self
, width
, id_wid
):
1545 self
.z
= FPOp(width
)
1546 self
.mid
= Signal(id_wid
, reset_less
=True)
1549 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1554 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1557 * width: bit-width of IEEE754. supported: 16, 32, 64
1558 * id_wid: an identifier that is sync-connected to the input
1559 * single_cycle: True indicates each stage to complete in 1 clock
1560 * compact: True indicates a reduced number of stages
1563 self
.id_wid
= id_wid
1564 self
.single_cycle
= single_cycle
1565 self
.compact
= compact
1567 self
.in_t
= Trigger()
1568 self
.i
= self
.ispec()
1569 self
.o
= self
.ospec()
1574 return FPADDBaseData(self
.width
, self
.id_wid
)
1577 return FPOpData(self
.width
, self
.id_wid
)
1579 def add_state(self
, state
):
1580 self
.states
.append(state
)
1583 def get_fragment(self
, platform
=None):
1584 """ creates the HDL code-fragment for FPAdd
1587 m
.submodules
.out_z
= self
.o
.z
1588 m
.submodules
.in_t
= self
.in_t
1590 self
.get_compact_fragment(m
, platform
)
1592 self
.get_longer_fragment(m
, platform
)
1594 with m
.FSM() as fsm
:
1596 for state
in self
.states
:
1597 with m
.State(state
.state_from
):
1602 def get_longer_fragment(self
, m
, platform
=None):
1604 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1606 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1610 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1611 sc
.setup(m
, a
, b
, self
.in_mid
)
1613 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1614 dn
.setup(m
, a
, b
, sc
.in_mid
)
1616 if self
.single_cycle
:
1617 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1618 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1620 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1621 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1623 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1624 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1626 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1627 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1629 if self
.single_cycle
:
1630 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1631 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1633 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1634 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1636 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1637 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1639 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1640 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1642 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1643 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1645 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1646 pa
.in_mid
, self
.out_mid
))
1648 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1649 pa
.in_mid
, self
.out_mid
))
1651 def get_compact_fragment(self
, m
, platform
=None):
1653 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1654 self
.width
, self
.id_wid
))
1655 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1657 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1660 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1663 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1664 n1
.setup(m
, alm
.a1o
)
1666 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1667 n1
.out_z
.mid
, self
.o
.mid
))
1669 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1670 sc
.o
.mid
, self
.o
.mid
))
1673 class FPADDBase(FPState
):
1675 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1678 * width: bit-width of IEEE754. supported: 16, 32, 64
1679 * id_wid: an identifier that is sync-connected to the input
1680 * single_cycle: True indicates each stage to complete in 1 clock
1682 FPState
.__init
__(self
, "fpadd")
1684 self
.single_cycle
= single_cycle
1685 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1686 self
.o
= self
.ospec()
1688 self
.in_t
= Trigger()
1689 self
.i
= self
.ispec()
1691 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1692 self
.in_accept
= Signal(reset_less
=True)
1693 self
.add_stb
= Signal(reset_less
=True)
1694 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1697 return self
.mod
.ispec()
1700 return self
.mod
.ospec()
1702 def setup(self
, m
, i
, add_stb
, in_mid
):
1703 m
.d
.comb
+= [self
.i
.eq(i
),
1704 self
.mod
.i
.eq(self
.i
),
1705 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1706 #self.add_stb.eq(add_stb),
1707 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1708 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1709 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1710 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1711 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1712 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1715 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1716 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1717 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1718 #m.d.sync += self.in_t.stb.eq(0)
1720 m
.submodules
.fpadd
= self
.mod
1722 def action(self
, m
):
1724 # in_accept is set on incoming strobe HIGH and ack LOW.
1725 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1727 #with m.If(self.in_t.ack):
1728 # m.d.sync += self.in_t.stb.eq(0)
1729 with m
.If(~self
.z_done
):
1730 # not done: test for accepting an incoming operand pair
1731 with m
.If(self
.in_accept
):
1733 self
.add_ack
.eq(1), # acknowledge receipt...
1734 self
.in_t
.stb
.eq(1), # initiate add
1737 m
.d
.sync
+= [self
.add_ack
.eq(0),
1738 self
.in_t
.stb
.eq(0),
1742 # done: acknowledge, and write out id and value
1743 m
.d
.sync
+= [self
.add_ack
.eq(1),
1750 if self
.in_mid
is not None:
1751 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1754 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1756 # move to output state on detecting z ack
1757 with m
.If(self
.out_z
.trigger
):
1758 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1761 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1765 def __init__(self
, width
, id_wid
):
1767 self
.id_wid
= id_wid
1769 for i
in range(rs_sz
):
1771 out_z
.name
= "out_z_%d" % i
1773 self
.res
= Array(res
)
1774 self
.in_z
= FPOp(width
)
1775 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1777 def setup(self
, m
, in_z
, in_mid
):
1778 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1779 self
.in_mid
.eq(in_mid
)]
1781 def get_fragment(self
, platform
=None):
1782 """ creates the HDL code-fragment for FPAdd
1785 m
.submodules
.res_in_z
= self
.in_z
1786 m
.submodules
+= self
.res
1798 """ FPADD: stages as follows:
1804 FPAddBase---> FPAddBaseMod
1806 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1808 FPAddBase is tricky: it is both a stage and *has* stages.
1809 Connection to FPAddBaseMod therefore requires an in stb/ack
1810 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1811 needs to be the thing that raises the incoming stb.
1814 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1817 * width: bit-width of IEEE754. supported: 16, 32, 64
1818 * id_wid: an identifier that is sync-connected to the input
1819 * single_cycle: True indicates each stage to complete in 1 clock
1822 self
.id_wid
= id_wid
1823 self
.single_cycle
= single_cycle
1825 #self.out_z = FPOp(width)
1826 self
.ids
= FPID(id_wid
)
1829 for i
in range(rs_sz
):
1832 in_a
.name
= "in_a_%d" % i
1833 in_b
.name
= "in_b_%d" % i
1834 rs
.append((in_a
, in_b
))
1838 for i
in range(rs_sz
):
1840 out_z
.name
= "out_z_%d" % i
1842 self
.res
= Array(res
)
1846 def add_state(self
, state
):
1847 self
.states
.append(state
)
1850 def get_fragment(self
, platform
=None):
1851 """ creates the HDL code-fragment for FPAdd
1854 m
.submodules
+= self
.rs
1856 in_a
= self
.rs
[0][0]
1857 in_b
= self
.rs
[0][1]
1859 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1864 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1869 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1870 ab
= self
.add_state(ab
)
1871 abd
= ab
.ispec() # create an input spec object for FPADDBase
1872 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1873 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1876 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1879 with m
.FSM() as fsm
:
1881 for state
in self
.states
:
1882 with m
.State(state
.state_from
):
1888 if __name__
== "__main__":
1890 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1891 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1892 alu
.rs
[0][1].ports() + \
1893 alu
.res
[0].ports() + \
1894 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1896 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1897 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1898 alu
.in_t
.ports() + \
1899 alu
.out_z
.ports() + \
1900 [alu
.in_mid
, alu
.out_mid
])
1903 # works... but don't use, just do "python fname.py convert -t v"
1904 #print (verilog.convert(alu, ports=[
1905 # ports=alu.in_a.ports() + \
1906 # alu.in_b.ports() + \
1907 # alu.out_z.ports())