1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 self
.mod
= FPAddSpecialCasesMod(width
)
396 self
.out_z
= self
.mod
.ospec()
397 self
.out_do_z
= Signal(reset_less
=True)
399 def setup(self
, m
, i
):
400 """ links module to inputs and outputs
402 self
.mod
.setup(m
, i
, self
.out_do_z
)
403 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
404 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
) # (and mid)
408 with m
.If(self
.out_do_z
):
411 m
.next
= "denormalise"
414 class FPAddSpecialCasesDeNorm(FPState
):
415 """ special cases: NaNs, infs, zeros, denormalised
416 NOTE: some of these are unique to add. see "Special Operations"
417 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
420 def __init__(self
, width
, id_wid
):
421 FPState
.__init
__(self
, "special_cases")
422 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
423 self
.out_z
= self
.smod
.ospec()
424 self
.out_do_z
= Signal(reset_less
=True)
426 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
427 self
.o
= self
.dmod
.ospec()
429 def setup(self
, m
, i
):
430 """ links module to inputs and outputs
432 self
.smod
.setup(m
, i
, self
.out_do_z
)
433 self
.dmod
.setup(m
, i
)
436 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
437 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
439 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
442 with m
.If(self
.out_do_z
):
448 class FPAddDeNormMod(FPState
):
450 def __init__(self
, width
, id_wid
):
453 self
.i
= self
.ispec()
454 self
.o
= self
.ospec()
457 return FPNumBase2Ops(self
.width
, self
.id_wid
)
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
462 def setup(self
, m
, i
):
463 """ links module to inputs and outputs
465 m
.submodules
.denormalise
= self
466 m
.d
.comb
+= self
.i
.eq(i
)
468 def elaborate(self
, platform
):
470 m
.submodules
.denorm_in_a
= self
.i
.a
471 m
.submodules
.denorm_in_b
= self
.i
.b
472 m
.submodules
.denorm_out_a
= self
.o
.a
473 m
.submodules
.denorm_out_b
= self
.o
.b
474 # hmmm, don't like repeating identical code
475 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
476 with m
.If(self
.i
.a
.exp_n127
):
477 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
479 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
481 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
482 with m
.If(self
.i
.b
.exp_n127
):
483 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
485 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 class FPAddDeNorm(FPState
):
492 def __init__(self
, width
, id_wid
):
493 FPState
.__init
__(self
, "denormalise")
494 self
.mod
= FPAddDeNormMod(width
)
495 self
.out_a
= FPNumBase(width
)
496 self
.out_b
= FPNumBase(width
)
498 def setup(self
, m
, i
):
499 """ links module to inputs and outputs
503 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
504 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
507 # Denormalised Number checks
511 class FPAddAlignMultiMod(FPState
):
513 def __init__(self
, width
):
514 self
.in_a
= FPNumBase(width
)
515 self
.in_b
= FPNumBase(width
)
516 self
.out_a
= FPNumIn(None, width
)
517 self
.out_b
= FPNumIn(None, width
)
518 self
.exp_eq
= Signal(reset_less
=True)
520 def elaborate(self
, platform
):
521 # This one however (single-cycle) will do the shift
526 m
.submodules
.align_in_a
= self
.in_a
527 m
.submodules
.align_in_b
= self
.in_b
528 m
.submodules
.align_out_a
= self
.out_a
529 m
.submodules
.align_out_b
= self
.out_b
531 # NOTE: this does *not* do single-cycle multi-shifting,
532 # it *STAYS* in the align state until exponents match
534 # exponent of a greater than b: shift b down
535 m
.d
.comb
+= self
.exp_eq
.eq(0)
536 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
537 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
538 agtb
= Signal(reset_less
=True)
539 altb
= Signal(reset_less
=True)
540 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
541 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
543 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
544 # exponent of b greater than a: shift a down
546 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
547 # exponents equal: move to next stage.
549 m
.d
.comb
+= self
.exp_eq
.eq(1)
553 class FPAddAlignMulti(FPState
):
555 def __init__(self
, width
, id_wid
):
556 FPState
.__init
__(self
, "align")
557 self
.mod
= FPAddAlignMultiMod(width
)
558 self
.out_a
= FPNumIn(None, width
)
559 self
.out_b
= FPNumIn(None, width
)
560 self
.exp_eq
= Signal(reset_less
=True)
562 def setup(self
, m
, in_a
, in_b
):
563 """ links module to inputs and outputs
565 m
.submodules
.align
= self
.mod
566 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
567 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
568 #m.d.comb += self.out_a.eq(self.mod.out_a)
569 #m.d.comb += self.out_b.eq(self.mod.out_b)
570 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
571 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
572 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
575 with m
.If(self
.exp_eq
):
581 def __init__(self
, width
, id_wid
):
582 self
.a
= FPNumIn(None, width
)
583 self
.b
= FPNumIn(None, width
)
584 self
.mid
= Signal(id_wid
, reset_less
=True)
587 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
590 class FPAddAlignSingleMod
:
592 def __init__(self
, width
, id_wid
):
595 self
.i
= self
.ispec()
596 self
.o
= self
.ospec()
599 return FPNumBase2Ops(self
.width
, self
.id_wid
)
602 return FPNumIn2Ops(self
.width
, self
.id_wid
)
604 def setup(self
, m
, i
):
605 """ links module to inputs and outputs
607 m
.submodules
.align
= self
608 m
.d
.comb
+= self
.i
.eq(i
)
610 def elaborate(self
, platform
):
611 """ Aligns A against B or B against A, depending on which has the
612 greater exponent. This is done in a *single* cycle using
613 variable-width bit-shift
615 the shifter used here is quite expensive in terms of gates.
616 Mux A or B in (and out) into temporaries, as only one of them
617 needs to be aligned against the other
621 m
.submodules
.align_in_a
= self
.i
.a
622 m
.submodules
.align_in_b
= self
.i
.b
623 m
.submodules
.align_out_a
= self
.o
.a
624 m
.submodules
.align_out_b
= self
.o
.b
626 # temporary (muxed) input and output to be shifted
627 t_inp
= FPNumBase(self
.width
)
628 t_out
= FPNumIn(None, self
.width
)
629 espec
= (len(self
.i
.a
.e
), True)
630 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
631 m
.submodules
.align_t_in
= t_inp
632 m
.submodules
.align_t_out
= t_out
633 m
.submodules
.multishift_r
= msr
635 ediff
= Signal(espec
, reset_less
=True)
636 ediffr
= Signal(espec
, reset_less
=True)
637 tdiff
= Signal(espec
, reset_less
=True)
638 elz
= Signal(reset_less
=True)
639 egz
= Signal(reset_less
=True)
641 # connect multi-shifter to t_inp/out mantissa (and tdiff)
642 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
643 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
644 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
645 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
646 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
648 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
649 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
650 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
651 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
653 # default: A-exp == B-exp, A and B untouched (fall through)
654 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
655 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
656 # only one shifter (muxed)
657 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
658 # exponent of a greater than b: shift b down
660 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
663 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
665 # exponent of b greater than a: shift a down
667 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
670 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
675 class FPAddAlignSingle(FPState
):
677 def __init__(self
, width
, id_wid
):
678 FPState
.__init
__(self
, "align")
679 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
680 self
.out_a
= FPNumIn(None, width
)
681 self
.out_b
= FPNumIn(None, width
)
683 def setup(self
, m
, i
):
684 """ links module to inputs and outputs
688 # NOTE: could be done as comb
689 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
690 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
696 class FPAddAlignSingleAdd(FPState
):
698 def __init__(self
, width
, id_wid
):
699 FPState
.__init
__(self
, "align")
702 self
.a1o
= self
.ospec()
705 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
708 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
710 def setup(self
, m
, i
):
711 """ links module to inputs and outputs
713 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
716 m
.d
.comb
+= o
.eq(mod
.o
)
718 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
721 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
723 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
725 self
.a1modo
= a1mod
.o
727 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
730 m
.next
= "normalise_1"
733 class FPAddStage0Data
:
735 def __init__(self
, width
, id_wid
):
736 self
.z
= FPNumBase(width
, False)
737 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
738 self
.mid
= Signal(id_wid
, reset_less
=True)
741 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
744 class FPAddStage0Mod
:
746 def __init__(self
, width
, id_wid
):
749 self
.i
= self
.ispec()
750 self
.o
= self
.ospec()
753 return FPNumBase2Ops(self
.width
, self
.id_wid
)
756 return FPAddStage0Data(self
.width
, self
.id_wid
)
758 def setup(self
, m
, i
):
759 """ links module to inputs and outputs
761 m
.submodules
.add0
= self
762 m
.d
.comb
+= self
.i
.eq(i
)
764 def elaborate(self
, platform
):
766 m
.submodules
.add0_in_a
= self
.i
.a
767 m
.submodules
.add0_in_b
= self
.i
.b
768 m
.submodules
.add0_out_z
= self
.o
.z
770 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
772 # store intermediate tests (and zero-extended mantissas)
773 seq
= Signal(reset_less
=True)
774 mge
= Signal(reset_less
=True)
775 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
776 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
777 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
778 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
779 am0
.eq(Cat(self
.i
.a
.m
, 0)),
780 bm0
.eq(Cat(self
.i
.b
.m
, 0))
782 # same-sign (both negative or both positive) add mantissas
785 self
.o
.tot
.eq(am0
+ bm0
),
786 self
.o
.z
.s
.eq(self
.i
.a
.s
)
788 # a mantissa greater than b, use a
791 self
.o
.tot
.eq(am0
- bm0
),
792 self
.o
.z
.s
.eq(self
.i
.a
.s
)
794 # b mantissa greater than a, use b
797 self
.o
.tot
.eq(bm0
- am0
),
798 self
.o
.z
.s
.eq(self
.i
.b
.s
)
803 class FPAddStage0(FPState
):
804 """ First stage of add. covers same-sign (add) and subtract
805 special-casing when mantissas are greater or equal, to
806 give greatest accuracy.
809 def __init__(self
, width
, id_wid
):
810 FPState
.__init
__(self
, "add_0")
811 self
.mod
= FPAddStage0Mod(width
)
812 self
.o
= self
.mod
.ospec()
814 def setup(self
, m
, i
):
815 """ links module to inputs and outputs
819 # NOTE: these could be done as combinatorial (merge add0+add1)
820 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
826 class FPAddStage1Data
:
828 def __init__(self
, width
, id_wid
):
829 self
.z
= FPNumBase(width
, False)
831 self
.mid
= Signal(id_wid
, reset_less
=True)
834 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
838 class FPAddStage1Mod(FPState
):
839 """ Second stage of add: preparation for normalisation.
840 detects when tot sum is too big (tot[27] is kinda a carry bit)
843 def __init__(self
, width
, id_wid
):
846 self
.i
= self
.ispec()
847 self
.o
= self
.ospec()
850 return FPAddStage0Data(self
.width
, self
.id_wid
)
853 return FPAddStage1Data(self
.width
, self
.id_wid
)
855 def setup(self
, m
, i
):
856 """ links module to inputs and outputs
858 m
.submodules
.add1
= self
859 m
.submodules
.add1_out_overflow
= self
.o
.of
861 m
.d
.comb
+= self
.i
.eq(i
)
863 def elaborate(self
, platform
):
865 #m.submodules.norm1_in_overflow = self.in_of
866 #m.submodules.norm1_out_overflow = self.out_of
867 #m.submodules.norm1_in_z = self.in_z
868 #m.submodules.norm1_out_z = self.out_z
869 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
870 # tot[-1] (MSB) gets set when the sum overflows. shift result down
871 with m
.If(self
.i
.tot
[-1]):
873 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
874 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
875 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
876 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
877 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
878 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
880 # tot[-1] (MSB) zero case
883 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
884 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
885 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
886 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
887 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
892 class FPAddStage1(FPState
):
894 def __init__(self
, width
, id_wid
):
895 FPState
.__init
__(self
, "add_1")
896 self
.mod
= FPAddStage1Mod(width
)
897 self
.out_z
= FPNumBase(width
, False)
898 self
.out_of
= Overflow()
899 self
.norm_stb
= Signal()
901 def setup(self
, m
, i
):
902 """ links module to inputs and outputs
906 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
908 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
909 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
910 m
.d
.sync
+= self
.norm_stb
.eq(1)
913 m
.next
= "normalise_1"
916 class FPNormaliseModSingle
:
918 def __init__(self
, width
):
920 self
.in_z
= self
.ispec()
921 self
.out_z
= self
.ospec()
924 return FPNumBase(self
.width
, False)
927 return FPNumBase(self
.width
, False)
929 def setup(self
, m
, i
):
930 """ links module to inputs and outputs
932 m
.submodules
.normalise
= self
933 m
.d
.comb
+= self
.i
.eq(i
)
935 def elaborate(self
, platform
):
938 mwid
= self
.out_z
.m_width
+2
939 pe
= PriorityEncoder(mwid
)
940 m
.submodules
.norm_pe
= pe
942 m
.submodules
.norm1_out_z
= self
.out_z
943 m
.submodules
.norm1_in_z
= self
.in_z
945 in_z
= FPNumBase(self
.width
, False)
947 m
.submodules
.norm1_insel_z
= in_z
948 m
.submodules
.norm1_insel_overflow
= in_of
950 espec
= (len(in_z
.e
), True)
951 ediff_n126
= Signal(espec
, reset_less
=True)
952 msr
= MultiShiftRMerge(mwid
, espec
)
953 m
.submodules
.multishift_r
= msr
955 m
.d
.comb
+= in_z
.eq(self
.in_z
)
956 m
.d
.comb
+= in_of
.eq(self
.in_of
)
957 # initialise out from in (overridden below)
958 m
.d
.comb
+= self
.out_z
.eq(in_z
)
959 m
.d
.comb
+= self
.out_of
.eq(in_of
)
960 # normalisation decrease condition
961 decrease
= Signal(reset_less
=True)
962 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
965 # *sigh* not entirely obvious: count leading zeros (clz)
966 # with a PriorityEncoder: to find from the MSB
967 # we reverse the order of the bits.
968 temp_m
= Signal(mwid
, reset_less
=True)
969 temp_s
= Signal(mwid
+1, reset_less
=True)
970 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
972 # cat round and guard bits back into the mantissa
973 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
974 pe
.i
.eq(temp_m
[::-1]), # inverted
975 clz
.eq(pe
.o
), # count zeros from MSB down
976 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
977 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
978 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
985 def __init__(self
, width
, id_wid
):
986 self
.roundz
= Signal(reset_less
=True)
987 self
.z
= FPNumBase(width
, False)
988 self
.mid
= Signal(id_wid
, reset_less
=True)
991 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
994 class FPNorm1ModSingle
:
996 def __init__(self
, width
, id_wid
):
999 self
.i
= self
.ispec()
1000 self
.o
= self
.ospec()
1003 return FPAddStage1Data(self
.width
, self
.id_wid
)
1006 return FPNorm1Data(self
.width
, self
.id_wid
)
1008 def setup(self
, m
, i
):
1009 """ links module to inputs and outputs
1011 m
.submodules
.normalise_1
= self
1012 m
.d
.comb
+= self
.i
.eq(i
)
1014 def elaborate(self
, platform
):
1017 mwid
= self
.o
.z
.m_width
+2
1018 pe
= PriorityEncoder(mwid
)
1019 m
.submodules
.norm_pe
= pe
1022 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1024 m
.submodules
.norm1_out_z
= self
.o
.z
1025 m
.submodules
.norm1_out_overflow
= of
1026 m
.submodules
.norm1_in_z
= self
.i
.z
1027 m
.submodules
.norm1_in_overflow
= self
.i
.of
1030 m
.submodules
.norm1_insel_z
= i
.z
1031 m
.submodules
.norm1_insel_overflow
= i
.of
1033 espec
= (len(i
.z
.e
), True)
1034 ediff_n126
= Signal(espec
, reset_less
=True)
1035 msr
= MultiShiftRMerge(mwid
, espec
)
1036 m
.submodules
.multishift_r
= msr
1038 m
.d
.comb
+= i
.eq(self
.i
)
1039 # initialise out from in (overridden below)
1040 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1041 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1042 m
.d
.comb
+= of
.eq(i
.of
)
1043 # normalisation increase/decrease conditions
1044 decrease
= Signal(reset_less
=True)
1045 increase
= Signal(reset_less
=True)
1046 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1047 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1049 with m
.If(decrease
):
1050 # *sigh* not entirely obvious: count leading zeros (clz)
1051 # with a PriorityEncoder: to find from the MSB
1052 # we reverse the order of the bits.
1053 temp_m
= Signal(mwid
, reset_less
=True)
1054 temp_s
= Signal(mwid
+1, reset_less
=True)
1055 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1056 # make sure that the amount to decrease by does NOT
1057 # go below the minimum non-INF/NaN exponent
1058 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1061 # cat round and guard bits back into the mantissa
1062 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1063 pe
.i
.eq(temp_m
[::-1]), # inverted
1064 clz
.eq(limclz
), # count zeros from MSB down
1065 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1066 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1067 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1068 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1069 # overflow in bits 0..1: got shifted too (leave sticky)
1070 of
.guard
.eq(temp_s
[1]), # guard
1071 of
.round_bit
.eq(temp_s
[0]), # round
1074 with m
.Elif(increase
):
1075 temp_m
= Signal(mwid
+1, reset_less
=True)
1077 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1079 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1080 # connect multi-shifter to inp/out mantissa (and ediff)
1082 msr
.diff
.eq(ediff_n126
),
1083 self
.o
.z
.m
.eq(msr
.m
[3:]),
1084 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1085 # overflow in bits 0..1: got shifted too (leave sticky)
1086 of
.guard
.eq(temp_s
[2]), # guard
1087 of
.round_bit
.eq(temp_s
[1]), # round
1088 of
.sticky
.eq(temp_s
[0]), # sticky
1089 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1095 class FPNorm1ModMulti
:
1097 def __init__(self
, width
, single_cycle
=True):
1099 self
.in_select
= Signal(reset_less
=True)
1100 self
.in_z
= FPNumBase(width
, False)
1101 self
.in_of
= Overflow()
1102 self
.temp_z
= FPNumBase(width
, False)
1103 self
.temp_of
= Overflow()
1104 self
.out_z
= FPNumBase(width
, False)
1105 self
.out_of
= Overflow()
1107 def elaborate(self
, platform
):
1110 m
.submodules
.norm1_out_z
= self
.out_z
1111 m
.submodules
.norm1_out_overflow
= self
.out_of
1112 m
.submodules
.norm1_temp_z
= self
.temp_z
1113 m
.submodules
.norm1_temp_of
= self
.temp_of
1114 m
.submodules
.norm1_in_z
= self
.in_z
1115 m
.submodules
.norm1_in_overflow
= self
.in_of
1117 in_z
= FPNumBase(self
.width
, False)
1119 m
.submodules
.norm1_insel_z
= in_z
1120 m
.submodules
.norm1_insel_overflow
= in_of
1122 # select which of temp or in z/of to use
1123 with m
.If(self
.in_select
):
1124 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1125 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1127 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1128 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1129 # initialise out from in (overridden below)
1130 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1131 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1132 # normalisation increase/decrease conditions
1133 decrease
= Signal(reset_less
=True)
1134 increase
= Signal(reset_less
=True)
1135 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1136 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1137 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1139 with m
.If(decrease
):
1141 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1142 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1143 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1144 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1145 self
.out_of
.round_bit
.eq(0), # reset round bit
1146 self
.out_of
.m0
.eq(in_of
.guard
),
1149 with m
.Elif(increase
):
1151 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1152 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1153 self
.out_of
.guard
.eq(in_z
.m
[0]),
1154 self
.out_of
.m0
.eq(in_z
.m
[1]),
1155 self
.out_of
.round_bit
.eq(in_of
.guard
),
1156 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1162 class FPNorm1Single(FPState
):
1164 def __init__(self
, width
, id_wid
, single_cycle
=True):
1165 FPState
.__init
__(self
, "normalise_1")
1166 self
.mod
= FPNorm1ModSingle(width
)
1167 self
.out_z
= FPNumBase(width
, False)
1168 self
.out_roundz
= Signal(reset_less
=True)
1170 def setup(self
, m
, i
):
1171 """ links module to inputs and outputs
1173 self
.mod
.setup(m
, i
, self
.out_z
)
1175 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1177 def action(self
, m
):
1181 class FPNorm1Multi(FPState
):
1183 def __init__(self
, width
, id_wid
):
1184 FPState
.__init
__(self
, "normalise_1")
1185 self
.mod
= FPNorm1ModMulti(width
)
1186 self
.stb
= Signal(reset_less
=True)
1187 self
.ack
= Signal(reset
=0, reset_less
=True)
1188 self
.out_norm
= Signal(reset_less
=True)
1189 self
.in_accept
= Signal(reset_less
=True)
1190 self
.temp_z
= FPNumBase(width
)
1191 self
.temp_of
= Overflow()
1192 self
.out_z
= FPNumBase(width
)
1193 self
.out_roundz
= Signal(reset_less
=True)
1195 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1196 """ links module to inputs and outputs
1198 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1199 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1200 self
.out_z
, self
.out_norm
)
1202 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1203 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1205 def action(self
, m
):
1206 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1207 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1208 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1209 with m
.If(self
.out_norm
):
1210 with m
.If(self
.in_accept
):
1215 m
.d
.sync
+= self
.ack
.eq(0)
1217 # normalisation not required (or done).
1219 m
.d
.sync
+= self
.ack
.eq(1)
1220 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1223 class FPNormToPack(FPState
):
1225 def __init__(self
, width
, id_wid
):
1226 FPState
.__init
__(self
, "normalise_1")
1227 self
.id_wid
= id_wid
1231 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1234 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1236 def setup(self
, m
, i
):
1237 """ links module to inputs and outputs
1240 # Normalisation (chained to input in_z+in_of)
1241 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1243 n_out
= nmod
.ospec()
1244 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1246 # Rounding (chained to normalisation)
1247 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1248 rmod
.setup(m
, n_out
)
1249 r_out_z
= rmod
.ospec()
1250 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1252 # Corrections (chained to rounding)
1253 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1254 cmod
.setup(m
, r_out_z
)
1255 c_out_z
= cmod
.ospec()
1256 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1258 # Pack (chained to corrections)
1259 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1260 self
.pmod
.setup(m
, c_out_z
)
1261 self
.out_z
= self
.pmod
.ospec()
1263 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
)
1264 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1266 def action(self
, m
):
1267 m
.next
= "pack_put_z"
1272 def __init__(self
, width
, id_wid
):
1273 self
.z
= FPNumBase(width
, False)
1274 self
.mid
= Signal(id_wid
, reset_less
=True)
1277 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1282 def __init__(self
, width
, id_wid
):
1284 self
.id_wid
= id_wid
1285 self
.i
= self
.ispec()
1286 self
.out_z
= self
.ospec()
1289 return FPNorm1Data(self
.width
, self
.id_wid
)
1292 return FPRoundData(self
.width
, self
.id_wid
)
1294 def setup(self
, m
, i
):
1295 m
.submodules
.roundz
= self
1296 m
.d
.comb
+= self
.i
.eq(i
)
1298 def elaborate(self
, platform
):
1300 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1301 with m
.If(self
.i
.roundz
):
1302 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1303 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1304 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1308 class FPRound(FPState
):
1310 def __init__(self
, width
, id_wid
):
1311 FPState
.__init
__(self
, "round")
1312 self
.mod
= FPRoundMod(width
)
1313 self
.out_z
= self
.ospec()
1316 return self
.mod
.ispec()
1319 return self
.mod
.ospec()
1321 def setup(self
, m
, i
):
1322 """ links module to inputs and outputs
1324 self
.mod
.setup(m
, i
)
1327 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1328 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1330 def action(self
, m
):
1331 m
.next
= "corrections"
1334 class FPCorrectionsMod
:
1336 def __init__(self
, width
, id_wid
):
1338 self
.id_wid
= id_wid
1339 self
.i
= self
.ispec()
1340 self
.out_z
= self
.ospec()
1343 return FPRoundData(self
.width
, self
.id_wid
)
1346 return FPRoundData(self
.width
, self
.id_wid
)
1348 def setup(self
, m
, i
):
1349 """ links module to inputs and outputs
1351 m
.submodules
.corrections
= self
1352 m
.d
.comb
+= self
.i
.eq(i
)
1354 def elaborate(self
, platform
):
1356 m
.submodules
.corr_in_z
= self
.i
.z
1357 m
.submodules
.corr_out_z
= self
.out_z
.z
1358 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1359 with m
.If(self
.i
.z
.is_denormalised
):
1360 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1364 class FPCorrections(FPState
):
1366 def __init__(self
, width
, id_wid
):
1367 FPState
.__init
__(self
, "corrections")
1368 self
.mod
= FPCorrectionsMod(width
)
1369 self
.out_z
= self
.ospec()
1372 return self
.mod
.ispec()
1375 return self
.mod
.ospec()
1377 def setup(self
, m
, in_z
):
1378 """ links module to inputs and outputs
1380 self
.mod
.setup(m
, in_z
)
1382 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1383 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1385 def action(self
, m
):
1391 def __init__(self
, width
, id_wid
):
1392 self
.z
= FPNumOut(width
, False)
1393 self
.mid
= Signal(id_wid
, reset_less
=True)
1396 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1401 def __init__(self
, width
, id_wid
):
1403 self
.id_wid
= id_wid
1404 self
.i
= self
.ispec()
1405 self
.o
= self
.ospec()
1408 return FPRoundData(self
.width
, self
.id_wid
)
1411 return FPPackData(self
.width
, self
.id_wid
)
1413 def setup(self
, m
, in_z
):
1414 """ links module to inputs and outputs
1416 m
.submodules
.pack
= self
1417 m
.d
.comb
+= self
.i
.eq(in_z
)
1419 def elaborate(self
, platform
):
1421 m
.submodules
.pack_in_z
= self
.i
.z
1422 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1423 with m
.If(self
.i
.z
.is_overflowed
):
1424 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1426 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1431 def __init__(self
, width
, id_wid
):
1432 self
.z
= FPNumOut(width
, False)
1433 self
.mid
= Signal(id_wid
, reset_less
=True)
1436 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1439 class FPPack(FPState
):
1441 def __init__(self
, width
, id_wid
):
1442 FPState
.__init
__(self
, "pack")
1443 self
.mod
= FPPackMod(width
)
1444 self
.out_z
= self
.ospec()
1447 return self
.mod
.ispec()
1450 return self
.mod
.ospec()
1452 def setup(self
, m
, in_z
):
1453 """ links module to inputs and outputs
1455 self
.mod
.setup(m
, in_z
)
1457 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1458 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1460 def action(self
, m
):
1461 m
.next
= "pack_put_z"
1464 class FPPutZ(FPState
):
1466 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1467 FPState
.__init
__(self
, state
)
1468 if to_state
is None:
1469 to_state
= "get_ops"
1470 self
.to_state
= to_state
1473 self
.in_mid
= in_mid
1474 self
.out_mid
= out_mid
1476 def action(self
, m
):
1477 if self
.in_mid
is not None:
1478 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1480 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1482 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1483 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1484 m
.next
= self
.to_state
1486 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1489 class FPPutZIdx(FPState
):
1491 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1492 FPState
.__init
__(self
, state
)
1493 if to_state
is None:
1494 to_state
= "get_ops"
1495 self
.to_state
= to_state
1497 self
.out_zs
= out_zs
1498 self
.in_mid
= in_mid
1500 def action(self
, m
):
1501 outz_stb
= Signal(reset_less
=True)
1502 outz_ack
= Signal(reset_less
=True)
1503 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1504 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1507 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1509 with m
.If(outz_stb
& outz_ack
):
1510 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1511 m
.next
= self
.to_state
1513 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1515 class FPADDBaseData
:
1517 def __init__(self
, width
, id_wid
):
1519 self
.id_wid
= id_wid
1520 self
.a
= Signal(width
)
1521 self
.b
= Signal(width
)
1522 self
.mid
= Signal(id_wid
, reset_less
=True)
1525 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1529 def __init__(self
, width
, id_wid
):
1530 self
.z
= FPOp(width
)
1531 self
.mid
= Signal(id_wid
, reset_less
=True)
1534 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1539 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1542 * width: bit-width of IEEE754. supported: 16, 32, 64
1543 * id_wid: an identifier that is sync-connected to the input
1544 * single_cycle: True indicates each stage to complete in 1 clock
1545 * compact: True indicates a reduced number of stages
1548 self
.id_wid
= id_wid
1549 self
.single_cycle
= single_cycle
1550 self
.compact
= compact
1552 self
.in_t
= Trigger()
1553 self
.i
= self
.ispec()
1554 self
.o
= self
.ospec()
1559 return FPADDBaseData(self
.width
, self
.id_wid
)
1562 return FPOpData(self
.width
, self
.id_wid
)
1564 def add_state(self
, state
):
1565 self
.states
.append(state
)
1568 def get_fragment(self
, platform
=None):
1569 """ creates the HDL code-fragment for FPAdd
1572 m
.submodules
.out_z
= self
.o
.z
1573 m
.submodules
.in_t
= self
.in_t
1575 self
.get_compact_fragment(m
, platform
)
1577 self
.get_longer_fragment(m
, platform
)
1579 with m
.FSM() as fsm
:
1581 for state
in self
.states
:
1582 with m
.State(state
.state_from
):
1587 def get_longer_fragment(self
, m
, platform
=None):
1589 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1591 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1595 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1596 sc
.setup(m
, a
, b
, self
.in_mid
)
1598 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1599 dn
.setup(m
, a
, b
, sc
.in_mid
)
1601 if self
.single_cycle
:
1602 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1603 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1605 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1606 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1608 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1609 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1611 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1612 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1614 if self
.single_cycle
:
1615 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1616 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1618 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1619 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1621 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1622 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1624 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1625 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1627 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1628 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1630 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1631 pa
.in_mid
, self
.out_mid
))
1633 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1634 pa
.in_mid
, self
.out_mid
))
1636 def get_compact_fragment(self
, m
, platform
=None):
1638 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1639 self
.width
, self
.id_wid
))
1640 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1642 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1645 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1648 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1649 n1
.setup(m
, alm
.a1o
)
1651 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1652 n1
.out_z
.mid
, self
.o
.mid
))
1654 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1655 sc
.o
.mid
, self
.o
.mid
))
1658 class FPADDBase(FPState
):
1660 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1663 * width: bit-width of IEEE754. supported: 16, 32, 64
1664 * id_wid: an identifier that is sync-connected to the input
1665 * single_cycle: True indicates each stage to complete in 1 clock
1667 FPState
.__init
__(self
, "fpadd")
1669 self
.single_cycle
= single_cycle
1670 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1671 self
.o
= self
.ospec()
1673 self
.in_t
= Trigger()
1674 self
.i
= self
.ispec()
1676 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1677 self
.in_accept
= Signal(reset_less
=True)
1678 self
.add_stb
= Signal(reset_less
=True)
1679 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1682 return self
.mod
.ispec()
1685 return self
.mod
.ospec()
1687 def setup(self
, m
, i
, add_stb
, in_mid
):
1688 m
.d
.comb
+= [self
.i
.eq(i
),
1689 self
.mod
.i
.eq(self
.i
),
1690 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1691 #self.add_stb.eq(add_stb),
1692 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1693 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1694 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1695 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1696 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1697 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1700 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1701 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1702 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1703 #m.d.sync += self.in_t.stb.eq(0)
1705 m
.submodules
.fpadd
= self
.mod
1707 def action(self
, m
):
1709 # in_accept is set on incoming strobe HIGH and ack LOW.
1710 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1712 #with m.If(self.in_t.ack):
1713 # m.d.sync += self.in_t.stb.eq(0)
1714 with m
.If(~self
.z_done
):
1715 # not done: test for accepting an incoming operand pair
1716 with m
.If(self
.in_accept
):
1718 self
.add_ack
.eq(1), # acknowledge receipt...
1719 self
.in_t
.stb
.eq(1), # initiate add
1722 m
.d
.sync
+= [self
.add_ack
.eq(0),
1723 self
.in_t
.stb
.eq(0),
1727 # done: acknowledge, and write out id and value
1728 m
.d
.sync
+= [self
.add_ack
.eq(1),
1735 if self
.in_mid
is not None:
1736 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1739 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1741 # move to output state on detecting z ack
1742 with m
.If(self
.out_z
.trigger
):
1743 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1746 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1750 def __init__(self
, width
, id_wid
):
1752 self
.id_wid
= id_wid
1754 for i
in range(rs_sz
):
1756 out_z
.name
= "out_z_%d" % i
1758 self
.res
= Array(res
)
1759 self
.in_z
= FPOp(width
)
1760 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1762 def setup(self
, m
, in_z
, in_mid
):
1763 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1764 self
.in_mid
.eq(in_mid
)]
1766 def get_fragment(self
, platform
=None):
1767 """ creates the HDL code-fragment for FPAdd
1770 m
.submodules
.res_in_z
= self
.in_z
1771 m
.submodules
+= self
.res
1783 """ FPADD: stages as follows:
1789 FPAddBase---> FPAddBaseMod
1791 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1793 FPAddBase is tricky: it is both a stage and *has* stages.
1794 Connection to FPAddBaseMod therefore requires an in stb/ack
1795 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1796 needs to be the thing that raises the incoming stb.
1799 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1802 * width: bit-width of IEEE754. supported: 16, 32, 64
1803 * id_wid: an identifier that is sync-connected to the input
1804 * single_cycle: True indicates each stage to complete in 1 clock
1807 self
.id_wid
= id_wid
1808 self
.single_cycle
= single_cycle
1810 #self.out_z = FPOp(width)
1811 self
.ids
= FPID(id_wid
)
1814 for i
in range(rs_sz
):
1817 in_a
.name
= "in_a_%d" % i
1818 in_b
.name
= "in_b_%d" % i
1819 rs
.append((in_a
, in_b
))
1823 for i
in range(rs_sz
):
1825 out_z
.name
= "out_z_%d" % i
1827 self
.res
= Array(res
)
1831 def add_state(self
, state
):
1832 self
.states
.append(state
)
1835 def get_fragment(self
, platform
=None):
1836 """ creates the HDL code-fragment for FPAdd
1839 m
.submodules
+= self
.rs
1841 in_a
= self
.rs
[0][0]
1842 in_b
= self
.rs
[0][1]
1844 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1849 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1854 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1855 ab
= self
.add_state(ab
)
1856 abd
= ab
.ispec() # create an input spec object for FPADDBase
1857 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1858 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1861 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1864 with m
.FSM() as fsm
:
1866 for state
in self
.states
:
1867 with m
.State(state
.state_from
):
1873 if __name__
== "__main__":
1875 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1876 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1877 alu
.rs
[0][1].ports() + \
1878 alu
.res
[0].ports() + \
1879 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1881 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1882 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1883 alu
.in_t
.ports() + \
1884 alu
.out_z
.ports() + \
1885 [alu
.in_mid
, alu
.out_mid
])
1888 # works... but don't use, just do "python fname.py convert -t v"
1889 #print (verilog.convert(alu, ports=[
1890 # ports=alu.in_a.ports() + \
1891 # alu.in_b.ports() + \
1892 # alu.out_z.ports())