1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 self
.mod
= FPAddSpecialCasesMod(width
)
396 self
.out_z
= self
.mod
.ospec()
397 self
.out_do_z
= Signal(reset_less
=True)
399 def setup(self
, m
, i
):
400 """ links module to inputs and outputs
402 self
.mod
.setup(m
, i
, self
.out_do_z
)
403 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
404 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
) # (and mid)
408 with m
.If(self
.out_do_z
):
411 m
.next
= "denormalise"
414 class FPAddSpecialCasesDeNorm(FPState
):
415 """ special cases: NaNs, infs, zeros, denormalised
416 NOTE: some of these are unique to add. see "Special Operations"
417 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
420 def __init__(self
, width
, id_wid
):
421 FPState
.__init
__(self
, "special_cases")
422 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
423 self
.out_z
= self
.smod
.ospec()
424 self
.out_do_z
= Signal(reset_less
=True)
426 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
427 self
.o
= self
.dmod
.ospec()
429 def setup(self
, m
, i
):
430 """ links module to inputs and outputs
432 self
.smod
.setup(m
, i
, self
.out_do_z
)
433 self
.dmod
.setup(m
, i
)
436 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
437 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
439 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
442 with m
.If(self
.out_do_z
):
448 class FPAddDeNormMod(FPState
):
450 def __init__(self
, width
, id_wid
):
453 self
.i
= self
.ispec()
454 self
.o
= self
.ospec()
457 return FPNumBase2Ops(self
.width
, self
.id_wid
)
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
462 def setup(self
, m
, i
):
463 """ links module to inputs and outputs
465 m
.submodules
.denormalise
= self
466 m
.d
.comb
+= self
.i
.eq(i
)
468 def elaborate(self
, platform
):
470 m
.submodules
.denorm_in_a
= self
.i
.a
471 m
.submodules
.denorm_in_b
= self
.i
.b
472 m
.submodules
.denorm_out_a
= self
.o
.a
473 m
.submodules
.denorm_out_b
= self
.o
.b
474 # hmmm, don't like repeating identical code
475 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
476 with m
.If(self
.i
.a
.exp_n127
):
477 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
479 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
481 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
482 with m
.If(self
.i
.b
.exp_n127
):
483 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
485 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 class FPAddDeNorm(FPState
):
492 def __init__(self
, width
, id_wid
):
493 FPState
.__init
__(self
, "denormalise")
494 self
.mod
= FPAddDeNormMod(width
)
495 self
.out_a
= FPNumBase(width
)
496 self
.out_b
= FPNumBase(width
)
498 def setup(self
, m
, i
):
499 """ links module to inputs and outputs
503 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
504 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
507 # Denormalised Number checks
511 class FPAddAlignMultiMod(FPState
):
513 def __init__(self
, width
):
514 self
.in_a
= FPNumBase(width
)
515 self
.in_b
= FPNumBase(width
)
516 self
.out_a
= FPNumIn(None, width
)
517 self
.out_b
= FPNumIn(None, width
)
518 self
.exp_eq
= Signal(reset_less
=True)
520 def elaborate(self
, platform
):
521 # This one however (single-cycle) will do the shift
526 m
.submodules
.align_in_a
= self
.in_a
527 m
.submodules
.align_in_b
= self
.in_b
528 m
.submodules
.align_out_a
= self
.out_a
529 m
.submodules
.align_out_b
= self
.out_b
531 # NOTE: this does *not* do single-cycle multi-shifting,
532 # it *STAYS* in the align state until exponents match
534 # exponent of a greater than b: shift b down
535 m
.d
.comb
+= self
.exp_eq
.eq(0)
536 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
537 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
538 agtb
= Signal(reset_less
=True)
539 altb
= Signal(reset_less
=True)
540 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
541 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
543 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
544 # exponent of b greater than a: shift a down
546 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
547 # exponents equal: move to next stage.
549 m
.d
.comb
+= self
.exp_eq
.eq(1)
553 class FPAddAlignMulti(FPState
):
555 def __init__(self
, width
, id_wid
):
556 FPState
.__init
__(self
, "align")
557 self
.mod
= FPAddAlignMultiMod(width
)
558 self
.out_a
= FPNumIn(None, width
)
559 self
.out_b
= FPNumIn(None, width
)
560 self
.exp_eq
= Signal(reset_less
=True)
562 def setup(self
, m
, in_a
, in_b
):
563 """ links module to inputs and outputs
565 m
.submodules
.align
= self
.mod
566 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
567 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
568 #m.d.comb += self.out_a.eq(self.mod.out_a)
569 #m.d.comb += self.out_b.eq(self.mod.out_b)
570 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
571 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
572 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
575 with m
.If(self
.exp_eq
):
581 def __init__(self
, width
, id_wid
):
582 self
.a
= FPNumIn(None, width
)
583 self
.b
= FPNumIn(None, width
)
584 self
.mid
= Signal(id_wid
, reset_less
=True)
587 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
590 class FPAddAlignSingleMod
:
592 def __init__(self
, width
, id_wid
):
595 self
.i
= self
.ispec()
596 self
.o
= self
.ospec()
599 return FPNumBase2Ops(self
.width
, self
.id_wid
)
602 return FPNumIn2Ops(self
.width
, self
.id_wid
)
604 def setup(self
, m
, i
):
605 """ links module to inputs and outputs
607 m
.submodules
.align
= self
608 m
.d
.comb
+= self
.i
.eq(i
)
610 def elaborate(self
, platform
):
611 """ Aligns A against B or B against A, depending on which has the
612 greater exponent. This is done in a *single* cycle using
613 variable-width bit-shift
615 the shifter used here is quite expensive in terms of gates.
616 Mux A or B in (and out) into temporaries, as only one of them
617 needs to be aligned against the other
621 m
.submodules
.align_in_a
= self
.i
.a
622 m
.submodules
.align_in_b
= self
.i
.b
623 m
.submodules
.align_out_a
= self
.o
.a
624 m
.submodules
.align_out_b
= self
.o
.b
626 # temporary (muxed) input and output to be shifted
627 t_inp
= FPNumBase(self
.width
)
628 t_out
= FPNumIn(None, self
.width
)
629 espec
= (len(self
.i
.a
.e
), True)
630 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
631 m
.submodules
.align_t_in
= t_inp
632 m
.submodules
.align_t_out
= t_out
633 m
.submodules
.multishift_r
= msr
635 ediff
= Signal(espec
, reset_less
=True)
636 ediffr
= Signal(espec
, reset_less
=True)
637 tdiff
= Signal(espec
, reset_less
=True)
638 elz
= Signal(reset_less
=True)
639 egz
= Signal(reset_less
=True)
641 # connect multi-shifter to t_inp/out mantissa (and tdiff)
642 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
643 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
644 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
645 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
646 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
648 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
649 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
650 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
651 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
653 # default: A-exp == B-exp, A and B untouched (fall through)
654 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
655 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
656 # only one shifter (muxed)
657 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
658 # exponent of a greater than b: shift b down
660 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
663 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
665 # exponent of b greater than a: shift a down
667 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
670 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
675 class FPAddAlignSingle(FPState
):
677 def __init__(self
, width
, id_wid
):
678 FPState
.__init
__(self
, "align")
679 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
680 self
.out_a
= FPNumIn(None, width
)
681 self
.out_b
= FPNumIn(None, width
)
683 def setup(self
, m
, i
):
684 """ links module to inputs and outputs
688 # NOTE: could be done as comb
689 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
690 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
696 class FPAddAlignSingleAdd(FPState
):
698 def __init__(self
, width
, id_wid
):
699 FPState
.__init
__(self
, "align")
702 self
.a1o
= self
.ospec()
705 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
708 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
710 def setup(self
, m
, i
):
711 """ links module to inputs and outputs
713 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
716 m
.d
.comb
+= o
.eq(mod
.o
)
718 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
721 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
723 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
725 self
.a1modo
= a1mod
.o
727 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
730 m
.next
= "normalise_1"
733 class FPAddStage0Data
:
735 def __init__(self
, width
, id_wid
):
736 self
.z
= FPNumBase(width
, False)
737 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
738 self
.mid
= Signal(id_wid
, reset_less
=True)
741 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
744 class FPAddStage0Mod
:
746 def __init__(self
, width
, id_wid
):
749 self
.i
= self
.ispec()
750 self
.o
= self
.ospec()
753 return FPNumBase2Ops(self
.width
, self
.id_wid
)
756 return FPAddStage0Data(self
.width
, self
.id_wid
)
758 def setup(self
, m
, i
):
759 """ links module to inputs and outputs
761 m
.submodules
.add0
= self
762 m
.d
.comb
+= self
.i
.eq(i
)
764 def elaborate(self
, platform
):
766 m
.submodules
.add0_in_a
= self
.i
.a
767 m
.submodules
.add0_in_b
= self
.i
.b
768 m
.submodules
.add0_out_z
= self
.o
.z
770 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
772 # store intermediate tests (and zero-extended mantissas)
773 seq
= Signal(reset_less
=True)
774 mge
= Signal(reset_less
=True)
775 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
776 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
777 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
778 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
779 am0
.eq(Cat(self
.i
.a
.m
, 0)),
780 bm0
.eq(Cat(self
.i
.b
.m
, 0))
782 # same-sign (both negative or both positive) add mantissas
785 self
.o
.tot
.eq(am0
+ bm0
),
786 self
.o
.z
.s
.eq(self
.i
.a
.s
)
788 # a mantissa greater than b, use a
791 self
.o
.tot
.eq(am0
- bm0
),
792 self
.o
.z
.s
.eq(self
.i
.a
.s
)
794 # b mantissa greater than a, use b
797 self
.o
.tot
.eq(bm0
- am0
),
798 self
.o
.z
.s
.eq(self
.i
.b
.s
)
803 class FPAddStage0(FPState
):
804 """ First stage of add. covers same-sign (add) and subtract
805 special-casing when mantissas are greater or equal, to
806 give greatest accuracy.
809 def __init__(self
, width
, id_wid
):
810 FPState
.__init
__(self
, "add_0")
811 self
.mod
= FPAddStage0Mod(width
)
812 self
.o
= self
.mod
.ospec()
814 def setup(self
, m
, i
):
815 """ links module to inputs and outputs
819 # NOTE: these could be done as combinatorial (merge add0+add1)
820 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
826 class FPAddStage1Data
:
828 def __init__(self
, width
, id_wid
):
829 self
.z
= FPNumBase(width
, False)
831 self
.mid
= Signal(id_wid
, reset_less
=True)
834 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
838 class FPAddStage1Mod(FPState
):
839 """ Second stage of add: preparation for normalisation.
840 detects when tot sum is too big (tot[27] is kinda a carry bit)
843 def __init__(self
, width
, id_wid
):
846 self
.i
= self
.ispec()
847 self
.o
= self
.ospec()
850 return FPAddStage0Data(self
.width
, self
.id_wid
)
853 return FPAddStage1Data(self
.width
, self
.id_wid
)
855 def setup(self
, m
, i
):
856 """ links module to inputs and outputs
858 m
.submodules
.add1
= self
859 m
.submodules
.add1_out_overflow
= self
.o
.of
861 m
.d
.comb
+= self
.i
.eq(i
)
863 def elaborate(self
, platform
):
865 #m.submodules.norm1_in_overflow = self.in_of
866 #m.submodules.norm1_out_overflow = self.out_of
867 #m.submodules.norm1_in_z = self.in_z
868 #m.submodules.norm1_out_z = self.out_z
869 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
870 # tot[-1] (MSB) gets set when the sum overflows. shift result down
871 with m
.If(self
.i
.tot
[-1]):
873 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
874 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
875 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
876 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
877 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
878 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
880 # tot[-1] (MSB) zero case
883 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
884 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
885 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
886 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
887 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
892 class FPAddStage1(FPState
):
894 def __init__(self
, width
, id_wid
):
895 FPState
.__init
__(self
, "add_1")
896 self
.mod
= FPAddStage1Mod(width
)
897 self
.out_z
= FPNumBase(width
, False)
898 self
.out_of
= Overflow()
899 self
.norm_stb
= Signal()
901 def setup(self
, m
, i
):
902 """ links module to inputs and outputs
906 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
908 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
909 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
910 m
.d
.sync
+= self
.norm_stb
.eq(1)
913 m
.next
= "normalise_1"
916 class FPNormaliseModSingle
:
918 def __init__(self
, width
):
920 self
.in_z
= self
.ispec()
921 self
.out_z
= self
.ospec()
924 return FPNumBase(self
.width
, False)
927 return FPNumBase(self
.width
, False)
929 def setup(self
, m
, i
):
930 """ links module to inputs and outputs
932 m
.submodules
.normalise
= self
933 m
.d
.comb
+= self
.i
.eq(i
)
935 def elaborate(self
, platform
):
938 mwid
= self
.out_z
.m_width
+2
939 pe
= PriorityEncoder(mwid
)
940 m
.submodules
.norm_pe
= pe
942 m
.submodules
.norm1_out_z
= self
.out_z
943 m
.submodules
.norm1_in_z
= self
.in_z
945 in_z
= FPNumBase(self
.width
, False)
947 m
.submodules
.norm1_insel_z
= in_z
948 m
.submodules
.norm1_insel_overflow
= in_of
950 espec
= (len(in_z
.e
), True)
951 ediff_n126
= Signal(espec
, reset_less
=True)
952 msr
= MultiShiftRMerge(mwid
, espec
)
953 m
.submodules
.multishift_r
= msr
955 m
.d
.comb
+= in_z
.eq(self
.in_z
)
956 m
.d
.comb
+= in_of
.eq(self
.in_of
)
957 # initialise out from in (overridden below)
958 m
.d
.comb
+= self
.out_z
.eq(in_z
)
959 m
.d
.comb
+= self
.out_of
.eq(in_of
)
960 # normalisation decrease condition
961 decrease
= Signal(reset_less
=True)
962 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
965 # *sigh* not entirely obvious: count leading zeros (clz)
966 # with a PriorityEncoder: to find from the MSB
967 # we reverse the order of the bits.
968 temp_m
= Signal(mwid
, reset_less
=True)
969 temp_s
= Signal(mwid
+1, reset_less
=True)
970 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
972 # cat round and guard bits back into the mantissa
973 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
974 pe
.i
.eq(temp_m
[::-1]), # inverted
975 clz
.eq(pe
.o
), # count zeros from MSB down
976 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
977 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
978 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
985 def __init__(self
, width
, id_wid
):
986 self
.roundz
= Signal(reset_less
=True)
987 self
.z
= FPNumBase(width
, False)
988 self
.mid
= Signal(id_wid
, reset_less
=True)
991 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
994 class FPNorm1ModSingle
:
996 def __init__(self
, width
, id_wid
):
999 self
.i
= self
.ispec()
1000 self
.o
= self
.ospec()
1003 return FPAddStage1Data(self
.width
, self
.id_wid
)
1006 return FPNorm1Data(self
.width
, self
.id_wid
)
1008 def setup(self
, m
, i
):
1009 """ links module to inputs and outputs
1011 m
.submodules
.normalise_1
= self
1012 m
.d
.comb
+= self
.i
.eq(i
)
1014 def elaborate(self
, platform
):
1017 mwid
= self
.o
.z
.m_width
+2
1018 pe
= PriorityEncoder(mwid
)
1019 m
.submodules
.norm_pe
= pe
1022 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1024 m
.submodules
.norm1_out_z
= self
.o
.z
1025 m
.submodules
.norm1_out_overflow
= of
1026 m
.submodules
.norm1_in_z
= self
.i
.z
1027 m
.submodules
.norm1_in_overflow
= self
.i
.of
1030 m
.submodules
.norm1_insel_z
= i
.z
1031 m
.submodules
.norm1_insel_overflow
= i
.of
1033 espec
= (len(i
.z
.e
), True)
1034 ediff_n126
= Signal(espec
, reset_less
=True)
1035 msr
= MultiShiftRMerge(mwid
, espec
)
1036 m
.submodules
.multishift_r
= msr
1038 m
.d
.comb
+= i
.eq(self
.i
)
1039 # initialise out from in (overridden below)
1040 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1041 m
.d
.comb
+= of
.eq(i
.of
)
1042 # normalisation increase/decrease conditions
1043 decrease
= Signal(reset_less
=True)
1044 increase
= Signal(reset_less
=True)
1045 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1046 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1048 with m
.If(decrease
):
1049 # *sigh* not entirely obvious: count leading zeros (clz)
1050 # with a PriorityEncoder: to find from the MSB
1051 # we reverse the order of the bits.
1052 temp_m
= Signal(mwid
, reset_less
=True)
1053 temp_s
= Signal(mwid
+1, reset_less
=True)
1054 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1055 # make sure that the amount to decrease by does NOT
1056 # go below the minimum non-INF/NaN exponent
1057 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1060 # cat round and guard bits back into the mantissa
1061 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1062 pe
.i
.eq(temp_m
[::-1]), # inverted
1063 clz
.eq(limclz
), # count zeros from MSB down
1064 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1065 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1066 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1067 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1068 # overflow in bits 0..1: got shifted too (leave sticky)
1069 of
.guard
.eq(temp_s
[1]), # guard
1070 of
.round_bit
.eq(temp_s
[0]), # round
1073 with m
.Elif(increase
):
1074 temp_m
= Signal(mwid
+1, reset_less
=True)
1076 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1078 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1079 # connect multi-shifter to inp/out mantissa (and ediff)
1081 msr
.diff
.eq(ediff_n126
),
1082 self
.o
.z
.m
.eq(msr
.m
[3:]),
1083 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1084 # overflow in bits 0..1: got shifted too (leave sticky)
1085 of
.guard
.eq(temp_s
[2]), # guard
1086 of
.round_bit
.eq(temp_s
[1]), # round
1087 of
.sticky
.eq(temp_s
[0]), # sticky
1088 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1094 class FPNorm1ModMulti
:
1096 def __init__(self
, width
, single_cycle
=True):
1098 self
.in_select
= Signal(reset_less
=True)
1099 self
.in_z
= FPNumBase(width
, False)
1100 self
.in_of
= Overflow()
1101 self
.temp_z
= FPNumBase(width
, False)
1102 self
.temp_of
= Overflow()
1103 self
.out_z
= FPNumBase(width
, False)
1104 self
.out_of
= Overflow()
1106 def elaborate(self
, platform
):
1109 m
.submodules
.norm1_out_z
= self
.out_z
1110 m
.submodules
.norm1_out_overflow
= self
.out_of
1111 m
.submodules
.norm1_temp_z
= self
.temp_z
1112 m
.submodules
.norm1_temp_of
= self
.temp_of
1113 m
.submodules
.norm1_in_z
= self
.in_z
1114 m
.submodules
.norm1_in_overflow
= self
.in_of
1116 in_z
= FPNumBase(self
.width
, False)
1118 m
.submodules
.norm1_insel_z
= in_z
1119 m
.submodules
.norm1_insel_overflow
= in_of
1121 # select which of temp or in z/of to use
1122 with m
.If(self
.in_select
):
1123 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1124 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1126 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1127 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1128 # initialise out from in (overridden below)
1129 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1130 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1131 # normalisation increase/decrease conditions
1132 decrease
= Signal(reset_less
=True)
1133 increase
= Signal(reset_less
=True)
1134 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1135 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1136 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1138 with m
.If(decrease
):
1140 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1141 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1142 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1143 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1144 self
.out_of
.round_bit
.eq(0), # reset round bit
1145 self
.out_of
.m0
.eq(in_of
.guard
),
1148 with m
.Elif(increase
):
1150 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1151 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1152 self
.out_of
.guard
.eq(in_z
.m
[0]),
1153 self
.out_of
.m0
.eq(in_z
.m
[1]),
1154 self
.out_of
.round_bit
.eq(in_of
.guard
),
1155 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1161 class FPNorm1Single(FPState
):
1163 def __init__(self
, width
, id_wid
, single_cycle
=True):
1164 FPState
.__init
__(self
, "normalise_1")
1165 self
.mod
= FPNorm1ModSingle(width
)
1166 self
.out_z
= FPNumBase(width
, False)
1167 self
.out_roundz
= Signal(reset_less
=True)
1169 def setup(self
, m
, i
):
1170 """ links module to inputs and outputs
1172 self
.mod
.setup(m
, i
, self
.out_z
)
1174 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1176 def action(self
, m
):
1180 class FPNorm1Multi(FPState
):
1182 def __init__(self
, width
, id_wid
):
1183 FPState
.__init
__(self
, "normalise_1")
1184 self
.mod
= FPNorm1ModMulti(width
)
1185 self
.stb
= Signal(reset_less
=True)
1186 self
.ack
= Signal(reset
=0, reset_less
=True)
1187 self
.out_norm
= Signal(reset_less
=True)
1188 self
.in_accept
= Signal(reset_less
=True)
1189 self
.temp_z
= FPNumBase(width
)
1190 self
.temp_of
= Overflow()
1191 self
.out_z
= FPNumBase(width
)
1192 self
.out_roundz
= Signal(reset_less
=True)
1194 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1195 """ links module to inputs and outputs
1197 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1198 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1199 self
.out_z
, self
.out_norm
)
1201 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1202 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1204 def action(self
, m
):
1205 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1206 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1207 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1208 with m
.If(self
.out_norm
):
1209 with m
.If(self
.in_accept
):
1214 m
.d
.sync
+= self
.ack
.eq(0)
1216 # normalisation not required (or done).
1218 m
.d
.sync
+= self
.ack
.eq(1)
1219 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1222 class FPNormToPack(FPState
):
1224 def __init__(self
, width
, id_wid
):
1225 FPState
.__init
__(self
, "normalise_1")
1226 self
.id_wid
= id_wid
1230 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1233 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1235 def setup(self
, m
, i
):
1236 """ links module to inputs and outputs
1239 # Normalisation (chained to input in_z+in_of)
1240 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1242 n_out
= nmod
.ospec()
1243 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1245 # Rounding (chained to normalisation)
1246 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1247 rmod
.setup(m
, n_out
)
1248 r_out_z
= rmod
.ospec()
1249 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1251 # Corrections (chained to rounding)
1252 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1253 cmod
.setup(m
, r_out_z
)
1254 c_out_z
= cmod
.ospec()
1255 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1257 # Pack (chained to corrections)
1258 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1259 self
.pmod
.setup(m
, c_out_z
)
1260 self
.out_z
= self
.pmod
.ospec()
1262 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.pmod
.o
.mid
)
1263 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1265 def action(self
, m
):
1266 m
.next
= "pack_put_z"
1271 def __init__(self
, width
, id_wid
):
1272 self
.z
= FPNumBase(width
, False)
1273 self
.mid
= Signal(id_wid
, reset_less
=True)
1276 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1281 def __init__(self
, width
, id_wid
):
1283 self
.id_wid
= id_wid
1284 self
.i
= self
.ispec()
1285 self
.out_z
= self
.ospec()
1288 return FPNorm1Data(self
.width
, self
.id_wid
)
1291 return FPRoundData(self
.width
, self
.id_wid
)
1293 def setup(self
, m
, i
):
1294 m
.submodules
.roundz
= self
1295 m
.d
.comb
+= self
.i
.eq(i
)
1297 def elaborate(self
, platform
):
1299 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1300 with m
.If(self
.i
.roundz
):
1301 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1302 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1303 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1307 class FPRound(FPState
):
1309 def __init__(self
, width
, id_wid
):
1310 FPState
.__init
__(self
, "round")
1311 self
.mod
= FPRoundMod(width
)
1312 self
.out_z
= self
.ospec()
1315 return self
.mod
.ispec()
1318 return self
.mod
.ospec()
1320 def setup(self
, m
, i
):
1321 """ links module to inputs and outputs
1323 self
.mod
.setup(m
, i
)
1326 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1327 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1329 def action(self
, m
):
1330 m
.next
= "corrections"
1333 class FPCorrectionsMod
:
1335 def __init__(self
, width
, id_wid
):
1337 self
.id_wid
= id_wid
1338 self
.i
= self
.ispec()
1339 self
.out_z
= self
.ospec()
1342 return FPRoundData(self
.width
, self
.id_wid
)
1345 return FPRoundData(self
.width
, self
.id_wid
)
1347 def setup(self
, m
, i
):
1348 """ links module to inputs and outputs
1350 m
.submodules
.corrections
= self
1351 m
.d
.comb
+= self
.i
.eq(i
)
1353 def elaborate(self
, platform
):
1355 m
.submodules
.corr_in_z
= self
.i
.z
1356 m
.submodules
.corr_out_z
= self
.out_z
.z
1357 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1358 with m
.If(self
.i
.z
.is_denormalised
):
1359 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1363 class FPCorrections(FPState
):
1365 def __init__(self
, width
, id_wid
):
1366 FPState
.__init
__(self
, "corrections")
1367 self
.mod
= FPCorrectionsMod(width
)
1368 self
.out_z
= self
.ospec()
1371 return self
.mod
.ispec()
1374 return self
.mod
.ospec()
1376 def setup(self
, m
, in_z
):
1377 """ links module to inputs and outputs
1379 self
.mod
.setup(m
, in_z
)
1381 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1382 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1384 def action(self
, m
):
1390 def __init__(self
, width
, id_wid
):
1391 self
.z
= FPNumOut(width
, False)
1392 self
.mid
= Signal(id_wid
, reset_less
=True)
1395 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1400 def __init__(self
, width
, id_wid
):
1402 self
.id_wid
= id_wid
1403 self
.i
= self
.ispec()
1404 self
.o
= self
.ospec()
1407 return FPRoundData(self
.width
, self
.id_wid
)
1410 return FPPackData(self
.width
, self
.id_wid
)
1412 def setup(self
, m
, in_z
):
1413 """ links module to inputs and outputs
1415 m
.submodules
.pack
= self
1416 m
.d
.comb
+= self
.i
.eq(in_z
)
1418 def elaborate(self
, platform
):
1420 m
.submodules
.pack_in_z
= self
.i
.z
1421 with m
.If(self
.i
.z
.is_overflowed
):
1422 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1424 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1429 def __init__(self
, width
, id_wid
):
1430 self
.z
= FPNumOut(width
, False)
1431 self
.mid
= Signal(id_wid
, reset_less
=True)
1434 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1437 class FPPack(FPState
):
1439 def __init__(self
, width
, id_wid
):
1440 FPState
.__init
__(self
, "pack")
1441 self
.mod
= FPPackMod(width
)
1442 self
.out_z
= self
.ospec()
1445 return self
.mod
.ispec()
1448 return self
.mod
.ospec()
1450 def setup(self
, m
, in_z
):
1451 """ links module to inputs and outputs
1453 self
.mod
.setup(m
, in_z
)
1455 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1458 def action(self
, m
):
1459 m
.next
= "pack_put_z"
1462 class FPPutZ(FPState
):
1464 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1465 FPState
.__init
__(self
, state
)
1466 if to_state
is None:
1467 to_state
= "get_ops"
1468 self
.to_state
= to_state
1471 self
.in_mid
= in_mid
1472 self
.out_mid
= out_mid
1474 def action(self
, m
):
1475 if self
.in_mid
is not None:
1476 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1478 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1480 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1481 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1482 m
.next
= self
.to_state
1484 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1487 class FPPutZIdx(FPState
):
1489 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1490 FPState
.__init
__(self
, state
)
1491 if to_state
is None:
1492 to_state
= "get_ops"
1493 self
.to_state
= to_state
1495 self
.out_zs
= out_zs
1496 self
.in_mid
= in_mid
1498 def action(self
, m
):
1499 outz_stb
= Signal(reset_less
=True)
1500 outz_ack
= Signal(reset_less
=True)
1501 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1502 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1505 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1507 with m
.If(outz_stb
& outz_ack
):
1508 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1509 m
.next
= self
.to_state
1511 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1513 class FPADDBaseData
:
1515 def __init__(self
, width
, id_wid
):
1517 self
.id_wid
= id_wid
1518 self
.a
= Signal(width
)
1519 self
.b
= Signal(width
)
1520 self
.mid
= Signal(id_wid
, reset_less
=True)
1523 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1527 def __init__(self
, width
, id_wid
):
1528 self
.z
= FPOp(width
)
1529 self
.mid
= Signal(id_wid
, reset_less
=True)
1532 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1537 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1540 * width: bit-width of IEEE754. supported: 16, 32, 64
1541 * id_wid: an identifier that is sync-connected to the input
1542 * single_cycle: True indicates each stage to complete in 1 clock
1543 * compact: True indicates a reduced number of stages
1546 self
.id_wid
= id_wid
1547 self
.single_cycle
= single_cycle
1548 self
.compact
= compact
1550 self
.in_t
= Trigger()
1551 self
.i
= self
.ispec()
1552 self
.o
= self
.ospec()
1557 return FPADDBaseData(self
.width
, self
.id_wid
)
1560 return FPOpData(self
.width
, self
.id_wid
)
1562 def add_state(self
, state
):
1563 self
.states
.append(state
)
1566 def get_fragment(self
, platform
=None):
1567 """ creates the HDL code-fragment for FPAdd
1570 m
.submodules
.out_z
= self
.o
.z
1571 m
.submodules
.in_t
= self
.in_t
1573 self
.get_compact_fragment(m
, platform
)
1575 self
.get_longer_fragment(m
, platform
)
1577 with m
.FSM() as fsm
:
1579 for state
in self
.states
:
1580 with m
.State(state
.state_from
):
1585 def get_longer_fragment(self
, m
, platform
=None):
1587 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1589 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1593 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1594 sc
.setup(m
, a
, b
, self
.in_mid
)
1596 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1597 dn
.setup(m
, a
, b
, sc
.in_mid
)
1599 if self
.single_cycle
:
1600 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1601 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1603 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1604 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1606 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1607 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1609 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1610 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1612 if self
.single_cycle
:
1613 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1614 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1616 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1617 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1619 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1620 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1622 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1623 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1625 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1626 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1628 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1629 pa
.in_mid
, self
.out_mid
))
1631 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1632 pa
.in_mid
, self
.out_mid
))
1634 def get_compact_fragment(self
, m
, platform
=None):
1636 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1637 self
.width
, self
.id_wid
))
1638 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1640 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1643 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1646 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1647 n1
.setup(m
, alm
.a1o
)
1649 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1650 n1
.out_z
.mid
, self
.o
.mid
))
1652 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1653 sc
.o
.mid
, self
.o
.mid
))
1656 class FPADDBase(FPState
):
1658 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1661 * width: bit-width of IEEE754. supported: 16, 32, 64
1662 * id_wid: an identifier that is sync-connected to the input
1663 * single_cycle: True indicates each stage to complete in 1 clock
1665 FPState
.__init
__(self
, "fpadd")
1667 self
.single_cycle
= single_cycle
1668 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1669 self
.o
= self
.ospec()
1671 self
.in_t
= Trigger()
1672 self
.i
= self
.ispec()
1674 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1675 self
.in_accept
= Signal(reset_less
=True)
1676 self
.add_stb
= Signal(reset_less
=True)
1677 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1680 return self
.mod
.ispec()
1683 return self
.mod
.ospec()
1685 def setup(self
, m
, i
, add_stb
, in_mid
):
1686 m
.d
.comb
+= [self
.i
.eq(i
),
1687 self
.mod
.i
.eq(self
.i
),
1688 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1689 #self.add_stb.eq(add_stb),
1690 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1691 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1692 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1693 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1694 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1695 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1698 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1699 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1700 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1701 #m.d.sync += self.in_t.stb.eq(0)
1703 m
.submodules
.fpadd
= self
.mod
1705 def action(self
, m
):
1707 # in_accept is set on incoming strobe HIGH and ack LOW.
1708 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1710 #with m.If(self.in_t.ack):
1711 # m.d.sync += self.in_t.stb.eq(0)
1712 with m
.If(~self
.z_done
):
1713 # not done: test for accepting an incoming operand pair
1714 with m
.If(self
.in_accept
):
1716 self
.add_ack
.eq(1), # acknowledge receipt...
1717 self
.in_t
.stb
.eq(1), # initiate add
1720 m
.d
.sync
+= [self
.add_ack
.eq(0),
1721 self
.in_t
.stb
.eq(0),
1725 # done: acknowledge, and write out id and value
1726 m
.d
.sync
+= [self
.add_ack
.eq(1),
1733 if self
.in_mid
is not None:
1734 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1737 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1739 # move to output state on detecting z ack
1740 with m
.If(self
.out_z
.trigger
):
1741 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1744 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1748 def __init__(self
, width
, id_wid
):
1750 self
.id_wid
= id_wid
1752 for i
in range(rs_sz
):
1754 out_z
.name
= "out_z_%d" % i
1756 self
.res
= Array(res
)
1757 self
.in_z
= FPOp(width
)
1758 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1760 def setup(self
, m
, in_z
, in_mid
):
1761 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1762 self
.in_mid
.eq(in_mid
)]
1764 def get_fragment(self
, platform
=None):
1765 """ creates the HDL code-fragment for FPAdd
1768 m
.submodules
.res_in_z
= self
.in_z
1769 m
.submodules
+= self
.res
1781 """ FPADD: stages as follows:
1787 FPAddBase---> FPAddBaseMod
1789 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1791 FPAddBase is tricky: it is both a stage and *has* stages.
1792 Connection to FPAddBaseMod therefore requires an in stb/ack
1793 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1794 needs to be the thing that raises the incoming stb.
1797 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1800 * width: bit-width of IEEE754. supported: 16, 32, 64
1801 * id_wid: an identifier that is sync-connected to the input
1802 * single_cycle: True indicates each stage to complete in 1 clock
1805 self
.id_wid
= id_wid
1806 self
.single_cycle
= single_cycle
1808 #self.out_z = FPOp(width)
1809 self
.ids
= FPID(id_wid
)
1812 for i
in range(rs_sz
):
1815 in_a
.name
= "in_a_%d" % i
1816 in_b
.name
= "in_b_%d" % i
1817 rs
.append((in_a
, in_b
))
1821 for i
in range(rs_sz
):
1823 out_z
.name
= "out_z_%d" % i
1825 self
.res
= Array(res
)
1829 def add_state(self
, state
):
1830 self
.states
.append(state
)
1833 def get_fragment(self
, platform
=None):
1834 """ creates the HDL code-fragment for FPAdd
1837 m
.submodules
+= self
.rs
1839 in_a
= self
.rs
[0][0]
1840 in_b
= self
.rs
[0][1]
1842 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1847 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1852 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1853 ab
= self
.add_state(ab
)
1854 abd
= ab
.ispec() # create an input spec object for FPADDBase
1855 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1856 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1859 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1862 with m
.FSM() as fsm
:
1864 for state
in self
.states
:
1865 with m
.State(state
.state_from
):
1871 if __name__
== "__main__":
1873 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1874 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1875 alu
.rs
[0][1].ports() + \
1876 alu
.res
[0].ports() + \
1877 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1879 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1880 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1881 alu
.in_t
.ports() + \
1882 alu
.out_z
.ports() + \
1883 [alu
.in_mid
, alu
.out_mid
])
1886 # works... but don't use, just do "python fname.py convert -t v"
1887 #print (verilog.convert(alu, ports=[
1888 # ports=alu.in_a.ports() + \
1889 # alu.in_b.ports() + \
1890 # alu.out_z.ports())