80a8de8336713dafc4ee9007dcd3a9aa6ff02ae3
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
, FPID
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 FPID
.__init
__(self
, id_wid
)
396 self
.mod
= FPAddSpecialCasesMod(width
)
397 self
.out_z
= self
.mod
.ospec()
398 self
.out_do_z
= Signal(reset_less
=True)
400 def setup(self
, m
, in_a
, in_b
, in_mid
):
401 """ links module to inputs and outputs
403 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
404 if self
.in_mid
is not None:
405 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
407 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
411 with m
.If(self
.out_do_z
):
414 m
.next
= "denormalise"
417 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
418 """ special cases: NaNs, infs, zeros, denormalised
419 NOTE: some of these are unique to add. see "Special Operations"
420 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
423 def __init__(self
, width
, id_wid
):
424 FPState
.__init
__(self
, "special_cases")
425 FPID
.__init
__(self
, id_wid
)
426 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
427 self
.out_z
= self
.smod
.ospec()
428 self
.out_do_z
= Signal(reset_less
=True)
430 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
431 self
.o
= self
.dmod
.ospec()
433 def setup(self
, m
, i
, in_mid
):
434 """ links module to inputs and outputs
436 self
.smod
.setup(m
, i
, self
.out_do_z
)
437 self
.dmod
.setup(m
, i
)
438 if self
.in_mid
is not None:
439 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
442 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
444 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
448 with m
.If(self
.out_do_z
):
454 class FPAddDeNormMod(FPState
):
456 def __init__(self
, width
, id_wid
):
459 self
.i
= self
.ispec()
460 self
.o
= self
.ospec()
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
466 return FPNumBase2Ops(self
.width
, self
.id_wid
)
468 def setup(self
, m
, i
):
469 """ links module to inputs and outputs
471 m
.submodules
.denormalise
= self
472 m
.d
.comb
+= self
.i
.eq(i
)
474 def elaborate(self
, platform
):
476 m
.submodules
.denorm_in_a
= self
.i
.a
477 m
.submodules
.denorm_in_b
= self
.i
.b
478 m
.submodules
.denorm_out_a
= self
.o
.a
479 m
.submodules
.denorm_out_b
= self
.o
.b
480 # hmmm, don't like repeating identical code
481 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
482 with m
.If(self
.i
.a
.exp_n127
):
483 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
485 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
487 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
488 with m
.If(self
.i
.b
.exp_n127
):
489 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
491 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
496 class FPAddDeNorm(FPState
, FPID
):
498 def __init__(self
, width
, id_wid
):
499 FPState
.__init
__(self
, "denormalise")
500 FPID
.__init
__(self
, id_wid
)
501 self
.mod
= FPAddDeNormMod(width
)
502 self
.out_a
= FPNumBase(width
)
503 self
.out_b
= FPNumBase(width
)
505 def setup(self
, m
, in_a
, in_b
, in_mid
):
506 """ links module to inputs and outputs
508 self
.mod
.setup(m
, in_a
, in_b
)
509 if self
.in_mid
is not None:
510 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 # Denormalised Number checks
521 class FPAddAlignMultiMod(FPState
):
523 def __init__(self
, width
):
524 self
.in_a
= FPNumBase(width
)
525 self
.in_b
= FPNumBase(width
)
526 self
.out_a
= FPNumIn(None, width
)
527 self
.out_b
= FPNumIn(None, width
)
528 self
.exp_eq
= Signal(reset_less
=True)
530 def elaborate(self
, platform
):
531 # This one however (single-cycle) will do the shift
536 m
.submodules
.align_in_a
= self
.in_a
537 m
.submodules
.align_in_b
= self
.in_b
538 m
.submodules
.align_out_a
= self
.out_a
539 m
.submodules
.align_out_b
= self
.out_b
541 # NOTE: this does *not* do single-cycle multi-shifting,
542 # it *STAYS* in the align state until exponents match
544 # exponent of a greater than b: shift b down
545 m
.d
.comb
+= self
.exp_eq
.eq(0)
546 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
547 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
548 agtb
= Signal(reset_less
=True)
549 altb
= Signal(reset_less
=True)
550 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
551 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
553 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
554 # exponent of b greater than a: shift a down
556 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
557 # exponents equal: move to next stage.
559 m
.d
.comb
+= self
.exp_eq
.eq(1)
563 class FPAddAlignMulti(FPState
, FPID
):
565 def __init__(self
, width
, id_wid
):
566 FPID
.__init
__(self
, id_wid
)
567 FPState
.__init
__(self
, "align")
568 self
.mod
= FPAddAlignMultiMod(width
)
569 self
.out_a
= FPNumIn(None, width
)
570 self
.out_b
= FPNumIn(None, width
)
571 self
.exp_eq
= Signal(reset_less
=True)
573 def setup(self
, m
, in_a
, in_b
, in_mid
):
574 """ links module to inputs and outputs
576 m
.submodules
.align
= self
.mod
577 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
578 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
579 #m.d.comb += self.out_a.eq(self.mod.out_a)
580 #m.d.comb += self.out_b.eq(self.mod.out_b)
581 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
582 if self
.in_mid
is not None:
583 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
586 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
587 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
590 with m
.If(self
.exp_eq
):
596 def __init__(self
, width
, id_wid
):
597 self
.a
= FPNumIn(None, width
)
598 self
.b
= FPNumIn(None, width
)
599 self
.mid
= Signal(id_wid
, reset_less
=True)
602 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
605 class FPAddAlignSingleMod
:
607 def __init__(self
, width
, id_wid
):
610 self
.i
= self
.ispec()
611 self
.o
= self
.ospec()
614 return FPNumBase2Ops(self
.width
, self
.id_wid
)
617 return FPNumIn2Ops(self
.width
, self
.id_wid
)
619 def setup(self
, m
, i
):
620 """ links module to inputs and outputs
622 m
.submodules
.align
= self
623 m
.d
.comb
+= self
.i
.eq(i
)
625 def elaborate(self
, platform
):
626 """ Aligns A against B or B against A, depending on which has the
627 greater exponent. This is done in a *single* cycle using
628 variable-width bit-shift
630 the shifter used here is quite expensive in terms of gates.
631 Mux A or B in (and out) into temporaries, as only one of them
632 needs to be aligned against the other
636 m
.submodules
.align_in_a
= self
.i
.a
637 m
.submodules
.align_in_b
= self
.i
.b
638 m
.submodules
.align_out_a
= self
.o
.a
639 m
.submodules
.align_out_b
= self
.o
.b
641 # temporary (muxed) input and output to be shifted
642 t_inp
= FPNumBase(self
.width
)
643 t_out
= FPNumIn(None, self
.width
)
644 espec
= (len(self
.i
.a
.e
), True)
645 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
646 m
.submodules
.align_t_in
= t_inp
647 m
.submodules
.align_t_out
= t_out
648 m
.submodules
.multishift_r
= msr
650 ediff
= Signal(espec
, reset_less
=True)
651 ediffr
= Signal(espec
, reset_less
=True)
652 tdiff
= Signal(espec
, reset_less
=True)
653 elz
= Signal(reset_less
=True)
654 egz
= Signal(reset_less
=True)
656 # connect multi-shifter to t_inp/out mantissa (and tdiff)
657 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
658 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
659 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
660 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
661 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
663 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
664 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
665 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
666 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
668 # default: A-exp == B-exp, A and B untouched (fall through)
669 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
670 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
671 # only one shifter (muxed)
672 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
673 # exponent of a greater than b: shift b down
675 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
678 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
680 # exponent of b greater than a: shift a down
682 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
685 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
690 class FPAddAlignSingle(FPState
, FPID
):
692 def __init__(self
, width
, id_wid
):
693 FPState
.__init
__(self
, "align")
694 FPID
.__init
__(self
, id_wid
)
695 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
696 self
.out_a
= FPNumIn(None, width
)
697 self
.out_b
= FPNumIn(None, width
)
699 def setup(self
, m
, in_a
, in_b
, in_mid
):
700 """ links module to inputs and outputs
702 self
.mod
.setup(m
, in_a
, in_b
)
703 if self
.in_mid
is not None:
704 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
707 # NOTE: could be done as comb
708 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
709 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
715 class FPAddAlignSingleAdd(FPState
, FPID
):
717 def __init__(self
, width
, id_wid
):
718 FPState
.__init
__(self
, "align")
719 FPID
.__init
__(self
, id_wid
)
722 self
.a1o
= self
.ospec()
725 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
728 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
730 def setup(self
, m
, i
, in_mid
):
731 """ links module to inputs and outputs
733 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
736 m
.d
.comb
+= o
.eq(mod
.o
)
738 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
741 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
743 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
745 self
.a1modo
= a1mod
.o
747 if self
.in_mid
is not None:
748 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
750 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
754 m
.next
= "normalise_1"
757 class FPAddStage0Data
:
759 def __init__(self
, width
, id_wid
):
760 self
.z
= FPNumBase(width
, False)
761 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
762 self
.mid
= Signal(id_wid
, reset_less
=True)
765 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
768 class FPAddStage0Mod
:
770 def __init__(self
, width
, id_wid
):
773 self
.i
= self
.ispec()
774 self
.o
= self
.ospec()
777 return FPNumBase2Ops(self
.width
, self
.id_wid
)
780 return FPAddStage0Data(self
.width
, self
.id_wid
)
782 def setup(self
, m
, i
):
783 """ links module to inputs and outputs
785 m
.submodules
.add0
= self
786 m
.d
.comb
+= self
.i
.eq(i
)
788 def elaborate(self
, platform
):
790 m
.submodules
.add0_in_a
= self
.i
.a
791 m
.submodules
.add0_in_b
= self
.i
.b
792 m
.submodules
.add0_out_z
= self
.o
.z
794 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
796 # store intermediate tests (and zero-extended mantissas)
797 seq
= Signal(reset_less
=True)
798 mge
= Signal(reset_less
=True)
799 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
800 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
801 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
802 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
803 am0
.eq(Cat(self
.i
.a
.m
, 0)),
804 bm0
.eq(Cat(self
.i
.b
.m
, 0))
806 # same-sign (both negative or both positive) add mantissas
809 self
.o
.tot
.eq(am0
+ bm0
),
810 self
.o
.z
.s
.eq(self
.i
.a
.s
)
812 # a mantissa greater than b, use a
815 self
.o
.tot
.eq(am0
- bm0
),
816 self
.o
.z
.s
.eq(self
.i
.a
.s
)
818 # b mantissa greater than a, use b
821 self
.o
.tot
.eq(bm0
- am0
),
822 self
.o
.z
.s
.eq(self
.i
.b
.s
)
827 class FPAddStage0(FPState
, FPID
):
828 """ First stage of add. covers same-sign (add) and subtract
829 special-casing when mantissas are greater or equal, to
830 give greatest accuracy.
833 def __init__(self
, width
, id_wid
):
834 FPState
.__init
__(self
, "add_0")
835 FPID
.__init
__(self
, id_wid
)
836 self
.mod
= FPAddStage0Mod(width
)
837 self
.o
= self
.mod
.ospec()
839 def setup(self
, m
, i
, in_mid
):
840 """ links module to inputs and outputs
843 if self
.in_mid
is not None:
844 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
847 # NOTE: these could be done as combinatorial (merge add0+add1)
848 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
854 class FPAddStage1Data
:
856 def __init__(self
, width
, id_wid
):
857 self
.z
= FPNumBase(width
, False)
859 self
.mid
= Signal(id_wid
, reset_less
=True)
862 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
866 class FPAddStage1Mod(FPState
):
867 """ Second stage of add: preparation for normalisation.
868 detects when tot sum is too big (tot[27] is kinda a carry bit)
871 def __init__(self
, width
, id_wid
):
874 self
.i
= self
.ispec()
875 self
.o
= self
.ospec()
878 return FPAddStage0Data(self
.width
, self
.id_wid
)
881 return FPAddStage1Data(self
.width
, self
.id_wid
)
883 def setup(self
, m
, i
):
884 """ links module to inputs and outputs
886 m
.submodules
.add1
= self
887 m
.submodules
.add1_out_overflow
= self
.o
.of
889 m
.d
.comb
+= self
.i
.eq(i
)
891 def elaborate(self
, platform
):
893 #m.submodules.norm1_in_overflow = self.in_of
894 #m.submodules.norm1_out_overflow = self.out_of
895 #m.submodules.norm1_in_z = self.in_z
896 #m.submodules.norm1_out_z = self.out_z
897 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
898 # tot[-1] (MSB) gets set when the sum overflows. shift result down
899 with m
.If(self
.i
.tot
[-1]):
901 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
902 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
903 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
904 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
905 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
906 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
908 # tot[-1] (MSB) zero case
911 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
912 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
913 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
914 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
915 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
920 class FPAddStage1(FPState
, FPID
):
922 def __init__(self
, width
, id_wid
):
923 FPState
.__init
__(self
, "add_1")
924 FPID
.__init
__(self
, id_wid
)
925 self
.mod
= FPAddStage1Mod(width
)
926 self
.out_z
= FPNumBase(width
, False)
927 self
.out_of
= Overflow()
928 self
.norm_stb
= Signal()
930 def setup(self
, m
, i
, in_mid
):
931 """ links module to inputs and outputs
935 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
937 if self
.in_mid
is not None:
938 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
941 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
942 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
943 m
.d
.sync
+= self
.norm_stb
.eq(1)
946 m
.next
= "normalise_1"
949 class FPNormaliseModSingle
:
951 def __init__(self
, width
):
953 self
.in_z
= self
.ispec()
954 self
.out_z
= self
.ospec()
957 return FPNumBase(self
.width
, False)
960 return FPNumBase(self
.width
, False)
962 def setup(self
, m
, i
):
963 """ links module to inputs and outputs
965 m
.submodules
.normalise
= self
966 m
.d
.comb
+= self
.i
.eq(i
)
968 def elaborate(self
, platform
):
971 mwid
= self
.out_z
.m_width
+2
972 pe
= PriorityEncoder(mwid
)
973 m
.submodules
.norm_pe
= pe
975 m
.submodules
.norm1_out_z
= self
.out_z
976 m
.submodules
.norm1_in_z
= self
.in_z
978 in_z
= FPNumBase(self
.width
, False)
980 m
.submodules
.norm1_insel_z
= in_z
981 m
.submodules
.norm1_insel_overflow
= in_of
983 espec
= (len(in_z
.e
), True)
984 ediff_n126
= Signal(espec
, reset_less
=True)
985 msr
= MultiShiftRMerge(mwid
, espec
)
986 m
.submodules
.multishift_r
= msr
988 m
.d
.comb
+= in_z
.eq(self
.in_z
)
989 m
.d
.comb
+= in_of
.eq(self
.in_of
)
990 # initialise out from in (overridden below)
991 m
.d
.comb
+= self
.out_z
.eq(in_z
)
992 m
.d
.comb
+= self
.out_of
.eq(in_of
)
993 # normalisation decrease condition
994 decrease
= Signal(reset_less
=True)
995 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
998 # *sigh* not entirely obvious: count leading zeros (clz)
999 # with a PriorityEncoder: to find from the MSB
1000 # we reverse the order of the bits.
1001 temp_m
= Signal(mwid
, reset_less
=True)
1002 temp_s
= Signal(mwid
+1, reset_less
=True)
1003 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1005 # cat round and guard bits back into the mantissa
1006 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1007 pe
.i
.eq(temp_m
[::-1]), # inverted
1008 clz
.eq(pe
.o
), # count zeros from MSB down
1009 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1010 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1011 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1018 def __init__(self
, width
, id_wid
):
1019 self
.roundz
= Signal(reset_less
=True)
1020 self
.z
= FPNumBase(width
, False)
1021 self
.mid
= Signal(id_wid
, reset_less
=True)
1024 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1027 class FPNorm1ModSingle
:
1029 def __init__(self
, width
, id_wid
):
1031 self
.id_wid
= id_wid
1032 self
.i
= self
.ispec()
1033 self
.o
= self
.ospec()
1036 return FPAddStage1Data(self
.width
, self
.id_wid
)
1039 return FPNorm1Data(self
.width
, self
.id_wid
)
1041 def setup(self
, m
, i
):
1042 """ links module to inputs and outputs
1044 m
.submodules
.normalise_1
= self
1045 m
.d
.comb
+= self
.i
.eq(i
)
1047 def elaborate(self
, platform
):
1050 mwid
= self
.o
.z
.m_width
+2
1051 pe
= PriorityEncoder(mwid
)
1052 m
.submodules
.norm_pe
= pe
1055 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1057 m
.submodules
.norm1_out_z
= self
.o
.z
1058 m
.submodules
.norm1_out_overflow
= of
1059 m
.submodules
.norm1_in_z
= self
.i
.z
1060 m
.submodules
.norm1_in_overflow
= self
.i
.of
1063 m
.submodules
.norm1_insel_z
= i
.z
1064 m
.submodules
.norm1_insel_overflow
= i
.of
1066 espec
= (len(i
.z
.e
), True)
1067 ediff_n126
= Signal(espec
, reset_less
=True)
1068 msr
= MultiShiftRMerge(mwid
, espec
)
1069 m
.submodules
.multishift_r
= msr
1071 m
.d
.comb
+= i
.eq(self
.i
)
1072 # initialise out from in (overridden below)
1073 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1074 m
.d
.comb
+= of
.eq(i
.of
)
1075 # normalisation increase/decrease conditions
1076 decrease
= Signal(reset_less
=True)
1077 increase
= Signal(reset_less
=True)
1078 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1079 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1081 with m
.If(decrease
):
1082 # *sigh* not entirely obvious: count leading zeros (clz)
1083 # with a PriorityEncoder: to find from the MSB
1084 # we reverse the order of the bits.
1085 temp_m
= Signal(mwid
, reset_less
=True)
1086 temp_s
= Signal(mwid
+1, reset_less
=True)
1087 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1088 # make sure that the amount to decrease by does NOT
1089 # go below the minimum non-INF/NaN exponent
1090 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1093 # cat round and guard bits back into the mantissa
1094 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1095 pe
.i
.eq(temp_m
[::-1]), # inverted
1096 clz
.eq(limclz
), # count zeros from MSB down
1097 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1098 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1099 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1100 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1101 # overflow in bits 0..1: got shifted too (leave sticky)
1102 of
.guard
.eq(temp_s
[1]), # guard
1103 of
.round_bit
.eq(temp_s
[0]), # round
1106 with m
.Elif(increase
):
1107 temp_m
= Signal(mwid
+1, reset_less
=True)
1109 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1111 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1112 # connect multi-shifter to inp/out mantissa (and ediff)
1114 msr
.diff
.eq(ediff_n126
),
1115 self
.o
.z
.m
.eq(msr
.m
[3:]),
1116 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1117 # overflow in bits 0..1: got shifted too (leave sticky)
1118 of
.guard
.eq(temp_s
[2]), # guard
1119 of
.round_bit
.eq(temp_s
[1]), # round
1120 of
.sticky
.eq(temp_s
[0]), # sticky
1121 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1127 class FPNorm1ModMulti
:
1129 def __init__(self
, width
, single_cycle
=True):
1131 self
.in_select
= Signal(reset_less
=True)
1132 self
.in_z
= FPNumBase(width
, False)
1133 self
.in_of
= Overflow()
1134 self
.temp_z
= FPNumBase(width
, False)
1135 self
.temp_of
= Overflow()
1136 self
.out_z
= FPNumBase(width
, False)
1137 self
.out_of
= Overflow()
1139 def elaborate(self
, platform
):
1142 m
.submodules
.norm1_out_z
= self
.out_z
1143 m
.submodules
.norm1_out_overflow
= self
.out_of
1144 m
.submodules
.norm1_temp_z
= self
.temp_z
1145 m
.submodules
.norm1_temp_of
= self
.temp_of
1146 m
.submodules
.norm1_in_z
= self
.in_z
1147 m
.submodules
.norm1_in_overflow
= self
.in_of
1149 in_z
= FPNumBase(self
.width
, False)
1151 m
.submodules
.norm1_insel_z
= in_z
1152 m
.submodules
.norm1_insel_overflow
= in_of
1154 # select which of temp or in z/of to use
1155 with m
.If(self
.in_select
):
1156 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1157 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1159 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1160 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1161 # initialise out from in (overridden below)
1162 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1163 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1164 # normalisation increase/decrease conditions
1165 decrease
= Signal(reset_less
=True)
1166 increase
= Signal(reset_less
=True)
1167 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1168 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1169 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1171 with m
.If(decrease
):
1173 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1174 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1175 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1176 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1177 self
.out_of
.round_bit
.eq(0), # reset round bit
1178 self
.out_of
.m0
.eq(in_of
.guard
),
1181 with m
.Elif(increase
):
1183 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1184 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1185 self
.out_of
.guard
.eq(in_z
.m
[0]),
1186 self
.out_of
.m0
.eq(in_z
.m
[1]),
1187 self
.out_of
.round_bit
.eq(in_of
.guard
),
1188 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1194 class FPNorm1Single(FPState
, FPID
):
1196 def __init__(self
, width
, id_wid
, single_cycle
=True):
1197 FPID
.__init
__(self
, id_wid
)
1198 FPState
.__init
__(self
, "normalise_1")
1199 self
.mod
= FPNorm1ModSingle(width
)
1200 self
.out_z
= FPNumBase(width
, False)
1201 self
.out_roundz
= Signal(reset_less
=True)
1203 def setup(self
, m
, i
, in_mid
):
1204 """ links module to inputs and outputs
1206 self
.mod
.setup(m
, i
, self
.out_z
)
1208 if self
.in_mid
is not None:
1209 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1212 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1214 def action(self
, m
):
1218 class FPNorm1Multi(FPState
, FPID
):
1220 def __init__(self
, width
, id_wid
):
1221 FPID
.__init
__(self
, id_wid
)
1222 FPState
.__init
__(self
, "normalise_1")
1223 self
.mod
= FPNorm1ModMulti(width
)
1224 self
.stb
= Signal(reset_less
=True)
1225 self
.ack
= Signal(reset
=0, reset_less
=True)
1226 self
.out_norm
= Signal(reset_less
=True)
1227 self
.in_accept
= Signal(reset_less
=True)
1228 self
.temp_z
= FPNumBase(width
)
1229 self
.temp_of
= Overflow()
1230 self
.out_z
= FPNumBase(width
)
1231 self
.out_roundz
= Signal(reset_less
=True)
1233 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1234 """ links module to inputs and outputs
1236 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1237 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1238 self
.out_z
, self
.out_norm
)
1240 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1241 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1243 if self
.in_mid
is not None:
1244 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1246 def action(self
, m
):
1248 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1249 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1250 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1251 with m
.If(self
.out_norm
):
1252 with m
.If(self
.in_accept
):
1257 m
.d
.sync
+= self
.ack
.eq(0)
1259 # normalisation not required (or done).
1261 m
.d
.sync
+= self
.ack
.eq(1)
1262 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1265 class FPNormToPack(FPState
, FPID
):
1267 def __init__(self
, width
, id_wid
):
1268 FPID
.__init
__(self
, id_wid
)
1269 FPState
.__init
__(self
, "normalise_1")
1270 self
.id_wid
= id_wid
1274 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1277 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1279 def setup(self
, m
, i
, in_mid
):
1280 """ links module to inputs and outputs
1283 # Normalisation (chained to input in_z+in_of)
1284 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1286 n_out
= nmod
.ospec()
1287 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1289 # Rounding (chained to normalisation)
1290 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1291 rmod
.setup(m
, n_out
)
1292 r_out_z
= rmod
.ospec()
1293 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1295 # Corrections (chained to rounding)
1296 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1297 cmod
.setup(m
, r_out_z
)
1298 c_out_z
= cmod
.ospec()
1299 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1301 # Pack (chained to corrections)
1302 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1303 self
.pmod
.setup(m
, c_out_z
)
1304 self
.out_z
= self
.pmod
.ospec()
1307 if self
.in_mid
is not None:
1308 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1310 self
.idsync(m
) # copies incoming ID to outgoing
1311 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1313 def action(self
, m
):
1314 m
.next
= "pack_put_z"
1319 def __init__(self
, width
, id_wid
):
1320 self
.z
= FPNumBase(width
, False)
1321 self
.mid
= Signal(id_wid
, reset_less
=True)
1324 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1329 def __init__(self
, width
, id_wid
):
1331 self
.id_wid
= id_wid
1332 self
.i
= self
.ispec()
1333 self
.out_z
= self
.ospec()
1336 return FPNorm1Data(self
.width
, self
.id_wid
)
1339 return FPRoundData(self
.width
, self
.id_wid
)
1341 def setup(self
, m
, i
):
1342 m
.submodules
.roundz
= self
1343 m
.d
.comb
+= self
.i
.eq(i
)
1345 def elaborate(self
, platform
):
1347 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1348 with m
.If(self
.i
.roundz
):
1349 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1350 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1351 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1355 class FPRound(FPState
, FPID
):
1357 def __init__(self
, width
, id_wid
):
1358 FPState
.__init
__(self
, "round")
1359 FPID
.__init
__(self
, id_wid
)
1360 self
.mod
= FPRoundMod(width
)
1361 self
.out_z
= self
.ospec()
1364 return self
.mod
.ispec()
1367 return self
.mod
.ospec()
1369 def setup(self
, m
, i
, in_mid
):
1370 """ links module to inputs and outputs
1372 self
.mod
.setup(m
, i
)
1374 if self
.in_mid
is not None:
1375 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1378 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1380 def action(self
, m
):
1381 m
.next
= "corrections"
1384 class FPCorrectionsMod
:
1386 def __init__(self
, width
, id_wid
):
1388 self
.id_wid
= id_wid
1389 self
.i
= self
.ispec()
1390 self
.out_z
= self
.ospec()
1393 return FPRoundData(self
.width
, self
.id_wid
)
1396 return FPRoundData(self
.width
, self
.id_wid
)
1398 def setup(self
, m
, i
):
1399 """ links module to inputs and outputs
1401 m
.submodules
.corrections
= self
1402 m
.d
.comb
+= self
.i
.eq(i
)
1404 def elaborate(self
, platform
):
1406 m
.submodules
.corr_in_z
= self
.i
.z
1407 m
.submodules
.corr_out_z
= self
.out_z
.z
1408 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1409 with m
.If(self
.i
.z
.is_denormalised
):
1410 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1414 class FPCorrections(FPState
, FPID
):
1416 def __init__(self
, width
, id_wid
):
1417 FPState
.__init
__(self
, "corrections")
1418 FPID
.__init
__(self
, id_wid
)
1419 self
.mod
= FPCorrectionsMod(width
)
1420 self
.out_z
= self
.ospec()
1423 return self
.mod
.ispec()
1426 return self
.mod
.ospec()
1428 def setup(self
, m
, in_z
, in_mid
):
1429 """ links module to inputs and outputs
1431 self
.mod
.setup(m
, in_z
)
1432 if self
.in_mid
is not None:
1433 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1436 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1438 def action(self
, m
):
1444 def __init__(self
, width
, id_wid
):
1445 self
.z
= FPNumOut(width
, False)
1446 self
.mid
= Signal(id_wid
, reset_less
=True)
1449 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1454 def __init__(self
, width
, id_wid
):
1456 self
.id_wid
= id_wid
1457 self
.i
= self
.ispec()
1458 self
.o
= self
.ospec()
1461 return FPRoundData(self
.width
, self
.id_wid
)
1464 return FPPackData(self
.width
, self
.id_wid
)
1466 def setup(self
, m
, in_z
):
1467 """ links module to inputs and outputs
1469 m
.submodules
.pack
= self
1470 m
.d
.comb
+= self
.i
.eq(in_z
)
1472 def elaborate(self
, platform
):
1474 m
.submodules
.pack_in_z
= self
.i
.z
1475 with m
.If(self
.i
.z
.is_overflowed
):
1476 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1478 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1483 def __init__(self
, width
, id_wid
):
1484 self
.z
= FPNumOut(width
, False)
1485 self
.mid
= Signal(id_wid
, reset_less
=True)
1488 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1491 class FPPack(FPState
, FPID
):
1493 def __init__(self
, width
, id_wid
):
1494 FPState
.__init
__(self
, "pack")
1495 FPID
.__init
__(self
, id_wid
)
1496 self
.mod
= FPPackMod(width
)
1497 self
.out_z
= self
.ospec()
1500 return self
.mod
.ispec()
1503 return self
.mod
.ospec()
1505 def setup(self
, m
, in_z
, in_mid
):
1506 """ links module to inputs and outputs
1508 self
.mod
.setup(m
, in_z
)
1509 if self
.in_mid
is not None:
1510 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1513 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1515 def action(self
, m
):
1516 m
.next
= "pack_put_z"
1519 class FPPutZ(FPState
):
1521 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1522 FPState
.__init
__(self
, state
)
1523 if to_state
is None:
1524 to_state
= "get_ops"
1525 self
.to_state
= to_state
1528 self
.in_mid
= in_mid
1529 self
.out_mid
= out_mid
1531 def action(self
, m
):
1532 if self
.in_mid
is not None:
1533 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1535 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1537 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1538 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1539 m
.next
= self
.to_state
1541 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1544 class FPPutZIdx(FPState
):
1546 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1547 FPState
.__init
__(self
, state
)
1548 if to_state
is None:
1549 to_state
= "get_ops"
1550 self
.to_state
= to_state
1552 self
.out_zs
= out_zs
1553 self
.in_mid
= in_mid
1555 def action(self
, m
):
1556 outz_stb
= Signal(reset_less
=True)
1557 outz_ack
= Signal(reset_less
=True)
1558 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1559 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1562 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1564 with m
.If(outz_stb
& outz_ack
):
1565 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1566 m
.next
= self
.to_state
1568 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1570 class FPADDBaseData
:
1572 def __init__(self
, width
, id_wid
):
1574 self
.id_wid
= id_wid
1575 self
.a
= Signal(width
)
1576 self
.b
= Signal(width
)
1577 self
.mid
= Signal(id_wid
, reset_less
=True)
1580 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1584 def __init__(self
, width
, id_wid
):
1585 self
.z
= FPOp(width
)
1586 self
.mid
= Signal(id_wid
, reset_less
=True)
1589 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1592 class FPADDBaseMod(FPID
):
1594 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1597 * width: bit-width of IEEE754. supported: 16, 32, 64
1598 * id_wid: an identifier that is sync-connected to the input
1599 * single_cycle: True indicates each stage to complete in 1 clock
1600 * compact: True indicates a reduced number of stages
1602 FPID
.__init
__(self
, id_wid
)
1604 self
.id_wid
= id_wid
1605 self
.single_cycle
= single_cycle
1606 self
.compact
= compact
1608 self
.in_t
= Trigger()
1609 self
.i
= self
.ispec()
1610 self
.o
= self
.ospec()
1615 return FPADDBaseData(self
.width
, self
.id_wid
)
1618 return FPOpData(self
.width
, self
.id_wid
)
1620 def add_state(self
, state
):
1621 self
.states
.append(state
)
1624 def get_fragment(self
, platform
=None):
1625 """ creates the HDL code-fragment for FPAdd
1628 m
.submodules
.out_z
= self
.o
.z
1629 m
.submodules
.in_t
= self
.in_t
1631 self
.get_compact_fragment(m
, platform
)
1633 self
.get_longer_fragment(m
, platform
)
1635 with m
.FSM() as fsm
:
1637 for state
in self
.states
:
1638 with m
.State(state
.state_from
):
1643 def get_longer_fragment(self
, m
, platform
=None):
1645 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1647 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1651 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1652 sc
.setup(m
, a
, b
, self
.in_mid
)
1654 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1655 dn
.setup(m
, a
, b
, sc
.in_mid
)
1657 if self
.single_cycle
:
1658 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1659 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1661 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1662 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1664 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1665 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1667 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1668 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1670 if self
.single_cycle
:
1671 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1672 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1674 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1675 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1677 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1678 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1680 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1681 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1683 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1684 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1686 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1687 pa
.in_mid
, self
.out_mid
))
1689 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1690 pa
.in_mid
, self
.out_mid
))
1692 def get_compact_fragment(self
, m
, platform
=None):
1694 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1695 self
.width
, self
.id_wid
))
1696 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1698 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1699 sc
.setup(m
, get
.o
, self
.in_mid
)
1701 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1702 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1704 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1705 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1707 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1708 n1
.in_mid
, self
.out_mid
))
1710 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1711 sc
.in_mid
, self
.out_mid
))
1714 class FPADDBase(FPState
, FPID
):
1716 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1719 * width: bit-width of IEEE754. supported: 16, 32, 64
1720 * id_wid: an identifier that is sync-connected to the input
1721 * single_cycle: True indicates each stage to complete in 1 clock
1723 FPID
.__init
__(self
, id_wid
)
1724 FPState
.__init
__(self
, "fpadd")
1726 self
.single_cycle
= single_cycle
1727 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1728 self
.o
= self
.ospec()
1730 self
.in_t
= Trigger()
1731 self
.i
= self
.ispec()
1733 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1734 self
.in_accept
= Signal(reset_less
=True)
1735 self
.add_stb
= Signal(reset_less
=True)
1736 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1739 return self
.mod
.ispec()
1742 return self
.mod
.ospec()
1744 def setup(self
, m
, i
, add_stb
, in_mid
):
1745 m
.d
.comb
+= [self
.i
.eq(i
),
1746 self
.mod
.i
.eq(self
.i
),
1747 self
.in_mid
.eq(in_mid
),
1748 self
.mod
.in_mid
.eq(self
.in_mid
),
1749 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1750 #self.add_stb.eq(add_stb),
1751 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1752 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1753 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1754 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1755 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1756 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1759 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1760 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1761 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1762 #m.d.sync += self.in_t.stb.eq(0)
1764 m
.submodules
.fpadd
= self
.mod
1766 def action(self
, m
):
1768 # in_accept is set on incoming strobe HIGH and ack LOW.
1769 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1771 #with m.If(self.in_t.ack):
1772 # m.d.sync += self.in_t.stb.eq(0)
1773 with m
.If(~self
.z_done
):
1774 # not done: test for accepting an incoming operand pair
1775 with m
.If(self
.in_accept
):
1777 self
.add_ack
.eq(1), # acknowledge receipt...
1778 self
.in_t
.stb
.eq(1), # initiate add
1781 m
.d
.sync
+= [self
.add_ack
.eq(0),
1782 self
.in_t
.stb
.eq(0),
1786 # done: acknowledge, and write out id and value
1787 m
.d
.sync
+= [self
.add_ack
.eq(1),
1794 if self
.in_mid
is not None:
1795 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1798 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1800 # move to output state on detecting z ack
1801 with m
.If(self
.out_z
.trigger
):
1802 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1805 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1809 def __init__(self
, width
, id_wid
):
1811 self
.id_wid
= id_wid
1813 for i
in range(rs_sz
):
1815 out_z
.name
= "out_z_%d" % i
1817 self
.res
= Array(res
)
1818 self
.in_z
= FPOp(width
)
1819 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1821 def setup(self
, m
, in_z
, in_mid
):
1822 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1823 self
.in_mid
.eq(in_mid
)]
1825 def get_fragment(self
, platform
=None):
1826 """ creates the HDL code-fragment for FPAdd
1829 m
.submodules
.res_in_z
= self
.in_z
1830 m
.submodules
+= self
.res
1842 """ FPADD: stages as follows:
1848 FPAddBase---> FPAddBaseMod
1850 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1852 FPAddBase is tricky: it is both a stage and *has* stages.
1853 Connection to FPAddBaseMod therefore requires an in stb/ack
1854 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1855 needs to be the thing that raises the incoming stb.
1858 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1861 * width: bit-width of IEEE754. supported: 16, 32, 64
1862 * id_wid: an identifier that is sync-connected to the input
1863 * single_cycle: True indicates each stage to complete in 1 clock
1866 self
.id_wid
= id_wid
1867 self
.single_cycle
= single_cycle
1869 #self.out_z = FPOp(width)
1870 self
.ids
= FPID(id_wid
)
1873 for i
in range(rs_sz
):
1876 in_a
.name
= "in_a_%d" % i
1877 in_b
.name
= "in_b_%d" % i
1878 rs
.append((in_a
, in_b
))
1882 for i
in range(rs_sz
):
1884 out_z
.name
= "out_z_%d" % i
1886 self
.res
= Array(res
)
1890 def add_state(self
, state
):
1891 self
.states
.append(state
)
1894 def get_fragment(self
, platform
=None):
1895 """ creates the HDL code-fragment for FPAdd
1898 m
.submodules
+= self
.rs
1900 in_a
= self
.rs
[0][0]
1901 in_b
= self
.rs
[0][1]
1903 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1908 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1913 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1914 ab
= self
.add_state(ab
)
1915 abd
= ab
.ispec() # create an input spec object for FPADDBase
1916 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1917 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1920 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1923 with m
.FSM() as fsm
:
1925 for state
in self
.states
:
1926 with m
.State(state
.state_from
):
1932 if __name__
== "__main__":
1934 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1935 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1936 alu
.rs
[0][1].ports() + \
1937 alu
.res
[0].ports() + \
1938 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1940 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1941 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1942 alu
.in_t
.ports() + \
1943 alu
.out_z
.ports() + \
1944 [alu
.in_mid
, alu
.out_mid
])
1947 # works... but don't use, just do "python fname.py convert -t v"
1948 #print (verilog.convert(alu, ports=[
1949 # ports=alu.in_a.ports() + \
1950 # alu.in_b.ports() + \
1951 # alu.out_z.ports())