1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
, FPID
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 FPID
.__init
__(self
, id_wid
)
396 self
.mod
= FPAddSpecialCasesMod(width
)
397 self
.out_z
= self
.mod
.ospec()
398 self
.out_do_z
= Signal(reset_less
=True)
400 def setup(self
, m
, in_a
, in_b
, in_mid
):
401 """ links module to inputs and outputs
403 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
404 if self
.in_mid
is not None:
405 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
409 with m
.If(self
.out_do_z
):
410 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
413 m
.next
= "denormalise"
416 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
417 """ special cases: NaNs, infs, zeros, denormalised
418 NOTE: some of these are unique to add. see "Special Operations"
419 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
422 def __init__(self
, width
, id_wid
):
423 FPState
.__init
__(self
, "special_cases")
424 FPID
.__init
__(self
, id_wid
)
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
, in_mid
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
437 if self
.in_mid
is not None:
438 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
442 with m
.If(self
.out_do_z
):
443 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
447 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
448 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
493 class FPAddDeNorm(FPState
, FPID
):
495 def __init__(self
, width
, id_wid
):
496 FPState
.__init
__(self
, "denormalise")
497 FPID
.__init
__(self
, id_wid
)
498 self
.mod
= FPAddDeNormMod(width
)
499 self
.out_a
= FPNumBase(width
)
500 self
.out_b
= FPNumBase(width
)
502 def setup(self
, m
, in_a
, in_b
, in_mid
):
503 """ links module to inputs and outputs
505 self
.mod
.setup(m
, in_a
, in_b
)
506 if self
.in_mid
is not None:
507 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
511 # Denormalised Number checks
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
, FPID
):
561 def __init__(self
, width
, id_wid
):
562 FPID
.__init
__(self
, id_wid
)
563 FPState
.__init
__(self
, "align")
564 self
.mod
= FPAddAlignMultiMod(width
)
565 self
.out_a
= FPNumIn(None, width
)
566 self
.out_b
= FPNumIn(None, width
)
567 self
.exp_eq
= Signal(reset_less
=True)
569 def setup(self
, m
, in_a
, in_b
, in_mid
):
570 """ links module to inputs and outputs
572 m
.submodules
.align
= self
.mod
573 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
574 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
575 #m.d.comb += self.out_a.eq(self.mod.out_a)
576 #m.d.comb += self.out_b.eq(self.mod.out_b)
577 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
578 if self
.in_mid
is not None:
579 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
583 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
584 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
585 with m
.If(self
.exp_eq
):
591 def __init__(self
, width
, id_wid
):
592 self
.a
= FPNumIn(None, width
)
593 self
.b
= FPNumIn(None, width
)
594 self
.mid
= Signal(id_wid
, reset_less
=True)
597 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
600 class FPAddAlignSingleMod
:
602 def __init__(self
, width
, id_wid
):
605 self
.i
= self
.ispec()
606 self
.o
= self
.ospec()
609 return FPNumBase2Ops(self
.width
, self
.id_wid
)
612 return FPNumIn2Ops(self
.width
, self
.id_wid
)
614 def setup(self
, m
, i
):
615 """ links module to inputs and outputs
617 m
.submodules
.align
= self
618 m
.d
.comb
+= self
.i
.eq(i
)
620 def elaborate(self
, platform
):
621 """ Aligns A against B or B against A, depending on which has the
622 greater exponent. This is done in a *single* cycle using
623 variable-width bit-shift
625 the shifter used here is quite expensive in terms of gates.
626 Mux A or B in (and out) into temporaries, as only one of them
627 needs to be aligned against the other
631 m
.submodules
.align_in_a
= self
.i
.a
632 m
.submodules
.align_in_b
= self
.i
.b
633 m
.submodules
.align_out_a
= self
.o
.a
634 m
.submodules
.align_out_b
= self
.o
.b
636 # temporary (muxed) input and output to be shifted
637 t_inp
= FPNumBase(self
.width
)
638 t_out
= FPNumIn(None, self
.width
)
639 espec
= (len(self
.i
.a
.e
), True)
640 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
641 m
.submodules
.align_t_in
= t_inp
642 m
.submodules
.align_t_out
= t_out
643 m
.submodules
.multishift_r
= msr
645 ediff
= Signal(espec
, reset_less
=True)
646 ediffr
= Signal(espec
, reset_less
=True)
647 tdiff
= Signal(espec
, reset_less
=True)
648 elz
= Signal(reset_less
=True)
649 egz
= Signal(reset_less
=True)
651 # connect multi-shifter to t_inp/out mantissa (and tdiff)
652 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
653 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
654 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
655 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
656 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
658 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
659 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
660 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
661 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
663 # default: A-exp == B-exp, A and B untouched (fall through)
664 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
665 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
666 # only one shifter (muxed)
667 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
668 # exponent of a greater than b: shift b down
670 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
673 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
675 # exponent of b greater than a: shift a down
677 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
680 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
685 class FPAddAlignSingle(FPState
, FPID
):
687 def __init__(self
, width
, id_wid
):
688 FPState
.__init
__(self
, "align")
689 FPID
.__init
__(self
, id_wid
)
690 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
691 self
.out_a
= FPNumIn(None, width
)
692 self
.out_b
= FPNumIn(None, width
)
694 def setup(self
, m
, in_a
, in_b
, in_mid
):
695 """ links module to inputs and outputs
697 self
.mod
.setup(m
, in_a
, in_b
)
698 if self
.in_mid
is not None:
699 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
703 # NOTE: could be done as comb
704 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
705 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
709 class FPAddAlignSingleAdd(FPState
, FPID
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
720 self
.a1o
= self
.a1mod
.ospec()
722 def setup(self
, m
, i
, in_mid
):
723 """ links module to inputs and outputs
726 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
728 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 a0mod
.setup(m
, self
.o
)
731 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
733 self
.a1mod
.setup(m
, a0o
)
735 if self
.in_mid
is not None:
736 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
740 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
741 m
.next
= "normalise_1"
744 class FPAddStage0Data
:
746 def __init__(self
, width
, id_wid
):
747 self
.z
= FPNumBase(width
, False)
748 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
749 self
.mid
= Signal(id_wid
, reset_less
=True)
752 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
755 class FPAddStage0Mod
:
757 def __init__(self
, width
, id_wid
):
760 self
.i
= self
.ispec()
761 self
.o
= self
.ospec()
764 return FPNumBase2Ops(self
.width
, self
.id_wid
)
767 return FPAddStage0Data(self
.width
, self
.id_wid
)
769 def setup(self
, m
, i
):
770 """ links module to inputs and outputs
772 m
.submodules
.add0
= self
773 m
.d
.comb
+= self
.i
.eq(i
)
775 def elaborate(self
, platform
):
777 m
.submodules
.add0_in_a
= self
.i
.a
778 m
.submodules
.add0_in_b
= self
.i
.b
779 m
.submodules
.add0_out_z
= self
.o
.z
781 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
783 # store intermediate tests (and zero-extended mantissas)
784 seq
= Signal(reset_less
=True)
785 mge
= Signal(reset_less
=True)
786 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
787 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
788 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
789 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
790 am0
.eq(Cat(self
.i
.a
.m
, 0)),
791 bm0
.eq(Cat(self
.i
.b
.m
, 0))
793 # same-sign (both negative or both positive) add mantissas
796 self
.o
.tot
.eq(am0
+ bm0
),
797 self
.o
.z
.s
.eq(self
.i
.a
.s
)
799 # a mantissa greater than b, use a
802 self
.o
.tot
.eq(am0
- bm0
),
803 self
.o
.z
.s
.eq(self
.i
.a
.s
)
805 # b mantissa greater than a, use b
808 self
.o
.tot
.eq(bm0
- am0
),
809 self
.o
.z
.s
.eq(self
.i
.b
.s
)
814 class FPAddStage0(FPState
, FPID
):
815 """ First stage of add. covers same-sign (add) and subtract
816 special-casing when mantissas are greater or equal, to
817 give greatest accuracy.
820 def __init__(self
, width
, id_wid
):
821 FPState
.__init
__(self
, "add_0")
822 FPID
.__init
__(self
, id_wid
)
823 self
.mod
= FPAddStage0Mod(width
)
824 self
.o
= self
.mod
.ospec()
826 def setup(self
, m
, i
, in_mid
):
827 """ links module to inputs and outputs
830 if self
.in_mid
is not None:
831 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
835 # NOTE: these could be done as combinatorial (merge add0+add1)
836 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
840 class FPAddStage1Data
:
842 def __init__(self
, width
, id_wid
):
843 self
.z
= FPNumBase(width
, False)
845 self
.mid
= Signal(id_wid
, reset_less
=True)
848 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
852 class FPAddStage1Mod(FPState
):
853 """ Second stage of add: preparation for normalisation.
854 detects when tot sum is too big (tot[27] is kinda a carry bit)
857 def __init__(self
, width
, id_wid
):
860 self
.i
= self
.ispec()
861 self
.o
= self
.ospec()
864 return FPAddStage0Data(self
.width
, self
.id_wid
)
867 return FPAddStage1Data(self
.width
, self
.id_wid
)
869 def setup(self
, m
, i
):
870 """ links module to inputs and outputs
872 m
.submodules
.add1
= self
873 m
.submodules
.add1_out_overflow
= self
.o
.of
875 m
.d
.comb
+= self
.i
.eq(i
)
877 def elaborate(self
, platform
):
879 #m.submodules.norm1_in_overflow = self.in_of
880 #m.submodules.norm1_out_overflow = self.out_of
881 #m.submodules.norm1_in_z = self.in_z
882 #m.submodules.norm1_out_z = self.out_z
883 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
884 # tot[-1] (MSB) gets set when the sum overflows. shift result down
885 with m
.If(self
.i
.tot
[-1]):
887 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
888 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
889 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
890 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
891 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
892 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
894 # tot[-1] (MSB) zero case
897 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
898 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
899 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
900 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
901 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
906 class FPAddStage1(FPState
, FPID
):
908 def __init__(self
, width
, id_wid
):
909 FPState
.__init
__(self
, "add_1")
910 FPID
.__init
__(self
, id_wid
)
911 self
.mod
= FPAddStage1Mod(width
)
912 self
.out_z
= FPNumBase(width
, False)
913 self
.out_of
= Overflow()
914 self
.norm_stb
= Signal()
916 def setup(self
, m
, i
, in_mid
):
917 """ links module to inputs and outputs
921 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
923 if self
.in_mid
is not None:
924 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
928 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
929 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
930 m
.d
.sync
+= self
.norm_stb
.eq(1)
931 m
.next
= "normalise_1"
934 class FPNormaliseModSingle
:
936 def __init__(self
, width
):
938 self
.in_z
= self
.ispec()
939 self
.out_z
= self
.ospec()
942 return FPNumBase(self
.width
, False)
945 return FPNumBase(self
.width
, False)
947 def setup(self
, m
, i
):
948 """ links module to inputs and outputs
950 m
.submodules
.normalise
= self
951 m
.d
.comb
+= self
.i
.eq(i
)
953 def elaborate(self
, platform
):
956 mwid
= self
.out_z
.m_width
+2
957 pe
= PriorityEncoder(mwid
)
958 m
.submodules
.norm_pe
= pe
960 m
.submodules
.norm1_out_z
= self
.out_z
961 m
.submodules
.norm1_in_z
= self
.in_z
963 in_z
= FPNumBase(self
.width
, False)
965 m
.submodules
.norm1_insel_z
= in_z
966 m
.submodules
.norm1_insel_overflow
= in_of
968 espec
= (len(in_z
.e
), True)
969 ediff_n126
= Signal(espec
, reset_less
=True)
970 msr
= MultiShiftRMerge(mwid
, espec
)
971 m
.submodules
.multishift_r
= msr
973 m
.d
.comb
+= in_z
.eq(self
.in_z
)
974 m
.d
.comb
+= in_of
.eq(self
.in_of
)
975 # initialise out from in (overridden below)
976 m
.d
.comb
+= self
.out_z
.eq(in_z
)
977 m
.d
.comb
+= self
.out_of
.eq(in_of
)
978 # normalisation decrease condition
979 decrease
= Signal(reset_less
=True)
980 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
983 # *sigh* not entirely obvious: count leading zeros (clz)
984 # with a PriorityEncoder: to find from the MSB
985 # we reverse the order of the bits.
986 temp_m
= Signal(mwid
, reset_less
=True)
987 temp_s
= Signal(mwid
+1, reset_less
=True)
988 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
990 # cat round and guard bits back into the mantissa
991 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
992 pe
.i
.eq(temp_m
[::-1]), # inverted
993 clz
.eq(pe
.o
), # count zeros from MSB down
994 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
995 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
996 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1003 def __init__(self
, width
, id_wid
):
1004 self
.roundz
= Signal(reset_less
=True)
1005 self
.z
= FPNumBase(width
, False)
1006 self
.mid
= Signal(id_wid
, reset_less
=True)
1009 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1012 class FPNorm1ModSingle
:
1014 def __init__(self
, width
, id_wid
):
1016 self
.id_wid
= id_wid
1017 self
.i
= self
.ispec()
1018 self
.o
= self
.ospec()
1021 return FPAddStage1Data(self
.width
, self
.id_wid
)
1024 return FPNorm1Data(self
.width
, self
.id_wid
)
1026 def setup(self
, m
, i
):
1027 """ links module to inputs and outputs
1029 m
.submodules
.normalise_1
= self
1030 m
.d
.comb
+= self
.i
.eq(i
)
1032 def elaborate(self
, platform
):
1035 mwid
= self
.o
.z
.m_width
+2
1036 pe
= PriorityEncoder(mwid
)
1037 m
.submodules
.norm_pe
= pe
1040 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1042 m
.submodules
.norm1_out_z
= self
.o
.z
1043 m
.submodules
.norm1_out_overflow
= of
1044 m
.submodules
.norm1_in_z
= self
.i
.z
1045 m
.submodules
.norm1_in_overflow
= self
.i
.of
1048 m
.submodules
.norm1_insel_z
= i
.z
1049 m
.submodules
.norm1_insel_overflow
= i
.of
1051 espec
= (len(i
.z
.e
), True)
1052 ediff_n126
= Signal(espec
, reset_less
=True)
1053 msr
= MultiShiftRMerge(mwid
, espec
)
1054 m
.submodules
.multishift_r
= msr
1056 m
.d
.comb
+= i
.eq(self
.i
)
1057 # initialise out from in (overridden below)
1058 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1059 m
.d
.comb
+= of
.eq(i
.of
)
1060 # normalisation increase/decrease conditions
1061 decrease
= Signal(reset_less
=True)
1062 increase
= Signal(reset_less
=True)
1063 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1064 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1066 with m
.If(decrease
):
1067 # *sigh* not entirely obvious: count leading zeros (clz)
1068 # with a PriorityEncoder: to find from the MSB
1069 # we reverse the order of the bits.
1070 temp_m
= Signal(mwid
, reset_less
=True)
1071 temp_s
= Signal(mwid
+1, reset_less
=True)
1072 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1073 # make sure that the amount to decrease by does NOT
1074 # go below the minimum non-INF/NaN exponent
1075 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1078 # cat round and guard bits back into the mantissa
1079 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1080 pe
.i
.eq(temp_m
[::-1]), # inverted
1081 clz
.eq(limclz
), # count zeros from MSB down
1082 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1083 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1084 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1085 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1086 # overflow in bits 0..1: got shifted too (leave sticky)
1087 of
.guard
.eq(temp_s
[1]), # guard
1088 of
.round_bit
.eq(temp_s
[0]), # round
1091 with m
.Elif(increase
):
1092 temp_m
= Signal(mwid
+1, reset_less
=True)
1094 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1096 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1097 # connect multi-shifter to inp/out mantissa (and ediff)
1099 msr
.diff
.eq(ediff_n126
),
1100 self
.o
.z
.m
.eq(msr
.m
[3:]),
1101 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1102 # overflow in bits 0..1: got shifted too (leave sticky)
1103 of
.guard
.eq(temp_s
[2]), # guard
1104 of
.round_bit
.eq(temp_s
[1]), # round
1105 of
.sticky
.eq(temp_s
[0]), # sticky
1106 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1112 class FPNorm1ModMulti
:
1114 def __init__(self
, width
, single_cycle
=True):
1116 self
.in_select
= Signal(reset_less
=True)
1117 self
.in_z
= FPNumBase(width
, False)
1118 self
.in_of
= Overflow()
1119 self
.temp_z
= FPNumBase(width
, False)
1120 self
.temp_of
= Overflow()
1121 self
.out_z
= FPNumBase(width
, False)
1122 self
.out_of
= Overflow()
1124 def elaborate(self
, platform
):
1127 m
.submodules
.norm1_out_z
= self
.out_z
1128 m
.submodules
.norm1_out_overflow
= self
.out_of
1129 m
.submodules
.norm1_temp_z
= self
.temp_z
1130 m
.submodules
.norm1_temp_of
= self
.temp_of
1131 m
.submodules
.norm1_in_z
= self
.in_z
1132 m
.submodules
.norm1_in_overflow
= self
.in_of
1134 in_z
= FPNumBase(self
.width
, False)
1136 m
.submodules
.norm1_insel_z
= in_z
1137 m
.submodules
.norm1_insel_overflow
= in_of
1139 # select which of temp or in z/of to use
1140 with m
.If(self
.in_select
):
1141 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1142 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1144 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1145 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1146 # initialise out from in (overridden below)
1147 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1148 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1149 # normalisation increase/decrease conditions
1150 decrease
= Signal(reset_less
=True)
1151 increase
= Signal(reset_less
=True)
1152 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1153 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1154 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1156 with m
.If(decrease
):
1158 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1159 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1160 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1161 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1162 self
.out_of
.round_bit
.eq(0), # reset round bit
1163 self
.out_of
.m0
.eq(in_of
.guard
),
1166 with m
.Elif(increase
):
1168 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1169 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1170 self
.out_of
.guard
.eq(in_z
.m
[0]),
1171 self
.out_of
.m0
.eq(in_z
.m
[1]),
1172 self
.out_of
.round_bit
.eq(in_of
.guard
),
1173 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1179 class FPNorm1Single(FPState
, FPID
):
1181 def __init__(self
, width
, id_wid
, single_cycle
=True):
1182 FPID
.__init
__(self
, id_wid
)
1183 FPState
.__init
__(self
, "normalise_1")
1184 self
.mod
= FPNorm1ModSingle(width
)
1185 self
.out_z
= FPNumBase(width
, False)
1186 self
.out_roundz
= Signal(reset_less
=True)
1188 def setup(self
, m
, i
, in_mid
):
1189 """ links module to inputs and outputs
1191 self
.mod
.setup(m
, i
, self
.out_z
)
1193 if self
.in_mid
is not None:
1194 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1196 def action(self
, m
):
1198 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1202 class FPNorm1Multi(FPState
, FPID
):
1204 def __init__(self
, width
, id_wid
):
1205 FPID
.__init
__(self
, id_wid
)
1206 FPState
.__init
__(self
, "normalise_1")
1207 self
.mod
= FPNorm1ModMulti(width
)
1208 self
.stb
= Signal(reset_less
=True)
1209 self
.ack
= Signal(reset
=0, reset_less
=True)
1210 self
.out_norm
= Signal(reset_less
=True)
1211 self
.in_accept
= Signal(reset_less
=True)
1212 self
.temp_z
= FPNumBase(width
)
1213 self
.temp_of
= Overflow()
1214 self
.out_z
= FPNumBase(width
)
1215 self
.out_roundz
= Signal(reset_less
=True)
1217 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1218 """ links module to inputs and outputs
1220 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1221 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1222 self
.out_z
, self
.out_norm
)
1224 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1225 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1227 if self
.in_mid
is not None:
1228 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1230 def action(self
, m
):
1232 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1233 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1234 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1235 with m
.If(self
.out_norm
):
1236 with m
.If(self
.in_accept
):
1241 m
.d
.sync
+= self
.ack
.eq(0)
1243 # normalisation not required (or done).
1245 m
.d
.sync
+= self
.ack
.eq(1)
1246 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1249 class FPNormToPack(FPState
, FPID
):
1251 def __init__(self
, width
, id_wid
):
1252 FPID
.__init
__(self
, id_wid
)
1253 FPState
.__init
__(self
, "normalise_1")
1254 self
.id_wid
= id_wid
1258 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1261 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1263 def setup(self
, m
, i
, in_mid
):
1264 """ links module to inputs and outputs
1267 # Normalisation (chained to input in_z+in_of)
1268 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1270 n_out
= nmod
.ospec()
1271 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1273 # Rounding (chained to normalisation)
1274 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1275 rmod
.setup(m
, n_out
)
1276 r_out_z
= rmod
.ospec()
1277 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1279 # Corrections (chained to rounding)
1280 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1281 cmod
.setup(m
, r_out_z
)
1282 c_out_z
= cmod
.ospec()
1283 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1285 # Pack (chained to corrections)
1286 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1287 self
.pmod
.setup(m
, c_out_z
)
1288 self
.out_z
= self
.pmod
.ospec()
1291 if self
.in_mid
is not None:
1292 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1294 def action(self
, m
):
1295 self
.idsync(m
) # copies incoming ID to outgoing
1296 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1297 m
.next
= "pack_put_z"
1302 def __init__(self
, width
, id_wid
):
1303 self
.z
= FPNumBase(width
, False)
1304 self
.mid
= Signal(id_wid
, reset_less
=True)
1307 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1312 def __init__(self
, width
, id_wid
):
1314 self
.id_wid
= id_wid
1315 self
.i
= self
.ispec()
1316 self
.out_z
= self
.ospec()
1319 return FPNorm1Data(self
.width
, self
.id_wid
)
1322 return FPRoundData(self
.width
, self
.id_wid
)
1324 def setup(self
, m
, i
):
1325 m
.submodules
.roundz
= self
1326 m
.d
.comb
+= self
.i
.eq(i
)
1328 def elaborate(self
, platform
):
1330 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1331 with m
.If(self
.i
.roundz
):
1332 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1333 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1334 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1338 class FPRound(FPState
, FPID
):
1340 def __init__(self
, width
, id_wid
):
1341 FPState
.__init
__(self
, "round")
1342 FPID
.__init
__(self
, id_wid
)
1343 self
.mod
= FPRoundMod(width
)
1344 self
.out_z
= self
.ospec()
1347 return self
.mod
.ispec()
1350 return self
.mod
.ospec()
1352 def setup(self
, m
, i
, in_mid
):
1353 """ links module to inputs and outputs
1355 self
.mod
.setup(m
, i
)
1357 if self
.in_mid
is not None:
1358 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1360 def action(self
, m
):
1362 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1363 m
.next
= "corrections"
1366 class FPCorrectionsMod
:
1368 def __init__(self
, width
, id_wid
):
1370 self
.id_wid
= id_wid
1371 self
.i
= self
.ispec()
1372 self
.out_z
= self
.ospec()
1375 return FPRoundData(self
.width
, self
.id_wid
)
1378 return FPRoundData(self
.width
, self
.id_wid
)
1380 def setup(self
, m
, i
):
1381 """ links module to inputs and outputs
1383 m
.submodules
.corrections
= self
1384 m
.d
.comb
+= self
.i
.eq(i
)
1386 def elaborate(self
, platform
):
1388 m
.submodules
.corr_in_z
= self
.i
.z
1389 m
.submodules
.corr_out_z
= self
.out_z
.z
1390 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1391 with m
.If(self
.i
.z
.is_denormalised
):
1392 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1396 class FPCorrections(FPState
, FPID
):
1398 def __init__(self
, width
, id_wid
):
1399 FPState
.__init
__(self
, "corrections")
1400 FPID
.__init
__(self
, id_wid
)
1401 self
.mod
= FPCorrectionsMod(width
)
1402 self
.out_z
= self
.ospec()
1405 return self
.mod
.ispec()
1408 return self
.mod
.ospec()
1410 def setup(self
, m
, in_z
, in_mid
):
1411 """ links module to inputs and outputs
1413 self
.mod
.setup(m
, in_z
)
1414 if self
.in_mid
is not None:
1415 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1417 def action(self
, m
):
1419 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1425 def __init__(self
, width
, id_wid
):
1426 self
.z
= FPNumOut(width
, False)
1427 self
.mid
= Signal(id_wid
, reset_less
=True)
1430 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1435 def __init__(self
, width
, id_wid
):
1437 self
.id_wid
= id_wid
1438 self
.i
= self
.ispec()
1439 self
.o
= self
.ospec()
1442 return FPRoundData(self
.width
, self
.id_wid
)
1445 return FPPackData(self
.width
, self
.id_wid
)
1447 def setup(self
, m
, in_z
):
1448 """ links module to inputs and outputs
1450 m
.submodules
.pack
= self
1451 m
.d
.comb
+= self
.i
.eq(in_z
)
1453 def elaborate(self
, platform
):
1455 m
.submodules
.pack_in_z
= self
.i
.z
1456 with m
.If(self
.i
.z
.is_overflowed
):
1457 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1459 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1464 def __init__(self
, width
, id_wid
):
1465 self
.z
= FPNumOut(width
, False)
1466 self
.mid
= Signal(id_wid
, reset_less
=True)
1469 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1472 class FPPack(FPState
, FPID
):
1474 def __init__(self
, width
, id_wid
):
1475 FPState
.__init
__(self
, "pack")
1476 FPID
.__init
__(self
, id_wid
)
1477 self
.mod
= FPPackMod(width
)
1478 self
.out_z
= self
.ospec()
1481 return self
.mod
.ispec()
1484 return self
.mod
.ospec()
1486 def setup(self
, m
, in_z
, in_mid
):
1487 """ links module to inputs and outputs
1489 self
.mod
.setup(m
, in_z
)
1490 if self
.in_mid
is not None:
1491 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1493 def action(self
, m
):
1495 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1496 m
.next
= "pack_put_z"
1499 class FPPutZ(FPState
):
1501 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1502 FPState
.__init
__(self
, state
)
1503 if to_state
is None:
1504 to_state
= "get_ops"
1505 self
.to_state
= to_state
1508 self
.in_mid
= in_mid
1509 self
.out_mid
= out_mid
1511 def action(self
, m
):
1512 if self
.in_mid
is not None:
1513 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1515 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1517 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1518 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1519 m
.next
= self
.to_state
1521 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1524 class FPPutZIdx(FPState
):
1526 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1527 FPState
.__init
__(self
, state
)
1528 if to_state
is None:
1529 to_state
= "get_ops"
1530 self
.to_state
= to_state
1532 self
.out_zs
= out_zs
1533 self
.in_mid
= in_mid
1535 def action(self
, m
):
1536 outz_stb
= Signal(reset_less
=True)
1537 outz_ack
= Signal(reset_less
=True)
1538 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1539 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1542 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1544 with m
.If(outz_stb
& outz_ack
):
1545 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1546 m
.next
= self
.to_state
1548 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1550 class FPADDBaseData
:
1552 def __init__(self
, width
, id_wid
):
1554 self
.id_wid
= id_wid
1555 self
.a
= Signal(width
)
1556 self
.b
= Signal(width
)
1557 self
.mid
= Signal(id_wid
, reset_less
=True)
1560 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1564 def __init__(self
, width
, id_wid
):
1565 self
.z
= FPOp(width
)
1566 self
.mid
= Signal(id_wid
, reset_less
=True)
1569 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1572 class FPADDBaseMod(FPID
):
1574 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1577 * width: bit-width of IEEE754. supported: 16, 32, 64
1578 * id_wid: an identifier that is sync-connected to the input
1579 * single_cycle: True indicates each stage to complete in 1 clock
1580 * compact: True indicates a reduced number of stages
1582 FPID
.__init
__(self
, id_wid
)
1584 self
.id_wid
= id_wid
1585 self
.single_cycle
= single_cycle
1586 self
.compact
= compact
1588 self
.in_t
= Trigger()
1589 self
.i
= self
.ispec()
1590 self
.o
= self
.ospec()
1595 return FPADDBaseData(self
.width
, self
.id_wid
)
1598 return FPOpData(self
.width
, self
.id_wid
)
1600 def add_state(self
, state
):
1601 self
.states
.append(state
)
1604 def get_fragment(self
, platform
=None):
1605 """ creates the HDL code-fragment for FPAdd
1608 m
.submodules
.out_z
= self
.o
.z
1609 m
.submodules
.in_t
= self
.in_t
1611 self
.get_compact_fragment(m
, platform
)
1613 self
.get_longer_fragment(m
, platform
)
1615 with m
.FSM() as fsm
:
1617 for state
in self
.states
:
1618 with m
.State(state
.state_from
):
1623 def get_longer_fragment(self
, m
, platform
=None):
1625 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1627 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1631 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1632 sc
.setup(m
, a
, b
, self
.in_mid
)
1634 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1635 dn
.setup(m
, a
, b
, sc
.in_mid
)
1637 if self
.single_cycle
:
1638 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1639 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1641 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1642 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1644 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1645 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1647 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1648 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1650 if self
.single_cycle
:
1651 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1652 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1654 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1655 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1657 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1658 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1660 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1661 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1663 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1664 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1666 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1667 pa
.in_mid
, self
.out_mid
))
1669 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1670 pa
.in_mid
, self
.out_mid
))
1672 def get_compact_fragment(self
, m
, platform
=None):
1674 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1675 self
.width
, self
.id_wid
))
1676 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1678 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1679 sc
.setup(m
, get
.o
, self
.in_mid
)
1681 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1682 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1684 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1685 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1687 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1688 n1
.in_mid
, self
.out_mid
))
1690 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1691 sc
.in_mid
, self
.out_mid
))
1694 class FPADDBase(FPState
, FPID
):
1696 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1699 * width: bit-width of IEEE754. supported: 16, 32, 64
1700 * id_wid: an identifier that is sync-connected to the input
1701 * single_cycle: True indicates each stage to complete in 1 clock
1703 FPID
.__init
__(self
, id_wid
)
1704 FPState
.__init
__(self
, "fpadd")
1706 self
.single_cycle
= single_cycle
1707 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1708 self
.o
= self
.ospec()
1710 self
.in_t
= Trigger()
1711 self
.i
= self
.ispec()
1713 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1714 self
.in_accept
= Signal(reset_less
=True)
1715 self
.add_stb
= Signal(reset_less
=True)
1716 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1719 return self
.mod
.ispec()
1722 return self
.mod
.ospec()
1724 def setup(self
, m
, i
, add_stb
, in_mid
):
1725 m
.d
.comb
+= [self
.i
.eq(i
),
1726 self
.mod
.i
.eq(self
.i
),
1727 self
.in_mid
.eq(in_mid
),
1728 self
.mod
.in_mid
.eq(self
.in_mid
),
1729 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1730 #self.add_stb.eq(add_stb),
1731 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1732 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1733 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1734 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1735 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1736 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1739 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1740 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1741 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1742 #m.d.sync += self.in_t.stb.eq(0)
1744 m
.submodules
.fpadd
= self
.mod
1746 def action(self
, m
):
1748 # in_accept is set on incoming strobe HIGH and ack LOW.
1749 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1751 #with m.If(self.in_t.ack):
1752 # m.d.sync += self.in_t.stb.eq(0)
1753 with m
.If(~self
.z_done
):
1754 # not done: test for accepting an incoming operand pair
1755 with m
.If(self
.in_accept
):
1757 self
.add_ack
.eq(1), # acknowledge receipt...
1758 self
.in_t
.stb
.eq(1), # initiate add
1761 m
.d
.sync
+= [self
.add_ack
.eq(0),
1762 self
.in_t
.stb
.eq(0),
1766 # done: acknowledge, and write out id and value
1767 m
.d
.sync
+= [self
.add_ack
.eq(1),
1774 if self
.in_mid
is not None:
1775 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1778 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1780 # move to output state on detecting z ack
1781 with m
.If(self
.out_z
.trigger
):
1782 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1785 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1789 def __init__(self
, width
, id_wid
):
1791 self
.id_wid
= id_wid
1793 for i
in range(rs_sz
):
1795 out_z
.name
= "out_z_%d" % i
1797 self
.res
= Array(res
)
1798 self
.in_z
= FPOp(width
)
1799 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1801 def setup(self
, m
, in_z
, in_mid
):
1802 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1803 self
.in_mid
.eq(in_mid
)]
1805 def get_fragment(self
, platform
=None):
1806 """ creates the HDL code-fragment for FPAdd
1809 m
.submodules
.res_in_z
= self
.in_z
1810 m
.submodules
+= self
.res
1822 """ FPADD: stages as follows:
1828 FPAddBase---> FPAddBaseMod
1830 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1832 FPAddBase is tricky: it is both a stage and *has* stages.
1833 Connection to FPAddBaseMod therefore requires an in stb/ack
1834 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1835 needs to be the thing that raises the incoming stb.
1838 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1841 * width: bit-width of IEEE754. supported: 16, 32, 64
1842 * id_wid: an identifier that is sync-connected to the input
1843 * single_cycle: True indicates each stage to complete in 1 clock
1846 self
.id_wid
= id_wid
1847 self
.single_cycle
= single_cycle
1849 #self.out_z = FPOp(width)
1850 self
.ids
= FPID(id_wid
)
1853 for i
in range(rs_sz
):
1856 in_a
.name
= "in_a_%d" % i
1857 in_b
.name
= "in_b_%d" % i
1858 rs
.append((in_a
, in_b
))
1862 for i
in range(rs_sz
):
1864 out_z
.name
= "out_z_%d" % i
1866 self
.res
= Array(res
)
1870 def add_state(self
, state
):
1871 self
.states
.append(state
)
1874 def get_fragment(self
, platform
=None):
1875 """ creates the HDL code-fragment for FPAdd
1878 m
.submodules
+= self
.rs
1880 in_a
= self
.rs
[0][0]
1881 in_b
= self
.rs
[0][1]
1883 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1888 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1893 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1894 ab
= self
.add_state(ab
)
1895 abd
= ab
.ispec() # create an input spec object for FPADDBase
1896 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1897 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1900 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1903 with m
.FSM() as fsm
:
1905 for state
in self
.states
:
1906 with m
.State(state
.state_from
):
1912 if __name__
== "__main__":
1914 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1915 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1916 alu
.rs
[0][1].ports() + \
1917 alu
.res
[0].ports() + \
1918 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1920 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1921 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1922 alu
.in_t
.ports() + \
1923 alu
.out_z
.ports() + \
1924 [alu
.in_mid
, alu
.out_mid
])
1927 # works... but don't use, just do "python fname.py convert -t v"
1928 #print (verilog.convert(alu, ports=[
1929 # ports=alu.in_a.ports() + \
1930 # alu.in_b.ports() + \
1931 # alu.out_z.ports())