1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
176 with m
.If(self
.out_decode
):
177 m
.next
= self
.out_state
179 self
.in_op
.ack
.eq(0),
180 self
.out_op
.eq(self
.mod
.out_op
)
183 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
186 class FPGet2OpMod(Trigger
):
187 def __init__(self
, width
, id_wid
):
188 Trigger
.__init
__(self
)
191 self
.i
= self
.ispec()
192 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPNumBase2Ops(self
.width
, self
.id_wid
)
200 def elaborate(self
, platform
):
201 m
= Trigger
.elaborate(self
, platform
)
202 m
.submodules
.get_op1_out
= self
.o
.a
203 m
.submodules
.get_op2_out
= self
.o
.b
204 out_op1
= FPNumIn(None, self
.width
)
205 out_op2
= FPNumIn(None, self
.width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.i
.a
),
209 out_op2
.decode(self
.i
.b
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
224 self
.o
= self
.mod
.ospec()
225 self
.in_stb
= Signal(reset_less
=True)
226 self
.out_ack
= Signal(reset_less
=True)
227 self
.out_decode
= Signal(reset_less
=True)
229 def setup(self
, m
, i
, in_stb
, in_ack
):
230 """ links module to inputs and outputs
232 m
.submodules
.get_ops
= self
.mod
233 m
.d
.comb
+= self
.mod
.i
.eq(i
)
234 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
235 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
236 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
237 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
240 with m
.If(self
.out_decode
):
241 m
.next
= self
.out_state
244 self
.o
.eq(self
.mod
.o
),
247 m
.d
.sync
+= self
.mod
.ack
.eq(1)
252 def __init__(self
, width
, id_wid
, m_extra
=True):
253 self
.a
= FPNumBase(width
, m_extra
)
254 self
.b
= FPNumBase(width
, m_extra
)
255 self
.mid
= Signal(id_wid
, reset_less
=True)
258 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
261 class FPAddSpecialCasesMod
:
262 """ special cases: NaNs, infs, zeros, denormalised
263 NOTE: some of these are unique to add. see "Special Operations"
264 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
267 def __init__(self
, width
, id_wid
):
270 self
.i
= self
.ispec()
271 self
.o
= self
.ospec()
272 self
.out_do_z
= Signal(reset_less
=True)
275 return FPNumBase2Ops(self
.width
, self
.id_wid
)
278 return FPPackData(self
.width
, self
.id_wid
)
280 def setup(self
, m
, i
, out_do_z
):
281 """ links module to inputs and outputs
283 m
.submodules
.specialcases
= self
284 m
.d
.comb
+= self
.i
.eq(i
)
285 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
287 def elaborate(self
, platform
):
290 m
.submodules
.sc_in_a
= self
.i
.a
291 m
.submodules
.sc_in_b
= self
.i
.b
292 m
.submodules
.sc_out_z
= self
.o
.z
295 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
298 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
300 # if a is NaN or b is NaN return NaN
301 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
302 m
.d
.comb
+= self
.out_do_z
.eq(1)
303 m
.d
.comb
+= self
.o
.z
.nan(0)
305 # XXX WEIRDNESS for FP16 non-canonical NaN handling
308 ## if a is zero and b is NaN return -b
309 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
310 # m.d.comb += self.out_do_z.eq(1)
311 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
313 ## if b is zero and a is NaN return -a
314 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
315 # m.d.comb += self.out_do_z.eq(1)
316 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
318 ## if a is -zero and b is NaN return -b
319 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
320 # m.d.comb += self.out_do_z.eq(1)
321 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
323 ## if b is -zero and a is NaN return -a
324 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
325 # m.d.comb += self.out_do_z.eq(1)
326 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
328 # if a is inf return inf (or NaN)
329 with m
.Elif(self
.i
.a
.is_inf
):
330 m
.d
.comb
+= self
.out_do_z
.eq(1)
331 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
332 # if a is inf and signs don't match return NaN
333 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
334 m
.d
.comb
+= self
.o
.z
.nan(0)
336 # if b is inf return inf
337 with m
.Elif(self
.i
.b
.is_inf
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
341 # if a is zero and b zero return signed-a/b
342 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
343 m
.d
.comb
+= self
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
348 # if a is zero return b
349 with m
.Elif(self
.i
.a
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
354 # if b is zero return a
355 with m
.Elif(self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
360 # if a equal to -b return zero (+ve zero)
361 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
362 m
.d
.comb
+= self
.out_do_z
.eq(1)
363 m
.d
.comb
+= self
.o
.z
.zero(0)
365 # Denormalised Number checks
367 m
.d
.comb
+= self
.out_do_z
.eq(0)
373 def __init__(self
, id_wid
):
376 self
.in_mid
= Signal(id_wid
, reset_less
=True)
377 self
.out_mid
= Signal(id_wid
, reset_less
=True)
383 if self
.id_wid
is not None:
384 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
387 class FPAddSpecialCases(FPState
, FPID
):
388 """ special cases: NaNs, infs, zeros, denormalised
389 NOTE: some of these are unique to add. see "Special Operations"
390 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
393 def __init__(self
, width
, id_wid
):
394 FPState
.__init
__(self
, "special_cases")
395 FPID
.__init
__(self
, id_wid
)
396 self
.mod
= FPAddSpecialCasesMod(width
)
397 self
.out_z
= self
.mod
.ospec()
398 self
.out_do_z
= Signal(reset_less
=True)
400 def setup(self
, m
, in_a
, in_b
, in_mid
):
401 """ links module to inputs and outputs
403 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
404 if self
.in_mid
is not None:
405 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
409 with m
.If(self
.out_do_z
):
410 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
413 m
.next
= "denormalise"
416 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
417 """ special cases: NaNs, infs, zeros, denormalised
418 NOTE: some of these are unique to add. see "Special Operations"
419 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
422 def __init__(self
, width
, id_wid
):
423 FPState
.__init
__(self
, "special_cases")
424 FPID
.__init
__(self
, id_wid
)
425 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
426 self
.out_z
= self
.smod
.ospec()
427 self
.out_do_z
= Signal(reset_less
=True)
429 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
430 self
.o
= self
.dmod
.ospec()
432 def setup(self
, m
, i
, in_mid
):
433 """ links module to inputs and outputs
435 self
.smod
.setup(m
, i
, self
.out_do_z
)
436 self
.dmod
.setup(m
, i
)
437 if self
.in_mid
is not None:
438 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
442 with m
.If(self
.out_do_z
):
443 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
447 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
448 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
451 class FPAddDeNormMod(FPState
):
453 def __init__(self
, width
, id_wid
):
456 self
.i
= self
.ispec()
457 self
.o
= self
.ospec()
460 return FPNumBase2Ops(self
.width
, self
.id_wid
)
463 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 def setup(self
, m
, i
):
466 """ links module to inputs and outputs
468 m
.submodules
.denormalise
= self
469 m
.d
.comb
+= self
.i
.eq(i
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
493 class FPAddDeNorm(FPState
, FPID
):
495 def __init__(self
, width
, id_wid
):
496 FPState
.__init
__(self
, "denormalise")
497 FPID
.__init
__(self
, id_wid
)
498 self
.mod
= FPAddDeNormMod(width
)
499 self
.out_a
= FPNumBase(width
)
500 self
.out_b
= FPNumBase(width
)
502 def setup(self
, m
, in_a
, in_b
, in_mid
):
503 """ links module to inputs and outputs
505 self
.mod
.setup(m
, in_a
, in_b
)
506 if self
.in_mid
is not None:
507 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
511 # Denormalised Number checks
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
, FPID
):
561 def __init__(self
, width
, id_wid
):
562 FPID
.__init
__(self
, id_wid
)
563 FPState
.__init
__(self
, "align")
564 self
.mod
= FPAddAlignMultiMod(width
)
565 self
.out_a
= FPNumIn(None, width
)
566 self
.out_b
= FPNumIn(None, width
)
567 self
.exp_eq
= Signal(reset_less
=True)
569 def setup(self
, m
, in_a
, in_b
, in_mid
):
570 """ links module to inputs and outputs
572 m
.submodules
.align
= self
.mod
573 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
574 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
575 #m.d.comb += self.out_a.eq(self.mod.out_a)
576 #m.d.comb += self.out_b.eq(self.mod.out_b)
577 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
578 if self
.in_mid
is not None:
579 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
583 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
584 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
585 with m
.If(self
.exp_eq
):
591 def __init__(self
, width
, id_wid
):
592 self
.a
= FPNumIn(None, width
)
593 self
.b
= FPNumIn(None, width
)
594 self
.mid
= Signal(id_wid
, reset_less
=True)
597 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
600 class FPAddAlignSingleMod
:
602 def __init__(self
, width
, id_wid
):
605 self
.i
= self
.ispec()
606 self
.o
= self
.ospec()
609 return FPNumBase2Ops(self
.width
, self
.id_wid
)
612 return FPNumIn2Ops(self
.width
, self
.id_wid
)
614 def setup(self
, m
, i
):
615 """ links module to inputs and outputs
617 m
.submodules
.align
= self
618 m
.d
.comb
+= self
.i
.eq(i
)
620 def elaborate(self
, platform
):
621 """ Aligns A against B or B against A, depending on which has the
622 greater exponent. This is done in a *single* cycle using
623 variable-width bit-shift
625 the shifter used here is quite expensive in terms of gates.
626 Mux A or B in (and out) into temporaries, as only one of them
627 needs to be aligned against the other
631 m
.submodules
.align_in_a
= self
.i
.a
632 m
.submodules
.align_in_b
= self
.i
.b
633 m
.submodules
.align_out_a
= self
.o
.a
634 m
.submodules
.align_out_b
= self
.o
.b
636 # temporary (muxed) input and output to be shifted
637 t_inp
= FPNumBase(self
.width
)
638 t_out
= FPNumIn(None, self
.width
)
639 espec
= (len(self
.i
.a
.e
), True)
640 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
641 m
.submodules
.align_t_in
= t_inp
642 m
.submodules
.align_t_out
= t_out
643 m
.submodules
.multishift_r
= msr
645 ediff
= Signal(espec
, reset_less
=True)
646 ediffr
= Signal(espec
, reset_less
=True)
647 tdiff
= Signal(espec
, reset_less
=True)
648 elz
= Signal(reset_less
=True)
649 egz
= Signal(reset_less
=True)
651 # connect multi-shifter to t_inp/out mantissa (and tdiff)
652 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
653 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
654 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
655 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
656 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
658 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
659 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
660 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
661 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
663 # default: A-exp == B-exp, A and B untouched (fall through)
664 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
665 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
666 # only one shifter (muxed)
667 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
668 # exponent of a greater than b: shift b down
670 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
673 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
675 # exponent of b greater than a: shift a down
677 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
680 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
685 class FPAddAlignSingle(FPState
, FPID
):
687 def __init__(self
, width
, id_wid
):
688 FPState
.__init
__(self
, "align")
689 FPID
.__init
__(self
, id_wid
)
690 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
691 self
.out_a
= FPNumIn(None, width
)
692 self
.out_b
= FPNumIn(None, width
)
694 def setup(self
, m
, in_a
, in_b
, in_mid
):
695 """ links module to inputs and outputs
697 self
.mod
.setup(m
, in_a
, in_b
)
698 if self
.in_mid
is not None:
699 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
703 # NOTE: could be done as comb
704 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
705 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
709 class FPAddAlignSingleAdd(FPState
, FPID
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 FPID
.__init
__(self
, id_wid
)
716 self
.a1o
= self
.ospec()
719 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
722 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
724 def setup(self
, m
, i
, in_mid
):
725 """ links module to inputs and outputs
727 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
730 m
.d
.comb
+= o
.eq(mod
.o
)
732 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
735 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
737 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
739 self
.a1modo
= a1mod
.o
741 if self
.in_mid
is not None:
742 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
744 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
748 m
.next
= "normalise_1"
751 class FPAddStage0Data
:
753 def __init__(self
, width
, id_wid
):
754 self
.z
= FPNumBase(width
, False)
755 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
756 self
.mid
= Signal(id_wid
, reset_less
=True)
759 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
762 class FPAddStage0Mod
:
764 def __init__(self
, width
, id_wid
):
767 self
.i
= self
.ispec()
768 self
.o
= self
.ospec()
771 return FPNumBase2Ops(self
.width
, self
.id_wid
)
774 return FPAddStage0Data(self
.width
, self
.id_wid
)
776 def setup(self
, m
, i
):
777 """ links module to inputs and outputs
779 m
.submodules
.add0
= self
780 m
.d
.comb
+= self
.i
.eq(i
)
782 def elaborate(self
, platform
):
784 m
.submodules
.add0_in_a
= self
.i
.a
785 m
.submodules
.add0_in_b
= self
.i
.b
786 m
.submodules
.add0_out_z
= self
.o
.z
788 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
790 # store intermediate tests (and zero-extended mantissas)
791 seq
= Signal(reset_less
=True)
792 mge
= Signal(reset_less
=True)
793 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
794 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
795 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
796 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
797 am0
.eq(Cat(self
.i
.a
.m
, 0)),
798 bm0
.eq(Cat(self
.i
.b
.m
, 0))
800 # same-sign (both negative or both positive) add mantissas
803 self
.o
.tot
.eq(am0
+ bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # a mantissa greater than b, use a
809 self
.o
.tot
.eq(am0
- bm0
),
810 self
.o
.z
.s
.eq(self
.i
.a
.s
)
812 # b mantissa greater than a, use b
815 self
.o
.tot
.eq(bm0
- am0
),
816 self
.o
.z
.s
.eq(self
.i
.b
.s
)
821 class FPAddStage0(FPState
, FPID
):
822 """ First stage of add. covers same-sign (add) and subtract
823 special-casing when mantissas are greater or equal, to
824 give greatest accuracy.
827 def __init__(self
, width
, id_wid
):
828 FPState
.__init
__(self
, "add_0")
829 FPID
.__init
__(self
, id_wid
)
830 self
.mod
= FPAddStage0Mod(width
)
831 self
.o
= self
.mod
.ospec()
833 def setup(self
, m
, i
, in_mid
):
834 """ links module to inputs and outputs
837 if self
.in_mid
is not None:
838 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
841 # NOTE: these could be done as combinatorial (merge add0+add1)
842 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
848 class FPAddStage1Data
:
850 def __init__(self
, width
, id_wid
):
851 self
.z
= FPNumBase(width
, False)
853 self
.mid
= Signal(id_wid
, reset_less
=True)
856 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
860 class FPAddStage1Mod(FPState
):
861 """ Second stage of add: preparation for normalisation.
862 detects when tot sum is too big (tot[27] is kinda a carry bit)
865 def __init__(self
, width
, id_wid
):
868 self
.i
= self
.ispec()
869 self
.o
= self
.ospec()
872 return FPAddStage0Data(self
.width
, self
.id_wid
)
875 return FPAddStage1Data(self
.width
, self
.id_wid
)
877 def setup(self
, m
, i
):
878 """ links module to inputs and outputs
880 m
.submodules
.add1
= self
881 m
.submodules
.add1_out_overflow
= self
.o
.of
883 m
.d
.comb
+= self
.i
.eq(i
)
885 def elaborate(self
, platform
):
887 #m.submodules.norm1_in_overflow = self.in_of
888 #m.submodules.norm1_out_overflow = self.out_of
889 #m.submodules.norm1_in_z = self.in_z
890 #m.submodules.norm1_out_z = self.out_z
891 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
892 # tot[-1] (MSB) gets set when the sum overflows. shift result down
893 with m
.If(self
.i
.tot
[-1]):
895 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
896 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
897 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
898 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
899 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
900 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
902 # tot[-1] (MSB) zero case
905 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
906 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
907 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
908 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
909 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
914 class FPAddStage1(FPState
, FPID
):
916 def __init__(self
, width
, id_wid
):
917 FPState
.__init
__(self
, "add_1")
918 FPID
.__init
__(self
, id_wid
)
919 self
.mod
= FPAddStage1Mod(width
)
920 self
.out_z
= FPNumBase(width
, False)
921 self
.out_of
= Overflow()
922 self
.norm_stb
= Signal()
924 def setup(self
, m
, i
, in_mid
):
925 """ links module to inputs and outputs
929 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
931 if self
.in_mid
is not None:
932 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
935 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
936 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
937 m
.d
.sync
+= self
.norm_stb
.eq(1)
940 m
.next
= "normalise_1"
943 class FPNormaliseModSingle
:
945 def __init__(self
, width
):
947 self
.in_z
= self
.ispec()
948 self
.out_z
= self
.ospec()
951 return FPNumBase(self
.width
, False)
954 return FPNumBase(self
.width
, False)
956 def setup(self
, m
, i
):
957 """ links module to inputs and outputs
959 m
.submodules
.normalise
= self
960 m
.d
.comb
+= self
.i
.eq(i
)
962 def elaborate(self
, platform
):
965 mwid
= self
.out_z
.m_width
+2
966 pe
= PriorityEncoder(mwid
)
967 m
.submodules
.norm_pe
= pe
969 m
.submodules
.norm1_out_z
= self
.out_z
970 m
.submodules
.norm1_in_z
= self
.in_z
972 in_z
= FPNumBase(self
.width
, False)
974 m
.submodules
.norm1_insel_z
= in_z
975 m
.submodules
.norm1_insel_overflow
= in_of
977 espec
= (len(in_z
.e
), True)
978 ediff_n126
= Signal(espec
, reset_less
=True)
979 msr
= MultiShiftRMerge(mwid
, espec
)
980 m
.submodules
.multishift_r
= msr
982 m
.d
.comb
+= in_z
.eq(self
.in_z
)
983 m
.d
.comb
+= in_of
.eq(self
.in_of
)
984 # initialise out from in (overridden below)
985 m
.d
.comb
+= self
.out_z
.eq(in_z
)
986 m
.d
.comb
+= self
.out_of
.eq(in_of
)
987 # normalisation decrease condition
988 decrease
= Signal(reset_less
=True)
989 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
992 # *sigh* not entirely obvious: count leading zeros (clz)
993 # with a PriorityEncoder: to find from the MSB
994 # we reverse the order of the bits.
995 temp_m
= Signal(mwid
, reset_less
=True)
996 temp_s
= Signal(mwid
+1, reset_less
=True)
997 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
999 # cat round and guard bits back into the mantissa
1000 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1001 pe
.i
.eq(temp_m
[::-1]), # inverted
1002 clz
.eq(pe
.o
), # count zeros from MSB down
1003 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1004 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1005 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1012 def __init__(self
, width
, id_wid
):
1013 self
.roundz
= Signal(reset_less
=True)
1014 self
.z
= FPNumBase(width
, False)
1015 self
.mid
= Signal(id_wid
, reset_less
=True)
1018 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1021 class FPNorm1ModSingle
:
1023 def __init__(self
, width
, id_wid
):
1025 self
.id_wid
= id_wid
1026 self
.i
= self
.ispec()
1027 self
.o
= self
.ospec()
1030 return FPAddStage1Data(self
.width
, self
.id_wid
)
1033 return FPNorm1Data(self
.width
, self
.id_wid
)
1035 def setup(self
, m
, i
):
1036 """ links module to inputs and outputs
1038 m
.submodules
.normalise_1
= self
1039 m
.d
.comb
+= self
.i
.eq(i
)
1041 def elaborate(self
, platform
):
1044 mwid
= self
.o
.z
.m_width
+2
1045 pe
= PriorityEncoder(mwid
)
1046 m
.submodules
.norm_pe
= pe
1049 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1051 m
.submodules
.norm1_out_z
= self
.o
.z
1052 m
.submodules
.norm1_out_overflow
= of
1053 m
.submodules
.norm1_in_z
= self
.i
.z
1054 m
.submodules
.norm1_in_overflow
= self
.i
.of
1057 m
.submodules
.norm1_insel_z
= i
.z
1058 m
.submodules
.norm1_insel_overflow
= i
.of
1060 espec
= (len(i
.z
.e
), True)
1061 ediff_n126
= Signal(espec
, reset_less
=True)
1062 msr
= MultiShiftRMerge(mwid
, espec
)
1063 m
.submodules
.multishift_r
= msr
1065 m
.d
.comb
+= i
.eq(self
.i
)
1066 # initialise out from in (overridden below)
1067 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1068 m
.d
.comb
+= of
.eq(i
.of
)
1069 # normalisation increase/decrease conditions
1070 decrease
= Signal(reset_less
=True)
1071 increase
= Signal(reset_less
=True)
1072 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1073 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1075 with m
.If(decrease
):
1076 # *sigh* not entirely obvious: count leading zeros (clz)
1077 # with a PriorityEncoder: to find from the MSB
1078 # we reverse the order of the bits.
1079 temp_m
= Signal(mwid
, reset_less
=True)
1080 temp_s
= Signal(mwid
+1, reset_less
=True)
1081 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1082 # make sure that the amount to decrease by does NOT
1083 # go below the minimum non-INF/NaN exponent
1084 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1087 # cat round and guard bits back into the mantissa
1088 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1089 pe
.i
.eq(temp_m
[::-1]), # inverted
1090 clz
.eq(limclz
), # count zeros from MSB down
1091 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1092 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1093 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1094 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1095 # overflow in bits 0..1: got shifted too (leave sticky)
1096 of
.guard
.eq(temp_s
[1]), # guard
1097 of
.round_bit
.eq(temp_s
[0]), # round
1100 with m
.Elif(increase
):
1101 temp_m
= Signal(mwid
+1, reset_less
=True)
1103 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1105 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1106 # connect multi-shifter to inp/out mantissa (and ediff)
1108 msr
.diff
.eq(ediff_n126
),
1109 self
.o
.z
.m
.eq(msr
.m
[3:]),
1110 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1111 # overflow in bits 0..1: got shifted too (leave sticky)
1112 of
.guard
.eq(temp_s
[2]), # guard
1113 of
.round_bit
.eq(temp_s
[1]), # round
1114 of
.sticky
.eq(temp_s
[0]), # sticky
1115 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1121 class FPNorm1ModMulti
:
1123 def __init__(self
, width
, single_cycle
=True):
1125 self
.in_select
= Signal(reset_less
=True)
1126 self
.in_z
= FPNumBase(width
, False)
1127 self
.in_of
= Overflow()
1128 self
.temp_z
= FPNumBase(width
, False)
1129 self
.temp_of
= Overflow()
1130 self
.out_z
= FPNumBase(width
, False)
1131 self
.out_of
= Overflow()
1133 def elaborate(self
, platform
):
1136 m
.submodules
.norm1_out_z
= self
.out_z
1137 m
.submodules
.norm1_out_overflow
= self
.out_of
1138 m
.submodules
.norm1_temp_z
= self
.temp_z
1139 m
.submodules
.norm1_temp_of
= self
.temp_of
1140 m
.submodules
.norm1_in_z
= self
.in_z
1141 m
.submodules
.norm1_in_overflow
= self
.in_of
1143 in_z
= FPNumBase(self
.width
, False)
1145 m
.submodules
.norm1_insel_z
= in_z
1146 m
.submodules
.norm1_insel_overflow
= in_of
1148 # select which of temp or in z/of to use
1149 with m
.If(self
.in_select
):
1150 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1151 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1153 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1154 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1155 # initialise out from in (overridden below)
1156 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1157 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1158 # normalisation increase/decrease conditions
1159 decrease
= Signal(reset_less
=True)
1160 increase
= Signal(reset_less
=True)
1161 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1162 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1163 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1165 with m
.If(decrease
):
1167 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1168 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1169 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1170 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1171 self
.out_of
.round_bit
.eq(0), # reset round bit
1172 self
.out_of
.m0
.eq(in_of
.guard
),
1175 with m
.Elif(increase
):
1177 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1178 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1179 self
.out_of
.guard
.eq(in_z
.m
[0]),
1180 self
.out_of
.m0
.eq(in_z
.m
[1]),
1181 self
.out_of
.round_bit
.eq(in_of
.guard
),
1182 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1188 class FPNorm1Single(FPState
, FPID
):
1190 def __init__(self
, width
, id_wid
, single_cycle
=True):
1191 FPID
.__init
__(self
, id_wid
)
1192 FPState
.__init
__(self
, "normalise_1")
1193 self
.mod
= FPNorm1ModSingle(width
)
1194 self
.out_z
= FPNumBase(width
, False)
1195 self
.out_roundz
= Signal(reset_less
=True)
1197 def setup(self
, m
, i
, in_mid
):
1198 """ links module to inputs and outputs
1200 self
.mod
.setup(m
, i
, self
.out_z
)
1202 if self
.in_mid
is not None:
1203 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1206 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1208 def action(self
, m
):
1212 class FPNorm1Multi(FPState
, FPID
):
1214 def __init__(self
, width
, id_wid
):
1215 FPID
.__init
__(self
, id_wid
)
1216 FPState
.__init
__(self
, "normalise_1")
1217 self
.mod
= FPNorm1ModMulti(width
)
1218 self
.stb
= Signal(reset_less
=True)
1219 self
.ack
= Signal(reset
=0, reset_less
=True)
1220 self
.out_norm
= Signal(reset_less
=True)
1221 self
.in_accept
= Signal(reset_less
=True)
1222 self
.temp_z
= FPNumBase(width
)
1223 self
.temp_of
= Overflow()
1224 self
.out_z
= FPNumBase(width
)
1225 self
.out_roundz
= Signal(reset_less
=True)
1227 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1228 """ links module to inputs and outputs
1230 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1231 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1232 self
.out_z
, self
.out_norm
)
1234 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1235 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1237 if self
.in_mid
is not None:
1238 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1240 def action(self
, m
):
1242 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1243 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1244 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1245 with m
.If(self
.out_norm
):
1246 with m
.If(self
.in_accept
):
1251 m
.d
.sync
+= self
.ack
.eq(0)
1253 # normalisation not required (or done).
1255 m
.d
.sync
+= self
.ack
.eq(1)
1256 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1259 class FPNormToPack(FPState
, FPID
):
1261 def __init__(self
, width
, id_wid
):
1262 FPID
.__init
__(self
, id_wid
)
1263 FPState
.__init
__(self
, "normalise_1")
1264 self
.id_wid
= id_wid
1268 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1271 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1273 def setup(self
, m
, i
, in_mid
):
1274 """ links module to inputs and outputs
1277 # Normalisation (chained to input in_z+in_of)
1278 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1280 n_out
= nmod
.ospec()
1281 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1283 # Rounding (chained to normalisation)
1284 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1285 rmod
.setup(m
, n_out
)
1286 r_out_z
= rmod
.ospec()
1287 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1289 # Corrections (chained to rounding)
1290 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1291 cmod
.setup(m
, r_out_z
)
1292 c_out_z
= cmod
.ospec()
1293 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1295 # Pack (chained to corrections)
1296 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1297 self
.pmod
.setup(m
, c_out_z
)
1298 self
.out_z
= self
.pmod
.ospec()
1301 if self
.in_mid
is not None:
1302 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1304 self
.idsync(m
) # copies incoming ID to outgoing
1305 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1307 def action(self
, m
):
1308 m
.next
= "pack_put_z"
1313 def __init__(self
, width
, id_wid
):
1314 self
.z
= FPNumBase(width
, False)
1315 self
.mid
= Signal(id_wid
, reset_less
=True)
1318 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1323 def __init__(self
, width
, id_wid
):
1325 self
.id_wid
= id_wid
1326 self
.i
= self
.ispec()
1327 self
.out_z
= self
.ospec()
1330 return FPNorm1Data(self
.width
, self
.id_wid
)
1333 return FPRoundData(self
.width
, self
.id_wid
)
1335 def setup(self
, m
, i
):
1336 m
.submodules
.roundz
= self
1337 m
.d
.comb
+= self
.i
.eq(i
)
1339 def elaborate(self
, platform
):
1341 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1342 with m
.If(self
.i
.roundz
):
1343 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1344 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1345 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1349 class FPRound(FPState
, FPID
):
1351 def __init__(self
, width
, id_wid
):
1352 FPState
.__init
__(self
, "round")
1353 FPID
.__init
__(self
, id_wid
)
1354 self
.mod
= FPRoundMod(width
)
1355 self
.out_z
= self
.ospec()
1358 return self
.mod
.ispec()
1361 return self
.mod
.ospec()
1363 def setup(self
, m
, i
, in_mid
):
1364 """ links module to inputs and outputs
1366 self
.mod
.setup(m
, i
)
1368 if self
.in_mid
is not None:
1369 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1372 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1374 def action(self
, m
):
1375 m
.next
= "corrections"
1378 class FPCorrectionsMod
:
1380 def __init__(self
, width
, id_wid
):
1382 self
.id_wid
= id_wid
1383 self
.i
= self
.ispec()
1384 self
.out_z
= self
.ospec()
1387 return FPRoundData(self
.width
, self
.id_wid
)
1390 return FPRoundData(self
.width
, self
.id_wid
)
1392 def setup(self
, m
, i
):
1393 """ links module to inputs and outputs
1395 m
.submodules
.corrections
= self
1396 m
.d
.comb
+= self
.i
.eq(i
)
1398 def elaborate(self
, platform
):
1400 m
.submodules
.corr_in_z
= self
.i
.z
1401 m
.submodules
.corr_out_z
= self
.out_z
.z
1402 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1403 with m
.If(self
.i
.z
.is_denormalised
):
1404 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1408 class FPCorrections(FPState
, FPID
):
1410 def __init__(self
, width
, id_wid
):
1411 FPState
.__init
__(self
, "corrections")
1412 FPID
.__init
__(self
, id_wid
)
1413 self
.mod
= FPCorrectionsMod(width
)
1414 self
.out_z
= self
.ospec()
1417 return self
.mod
.ispec()
1420 return self
.mod
.ospec()
1422 def setup(self
, m
, in_z
, in_mid
):
1423 """ links module to inputs and outputs
1425 self
.mod
.setup(m
, in_z
)
1426 if self
.in_mid
is not None:
1427 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1430 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1432 def action(self
, m
):
1438 def __init__(self
, width
, id_wid
):
1439 self
.z
= FPNumOut(width
, False)
1440 self
.mid
= Signal(id_wid
, reset_less
=True)
1443 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1448 def __init__(self
, width
, id_wid
):
1450 self
.id_wid
= id_wid
1451 self
.i
= self
.ispec()
1452 self
.o
= self
.ospec()
1455 return FPRoundData(self
.width
, self
.id_wid
)
1458 return FPPackData(self
.width
, self
.id_wid
)
1460 def setup(self
, m
, in_z
):
1461 """ links module to inputs and outputs
1463 m
.submodules
.pack
= self
1464 m
.d
.comb
+= self
.i
.eq(in_z
)
1466 def elaborate(self
, platform
):
1468 m
.submodules
.pack_in_z
= self
.i
.z
1469 with m
.If(self
.i
.z
.is_overflowed
):
1470 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1472 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1477 def __init__(self
, width
, id_wid
):
1478 self
.z
= FPNumOut(width
, False)
1479 self
.mid
= Signal(id_wid
, reset_less
=True)
1482 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1485 class FPPack(FPState
, FPID
):
1487 def __init__(self
, width
, id_wid
):
1488 FPState
.__init
__(self
, "pack")
1489 FPID
.__init
__(self
, id_wid
)
1490 self
.mod
= FPPackMod(width
)
1491 self
.out_z
= self
.ospec()
1494 return self
.mod
.ispec()
1497 return self
.mod
.ospec()
1499 def setup(self
, m
, in_z
, in_mid
):
1500 """ links module to inputs and outputs
1502 self
.mod
.setup(m
, in_z
)
1503 if self
.in_mid
is not None:
1504 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1507 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1509 def action(self
, m
):
1510 m
.next
= "pack_put_z"
1513 class FPPutZ(FPState
):
1515 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1516 FPState
.__init
__(self
, state
)
1517 if to_state
is None:
1518 to_state
= "get_ops"
1519 self
.to_state
= to_state
1522 self
.in_mid
= in_mid
1523 self
.out_mid
= out_mid
1525 def action(self
, m
):
1526 if self
.in_mid
is not None:
1527 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1529 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1531 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1532 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1533 m
.next
= self
.to_state
1535 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1538 class FPPutZIdx(FPState
):
1540 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1541 FPState
.__init
__(self
, state
)
1542 if to_state
is None:
1543 to_state
= "get_ops"
1544 self
.to_state
= to_state
1546 self
.out_zs
= out_zs
1547 self
.in_mid
= in_mid
1549 def action(self
, m
):
1550 outz_stb
= Signal(reset_less
=True)
1551 outz_ack
= Signal(reset_less
=True)
1552 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1553 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1556 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1558 with m
.If(outz_stb
& outz_ack
):
1559 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1560 m
.next
= self
.to_state
1562 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1564 class FPADDBaseData
:
1566 def __init__(self
, width
, id_wid
):
1568 self
.id_wid
= id_wid
1569 self
.a
= Signal(width
)
1570 self
.b
= Signal(width
)
1571 self
.mid
= Signal(id_wid
, reset_less
=True)
1574 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1578 def __init__(self
, width
, id_wid
):
1579 self
.z
= FPOp(width
)
1580 self
.mid
= Signal(id_wid
, reset_less
=True)
1583 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1586 class FPADDBaseMod(FPID
):
1588 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1591 * width: bit-width of IEEE754. supported: 16, 32, 64
1592 * id_wid: an identifier that is sync-connected to the input
1593 * single_cycle: True indicates each stage to complete in 1 clock
1594 * compact: True indicates a reduced number of stages
1596 FPID
.__init
__(self
, id_wid
)
1598 self
.id_wid
= id_wid
1599 self
.single_cycle
= single_cycle
1600 self
.compact
= compact
1602 self
.in_t
= Trigger()
1603 self
.i
= self
.ispec()
1604 self
.o
= self
.ospec()
1609 return FPADDBaseData(self
.width
, self
.id_wid
)
1612 return FPOpData(self
.width
, self
.id_wid
)
1614 def add_state(self
, state
):
1615 self
.states
.append(state
)
1618 def get_fragment(self
, platform
=None):
1619 """ creates the HDL code-fragment for FPAdd
1622 m
.submodules
.out_z
= self
.o
.z
1623 m
.submodules
.in_t
= self
.in_t
1625 self
.get_compact_fragment(m
, platform
)
1627 self
.get_longer_fragment(m
, platform
)
1629 with m
.FSM() as fsm
:
1631 for state
in self
.states
:
1632 with m
.State(state
.state_from
):
1637 def get_longer_fragment(self
, m
, platform
=None):
1639 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1641 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1645 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1646 sc
.setup(m
, a
, b
, self
.in_mid
)
1648 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1649 dn
.setup(m
, a
, b
, sc
.in_mid
)
1651 if self
.single_cycle
:
1652 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1653 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1655 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1656 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1658 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1659 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1661 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1662 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1664 if self
.single_cycle
:
1665 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1666 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1668 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1669 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1671 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1672 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1674 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1675 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1677 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1678 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1680 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1681 pa
.in_mid
, self
.out_mid
))
1683 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1684 pa
.in_mid
, self
.out_mid
))
1686 def get_compact_fragment(self
, m
, platform
=None):
1688 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1689 self
.width
, self
.id_wid
))
1690 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1692 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1693 sc
.setup(m
, get
.o
, self
.in_mid
)
1695 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1696 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1698 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1699 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1701 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1702 n1
.in_mid
, self
.out_mid
))
1704 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1705 sc
.in_mid
, self
.out_mid
))
1708 class FPADDBase(FPState
, FPID
):
1710 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1713 * width: bit-width of IEEE754. supported: 16, 32, 64
1714 * id_wid: an identifier that is sync-connected to the input
1715 * single_cycle: True indicates each stage to complete in 1 clock
1717 FPID
.__init
__(self
, id_wid
)
1718 FPState
.__init
__(self
, "fpadd")
1720 self
.single_cycle
= single_cycle
1721 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1722 self
.o
= self
.ospec()
1724 self
.in_t
= Trigger()
1725 self
.i
= self
.ispec()
1727 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1728 self
.in_accept
= Signal(reset_less
=True)
1729 self
.add_stb
= Signal(reset_less
=True)
1730 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1733 return self
.mod
.ispec()
1736 return self
.mod
.ospec()
1738 def setup(self
, m
, i
, add_stb
, in_mid
):
1739 m
.d
.comb
+= [self
.i
.eq(i
),
1740 self
.mod
.i
.eq(self
.i
),
1741 self
.in_mid
.eq(in_mid
),
1742 self
.mod
.in_mid
.eq(self
.in_mid
),
1743 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1744 #self.add_stb.eq(add_stb),
1745 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1746 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1747 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1748 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1749 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1750 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1753 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1754 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1755 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1756 #m.d.sync += self.in_t.stb.eq(0)
1758 m
.submodules
.fpadd
= self
.mod
1760 def action(self
, m
):
1762 # in_accept is set on incoming strobe HIGH and ack LOW.
1763 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1765 #with m.If(self.in_t.ack):
1766 # m.d.sync += self.in_t.stb.eq(0)
1767 with m
.If(~self
.z_done
):
1768 # not done: test for accepting an incoming operand pair
1769 with m
.If(self
.in_accept
):
1771 self
.add_ack
.eq(1), # acknowledge receipt...
1772 self
.in_t
.stb
.eq(1), # initiate add
1775 m
.d
.sync
+= [self
.add_ack
.eq(0),
1776 self
.in_t
.stb
.eq(0),
1780 # done: acknowledge, and write out id and value
1781 m
.d
.sync
+= [self
.add_ack
.eq(1),
1788 if self
.in_mid
is not None:
1789 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1792 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1794 # move to output state on detecting z ack
1795 with m
.If(self
.out_z
.trigger
):
1796 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1799 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1803 def __init__(self
, width
, id_wid
):
1805 self
.id_wid
= id_wid
1807 for i
in range(rs_sz
):
1809 out_z
.name
= "out_z_%d" % i
1811 self
.res
= Array(res
)
1812 self
.in_z
= FPOp(width
)
1813 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1815 def setup(self
, m
, in_z
, in_mid
):
1816 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1817 self
.in_mid
.eq(in_mid
)]
1819 def get_fragment(self
, platform
=None):
1820 """ creates the HDL code-fragment for FPAdd
1823 m
.submodules
.res_in_z
= self
.in_z
1824 m
.submodules
+= self
.res
1836 """ FPADD: stages as follows:
1842 FPAddBase---> FPAddBaseMod
1844 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1846 FPAddBase is tricky: it is both a stage and *has* stages.
1847 Connection to FPAddBaseMod therefore requires an in stb/ack
1848 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1849 needs to be the thing that raises the incoming stb.
1852 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1855 * width: bit-width of IEEE754. supported: 16, 32, 64
1856 * id_wid: an identifier that is sync-connected to the input
1857 * single_cycle: True indicates each stage to complete in 1 clock
1860 self
.id_wid
= id_wid
1861 self
.single_cycle
= single_cycle
1863 #self.out_z = FPOp(width)
1864 self
.ids
= FPID(id_wid
)
1867 for i
in range(rs_sz
):
1870 in_a
.name
= "in_a_%d" % i
1871 in_b
.name
= "in_b_%d" % i
1872 rs
.append((in_a
, in_b
))
1876 for i
in range(rs_sz
):
1878 out_z
.name
= "out_z_%d" % i
1880 self
.res
= Array(res
)
1884 def add_state(self
, state
):
1885 self
.states
.append(state
)
1888 def get_fragment(self
, platform
=None):
1889 """ creates the HDL code-fragment for FPAdd
1892 m
.submodules
+= self
.rs
1894 in_a
= self
.rs
[0][0]
1895 in_b
= self
.rs
[0][1]
1897 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1902 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1907 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1908 ab
= self
.add_state(ab
)
1909 abd
= ab
.ispec() # create an input spec object for FPADDBase
1910 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1911 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1914 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1917 with m
.FSM() as fsm
:
1919 for state
in self
.states
:
1920 with m
.State(state
.state_from
):
1926 if __name__
== "__main__":
1928 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1929 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1930 alu
.rs
[0][1].ports() + \
1931 alu
.res
[0].ports() + \
1932 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1934 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1935 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1936 alu
.in_t
.ports() + \
1937 alu
.out_z
.ports() + \
1938 [alu
.in_mid
, alu
.out_mid
])
1941 # works... but don't use, just do "python fname.py convert -t v"
1942 #print (verilog.convert(alu, ports=[
1943 # ports=alu.in_a.ports() + \
1944 # alu.in_b.ports() + \
1945 # alu.out_z.ports())