1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
720 self
.a0o
= self
.a0mod
.ospec()
722 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
723 self
.a1o
= self
.a1mod
.ospec()
725 def setup(self
, m
, i
, in_mid
):
726 """ links module to inputs and outputs
729 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
731 self
.a0mod
.setup(m
, self
.o
)
732 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
734 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
736 if self
.in_mid
is not None:
737 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
741 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
742 m
.next
= "normalise_1"
745 class FPAddStage0Data
:
747 def __init__(self
, width
, id_wid
):
748 self
.z
= FPNumBase(width
, False)
749 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
750 self
.mid
= Signal(id_wid
, reset_less
=True)
753 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
756 class FPAddStage0Mod
:
758 def __init__(self
, width
, id_wid
):
761 self
.i
= self
.ispec()
762 self
.o
= self
.ospec()
765 return FPNumBase2Ops(self
.width
, self
.id_wid
)
768 return FPAddStage0Data(self
.width
, self
.id_wid
)
770 def setup(self
, m
, i
):
771 """ links module to inputs and outputs
773 m
.submodules
.add0
= self
774 m
.d
.comb
+= self
.i
.eq(i
)
776 def elaborate(self
, platform
):
778 m
.submodules
.add0_in_a
= self
.i
.a
779 m
.submodules
.add0_in_b
= self
.i
.b
780 m
.submodules
.add0_out_z
= self
.o
.z
782 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
784 # store intermediate tests (and zero-extended mantissas)
785 seq
= Signal(reset_less
=True)
786 mge
= Signal(reset_less
=True)
787 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
788 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
789 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
790 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
791 am0
.eq(Cat(self
.i
.a
.m
, 0)),
792 bm0
.eq(Cat(self
.i
.b
.m
, 0))
794 # same-sign (both negative or both positive) add mantissas
797 self
.o
.tot
.eq(am0
+ bm0
),
798 self
.o
.z
.s
.eq(self
.i
.a
.s
)
800 # a mantissa greater than b, use a
803 self
.o
.tot
.eq(am0
- bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # b mantissa greater than a, use b
809 self
.o
.tot
.eq(bm0
- am0
),
810 self
.o
.z
.s
.eq(self
.i
.b
.s
)
815 class FPAddStage0(FPState
, FPID
):
816 """ First stage of add. covers same-sign (add) and subtract
817 special-casing when mantissas are greater or equal, to
818 give greatest accuracy.
821 def __init__(self
, width
, id_wid
):
822 FPState
.__init
__(self
, "add_0")
823 FPID
.__init
__(self
, id_wid
)
824 self
.mod
= FPAddStage0Mod(width
)
825 self
.o
= self
.mod
.ospec()
827 def setup(self
, m
, in_a
, in_b
, in_mid
):
828 """ links module to inputs and outputs
830 self
.mod
.setup(m
, in_a
, in_b
)
831 if self
.in_mid
is not None:
832 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
836 # NOTE: these could be done as combinatorial (merge add0+add1)
837 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
841 class FPAddStage1Data
:
843 def __init__(self
, width
, id_wid
):
844 self
.z
= FPNumBase(width
, False)
846 self
.mid
= Signal(id_wid
, reset_less
=True)
849 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
853 class FPAddStage1Mod(FPState
):
854 """ Second stage of add: preparation for normalisation.
855 detects when tot sum is too big (tot[27] is kinda a carry bit)
858 def __init__(self
, width
, id_wid
):
861 self
.i
= self
.ispec()
862 self
.o
= self
.ospec()
865 return FPAddStage0Data(self
.width
, self
.id_wid
)
868 return FPAddStage1Data(self
.width
, self
.id_wid
)
870 def setup(self
, m
, in_tot
, in_z
):
871 """ links module to inputs and outputs
873 m
.submodules
.add1
= self
874 m
.submodules
.add1_out_overflow
= self
.o
.of
876 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
877 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
879 def elaborate(self
, platform
):
881 #m.submodules.norm1_in_overflow = self.in_of
882 #m.submodules.norm1_out_overflow = self.out_of
883 #m.submodules.norm1_in_z = self.in_z
884 #m.submodules.norm1_out_z = self.out_z
885 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
, FPID
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 FPID
.__init
__(self
, id_wid
)
913 self
.mod
= FPAddStage1Mod(width
)
914 self
.out_z
= FPNumBase(width
, False)
915 self
.out_of
= Overflow()
916 self
.norm_stb
= Signal()
918 def setup(self
, m
, in_tot
, in_z
, in_mid
):
919 """ links module to inputs and outputs
921 self
.mod
.setup(m
, in_tot
, in_z
)
923 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
925 if self
.in_mid
is not None:
926 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
930 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
931 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
932 m
.d
.sync
+= self
.norm_stb
.eq(1)
933 m
.next
= "normalise_1"
936 class FPNormaliseModSingle
:
938 def __init__(self
, width
):
940 self
.in_z
= self
.ispec()
941 self
.out_z
= self
.ospec()
944 return FPNumBase(self
.width
, False)
947 return FPNumBase(self
.width
, False)
949 def setup(self
, m
, in_z
, out_z
):
950 """ links module to inputs and outputs
952 m
.submodules
.normalise
= self
953 m
.d
.comb
+= self
.in_z
.eq(in_z
)
954 m
.d
.comb
+= out_z
.eq(self
.out_z
)
956 def elaborate(self
, platform
):
959 mwid
= self
.out_z
.m_width
+2
960 pe
= PriorityEncoder(mwid
)
961 m
.submodules
.norm_pe
= pe
963 m
.submodules
.norm1_out_z
= self
.out_z
964 m
.submodules
.norm1_in_z
= self
.in_z
966 in_z
= FPNumBase(self
.width
, False)
968 m
.submodules
.norm1_insel_z
= in_z
969 m
.submodules
.norm1_insel_overflow
= in_of
971 espec
= (len(in_z
.e
), True)
972 ediff_n126
= Signal(espec
, reset_less
=True)
973 msr
= MultiShiftRMerge(mwid
, espec
)
974 m
.submodules
.multishift_r
= msr
976 m
.d
.comb
+= in_z
.eq(self
.in_z
)
977 m
.d
.comb
+= in_of
.eq(self
.in_of
)
978 # initialise out from in (overridden below)
979 m
.d
.comb
+= self
.out_z
.eq(in_z
)
980 m
.d
.comb
+= self
.out_of
.eq(in_of
)
981 # normalisation decrease condition
982 decrease
= Signal(reset_less
=True)
983 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
986 # *sigh* not entirely obvious: count leading zeros (clz)
987 # with a PriorityEncoder: to find from the MSB
988 # we reverse the order of the bits.
989 temp_m
= Signal(mwid
, reset_less
=True)
990 temp_s
= Signal(mwid
+1, reset_less
=True)
991 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
993 # cat round and guard bits back into the mantissa
994 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
995 pe
.i
.eq(temp_m
[::-1]), # inverted
996 clz
.eq(pe
.o
), # count zeros from MSB down
997 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
998 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
999 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1006 def __init__(self
, width
, id_wid
):
1007 self
.roundz
= Signal(reset_less
=True)
1008 self
.z
= FPNumBase(width
, False)
1009 self
.mid
= Signal(id_wid
, reset_less
=True)
1012 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1015 class FPNorm1ModSingle
:
1017 def __init__(self
, width
, id_wid
):
1019 self
.id_wid
= id_wid
1020 self
.i
= self
.ispec()
1021 self
.o
= self
.ospec()
1024 return FPAddStage1Data(self
.width
, self
.id_wid
)
1027 return FPNorm1Data(self
.width
, self
.id_wid
)
1029 def setup(self
, m
, i
, out_z
):
1030 """ links module to inputs and outputs
1032 m
.submodules
.normalise_1
= self
1034 m
.d
.comb
+= self
.i
.eq(i
)
1036 m
.d
.comb
+= out_z
.eq(self
.o
.z
)
1038 def elaborate(self
, platform
):
1041 mwid
= self
.o
.z
.m_width
+2
1042 pe
= PriorityEncoder(mwid
)
1043 m
.submodules
.norm_pe
= pe
1046 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1048 m
.submodules
.norm1_out_z
= self
.o
.z
1049 m
.submodules
.norm1_out_overflow
= of
1050 m
.submodules
.norm1_in_z
= self
.i
.z
1051 m
.submodules
.norm1_in_overflow
= self
.i
.of
1054 m
.submodules
.norm1_insel_z
= i
.z
1055 m
.submodules
.norm1_insel_overflow
= i
.of
1057 espec
= (len(i
.z
.e
), True)
1058 ediff_n126
= Signal(espec
, reset_less
=True)
1059 msr
= MultiShiftRMerge(mwid
, espec
)
1060 m
.submodules
.multishift_r
= msr
1062 m
.d
.comb
+= i
.eq(self
.i
)
1063 # initialise out from in (overridden below)
1064 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1065 m
.d
.comb
+= of
.eq(i
.of
)
1066 # normalisation increase/decrease conditions
1067 decrease
= Signal(reset_less
=True)
1068 increase
= Signal(reset_less
=True)
1069 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1070 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1072 with m
.If(decrease
):
1073 # *sigh* not entirely obvious: count leading zeros (clz)
1074 # with a PriorityEncoder: to find from the MSB
1075 # we reverse the order of the bits.
1076 temp_m
= Signal(mwid
, reset_less
=True)
1077 temp_s
= Signal(mwid
+1, reset_less
=True)
1078 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1079 # make sure that the amount to decrease by does NOT
1080 # go below the minimum non-INF/NaN exponent
1081 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1084 # cat round and guard bits back into the mantissa
1085 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1086 pe
.i
.eq(temp_m
[::-1]), # inverted
1087 clz
.eq(limclz
), # count zeros from MSB down
1088 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1089 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1090 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1091 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1092 # overflow in bits 0..1: got shifted too (leave sticky)
1093 of
.guard
.eq(temp_s
[1]), # guard
1094 of
.round_bit
.eq(temp_s
[0]), # round
1097 with m
.Elif(increase
):
1098 temp_m
= Signal(mwid
+1, reset_less
=True)
1100 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1102 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1103 # connect multi-shifter to inp/out mantissa (and ediff)
1105 msr
.diff
.eq(ediff_n126
),
1106 self
.o
.z
.m
.eq(msr
.m
[3:]),
1107 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1108 # overflow in bits 0..1: got shifted too (leave sticky)
1109 of
.guard
.eq(temp_s
[2]), # guard
1110 of
.round_bit
.eq(temp_s
[1]), # round
1111 of
.sticky
.eq(temp_s
[0]), # sticky
1112 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1118 class FPNorm1ModMulti
:
1120 def __init__(self
, width
, single_cycle
=True):
1122 self
.in_select
= Signal(reset_less
=True)
1123 self
.in_z
= FPNumBase(width
, False)
1124 self
.in_of
= Overflow()
1125 self
.temp_z
= FPNumBase(width
, False)
1126 self
.temp_of
= Overflow()
1127 self
.out_z
= FPNumBase(width
, False)
1128 self
.out_of
= Overflow()
1130 def elaborate(self
, platform
):
1133 m
.submodules
.norm1_out_z
= self
.out_z
1134 m
.submodules
.norm1_out_overflow
= self
.out_of
1135 m
.submodules
.norm1_temp_z
= self
.temp_z
1136 m
.submodules
.norm1_temp_of
= self
.temp_of
1137 m
.submodules
.norm1_in_z
= self
.in_z
1138 m
.submodules
.norm1_in_overflow
= self
.in_of
1140 in_z
= FPNumBase(self
.width
, False)
1142 m
.submodules
.norm1_insel_z
= in_z
1143 m
.submodules
.norm1_insel_overflow
= in_of
1145 # select which of temp or in z/of to use
1146 with m
.If(self
.in_select
):
1147 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1148 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1150 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1151 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1152 # initialise out from in (overridden below)
1153 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1154 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1155 # normalisation increase/decrease conditions
1156 decrease
= Signal(reset_less
=True)
1157 increase
= Signal(reset_less
=True)
1158 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1159 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1160 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1162 with m
.If(decrease
):
1164 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1165 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1166 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1167 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1168 self
.out_of
.round_bit
.eq(0), # reset round bit
1169 self
.out_of
.m0
.eq(in_of
.guard
),
1172 with m
.Elif(increase
):
1174 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1175 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1176 self
.out_of
.guard
.eq(in_z
.m
[0]),
1177 self
.out_of
.m0
.eq(in_z
.m
[1]),
1178 self
.out_of
.round_bit
.eq(in_of
.guard
),
1179 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1185 class FPNorm1Single(FPState
, FPID
):
1187 def __init__(self
, width
, id_wid
, single_cycle
=True):
1188 FPID
.__init
__(self
, id_wid
)
1189 FPState
.__init
__(self
, "normalise_1")
1190 self
.mod
= FPNorm1ModSingle(width
)
1191 self
.out_z
= FPNumBase(width
, False)
1192 self
.out_roundz
= Signal(reset_less
=True)
1194 def setup(self
, m
, in_z
, in_of
, in_mid
):
1195 """ links module to inputs and outputs
1197 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1199 if self
.in_mid
is not None:
1200 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1202 def action(self
, m
):
1204 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1208 class FPNorm1Multi(FPState
, FPID
):
1210 def __init__(self
, width
, id_wid
):
1211 FPID
.__init
__(self
, id_wid
)
1212 FPState
.__init
__(self
, "normalise_1")
1213 self
.mod
= FPNorm1ModMulti(width
)
1214 self
.stb
= Signal(reset_less
=True)
1215 self
.ack
= Signal(reset
=0, reset_less
=True)
1216 self
.out_norm
= Signal(reset_less
=True)
1217 self
.in_accept
= Signal(reset_less
=True)
1218 self
.temp_z
= FPNumBase(width
)
1219 self
.temp_of
= Overflow()
1220 self
.out_z
= FPNumBase(width
)
1221 self
.out_roundz
= Signal(reset_less
=True)
1223 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1224 """ links module to inputs and outputs
1226 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1227 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1228 self
.out_z
, self
.out_norm
)
1230 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1231 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1233 if self
.in_mid
is not None:
1234 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1236 def action(self
, m
):
1238 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1239 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1240 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1241 with m
.If(self
.out_norm
):
1242 with m
.If(self
.in_accept
):
1247 m
.d
.sync
+= self
.ack
.eq(0)
1249 # normalisation not required (or done).
1251 m
.d
.sync
+= self
.ack
.eq(1)
1252 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1255 class FPNormToPack(FPState
, FPID
):
1257 def __init__(self
, width
, id_wid
):
1258 FPID
.__init
__(self
, id_wid
)
1259 FPState
.__init
__(self
, "normalise_1")
1262 def setup(self
, m
, i
, in_mid
):
1263 """ links module to inputs and outputs
1266 # Normalisation (chained to input in_z+in_of)
1267 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1268 n_out
= nmod
.ospec()
1269 nmod
.setup(m
, i
, n_out
.z
)
1270 m
.d
.comb
+= n_out
.roundz
.eq(nmod
.o
.roundz
)
1272 # Rounding (chained to normalisation)
1273 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1274 r_out_z
= rmod
.ospec()
1275 rmod
.setup(m
, n_out
.z
, n_out
.roundz
)
1276 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1278 # Corrections (chained to rounding)
1279 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1280 c_out_z
= cmod
.ospec()
1281 cmod
.setup(m
, r_out_z
)
1282 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1284 # Pack (chained to corrections)
1285 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1286 self
.out_z
= self
.pmod
.ospec()
1287 self
.pmod
.setup(m
, c_out_z
)
1290 if self
.in_mid
is not None:
1291 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1293 def action(self
, m
):
1294 self
.idsync(m
) # copies incoming ID to outgoing
1295 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1296 m
.next
= "pack_put_z"
1301 def __init__(self
, width
, id_wid
):
1302 self
.z
= FPNumBase(width
, False)
1303 self
.mid
= Signal(id_wid
, reset_less
=True)
1306 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1311 def __init__(self
, width
, id_wid
):
1313 self
.id_wid
= id_wid
1314 self
.i
= self
.ispec()
1315 self
.out_z
= self
.ospec()
1318 return FPNorm1Data(self
.width
, self
.id_wid
)
1321 return FPRoundData(self
.width
, self
.id_wid
)
1323 def setup(self
, m
, in_z
, roundz
):
1324 m
.submodules
.roundz
= self
1326 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1327 m
.d
.comb
+= self
.i
.roundz
.eq(roundz
)
1329 def elaborate(self
, platform
):
1331 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1332 with m
.If(self
.i
.roundz
):
1333 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1334 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1335 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1339 class FPRound(FPState
, FPID
):
1341 def __init__(self
, width
, id_wid
):
1342 FPState
.__init
__(self
, "round")
1343 FPID
.__init
__(self
, id_wid
)
1344 self
.mod
= FPRoundMod(width
)
1345 self
.out_z
= self
.mod
.ospec()
1347 def setup(self
, m
, in_z
, roundz
, in_mid
):
1348 """ links module to inputs and outputs
1350 self
.mod
.setup(m
, in_z
, roundz
)
1352 if self
.in_mid
is not None:
1353 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1355 def action(self
, m
):
1357 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1358 m
.next
= "corrections"
1361 class FPCorrectionsMod
:
1363 def __init__(self
, width
, id_wid
):
1365 self
.id_wid
= id_wid
1366 self
.in_z
= self
.ispec()
1367 self
.out_z
= self
.ospec()
1370 return FPRoundData(self
.width
, self
.id_wid
)
1373 return FPRoundData(self
.width
, self
.id_wid
)
1375 def setup(self
, m
, in_z
):
1376 """ links module to inputs and outputs
1378 m
.submodules
.corrections
= self
1379 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1381 def elaborate(self
, platform
):
1383 m
.submodules
.corr_in_z
= self
.in_z
.z
1384 m
.submodules
.corr_out_z
= self
.out_z
.z
1385 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1386 with m
.If(self
.in_z
.z
.is_denormalised
):
1387 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1391 class FPCorrections(FPState
, FPID
):
1393 def __init__(self
, width
, id_wid
):
1394 FPState
.__init
__(self
, "corrections")
1395 FPID
.__init
__(self
, id_wid
)
1396 self
.mod
= FPCorrectionsMod(width
)
1397 self
.out_z
= self
.mod
.ospec()
1399 def setup(self
, m
, in_z
, in_mid
):
1400 """ links module to inputs and outputs
1402 self
.mod
.setup(m
, in_z
)
1403 if self
.in_mid
is not None:
1404 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1406 def action(self
, m
):
1408 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1414 def __init__(self
, width
, id_wid
):
1415 self
.z
= FPNumOut(width
, False)
1416 self
.mid
= Signal(id_wid
, reset_less
=True)
1419 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1424 def __init__(self
, width
, id_wid
):
1426 self
.id_wid
= id_wid
1427 self
.i
= self
.ispec()
1428 self
.o
= self
.ospec()
1431 return FPRoundData(self
.width
, self
.id_wid
)
1434 return FPPackData(self
.width
, self
.id_wid
)
1436 def setup(self
, m
, in_z
):
1437 """ links module to inputs and outputs
1439 m
.submodules
.pack
= self
1440 m
.d
.comb
+= self
.i
.eq(in_z
)
1442 def elaborate(self
, platform
):
1444 m
.submodules
.pack_in_z
= self
.i
.z
1445 with m
.If(self
.i
.z
.is_overflowed
):
1446 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1448 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1453 def __init__(self
, width
, id_wid
):
1454 self
.z
= FPNumOut(width
, False)
1455 self
.mid
= Signal(id_wid
, reset_less
=True)
1458 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1461 class FPPack(FPState
, FPID
):
1463 def __init__(self
, width
, id_wid
):
1464 FPState
.__init
__(self
, "pack")
1465 FPID
.__init
__(self
, id_wid
)
1466 self
.mod
= FPPackMod(width
)
1467 self
.out_z
= self
.ospec()
1470 return self
.mod
.ispec()
1473 return self
.mod
.ospec()
1475 def setup(self
, m
, in_z
, in_mid
):
1476 """ links module to inputs and outputs
1478 self
.mod
.setup(m
, in_z
)
1479 if self
.in_mid
is not None:
1480 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1482 def action(self
, m
):
1484 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1485 m
.next
= "pack_put_z"
1488 class FPPutZ(FPState
):
1490 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1491 FPState
.__init
__(self
, state
)
1492 if to_state
is None:
1493 to_state
= "get_ops"
1494 self
.to_state
= to_state
1497 self
.in_mid
= in_mid
1498 self
.out_mid
= out_mid
1500 def action(self
, m
):
1501 if self
.in_mid
is not None:
1502 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1504 self
.out_z
.v
.eq(self
.in_z
.v
)
1506 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1507 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1508 m
.next
= self
.to_state
1510 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1513 class FPPutZIdx(FPState
):
1515 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1516 FPState
.__init
__(self
, state
)
1517 if to_state
is None:
1518 to_state
= "get_ops"
1519 self
.to_state
= to_state
1521 self
.out_zs
= out_zs
1522 self
.in_mid
= in_mid
1524 def action(self
, m
):
1525 outz_stb
= Signal(reset_less
=True)
1526 outz_ack
= Signal(reset_less
=True)
1527 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1528 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1531 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1533 with m
.If(outz_stb
& outz_ack
):
1534 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1535 m
.next
= self
.to_state
1537 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1539 class FPADDBaseData
:
1541 def __init__(self
, width
, id_wid
):
1543 self
.id_wid
= id_wid
1544 self
.a
= Signal(width
)
1545 self
.b
= Signal(width
)
1546 self
.mid
= Signal(id_wid
, reset_less
=True)
1549 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1552 class FPADDBaseMod(FPID
):
1554 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1557 * width: bit-width of IEEE754. supported: 16, 32, 64
1558 * id_wid: an identifier that is sync-connected to the input
1559 * single_cycle: True indicates each stage to complete in 1 clock
1560 * compact: True indicates a reduced number of stages
1562 FPID
.__init
__(self
, id_wid
)
1564 self
.id_wid
= id_wid
1565 self
.single_cycle
= single_cycle
1566 self
.compact
= compact
1568 self
.in_t
= Trigger()
1569 self
.i
= self
.ispec()
1570 self
.out_z
= FPOp(width
)
1575 return FPADDBaseData(self
.width
, self
.id_wid
)
1577 def add_state(self
, state
):
1578 self
.states
.append(state
)
1581 def get_fragment(self
, platform
=None):
1582 """ creates the HDL code-fragment for FPAdd
1585 m
.submodules
.out_z
= self
.out_z
1586 m
.submodules
.in_t
= self
.in_t
1588 self
.get_compact_fragment(m
, platform
)
1590 self
.get_longer_fragment(m
, platform
)
1592 with m
.FSM() as fsm
:
1594 for state
in self
.states
:
1595 with m
.State(state
.state_from
):
1600 def get_longer_fragment(self
, m
, platform
=None):
1602 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1604 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1608 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1609 sc
.setup(m
, a
, b
, self
.in_mid
)
1611 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1612 dn
.setup(m
, a
, b
, sc
.in_mid
)
1614 if self
.single_cycle
:
1615 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1616 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1618 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1619 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1621 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1622 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1624 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1625 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1627 if self
.single_cycle
:
1628 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1629 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1631 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1632 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1634 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1635 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1637 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1638 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1640 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1641 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1643 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1644 pa
.in_mid
, self
.out_mid
))
1646 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1647 pa
.in_mid
, self
.out_mid
))
1649 def get_compact_fragment(self
, m
, platform
=None):
1651 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1652 self
.width
, self
.id_wid
))
1653 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1657 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1658 sc
.setup(m
, get
.o
, self
.in_mid
)
1660 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1661 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1663 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1664 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1666 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.out_z
,
1667 n1
.in_mid
, self
.out_mid
))
1669 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.out_z
,
1670 sc
.in_mid
, self
.out_mid
))
1673 class FPADDBase(FPState
, FPID
):
1675 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1678 * width: bit-width of IEEE754. supported: 16, 32, 64
1679 * id_wid: an identifier that is sync-connected to the input
1680 * single_cycle: True indicates each stage to complete in 1 clock
1682 FPID
.__init
__(self
, id_wid
)
1683 FPState
.__init
__(self
, "fpadd")
1685 self
.single_cycle
= single_cycle
1686 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1688 self
.in_t
= Trigger()
1689 self
.in_a
= Signal(width
)
1690 self
.in_b
= Signal(width
)
1691 #self.out_z = FPOp(width)
1693 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1694 self
.in_accept
= Signal(reset_less
=True)
1695 self
.add_stb
= Signal(reset_less
=True)
1696 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1698 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1700 self
.out_mid
= out_mid
1701 m
.d
.comb
+= [self
.in_a
.eq(a
),
1703 self
.mod
.i
.a
.eq(self
.in_a
),
1704 self
.mod
.i
.b
.eq(self
.in_b
),
1705 self
.in_mid
.eq(in_mid
),
1706 self
.mod
.in_mid
.eq(self
.in_mid
),
1707 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1708 #self.add_stb.eq(add_stb),
1709 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1710 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1711 self
.out_mid
.eq(self
.mod
.out_mid
),
1712 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1713 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1714 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1717 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1718 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1719 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1720 #m.d.sync += self.in_t.stb.eq(0)
1722 m
.submodules
.fpadd
= self
.mod
1724 def action(self
, m
):
1726 # in_accept is set on incoming strobe HIGH and ack LOW.
1727 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1729 #with m.If(self.in_t.ack):
1730 # m.d.sync += self.in_t.stb.eq(0)
1731 with m
.If(~self
.z_done
):
1732 # not done: test for accepting an incoming operand pair
1733 with m
.If(self
.in_accept
):
1735 self
.add_ack
.eq(1), # acknowledge receipt...
1736 self
.in_t
.stb
.eq(1), # initiate add
1739 m
.d
.sync
+= [self
.add_ack
.eq(0),
1740 self
.in_t
.stb
.eq(0),
1741 self
.out_z
.ack
.eq(1),
1744 # done: acknowledge, and write out id and value
1745 m
.d
.sync
+= [self
.add_ack
.eq(1),
1752 if self
.in_mid
is not None:
1753 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1756 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1758 # move to output state on detecting z ack
1759 with m
.If(self
.out_z
.trigger
):
1760 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1763 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1766 def __init__(self
, width
, id_wid
):
1768 self
.id_wid
= id_wid
1770 for i
in range(rs_sz
):
1772 out_z
.name
= "out_z_%d" % i
1774 self
.res
= Array(res
)
1775 self
.in_z
= FPOp(width
)
1776 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1778 def setup(self
, m
, in_z
, in_mid
):
1779 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1780 self
.in_mid
.eq(in_mid
)]
1782 def get_fragment(self
, platform
=None):
1783 """ creates the HDL code-fragment for FPAdd
1786 m
.submodules
.res_in_z
= self
.in_z
1787 m
.submodules
+= self
.res
1799 """ FPADD: stages as follows:
1805 FPAddBase---> FPAddBaseMod
1807 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1809 FPAddBase is tricky: it is both a stage and *has* stages.
1810 Connection to FPAddBaseMod therefore requires an in stb/ack
1811 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1812 needs to be the thing that raises the incoming stb.
1815 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1818 * width: bit-width of IEEE754. supported: 16, 32, 64
1819 * id_wid: an identifier that is sync-connected to the input
1820 * single_cycle: True indicates each stage to complete in 1 clock
1823 self
.id_wid
= id_wid
1824 self
.single_cycle
= single_cycle
1826 #self.out_z = FPOp(width)
1827 self
.ids
= FPID(id_wid
)
1830 for i
in range(rs_sz
):
1833 in_a
.name
= "in_a_%d" % i
1834 in_b
.name
= "in_b_%d" % i
1835 rs
.append((in_a
, in_b
))
1839 for i
in range(rs_sz
):
1841 out_z
.name
= "out_z_%d" % i
1843 self
.res
= Array(res
)
1847 def add_state(self
, state
):
1848 self
.states
.append(state
)
1851 def get_fragment(self
, platform
=None):
1852 """ creates the HDL code-fragment for FPAdd
1855 m
.submodules
+= self
.rs
1857 in_a
= self
.rs
[0][0]
1858 in_b
= self
.rs
[0][1]
1860 out_z
= FPOp(self
.width
)
1861 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1862 m
.submodules
.out_z
= out_z
1864 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1869 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1874 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1875 ab
= self
.add_state(ab
)
1876 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1879 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1882 with m
.FSM() as fsm
:
1884 for state
in self
.states
:
1885 with m
.State(state
.state_from
):
1891 if __name__
== "__main__":
1893 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1894 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1895 alu
.rs
[0][1].ports() + \
1896 alu
.res
[0].ports() + \
1897 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1899 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1900 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1901 alu
.in_t
.ports() + \
1902 alu
.out_z
.ports() + \
1903 [alu
.in_mid
, alu
.out_mid
])
1906 # works... but don't use, just do "python fname.py convert -t v"
1907 #print (verilog.convert(alu, ports=[
1908 # ports=alu.in_a.ports() + \
1909 # alu.in_b.ports() + \
1910 # alu.out_z.ports())