1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
720 self
.a0o
= self
.a0mod
.ospec()
722 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
723 self
.a1o
= self
.a1mod
.ospec()
725 def setup(self
, m
, i
, in_mid
):
726 """ links module to inputs and outputs
729 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
731 self
.a0mod
.setup(m
, self
.o
)
732 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
734 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
736 if self
.in_mid
is not None:
737 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
741 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
742 m
.next
= "normalise_1"
745 class FPAddStage0Data
:
747 def __init__(self
, width
, id_wid
):
748 self
.z
= FPNumBase(width
, False)
749 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
750 self
.mid
= Signal(id_wid
, reset_less
=True)
753 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
756 class FPAddStage0Mod
:
758 def __init__(self
, width
, id_wid
):
761 self
.i
= self
.ispec()
762 self
.o
= self
.ospec()
765 return FPNumBase2Ops(self
.width
, self
.id_wid
)
768 return FPAddStage0Data(self
.width
, self
.id_wid
)
770 def setup(self
, m
, i
):
771 """ links module to inputs and outputs
773 m
.submodules
.add0
= self
774 m
.d
.comb
+= self
.i
.eq(i
)
776 def elaborate(self
, platform
):
778 m
.submodules
.add0_in_a
= self
.i
.a
779 m
.submodules
.add0_in_b
= self
.i
.b
780 m
.submodules
.add0_out_z
= self
.o
.z
782 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
784 # store intermediate tests (and zero-extended mantissas)
785 seq
= Signal(reset_less
=True)
786 mge
= Signal(reset_less
=True)
787 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
788 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
789 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
790 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
791 am0
.eq(Cat(self
.i
.a
.m
, 0)),
792 bm0
.eq(Cat(self
.i
.b
.m
, 0))
794 # same-sign (both negative or both positive) add mantissas
797 self
.o
.tot
.eq(am0
+ bm0
),
798 self
.o
.z
.s
.eq(self
.i
.a
.s
)
800 # a mantissa greater than b, use a
803 self
.o
.tot
.eq(am0
- bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # b mantissa greater than a, use b
809 self
.o
.tot
.eq(bm0
- am0
),
810 self
.o
.z
.s
.eq(self
.i
.b
.s
)
815 class FPAddStage0(FPState
, FPID
):
816 """ First stage of add. covers same-sign (add) and subtract
817 special-casing when mantissas are greater or equal, to
818 give greatest accuracy.
821 def __init__(self
, width
, id_wid
):
822 FPState
.__init
__(self
, "add_0")
823 FPID
.__init
__(self
, id_wid
)
824 self
.mod
= FPAddStage0Mod(width
)
825 self
.o
= self
.mod
.ospec()
827 def setup(self
, m
, in_a
, in_b
, in_mid
):
828 """ links module to inputs and outputs
830 self
.mod
.setup(m
, in_a
, in_b
)
831 if self
.in_mid
is not None:
832 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
836 # NOTE: these could be done as combinatorial (merge add0+add1)
837 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
841 class FPAddStage1Data
:
843 def __init__(self
, width
, id_wid
):
844 self
.z
= FPNumBase(width
, False)
846 self
.mid
= Signal(id_wid
, reset_less
=True)
849 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
853 class FPAddStage1Mod(FPState
):
854 """ Second stage of add: preparation for normalisation.
855 detects when tot sum is too big (tot[27] is kinda a carry bit)
858 def __init__(self
, width
, id_wid
):
861 self
.i
= self
.ispec()
862 self
.o
= self
.ospec()
865 return FPAddStage0Data(self
.width
, self
.id_wid
)
868 return FPAddStage1Data(self
.width
, self
.id_wid
)
870 def setup(self
, m
, in_tot
, in_z
):
871 """ links module to inputs and outputs
873 m
.submodules
.add1
= self
874 m
.submodules
.add1_out_overflow
= self
.o
.of
876 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
877 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
879 def elaborate(self
, platform
):
881 #m.submodules.norm1_in_overflow = self.in_of
882 #m.submodules.norm1_out_overflow = self.out_of
883 #m.submodules.norm1_in_z = self.in_z
884 #m.submodules.norm1_out_z = self.out_z
885 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
, FPID
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 FPID
.__init
__(self
, id_wid
)
913 self
.mod
= FPAddStage1Mod(width
)
914 self
.out_z
= FPNumBase(width
, False)
915 self
.out_of
= Overflow()
916 self
.norm_stb
= Signal()
918 def setup(self
, m
, in_tot
, in_z
, in_mid
):
919 """ links module to inputs and outputs
921 self
.mod
.setup(m
, in_tot
, in_z
)
923 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
925 if self
.in_mid
is not None:
926 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
930 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
931 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
932 m
.d
.sync
+= self
.norm_stb
.eq(1)
933 m
.next
= "normalise_1"
936 class FPNormaliseModSingle
:
938 def __init__(self
, width
):
940 self
.in_z
= self
.ispec()
941 self
.out_z
= self
.ospec()
944 return FPNumBase(self
.width
, False)
947 return FPNumBase(self
.width
, False)
949 def setup(self
, m
, in_z
, out_z
):
950 """ links module to inputs and outputs
952 m
.submodules
.normalise
= self
953 m
.d
.comb
+= self
.in_z
.eq(in_z
)
954 m
.d
.comb
+= out_z
.eq(self
.out_z
)
956 def elaborate(self
, platform
):
959 mwid
= self
.out_z
.m_width
+2
960 pe
= PriorityEncoder(mwid
)
961 m
.submodules
.norm_pe
= pe
963 m
.submodules
.norm1_out_z
= self
.out_z
964 m
.submodules
.norm1_in_z
= self
.in_z
966 in_z
= FPNumBase(self
.width
, False)
968 m
.submodules
.norm1_insel_z
= in_z
969 m
.submodules
.norm1_insel_overflow
= in_of
971 espec
= (len(in_z
.e
), True)
972 ediff_n126
= Signal(espec
, reset_less
=True)
973 msr
= MultiShiftRMerge(mwid
, espec
)
974 m
.submodules
.multishift_r
= msr
976 m
.d
.comb
+= in_z
.eq(self
.in_z
)
977 m
.d
.comb
+= in_of
.eq(self
.in_of
)
978 # initialise out from in (overridden below)
979 m
.d
.comb
+= self
.out_z
.eq(in_z
)
980 m
.d
.comb
+= self
.out_of
.eq(in_of
)
981 # normalisation decrease condition
982 decrease
= Signal(reset_less
=True)
983 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
986 # *sigh* not entirely obvious: count leading zeros (clz)
987 # with a PriorityEncoder: to find from the MSB
988 # we reverse the order of the bits.
989 temp_m
= Signal(mwid
, reset_less
=True)
990 temp_s
= Signal(mwid
+1, reset_less
=True)
991 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
993 # cat round and guard bits back into the mantissa
994 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
995 pe
.i
.eq(temp_m
[::-1]), # inverted
996 clz
.eq(pe
.o
), # count zeros from MSB down
997 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
998 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
999 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1006 def __init__(self
, width
, id_wid
):
1007 self
.roundz
= Signal(reset_less
=True)
1008 self
.z
= FPNumBase(width
, False)
1009 self
.mid
= Signal(id_wid
, reset_less
=True)
1012 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1015 class FPNorm1ModSingle
:
1017 def __init__(self
, width
, id_wid
):
1019 self
.id_wid
= id_wid
1020 self
.i
= self
.ispec()
1021 self
.o
= self
.ospec()
1024 return FPAddStage1Data(self
.width
, self
.id_wid
)
1027 return FPNorm1Data(self
.width
, self
.id_wid
)
1029 def setup(self
, m
, i
, out_z
):
1030 """ links module to inputs and outputs
1032 m
.submodules
.normalise_1
= self
1034 m
.d
.comb
+= self
.i
.eq(i
)
1036 m
.d
.comb
+= out_z
.eq(self
.o
.z
)
1038 def elaborate(self
, platform
):
1041 mwid
= self
.o
.z
.m_width
+2
1042 pe
= PriorityEncoder(mwid
)
1043 m
.submodules
.norm_pe
= pe
1046 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1048 m
.submodules
.norm1_out_z
= self
.o
.z
1049 m
.submodules
.norm1_out_overflow
= of
1050 m
.submodules
.norm1_in_z
= self
.i
.z
1051 m
.submodules
.norm1_in_overflow
= self
.i
.of
1054 m
.submodules
.norm1_insel_z
= i
.z
1055 m
.submodules
.norm1_insel_overflow
= i
.of
1057 espec
= (len(i
.z
.e
), True)
1058 ediff_n126
= Signal(espec
, reset_less
=True)
1059 msr
= MultiShiftRMerge(mwid
, espec
)
1060 m
.submodules
.multishift_r
= msr
1062 m
.d
.comb
+= i
.eq(self
.i
)
1063 # initialise out from in (overridden below)
1064 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1065 m
.d
.comb
+= of
.eq(i
.of
)
1066 # normalisation increase/decrease conditions
1067 decrease
= Signal(reset_less
=True)
1068 increase
= Signal(reset_less
=True)
1069 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1070 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1072 with m
.If(decrease
):
1073 # *sigh* not entirely obvious: count leading zeros (clz)
1074 # with a PriorityEncoder: to find from the MSB
1075 # we reverse the order of the bits.
1076 temp_m
= Signal(mwid
, reset_less
=True)
1077 temp_s
= Signal(mwid
+1, reset_less
=True)
1078 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1079 # make sure that the amount to decrease by does NOT
1080 # go below the minimum non-INF/NaN exponent
1081 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1084 # cat round and guard bits back into the mantissa
1085 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1086 pe
.i
.eq(temp_m
[::-1]), # inverted
1087 clz
.eq(limclz
), # count zeros from MSB down
1088 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1089 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1090 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1091 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1092 # overflow in bits 0..1: got shifted too (leave sticky)
1093 of
.guard
.eq(temp_s
[1]), # guard
1094 of
.round_bit
.eq(temp_s
[0]), # round
1097 with m
.Elif(increase
):
1098 temp_m
= Signal(mwid
+1, reset_less
=True)
1100 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1102 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1103 # connect multi-shifter to inp/out mantissa (and ediff)
1105 msr
.diff
.eq(ediff_n126
),
1106 self
.o
.z
.m
.eq(msr
.m
[3:]),
1107 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1108 # overflow in bits 0..1: got shifted too (leave sticky)
1109 of
.guard
.eq(temp_s
[2]), # guard
1110 of
.round_bit
.eq(temp_s
[1]), # round
1111 of
.sticky
.eq(temp_s
[0]), # sticky
1112 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1118 class FPNorm1ModMulti
:
1120 def __init__(self
, width
, single_cycle
=True):
1122 self
.in_select
= Signal(reset_less
=True)
1123 self
.in_z
= FPNumBase(width
, False)
1124 self
.in_of
= Overflow()
1125 self
.temp_z
= FPNumBase(width
, False)
1126 self
.temp_of
= Overflow()
1127 self
.out_z
= FPNumBase(width
, False)
1128 self
.out_of
= Overflow()
1130 def elaborate(self
, platform
):
1133 m
.submodules
.norm1_out_z
= self
.out_z
1134 m
.submodules
.norm1_out_overflow
= self
.out_of
1135 m
.submodules
.norm1_temp_z
= self
.temp_z
1136 m
.submodules
.norm1_temp_of
= self
.temp_of
1137 m
.submodules
.norm1_in_z
= self
.in_z
1138 m
.submodules
.norm1_in_overflow
= self
.in_of
1140 in_z
= FPNumBase(self
.width
, False)
1142 m
.submodules
.norm1_insel_z
= in_z
1143 m
.submodules
.norm1_insel_overflow
= in_of
1145 # select which of temp or in z/of to use
1146 with m
.If(self
.in_select
):
1147 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1148 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1150 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1151 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1152 # initialise out from in (overridden below)
1153 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1154 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1155 # normalisation increase/decrease conditions
1156 decrease
= Signal(reset_less
=True)
1157 increase
= Signal(reset_less
=True)
1158 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1159 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1160 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1162 with m
.If(decrease
):
1164 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1165 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1166 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1167 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1168 self
.out_of
.round_bit
.eq(0), # reset round bit
1169 self
.out_of
.m0
.eq(in_of
.guard
),
1172 with m
.Elif(increase
):
1174 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1175 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1176 self
.out_of
.guard
.eq(in_z
.m
[0]),
1177 self
.out_of
.m0
.eq(in_z
.m
[1]),
1178 self
.out_of
.round_bit
.eq(in_of
.guard
),
1179 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1185 class FPNorm1Single(FPState
, FPID
):
1187 def __init__(self
, width
, id_wid
, single_cycle
=True):
1188 FPID
.__init
__(self
, id_wid
)
1189 FPState
.__init
__(self
, "normalise_1")
1190 self
.mod
= FPNorm1ModSingle(width
)
1191 self
.out_z
= FPNumBase(width
, False)
1192 self
.out_roundz
= Signal(reset_less
=True)
1194 def setup(self
, m
, in_z
, in_of
, in_mid
):
1195 """ links module to inputs and outputs
1197 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1199 if self
.in_mid
is not None:
1200 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1202 def action(self
, m
):
1204 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1208 class FPNorm1Multi(FPState
, FPID
):
1210 def __init__(self
, width
, id_wid
):
1211 FPID
.__init
__(self
, id_wid
)
1212 FPState
.__init
__(self
, "normalise_1")
1213 self
.mod
= FPNorm1ModMulti(width
)
1214 self
.stb
= Signal(reset_less
=True)
1215 self
.ack
= Signal(reset
=0, reset_less
=True)
1216 self
.out_norm
= Signal(reset_less
=True)
1217 self
.in_accept
= Signal(reset_less
=True)
1218 self
.temp_z
= FPNumBase(width
)
1219 self
.temp_of
= Overflow()
1220 self
.out_z
= FPNumBase(width
)
1221 self
.out_roundz
= Signal(reset_less
=True)
1223 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1224 """ links module to inputs and outputs
1226 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1227 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1228 self
.out_z
, self
.out_norm
)
1230 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1231 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1233 if self
.in_mid
is not None:
1234 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1236 def action(self
, m
):
1238 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1239 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1240 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1241 with m
.If(self
.out_norm
):
1242 with m
.If(self
.in_accept
):
1247 m
.d
.sync
+= self
.ack
.eq(0)
1249 # normalisation not required (or done).
1251 m
.d
.sync
+= self
.ack
.eq(1)
1252 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1255 class FPNormToPack(FPState
, FPID
):
1257 def __init__(self
, width
, id_wid
):
1258 FPID
.__init
__(self
, id_wid
)
1259 FPState
.__init
__(self
, "normalise_1")
1262 def setup(self
, m
, i
, in_mid
):
1263 """ links module to inputs and outputs
1266 # Normalisation (chained to input in_z+in_of)
1267 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1268 n_out
= nmod
.ospec()
1269 nmod
.setup(m
, i
, n_out
.z
)
1270 m
.d
.comb
+= n_out
.roundz
.eq(nmod
.o
.roundz
)
1272 # Rounding (chained to normalisation)
1273 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1274 r_out_z
= rmod
.ospec()
1275 rmod
.setup(m
, n_out
.z
, n_out
.roundz
)
1276 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1278 # Corrections (chained to rounding)
1279 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1280 c_out_z
= cmod
.ospec()
1281 cmod
.setup(m
, r_out_z
)
1282 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1284 # Pack (chained to corrections)
1285 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1286 self
.out_z
= self
.pmod
.ospec()
1287 self
.pmod
.setup(m
, c_out_z
)
1290 if self
.in_mid
is not None:
1291 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1293 def action(self
, m
):
1294 self
.idsync(m
) # copies incoming ID to outgoing
1295 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1296 m
.next
= "pack_put_z"
1301 def __init__(self
, width
, id_wid
):
1302 self
.z
= FPNumBase(width
, False)
1303 self
.mid
= Signal(id_wid
, reset_less
=True)
1306 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1311 def __init__(self
, width
, id_wid
):
1313 self
.id_wid
= id_wid
1314 self
.i
= self
.ispec()
1315 self
.out_z
= self
.ospec()
1318 return FPNorm1Data(self
.width
, self
.id_wid
)
1321 return FPRoundData(self
.width
, self
.id_wid
)
1323 def setup(self
, m
, in_z
, roundz
):
1324 m
.submodules
.roundz
= self
1326 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1327 m
.d
.comb
+= self
.i
.roundz
.eq(roundz
)
1329 def elaborate(self
, platform
):
1331 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1332 with m
.If(self
.i
.roundz
):
1333 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1334 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1335 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1339 class FPRound(FPState
, FPID
):
1341 def __init__(self
, width
, id_wid
):
1342 FPState
.__init
__(self
, "round")
1343 FPID
.__init
__(self
, id_wid
)
1344 self
.mod
= FPRoundMod(width
)
1345 self
.out_z
= self
.mod
.ospec()
1347 def setup(self
, m
, in_z
, roundz
, in_mid
):
1348 """ links module to inputs and outputs
1350 self
.mod
.setup(m
, in_z
, roundz
)
1352 if self
.in_mid
is not None:
1353 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1355 def action(self
, m
):
1357 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1358 m
.next
= "corrections"
1361 class FPCorrectionsMod
:
1363 def __init__(self
, width
, id_wid
):
1365 self
.id_wid
= id_wid
1366 self
.in_z
= self
.ispec()
1367 self
.out_z
= self
.ospec()
1370 return FPRoundData(self
.width
, self
.id_wid
)
1373 return FPRoundData(self
.width
, self
.id_wid
)
1375 def setup(self
, m
, in_z
):
1376 """ links module to inputs and outputs
1378 m
.submodules
.corrections
= self
1379 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1381 def elaborate(self
, platform
):
1383 m
.submodules
.corr_in_z
= self
.in_z
.z
1384 m
.submodules
.corr_out_z
= self
.out_z
.z
1385 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1386 with m
.If(self
.in_z
.z
.is_denormalised
):
1387 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1391 class FPCorrections(FPState
, FPID
):
1393 def __init__(self
, width
, id_wid
):
1394 FPState
.__init
__(self
, "corrections")
1395 FPID
.__init
__(self
, id_wid
)
1396 self
.mod
= FPCorrectionsMod(width
)
1397 self
.out_z
= self
.mod
.ospec()
1399 def setup(self
, m
, in_z
, in_mid
):
1400 """ links module to inputs and outputs
1402 self
.mod
.setup(m
, in_z
)
1403 if self
.in_mid
is not None:
1404 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1406 def action(self
, m
):
1408 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1414 def __init__(self
, width
, id_wid
):
1415 self
.z
= FPNumOut(width
, False)
1416 self
.mid
= Signal(id_wid
, reset_less
=True)
1419 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1424 def __init__(self
, width
, id_wid
):
1426 self
.id_wid
= id_wid
1427 self
.i
= self
.ispec()
1428 self
.o
= self
.ospec()
1431 return FPRoundData(self
.width
, self
.id_wid
)
1434 return FPPackData(self
.width
, self
.id_wid
)
1436 def setup(self
, m
, in_z
):
1437 """ links module to inputs and outputs
1439 m
.submodules
.pack
= self
1440 m
.d
.comb
+= self
.i
.eq(in_z
)
1442 def elaborate(self
, platform
):
1444 m
.submodules
.pack_in_z
= self
.i
.z
1445 with m
.If(self
.i
.z
.is_overflowed
):
1446 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1448 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1453 def __init__(self
, width
, id_wid
):
1454 self
.z
= FPNumOut(width
, False)
1455 self
.mid
= Signal(id_wid
, reset_less
=True)
1458 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1461 class FPPack(FPState
, FPID
):
1463 def __init__(self
, width
, id_wid
):
1464 FPState
.__init
__(self
, "pack")
1465 FPID
.__init
__(self
, id_wid
)
1466 self
.mod
= FPPackMod(width
)
1467 self
.out_z
= self
.ospec()
1470 return self
.mod
.ispec()
1473 return self
.mod
.ospec()
1475 def setup(self
, m
, in_z
, in_mid
):
1476 """ links module to inputs and outputs
1478 self
.mod
.setup(m
, in_z
)
1479 if self
.in_mid
is not None:
1480 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1482 def action(self
, m
):
1484 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1485 m
.next
= "pack_put_z"
1488 class FPPutZ(FPState
):
1490 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1491 FPState
.__init
__(self
, state
)
1492 if to_state
is None:
1493 to_state
= "get_ops"
1494 self
.to_state
= to_state
1497 self
.in_mid
= in_mid
1498 self
.out_mid
= out_mid
1500 def action(self
, m
):
1501 if self
.in_mid
is not None:
1502 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1504 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1506 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1507 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1508 m
.next
= self
.to_state
1510 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1513 class FPPutZIdx(FPState
):
1515 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1516 FPState
.__init
__(self
, state
)
1517 if to_state
is None:
1518 to_state
= "get_ops"
1519 self
.to_state
= to_state
1521 self
.out_zs
= out_zs
1522 self
.in_mid
= in_mid
1524 def action(self
, m
):
1525 outz_stb
= Signal(reset_less
=True)
1526 outz_ack
= Signal(reset_less
=True)
1527 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1528 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1531 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1533 with m
.If(outz_stb
& outz_ack
):
1534 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1535 m
.next
= self
.to_state
1537 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1539 class FPADDBaseData
:
1541 def __init__(self
, width
, id_wid
):
1543 self
.id_wid
= id_wid
1544 self
.a
= Signal(width
)
1545 self
.b
= Signal(width
)
1546 self
.mid
= Signal(id_wid
, reset_less
=True)
1549 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1553 def __init__(self
, width
, id_wid
):
1554 self
.z
= FPOp(width
)
1555 self
.mid
= Signal(id_wid
, reset_less
=True)
1558 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1561 class FPADDBaseMod(FPID
):
1563 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1566 * width: bit-width of IEEE754. supported: 16, 32, 64
1567 * id_wid: an identifier that is sync-connected to the input
1568 * single_cycle: True indicates each stage to complete in 1 clock
1569 * compact: True indicates a reduced number of stages
1571 FPID
.__init
__(self
, id_wid
)
1573 self
.id_wid
= id_wid
1574 self
.single_cycle
= single_cycle
1575 self
.compact
= compact
1577 self
.in_t
= Trigger()
1578 self
.i
= self
.ispec()
1579 self
.o
= self
.ospec()
1584 return FPADDBaseData(self
.width
, self
.id_wid
)
1587 return FPOpData(self
.width
, self
.id_wid
)
1589 def add_state(self
, state
):
1590 self
.states
.append(state
)
1593 def get_fragment(self
, platform
=None):
1594 """ creates the HDL code-fragment for FPAdd
1597 m
.submodules
.out_z
= self
.o
.z
1598 m
.submodules
.in_t
= self
.in_t
1600 self
.get_compact_fragment(m
, platform
)
1602 self
.get_longer_fragment(m
, platform
)
1604 with m
.FSM() as fsm
:
1606 for state
in self
.states
:
1607 with m
.State(state
.state_from
):
1612 def get_longer_fragment(self
, m
, platform
=None):
1614 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1616 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1620 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1621 sc
.setup(m
, a
, b
, self
.in_mid
)
1623 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1624 dn
.setup(m
, a
, b
, sc
.in_mid
)
1626 if self
.single_cycle
:
1627 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1628 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1630 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1631 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1633 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1634 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1636 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1637 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1639 if self
.single_cycle
:
1640 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1641 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1643 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1644 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1646 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1647 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1649 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1650 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1652 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1653 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1655 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1656 pa
.in_mid
, self
.out_mid
))
1658 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1659 pa
.in_mid
, self
.out_mid
))
1661 def get_compact_fragment(self
, m
, platform
=None):
1663 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1664 self
.width
, self
.id_wid
))
1665 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1667 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1668 sc
.setup(m
, get
.o
, self
.in_mid
)
1670 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1671 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1673 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1674 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1676 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1677 n1
.in_mid
, self
.out_mid
))
1679 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1680 sc
.in_mid
, self
.out_mid
))
1683 class FPADDBase(FPState
, FPID
):
1685 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1688 * width: bit-width of IEEE754. supported: 16, 32, 64
1689 * id_wid: an identifier that is sync-connected to the input
1690 * single_cycle: True indicates each stage to complete in 1 clock
1692 FPID
.__init
__(self
, id_wid
)
1693 FPState
.__init
__(self
, "fpadd")
1695 self
.single_cycle
= single_cycle
1696 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1697 self
.o
= self
.ospec()
1699 self
.in_t
= Trigger()
1700 self
.i
= self
.ispec()
1702 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1703 self
.in_accept
= Signal(reset_less
=True)
1704 self
.add_stb
= Signal(reset_less
=True)
1705 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1708 return self
.mod
.ispec()
1711 return self
.mod
.ospec()
1713 def setup(self
, m
, i
, add_stb
, in_mid
):
1714 m
.d
.comb
+= [self
.i
.eq(i
),
1715 self
.mod
.i
.eq(self
.i
),
1716 self
.in_mid
.eq(in_mid
),
1717 self
.mod
.in_mid
.eq(self
.in_mid
),
1718 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1719 #self.add_stb.eq(add_stb),
1720 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1721 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1722 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1723 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1724 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1725 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1728 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1729 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1730 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1731 #m.d.sync += self.in_t.stb.eq(0)
1733 m
.submodules
.fpadd
= self
.mod
1735 def action(self
, m
):
1737 # in_accept is set on incoming strobe HIGH and ack LOW.
1738 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1740 #with m.If(self.in_t.ack):
1741 # m.d.sync += self.in_t.stb.eq(0)
1742 with m
.If(~self
.z_done
):
1743 # not done: test for accepting an incoming operand pair
1744 with m
.If(self
.in_accept
):
1746 self
.add_ack
.eq(1), # acknowledge receipt...
1747 self
.in_t
.stb
.eq(1), # initiate add
1750 m
.d
.sync
+= [self
.add_ack
.eq(0),
1751 self
.in_t
.stb
.eq(0),
1755 # done: acknowledge, and write out id and value
1756 m
.d
.sync
+= [self
.add_ack
.eq(1),
1763 if self
.in_mid
is not None:
1764 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1767 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1769 # move to output state on detecting z ack
1770 with m
.If(self
.out_z
.trigger
):
1771 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1774 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1777 def __init__(self
, width
, id_wid
):
1779 self
.id_wid
= id_wid
1781 for i
in range(rs_sz
):
1783 out_z
.name
= "out_z_%d" % i
1785 self
.res
= Array(res
)
1786 self
.in_z
= FPOp(width
)
1787 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1789 def setup(self
, m
, in_z
, in_mid
):
1790 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1791 self
.in_mid
.eq(in_mid
)]
1793 def get_fragment(self
, platform
=None):
1794 """ creates the HDL code-fragment for FPAdd
1797 m
.submodules
.res_in_z
= self
.in_z
1798 m
.submodules
+= self
.res
1810 """ FPADD: stages as follows:
1816 FPAddBase---> FPAddBaseMod
1818 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1820 FPAddBase is tricky: it is both a stage and *has* stages.
1821 Connection to FPAddBaseMod therefore requires an in stb/ack
1822 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1823 needs to be the thing that raises the incoming stb.
1826 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1829 * width: bit-width of IEEE754. supported: 16, 32, 64
1830 * id_wid: an identifier that is sync-connected to the input
1831 * single_cycle: True indicates each stage to complete in 1 clock
1834 self
.id_wid
= id_wid
1835 self
.single_cycle
= single_cycle
1837 #self.out_z = FPOp(width)
1838 self
.ids
= FPID(id_wid
)
1841 for i
in range(rs_sz
):
1844 in_a
.name
= "in_a_%d" % i
1845 in_b
.name
= "in_b_%d" % i
1846 rs
.append((in_a
, in_b
))
1850 for i
in range(rs_sz
):
1852 out_z
.name
= "out_z_%d" % i
1854 self
.res
= Array(res
)
1858 def add_state(self
, state
):
1859 self
.states
.append(state
)
1862 def get_fragment(self
, platform
=None):
1863 """ creates the HDL code-fragment for FPAdd
1866 m
.submodules
+= self
.rs
1868 in_a
= self
.rs
[0][0]
1869 in_b
= self
.rs
[0][1]
1871 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1876 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1881 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1882 ab
= self
.add_state(ab
)
1883 abd
= ab
.ispec() # create an input spec object for FPADDBase
1884 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1885 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1888 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1891 with m
.FSM() as fsm
:
1893 for state
in self
.states
:
1894 with m
.State(state
.state_from
):
1900 if __name__
== "__main__":
1902 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1903 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1904 alu
.rs
[0][1].ports() + \
1905 alu
.res
[0].ports() + \
1906 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1908 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1909 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1910 alu
.in_t
.ports() + \
1911 alu
.out_z
.ports() + \
1912 [alu
.in_mid
, alu
.out_mid
])
1915 # works... but don't use, just do "python fname.py convert -t v"
1916 #print (verilog.convert(alu, ports=[
1917 # ports=alu.in_a.ports() + \
1918 # alu.in_b.ports() + \
1919 # alu.out_z.ports())