1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
718 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
719 self
.o
= self
.mod
.ospec()
721 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
722 self
.a1o
= self
.a1mod
.ospec()
724 def setup(self
, m
, i
, in_mid
):
725 """ links module to inputs and outputs
728 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
730 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
731 a0mod
.setup(m
, self
.o
)
733 m
.d
.comb
+= a0o
.eq(a0mod
.o
)
735 self
.a1mod
.setup(m
, a0o
)
737 if self
.in_mid
is not None:
738 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
742 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
743 m
.next
= "normalise_1"
746 class FPAddStage0Data
:
748 def __init__(self
, width
, id_wid
):
749 self
.z
= FPNumBase(width
, False)
750 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
751 self
.mid
= Signal(id_wid
, reset_less
=True)
754 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
757 class FPAddStage0Mod
:
759 def __init__(self
, width
, id_wid
):
762 self
.i
= self
.ispec()
763 self
.o
= self
.ospec()
766 return FPNumBase2Ops(self
.width
, self
.id_wid
)
769 return FPAddStage0Data(self
.width
, self
.id_wid
)
771 def setup(self
, m
, i
):
772 """ links module to inputs and outputs
774 m
.submodules
.add0
= self
775 m
.d
.comb
+= self
.i
.eq(i
)
777 def elaborate(self
, platform
):
779 m
.submodules
.add0_in_a
= self
.i
.a
780 m
.submodules
.add0_in_b
= self
.i
.b
781 m
.submodules
.add0_out_z
= self
.o
.z
783 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
785 # store intermediate tests (and zero-extended mantissas)
786 seq
= Signal(reset_less
=True)
787 mge
= Signal(reset_less
=True)
788 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
789 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
790 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
791 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
792 am0
.eq(Cat(self
.i
.a
.m
, 0)),
793 bm0
.eq(Cat(self
.i
.b
.m
, 0))
795 # same-sign (both negative or both positive) add mantissas
798 self
.o
.tot
.eq(am0
+ bm0
),
799 self
.o
.z
.s
.eq(self
.i
.a
.s
)
801 # a mantissa greater than b, use a
804 self
.o
.tot
.eq(am0
- bm0
),
805 self
.o
.z
.s
.eq(self
.i
.a
.s
)
807 # b mantissa greater than a, use b
810 self
.o
.tot
.eq(bm0
- am0
),
811 self
.o
.z
.s
.eq(self
.i
.b
.s
)
816 class FPAddStage0(FPState
, FPID
):
817 """ First stage of add. covers same-sign (add) and subtract
818 special-casing when mantissas are greater or equal, to
819 give greatest accuracy.
822 def __init__(self
, width
, id_wid
):
823 FPState
.__init
__(self
, "add_0")
824 FPID
.__init
__(self
, id_wid
)
825 self
.mod
= FPAddStage0Mod(width
)
826 self
.o
= self
.mod
.ospec()
828 def setup(self
, m
, i
, in_mid
):
829 """ links module to inputs and outputs
832 if self
.in_mid
is not None:
833 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
837 # NOTE: these could be done as combinatorial (merge add0+add1)
838 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
842 class FPAddStage1Data
:
844 def __init__(self
, width
, id_wid
):
845 self
.z
= FPNumBase(width
, False)
847 self
.mid
= Signal(id_wid
, reset_less
=True)
850 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
854 class FPAddStage1Mod(FPState
):
855 """ Second stage of add: preparation for normalisation.
856 detects when tot sum is too big (tot[27] is kinda a carry bit)
859 def __init__(self
, width
, id_wid
):
862 self
.i
= self
.ispec()
863 self
.o
= self
.ospec()
866 return FPAddStage0Data(self
.width
, self
.id_wid
)
869 return FPAddStage1Data(self
.width
, self
.id_wid
)
871 def setup(self
, m
, i
):
872 """ links module to inputs and outputs
874 m
.submodules
.add1
= self
875 m
.submodules
.add1_out_overflow
= self
.o
.of
877 m
.d
.comb
+= self
.i
.eq(i
)
879 def elaborate(self
, platform
):
881 #m.submodules.norm1_in_overflow = self.in_of
882 #m.submodules.norm1_out_overflow = self.out_of
883 #m.submodules.norm1_in_z = self.in_z
884 #m.submodules.norm1_out_z = self.out_z
885 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
, FPID
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 FPID
.__init
__(self
, id_wid
)
913 self
.mod
= FPAddStage1Mod(width
)
914 self
.out_z
= FPNumBase(width
, False)
915 self
.out_of
= Overflow()
916 self
.norm_stb
= Signal()
918 def setup(self
, m
, i
, in_mid
):
919 """ links module to inputs and outputs
923 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
925 if self
.in_mid
is not None:
926 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
930 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
931 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
932 m
.d
.sync
+= self
.norm_stb
.eq(1)
933 m
.next
= "normalise_1"
936 class FPNormaliseModSingle
:
938 def __init__(self
, width
):
940 self
.in_z
= self
.ispec()
941 self
.out_z
= self
.ospec()
944 return FPNumBase(self
.width
, False)
947 return FPNumBase(self
.width
, False)
949 def setup(self
, m
, i
):
950 """ links module to inputs and outputs
952 m
.submodules
.normalise
= self
953 m
.d
.comb
+= self
.i
.eq(i
)
955 def elaborate(self
, platform
):
958 mwid
= self
.out_z
.m_width
+2
959 pe
= PriorityEncoder(mwid
)
960 m
.submodules
.norm_pe
= pe
962 m
.submodules
.norm1_out_z
= self
.out_z
963 m
.submodules
.norm1_in_z
= self
.in_z
965 in_z
= FPNumBase(self
.width
, False)
967 m
.submodules
.norm1_insel_z
= in_z
968 m
.submodules
.norm1_insel_overflow
= in_of
970 espec
= (len(in_z
.e
), True)
971 ediff_n126
= Signal(espec
, reset_less
=True)
972 msr
= MultiShiftRMerge(mwid
, espec
)
973 m
.submodules
.multishift_r
= msr
975 m
.d
.comb
+= in_z
.eq(self
.in_z
)
976 m
.d
.comb
+= in_of
.eq(self
.in_of
)
977 # initialise out from in (overridden below)
978 m
.d
.comb
+= self
.out_z
.eq(in_z
)
979 m
.d
.comb
+= self
.out_of
.eq(in_of
)
980 # normalisation decrease condition
981 decrease
= Signal(reset_less
=True)
982 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
985 # *sigh* not entirely obvious: count leading zeros (clz)
986 # with a PriorityEncoder: to find from the MSB
987 # we reverse the order of the bits.
988 temp_m
= Signal(mwid
, reset_less
=True)
989 temp_s
= Signal(mwid
+1, reset_less
=True)
990 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
992 # cat round and guard bits back into the mantissa
993 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
994 pe
.i
.eq(temp_m
[::-1]), # inverted
995 clz
.eq(pe
.o
), # count zeros from MSB down
996 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
997 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
998 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1005 def __init__(self
, width
, id_wid
):
1006 self
.roundz
= Signal(reset_less
=True)
1007 self
.z
= FPNumBase(width
, False)
1008 self
.mid
= Signal(id_wid
, reset_less
=True)
1011 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1014 class FPNorm1ModSingle
:
1016 def __init__(self
, width
, id_wid
):
1018 self
.id_wid
= id_wid
1019 self
.i
= self
.ispec()
1020 self
.o
= self
.ospec()
1023 return FPAddStage1Data(self
.width
, self
.id_wid
)
1026 return FPNorm1Data(self
.width
, self
.id_wid
)
1028 def setup(self
, m
, i
):
1029 """ links module to inputs and outputs
1031 m
.submodules
.normalise_1
= self
1032 m
.d
.comb
+= self
.i
.eq(i
)
1034 def elaborate(self
, platform
):
1037 mwid
= self
.o
.z
.m_width
+2
1038 pe
= PriorityEncoder(mwid
)
1039 m
.submodules
.norm_pe
= pe
1042 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1044 m
.submodules
.norm1_out_z
= self
.o
.z
1045 m
.submodules
.norm1_out_overflow
= of
1046 m
.submodules
.norm1_in_z
= self
.i
.z
1047 m
.submodules
.norm1_in_overflow
= self
.i
.of
1050 m
.submodules
.norm1_insel_z
= i
.z
1051 m
.submodules
.norm1_insel_overflow
= i
.of
1053 espec
= (len(i
.z
.e
), True)
1054 ediff_n126
= Signal(espec
, reset_less
=True)
1055 msr
= MultiShiftRMerge(mwid
, espec
)
1056 m
.submodules
.multishift_r
= msr
1058 m
.d
.comb
+= i
.eq(self
.i
)
1059 # initialise out from in (overridden below)
1060 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1061 m
.d
.comb
+= of
.eq(i
.of
)
1062 # normalisation increase/decrease conditions
1063 decrease
= Signal(reset_less
=True)
1064 increase
= Signal(reset_less
=True)
1065 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1066 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1068 with m
.If(decrease
):
1069 # *sigh* not entirely obvious: count leading zeros (clz)
1070 # with a PriorityEncoder: to find from the MSB
1071 # we reverse the order of the bits.
1072 temp_m
= Signal(mwid
, reset_less
=True)
1073 temp_s
= Signal(mwid
+1, reset_less
=True)
1074 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1075 # make sure that the amount to decrease by does NOT
1076 # go below the minimum non-INF/NaN exponent
1077 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1080 # cat round and guard bits back into the mantissa
1081 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1082 pe
.i
.eq(temp_m
[::-1]), # inverted
1083 clz
.eq(limclz
), # count zeros from MSB down
1084 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1085 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1086 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1087 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1088 # overflow in bits 0..1: got shifted too (leave sticky)
1089 of
.guard
.eq(temp_s
[1]), # guard
1090 of
.round_bit
.eq(temp_s
[0]), # round
1093 with m
.Elif(increase
):
1094 temp_m
= Signal(mwid
+1, reset_less
=True)
1096 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1098 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1099 # connect multi-shifter to inp/out mantissa (and ediff)
1101 msr
.diff
.eq(ediff_n126
),
1102 self
.o
.z
.m
.eq(msr
.m
[3:]),
1103 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1104 # overflow in bits 0..1: got shifted too (leave sticky)
1105 of
.guard
.eq(temp_s
[2]), # guard
1106 of
.round_bit
.eq(temp_s
[1]), # round
1107 of
.sticky
.eq(temp_s
[0]), # sticky
1108 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1114 class FPNorm1ModMulti
:
1116 def __init__(self
, width
, single_cycle
=True):
1118 self
.in_select
= Signal(reset_less
=True)
1119 self
.in_z
= FPNumBase(width
, False)
1120 self
.in_of
= Overflow()
1121 self
.temp_z
= FPNumBase(width
, False)
1122 self
.temp_of
= Overflow()
1123 self
.out_z
= FPNumBase(width
, False)
1124 self
.out_of
= Overflow()
1126 def elaborate(self
, platform
):
1129 m
.submodules
.norm1_out_z
= self
.out_z
1130 m
.submodules
.norm1_out_overflow
= self
.out_of
1131 m
.submodules
.norm1_temp_z
= self
.temp_z
1132 m
.submodules
.norm1_temp_of
= self
.temp_of
1133 m
.submodules
.norm1_in_z
= self
.in_z
1134 m
.submodules
.norm1_in_overflow
= self
.in_of
1136 in_z
= FPNumBase(self
.width
, False)
1138 m
.submodules
.norm1_insel_z
= in_z
1139 m
.submodules
.norm1_insel_overflow
= in_of
1141 # select which of temp or in z/of to use
1142 with m
.If(self
.in_select
):
1143 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1144 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1146 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1147 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1148 # initialise out from in (overridden below)
1149 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1150 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1151 # normalisation increase/decrease conditions
1152 decrease
= Signal(reset_less
=True)
1153 increase
= Signal(reset_less
=True)
1154 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1155 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1156 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1158 with m
.If(decrease
):
1160 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1161 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1162 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1163 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1164 self
.out_of
.round_bit
.eq(0), # reset round bit
1165 self
.out_of
.m0
.eq(in_of
.guard
),
1168 with m
.Elif(increase
):
1170 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1171 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1172 self
.out_of
.guard
.eq(in_z
.m
[0]),
1173 self
.out_of
.m0
.eq(in_z
.m
[1]),
1174 self
.out_of
.round_bit
.eq(in_of
.guard
),
1175 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1181 class FPNorm1Single(FPState
, FPID
):
1183 def __init__(self
, width
, id_wid
, single_cycle
=True):
1184 FPID
.__init
__(self
, id_wid
)
1185 FPState
.__init
__(self
, "normalise_1")
1186 self
.mod
= FPNorm1ModSingle(width
)
1187 self
.out_z
= FPNumBase(width
, False)
1188 self
.out_roundz
= Signal(reset_less
=True)
1190 def setup(self
, m
, i
, in_mid
):
1191 """ links module to inputs and outputs
1193 self
.mod
.setup(m
, i
, self
.out_z
)
1195 if self
.in_mid
is not None:
1196 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1198 def action(self
, m
):
1200 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1204 class FPNorm1Multi(FPState
, FPID
):
1206 def __init__(self
, width
, id_wid
):
1207 FPID
.__init
__(self
, id_wid
)
1208 FPState
.__init
__(self
, "normalise_1")
1209 self
.mod
= FPNorm1ModMulti(width
)
1210 self
.stb
= Signal(reset_less
=True)
1211 self
.ack
= Signal(reset
=0, reset_less
=True)
1212 self
.out_norm
= Signal(reset_less
=True)
1213 self
.in_accept
= Signal(reset_less
=True)
1214 self
.temp_z
= FPNumBase(width
)
1215 self
.temp_of
= Overflow()
1216 self
.out_z
= FPNumBase(width
)
1217 self
.out_roundz
= Signal(reset_less
=True)
1219 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1220 """ links module to inputs and outputs
1222 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1223 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1224 self
.out_z
, self
.out_norm
)
1226 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1227 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1229 if self
.in_mid
is not None:
1230 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1232 def action(self
, m
):
1234 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1235 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1236 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1237 with m
.If(self
.out_norm
):
1238 with m
.If(self
.in_accept
):
1243 m
.d
.sync
+= self
.ack
.eq(0)
1245 # normalisation not required (or done).
1247 m
.d
.sync
+= self
.ack
.eq(1)
1248 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1251 class FPNormToPack(FPState
, FPID
):
1253 def __init__(self
, width
, id_wid
):
1254 FPID
.__init
__(self
, id_wid
)
1255 FPState
.__init
__(self
, "normalise_1")
1258 def setup(self
, m
, i
, in_mid
):
1259 """ links module to inputs and outputs
1262 # Normalisation (chained to input in_z+in_of)
1263 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1265 n_out
= nmod
.ospec()
1266 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1268 # Rounding (chained to normalisation)
1269 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1270 rmod
.setup(m
, n_out
)
1271 r_out_z
= rmod
.ospec()
1272 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1274 # Corrections (chained to rounding)
1275 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1276 cmod
.setup(m
, r_out_z
)
1277 c_out_z
= cmod
.ospec()
1278 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1280 # Pack (chained to corrections)
1281 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1282 self
.pmod
.setup(m
, c_out_z
)
1283 self
.out_z
= self
.pmod
.ospec()
1286 if self
.in_mid
is not None:
1287 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1289 def action(self
, m
):
1290 self
.idsync(m
) # copies incoming ID to outgoing
1291 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1292 m
.next
= "pack_put_z"
1297 def __init__(self
, width
, id_wid
):
1298 self
.z
= FPNumBase(width
, False)
1299 self
.mid
= Signal(id_wid
, reset_less
=True)
1302 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1307 def __init__(self
, width
, id_wid
):
1309 self
.id_wid
= id_wid
1310 self
.i
= self
.ispec()
1311 self
.out_z
= self
.ospec()
1314 return FPNorm1Data(self
.width
, self
.id_wid
)
1317 return FPRoundData(self
.width
, self
.id_wid
)
1319 def setup(self
, m
, i
):
1320 m
.submodules
.roundz
= self
1321 m
.d
.comb
+= self
.i
.eq(i
)
1323 def elaborate(self
, platform
):
1325 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1326 with m
.If(self
.i
.roundz
):
1327 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1328 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1329 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1333 class FPRound(FPState
, FPID
):
1335 def __init__(self
, width
, id_wid
):
1336 FPState
.__init
__(self
, "round")
1337 FPID
.__init
__(self
, id_wid
)
1338 self
.mod
= FPRoundMod(width
)
1339 self
.out_z
= self
.mod
.ospec()
1341 def setup(self
, m
, i
, in_mid
):
1342 """ links module to inputs and outputs
1344 self
.mod
.setup(m
, i
)
1346 if self
.in_mid
is not None:
1347 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1349 def action(self
, m
):
1351 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1352 m
.next
= "corrections"
1355 class FPCorrectionsMod
:
1357 def __init__(self
, width
, id_wid
):
1359 self
.id_wid
= id_wid
1360 self
.i
= self
.ispec()
1361 self
.out_z
= self
.ospec()
1364 return FPRoundData(self
.width
, self
.id_wid
)
1367 return FPRoundData(self
.width
, self
.id_wid
)
1369 def setup(self
, m
, i
):
1370 """ links module to inputs and outputs
1372 m
.submodules
.corrections
= self
1373 m
.d
.comb
+= self
.i
.eq(i
)
1375 def elaborate(self
, platform
):
1377 m
.submodules
.corr_in_z
= self
.i
.z
1378 m
.submodules
.corr_out_z
= self
.out_z
.z
1379 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1380 with m
.If(self
.i
.z
.is_denormalised
):
1381 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1385 class FPCorrections(FPState
, FPID
):
1387 def __init__(self
, width
, id_wid
):
1388 FPState
.__init
__(self
, "corrections")
1389 FPID
.__init
__(self
, id_wid
)
1390 self
.mod
= FPCorrectionsMod(width
)
1391 self
.out_z
= self
.ospec()
1394 return self
.mod
.ispec()
1397 return self
.mod
.ospec()
1399 def setup(self
, m
, in_z
, in_mid
):
1400 """ links module to inputs and outputs
1402 self
.mod
.setup(m
, in_z
)
1403 if self
.in_mid
is not None:
1404 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1406 def action(self
, m
):
1408 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1414 def __init__(self
, width
, id_wid
):
1415 self
.z
= FPNumOut(width
, False)
1416 self
.mid
= Signal(id_wid
, reset_less
=True)
1419 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1424 def __init__(self
, width
, id_wid
):
1426 self
.id_wid
= id_wid
1427 self
.i
= self
.ispec()
1428 self
.o
= self
.ospec()
1431 return FPRoundData(self
.width
, self
.id_wid
)
1434 return FPPackData(self
.width
, self
.id_wid
)
1436 def setup(self
, m
, in_z
):
1437 """ links module to inputs and outputs
1439 m
.submodules
.pack
= self
1440 m
.d
.comb
+= self
.i
.eq(in_z
)
1442 def elaborate(self
, platform
):
1444 m
.submodules
.pack_in_z
= self
.i
.z
1445 with m
.If(self
.i
.z
.is_overflowed
):
1446 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1448 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1453 def __init__(self
, width
, id_wid
):
1454 self
.z
= FPNumOut(width
, False)
1455 self
.mid
= Signal(id_wid
, reset_less
=True)
1458 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1461 class FPPack(FPState
, FPID
):
1463 def __init__(self
, width
, id_wid
):
1464 FPState
.__init
__(self
, "pack")
1465 FPID
.__init
__(self
, id_wid
)
1466 self
.mod
= FPPackMod(width
)
1467 self
.out_z
= self
.ospec()
1470 return self
.mod
.ispec()
1473 return self
.mod
.ospec()
1475 def setup(self
, m
, in_z
, in_mid
):
1476 """ links module to inputs and outputs
1478 self
.mod
.setup(m
, in_z
)
1479 if self
.in_mid
is not None:
1480 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1482 def action(self
, m
):
1484 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1485 m
.next
= "pack_put_z"
1488 class FPPutZ(FPState
):
1490 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1491 FPState
.__init
__(self
, state
)
1492 if to_state
is None:
1493 to_state
= "get_ops"
1494 self
.to_state
= to_state
1497 self
.in_mid
= in_mid
1498 self
.out_mid
= out_mid
1500 def action(self
, m
):
1501 if self
.in_mid
is not None:
1502 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1504 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1506 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1507 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1508 m
.next
= self
.to_state
1510 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1513 class FPPutZIdx(FPState
):
1515 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1516 FPState
.__init
__(self
, state
)
1517 if to_state
is None:
1518 to_state
= "get_ops"
1519 self
.to_state
= to_state
1521 self
.out_zs
= out_zs
1522 self
.in_mid
= in_mid
1524 def action(self
, m
):
1525 outz_stb
= Signal(reset_less
=True)
1526 outz_ack
= Signal(reset_less
=True)
1527 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1528 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1531 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1533 with m
.If(outz_stb
& outz_ack
):
1534 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1535 m
.next
= self
.to_state
1537 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1539 class FPADDBaseData
:
1541 def __init__(self
, width
, id_wid
):
1543 self
.id_wid
= id_wid
1544 self
.a
= Signal(width
)
1545 self
.b
= Signal(width
)
1546 self
.mid
= Signal(id_wid
, reset_less
=True)
1549 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1553 def __init__(self
, width
, id_wid
):
1554 self
.z
= FPOp(width
)
1555 self
.mid
= Signal(id_wid
, reset_less
=True)
1558 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1561 class FPADDBaseMod(FPID
):
1563 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1566 * width: bit-width of IEEE754. supported: 16, 32, 64
1567 * id_wid: an identifier that is sync-connected to the input
1568 * single_cycle: True indicates each stage to complete in 1 clock
1569 * compact: True indicates a reduced number of stages
1571 FPID
.__init
__(self
, id_wid
)
1573 self
.id_wid
= id_wid
1574 self
.single_cycle
= single_cycle
1575 self
.compact
= compact
1577 self
.in_t
= Trigger()
1578 self
.i
= self
.ispec()
1579 self
.o
= self
.ospec()
1584 return FPADDBaseData(self
.width
, self
.id_wid
)
1587 return FPOpData(self
.width
, self
.id_wid
)
1589 def add_state(self
, state
):
1590 self
.states
.append(state
)
1593 def get_fragment(self
, platform
=None):
1594 """ creates the HDL code-fragment for FPAdd
1597 m
.submodules
.out_z
= self
.o
.z
1598 m
.submodules
.in_t
= self
.in_t
1600 self
.get_compact_fragment(m
, platform
)
1602 self
.get_longer_fragment(m
, platform
)
1604 with m
.FSM() as fsm
:
1606 for state
in self
.states
:
1607 with m
.State(state
.state_from
):
1612 def get_longer_fragment(self
, m
, platform
=None):
1614 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1616 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1620 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1621 sc
.setup(m
, a
, b
, self
.in_mid
)
1623 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1624 dn
.setup(m
, a
, b
, sc
.in_mid
)
1626 if self
.single_cycle
:
1627 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1628 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1630 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1631 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1633 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1634 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1636 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1637 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1639 if self
.single_cycle
:
1640 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1641 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1643 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1644 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1646 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1647 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1649 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1650 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1652 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1653 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1655 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1656 pa
.in_mid
, self
.out_mid
))
1658 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1659 pa
.in_mid
, self
.out_mid
))
1661 def get_compact_fragment(self
, m
, platform
=None):
1663 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1664 self
.width
, self
.id_wid
))
1665 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1667 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1668 sc
.setup(m
, get
.o
, self
.in_mid
)
1670 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1671 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1673 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1674 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1676 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1677 n1
.in_mid
, self
.out_mid
))
1679 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1680 sc
.in_mid
, self
.out_mid
))
1683 class FPADDBase(FPState
, FPID
):
1685 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1688 * width: bit-width of IEEE754. supported: 16, 32, 64
1689 * id_wid: an identifier that is sync-connected to the input
1690 * single_cycle: True indicates each stage to complete in 1 clock
1692 FPID
.__init
__(self
, id_wid
)
1693 FPState
.__init
__(self
, "fpadd")
1695 self
.single_cycle
= single_cycle
1696 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1697 self
.o
= self
.ospec()
1699 self
.in_t
= Trigger()
1700 self
.i
= self
.ispec()
1702 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1703 self
.in_accept
= Signal(reset_less
=True)
1704 self
.add_stb
= Signal(reset_less
=True)
1705 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1708 return self
.mod
.ispec()
1711 return self
.mod
.ospec()
1713 def setup(self
, m
, i
, add_stb
, in_mid
):
1714 m
.d
.comb
+= [self
.i
.eq(i
),
1715 self
.mod
.i
.eq(self
.i
),
1716 self
.in_mid
.eq(in_mid
),
1717 self
.mod
.in_mid
.eq(self
.in_mid
),
1718 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1719 #self.add_stb.eq(add_stb),
1720 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1721 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1722 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1723 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1724 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1725 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1728 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1729 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1730 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1731 #m.d.sync += self.in_t.stb.eq(0)
1733 m
.submodules
.fpadd
= self
.mod
1735 def action(self
, m
):
1737 # in_accept is set on incoming strobe HIGH and ack LOW.
1738 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1740 #with m.If(self.in_t.ack):
1741 # m.d.sync += self.in_t.stb.eq(0)
1742 with m
.If(~self
.z_done
):
1743 # not done: test for accepting an incoming operand pair
1744 with m
.If(self
.in_accept
):
1746 self
.add_ack
.eq(1), # acknowledge receipt...
1747 self
.in_t
.stb
.eq(1), # initiate add
1750 m
.d
.sync
+= [self
.add_ack
.eq(0),
1751 self
.in_t
.stb
.eq(0),
1755 # done: acknowledge, and write out id and value
1756 m
.d
.sync
+= [self
.add_ack
.eq(1),
1763 if self
.in_mid
is not None:
1764 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1767 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1769 # move to output state on detecting z ack
1770 with m
.If(self
.out_z
.trigger
):
1771 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1774 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1777 def __init__(self
, width
, id_wid
):
1779 self
.id_wid
= id_wid
1781 for i
in range(rs_sz
):
1783 out_z
.name
= "out_z_%d" % i
1785 self
.res
= Array(res
)
1786 self
.in_z
= FPOp(width
)
1787 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1789 def setup(self
, m
, in_z
, in_mid
):
1790 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1791 self
.in_mid
.eq(in_mid
)]
1793 def get_fragment(self
, platform
=None):
1794 """ creates the HDL code-fragment for FPAdd
1797 m
.submodules
.res_in_z
= self
.in_z
1798 m
.submodules
+= self
.res
1810 """ FPADD: stages as follows:
1816 FPAddBase---> FPAddBaseMod
1818 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1820 FPAddBase is tricky: it is both a stage and *has* stages.
1821 Connection to FPAddBaseMod therefore requires an in stb/ack
1822 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1823 needs to be the thing that raises the incoming stb.
1826 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1829 * width: bit-width of IEEE754. supported: 16, 32, 64
1830 * id_wid: an identifier that is sync-connected to the input
1831 * single_cycle: True indicates each stage to complete in 1 clock
1834 self
.id_wid
= id_wid
1835 self
.single_cycle
= single_cycle
1837 #self.out_z = FPOp(width)
1838 self
.ids
= FPID(id_wid
)
1841 for i
in range(rs_sz
):
1844 in_a
.name
= "in_a_%d" % i
1845 in_b
.name
= "in_b_%d" % i
1846 rs
.append((in_a
, in_b
))
1850 for i
in range(rs_sz
):
1852 out_z
.name
= "out_z_%d" % i
1854 self
.res
= Array(res
)
1858 def add_state(self
, state
):
1859 self
.states
.append(state
)
1862 def get_fragment(self
, platform
=None):
1863 """ creates the HDL code-fragment for FPAdd
1866 m
.submodules
+= self
.rs
1868 in_a
= self
.rs
[0][0]
1869 in_b
= self
.rs
[0][1]
1871 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1876 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1881 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1882 ab
= self
.add_state(ab
)
1883 abd
= ab
.ispec() # create an input spec object for FPADDBase
1884 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1885 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1888 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1891 with m
.FSM() as fsm
:
1893 for state
in self
.states
:
1894 with m
.State(state
.state_from
):
1900 if __name__
== "__main__":
1902 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1903 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1904 alu
.rs
[0][1].ports() + \
1905 alu
.res
[0].ports() + \
1906 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1908 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1909 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1910 alu
.in_t
.ports() + \
1911 alu
.out_z
.ports() + \
1912 [alu
.in_mid
, alu
.out_mid
])
1915 # works... but don't use, just do "python fname.py convert -t v"
1916 #print (verilog.convert(alu, ports=[
1917 # ports=alu.in_a.ports() + \
1918 # alu.in_b.ports() + \
1919 # alu.out_z.ports())