1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.out_do_z
= Signal(reset_less
=True)
267 self
.a
= FPNumBase(width
, True)
268 self
.b
= FPNumBase(width
, True)
269 self
.z
= FPNumOut(width
, False)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
276 class FPAddSpecialCasesMod
:
277 """ special cases: NaNs, infs, zeros, denormalised
278 NOTE: some of these are unique to add. see "Special Operations"
279 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
282 def __init__(self
, width
, id_wid
):
285 self
.i
= self
.ispec()
286 self
.o
= self
.ospec()
289 return FPNumBase2Ops(self
.width
, self
.id_wid
)
292 return FPSCData(self
.width
, self
.id_wid
)
294 def setup(self
, m
, i
):
295 """ links module to inputs and outputs
297 m
.submodules
.specialcases
= self
298 m
.d
.comb
+= self
.i
.eq(i
)
300 def elaborate(self
, platform
):
303 m
.submodules
.sc_in_a
= self
.i
.a
304 m
.submodules
.sc_in_b
= self
.i
.b
305 m
.submodules
.sc_out_z
= self
.o
.z
308 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
311 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
313 # if a is NaN or b is NaN return NaN
314 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
315 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
316 m
.d
.comb
+= self
.o
.z
.nan(0)
318 # XXX WEIRDNESS for FP16 non-canonical NaN handling
321 ## if a is zero and b is NaN return -b
322 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
323 # m.d.comb += self.o.out_do_z.eq(1)
324 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
326 ## if b is zero and a is NaN return -a
327 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
328 # m.d.comb += self.o.out_do_z.eq(1)
329 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
331 ## if a is -zero and b is NaN return -b
332 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
333 # m.d.comb += self.o.out_do_z.eq(1)
334 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
336 ## if b is -zero and a is NaN return -a
337 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
338 # m.d.comb += self.o.out_do_z.eq(1)
339 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
341 # if a is inf return inf (or NaN)
342 with m
.Elif(self
.i
.a
.is_inf
):
343 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
345 # if a is inf and signs don't match return NaN
346 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
347 m
.d
.comb
+= self
.o
.z
.nan(0)
349 # if b is inf return inf
350 with m
.Elif(self
.i
.b
.is_inf
):
351 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
352 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
354 # if a is zero and b zero return signed-a/b
355 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
361 # if a is zero return b
362 with m
.Elif(self
.i
.a
.is_zero
):
363 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
367 # if b is zero return a
368 with m
.Elif(self
.i
.b
.is_zero
):
369 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
370 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
373 # if a equal to -b return zero (+ve zero)
374 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
375 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
376 m
.d
.comb
+= self
.o
.z
.zero(0)
378 # Denormalised Number checks
380 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
388 def __init__(self
, id_wid
):
391 self
.in_mid
= Signal(id_wid
, reset_less
=True)
392 self
.out_mid
= Signal(id_wid
, reset_less
=True)
398 if self
.id_wid
is not None:
399 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
402 class FPAddSpecialCases(FPState
):
403 """ special cases: NaNs, infs, zeros, denormalised
404 NOTE: some of these are unique to add. see "Special Operations"
405 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
408 def __init__(self
, width
, id_wid
):
409 FPState
.__init
__(self
, "special_cases")
410 self
.mod
= FPAddSpecialCasesMod(width
)
411 self
.out_z
= self
.mod
.ospec()
412 self
.out_do_z
= Signal(reset_less
=True)
414 def setup(self
, m
, i
):
415 """ links module to inputs and outputs
417 self
.mod
.setup(m
, i
, self
.out_do_z
)
418 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
419 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
423 with m
.If(self
.out_do_z
):
426 m
.next
= "denormalise"
429 class FPAddSpecialCasesDeNorm(FPState
):
430 """ special cases: NaNs, infs, zeros, denormalised
431 NOTE: some of these are unique to add. see "Special Operations"
432 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
435 def __init__(self
, width
, id_wid
):
436 FPState
.__init
__(self
, "special_cases")
437 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
438 self
.out_z
= self
.smod
.ospec()
439 self
.out_do_z
= Signal(reset_less
=True)
441 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
442 self
.o
= self
.dmod
.ospec()
444 def setup(self
, m
, i
):
445 """ links module to inputs and outputs
447 self
.smod
.setup(m
, i
)
448 self
.dmod
.setup(m
, i
)
449 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
452 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
453 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
455 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
458 with m
.If(self
.out_do_z
):
464 class FPAddDeNormMod(FPState
):
466 def __init__(self
, width
, id_wid
):
469 self
.i
= self
.ispec()
470 self
.o
= self
.ospec()
473 return FPNumBase2Ops(self
.width
, self
.id_wid
)
476 return FPNumBase2Ops(self
.width
, self
.id_wid
)
478 def setup(self
, m
, i
):
479 """ links module to inputs and outputs
481 m
.submodules
.denormalise
= self
482 m
.d
.comb
+= self
.i
.eq(i
)
484 def elaborate(self
, platform
):
486 m
.submodules
.denorm_in_a
= self
.i
.a
487 m
.submodules
.denorm_in_b
= self
.i
.b
488 m
.submodules
.denorm_out_a
= self
.o
.a
489 m
.submodules
.denorm_out_b
= self
.o
.b
490 # hmmm, don't like repeating identical code
491 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
492 with m
.If(self
.i
.a
.exp_n127
):
493 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
495 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
497 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
498 with m
.If(self
.i
.b
.exp_n127
):
499 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
501 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
503 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
508 class FPAddDeNorm(FPState
):
510 def __init__(self
, width
, id_wid
):
511 FPState
.__init
__(self
, "denormalise")
512 self
.mod
= FPAddDeNormMod(width
)
513 self
.out_a
= FPNumBase(width
)
514 self
.out_b
= FPNumBase(width
)
516 def setup(self
, m
, i
):
517 """ links module to inputs and outputs
521 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
522 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
525 # Denormalised Number checks
529 class FPAddAlignMultiMod(FPState
):
531 def __init__(self
, width
):
532 self
.in_a
= FPNumBase(width
)
533 self
.in_b
= FPNumBase(width
)
534 self
.out_a
= FPNumIn(None, width
)
535 self
.out_b
= FPNumIn(None, width
)
536 self
.exp_eq
= Signal(reset_less
=True)
538 def elaborate(self
, platform
):
539 # This one however (single-cycle) will do the shift
544 m
.submodules
.align_in_a
= self
.in_a
545 m
.submodules
.align_in_b
= self
.in_b
546 m
.submodules
.align_out_a
= self
.out_a
547 m
.submodules
.align_out_b
= self
.out_b
549 # NOTE: this does *not* do single-cycle multi-shifting,
550 # it *STAYS* in the align state until exponents match
552 # exponent of a greater than b: shift b down
553 m
.d
.comb
+= self
.exp_eq
.eq(0)
554 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
555 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
556 agtb
= Signal(reset_less
=True)
557 altb
= Signal(reset_less
=True)
558 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
559 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
561 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
562 # exponent of b greater than a: shift a down
564 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
565 # exponents equal: move to next stage.
567 m
.d
.comb
+= self
.exp_eq
.eq(1)
571 class FPAddAlignMulti(FPState
):
573 def __init__(self
, width
, id_wid
):
574 FPState
.__init
__(self
, "align")
575 self
.mod
= FPAddAlignMultiMod(width
)
576 self
.out_a
= FPNumIn(None, width
)
577 self
.out_b
= FPNumIn(None, width
)
578 self
.exp_eq
= Signal(reset_less
=True)
580 def setup(self
, m
, in_a
, in_b
):
581 """ links module to inputs and outputs
583 m
.submodules
.align
= self
.mod
584 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
585 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
586 #m.d.comb += self.out_a.eq(self.mod.out_a)
587 #m.d.comb += self.out_b.eq(self.mod.out_b)
588 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
589 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
590 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
593 with m
.If(self
.exp_eq
):
599 def __init__(self
, width
, id_wid
):
600 self
.a
= FPNumIn(None, width
)
601 self
.b
= FPNumIn(None, width
)
602 self
.mid
= Signal(id_wid
, reset_less
=True)
605 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
608 class FPAddAlignSingleMod
:
610 def __init__(self
, width
, id_wid
):
613 self
.i
= self
.ispec()
614 self
.o
= self
.ospec()
617 return FPNumBase2Ops(self
.width
, self
.id_wid
)
620 return FPNumIn2Ops(self
.width
, self
.id_wid
)
622 def process(self
, i
):
625 def setup(self
, m
, i
):
626 """ links module to inputs and outputs
628 m
.submodules
.align
= self
629 m
.d
.comb
+= self
.i
.eq(i
)
631 def elaborate(self
, platform
):
632 """ Aligns A against B or B against A, depending on which has the
633 greater exponent. This is done in a *single* cycle using
634 variable-width bit-shift
636 the shifter used here is quite expensive in terms of gates.
637 Mux A or B in (and out) into temporaries, as only one of them
638 needs to be aligned against the other
642 m
.submodules
.align_in_a
= self
.i
.a
643 m
.submodules
.align_in_b
= self
.i
.b
644 m
.submodules
.align_out_a
= self
.o
.a
645 m
.submodules
.align_out_b
= self
.o
.b
647 # temporary (muxed) input and output to be shifted
648 t_inp
= FPNumBase(self
.width
)
649 t_out
= FPNumIn(None, self
.width
)
650 espec
= (len(self
.i
.a
.e
), True)
651 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
652 m
.submodules
.align_t_in
= t_inp
653 m
.submodules
.align_t_out
= t_out
654 m
.submodules
.multishift_r
= msr
656 ediff
= Signal(espec
, reset_less
=True)
657 ediffr
= Signal(espec
, reset_less
=True)
658 tdiff
= Signal(espec
, reset_less
=True)
659 elz
= Signal(reset_less
=True)
660 egz
= Signal(reset_less
=True)
662 # connect multi-shifter to t_inp/out mantissa (and tdiff)
663 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
664 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
665 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
666 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
667 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
669 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
670 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
671 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
672 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
674 # default: A-exp == B-exp, A and B untouched (fall through)
675 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
676 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
677 # only one shifter (muxed)
678 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
679 # exponent of a greater than b: shift b down
681 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
684 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
686 # exponent of b greater than a: shift a down
688 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
691 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
694 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
699 class FPAddAlignSingle(FPState
):
701 def __init__(self
, width
, id_wid
):
702 FPState
.__init
__(self
, "align")
703 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
704 self
.out_a
= FPNumIn(None, width
)
705 self
.out_b
= FPNumIn(None, width
)
707 def setup(self
, m
, i
):
708 """ links module to inputs and outputs
712 # NOTE: could be done as comb
713 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
714 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
720 class FPAddAlignSingleAdd(FPState
):
722 def __init__(self
, width
, id_wid
):
723 FPState
.__init
__(self
, "align")
726 self
.a1o
= self
.ospec()
729 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
732 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
734 def setup(self
, m
, i
):
735 """ links module to inputs and outputs
738 # chain AddAlignSingle, AddStage0 and AddStage1
739 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
740 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
741 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
743 chain
= StageChain([mod
, a0mod
, a1mod
])
746 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
749 m
.next
= "normalise_1"
752 class FPAddStage0Data
:
754 def __init__(self
, width
, id_wid
):
755 self
.z
= FPNumBase(width
, False)
756 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
757 self
.mid
= Signal(id_wid
, reset_less
=True)
760 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
763 class FPAddStage0Mod
:
765 def __init__(self
, width
, id_wid
):
768 self
.i
= self
.ispec()
769 self
.o
= self
.ospec()
772 return FPNumBase2Ops(self
.width
, self
.id_wid
)
775 return FPAddStage0Data(self
.width
, self
.id_wid
)
777 def process(self
, i
):
780 def setup(self
, m
, i
):
781 """ links module to inputs and outputs
783 m
.submodules
.add0
= self
784 m
.d
.comb
+= self
.i
.eq(i
)
786 def elaborate(self
, platform
):
788 m
.submodules
.add0_in_a
= self
.i
.a
789 m
.submodules
.add0_in_b
= self
.i
.b
790 m
.submodules
.add0_out_z
= self
.o
.z
792 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
793 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
795 # store intermediate tests (and zero-extended mantissas)
796 seq
= Signal(reset_less
=True)
797 mge
= Signal(reset_less
=True)
798 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
799 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
800 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
801 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
802 am0
.eq(Cat(self
.i
.a
.m
, 0)),
803 bm0
.eq(Cat(self
.i
.b
.m
, 0))
805 # same-sign (both negative or both positive) add mantissas
808 self
.o
.tot
.eq(am0
+ bm0
),
809 self
.o
.z
.s
.eq(self
.i
.a
.s
)
811 # a mantissa greater than b, use a
814 self
.o
.tot
.eq(am0
- bm0
),
815 self
.o
.z
.s
.eq(self
.i
.a
.s
)
817 # b mantissa greater than a, use b
820 self
.o
.tot
.eq(bm0
- am0
),
821 self
.o
.z
.s
.eq(self
.i
.b
.s
)
826 class FPAddStage0(FPState
):
827 """ First stage of add. covers same-sign (add) and subtract
828 special-casing when mantissas are greater or equal, to
829 give greatest accuracy.
832 def __init__(self
, width
, id_wid
):
833 FPState
.__init
__(self
, "add_0")
834 self
.mod
= FPAddStage0Mod(width
)
835 self
.o
= self
.mod
.ospec()
837 def setup(self
, m
, i
):
838 """ links module to inputs and outputs
842 # NOTE: these could be done as combinatorial (merge add0+add1)
843 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
849 class FPAddStage1Data
:
851 def __init__(self
, width
, id_wid
):
852 self
.z
= FPNumBase(width
, False)
854 self
.mid
= Signal(id_wid
, reset_less
=True)
857 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
861 class FPAddStage1Mod(FPState
):
862 """ Second stage of add: preparation for normalisation.
863 detects when tot sum is too big (tot[27] is kinda a carry bit)
866 def __init__(self
, width
, id_wid
):
869 self
.i
= self
.ispec()
870 self
.o
= self
.ospec()
873 return FPAddStage0Data(self
.width
, self
.id_wid
)
876 return FPAddStage1Data(self
.width
, self
.id_wid
)
878 def process(self
, i
):
881 def setup(self
, m
, i
):
882 """ links module to inputs and outputs
884 m
.submodules
.add1
= self
885 m
.submodules
.add1_out_overflow
= self
.o
.of
887 m
.d
.comb
+= self
.i
.eq(i
)
889 def elaborate(self
, platform
):
891 #m.submodules.norm1_in_overflow = self.in_of
892 #m.submodules.norm1_out_overflow = self.out_of
893 #m.submodules.norm1_in_z = self.in_z
894 #m.submodules.norm1_out_z = self.out_z
895 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
896 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
897 # tot[-1] (MSB) gets set when the sum overflows. shift result down
898 with m
.If(self
.i
.tot
[-1]):
900 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
901 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
902 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
903 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
904 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
905 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
907 # tot[-1] (MSB) zero case
910 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
911 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
912 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
913 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
914 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
919 class FPAddStage1(FPState
):
921 def __init__(self
, width
, id_wid
):
922 FPState
.__init
__(self
, "add_1")
923 self
.mod
= FPAddStage1Mod(width
)
924 self
.out_z
= FPNumBase(width
, False)
925 self
.out_of
= Overflow()
926 self
.norm_stb
= Signal()
928 def setup(self
, m
, i
):
929 """ links module to inputs and outputs
933 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
935 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
936 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
937 m
.d
.sync
+= self
.norm_stb
.eq(1)
940 m
.next
= "normalise_1"
943 class FPNormaliseModSingle
:
945 def __init__(self
, width
):
947 self
.in_z
= self
.ispec()
948 self
.out_z
= self
.ospec()
951 return FPNumBase(self
.width
, False)
954 return FPNumBase(self
.width
, False)
956 def setup(self
, m
, i
):
957 """ links module to inputs and outputs
959 m
.submodules
.normalise
= self
960 m
.d
.comb
+= self
.i
.eq(i
)
962 def elaborate(self
, platform
):
965 mwid
= self
.out_z
.m_width
+2
966 pe
= PriorityEncoder(mwid
)
967 m
.submodules
.norm_pe
= pe
969 m
.submodules
.norm1_out_z
= self
.out_z
970 m
.submodules
.norm1_in_z
= self
.in_z
972 in_z
= FPNumBase(self
.width
, False)
974 m
.submodules
.norm1_insel_z
= in_z
975 m
.submodules
.norm1_insel_overflow
= in_of
977 espec
= (len(in_z
.e
), True)
978 ediff_n126
= Signal(espec
, reset_less
=True)
979 msr
= MultiShiftRMerge(mwid
, espec
)
980 m
.submodules
.multishift_r
= msr
982 m
.d
.comb
+= in_z
.eq(self
.in_z
)
983 m
.d
.comb
+= in_of
.eq(self
.in_of
)
984 # initialise out from in (overridden below)
985 m
.d
.comb
+= self
.out_z
.eq(in_z
)
986 m
.d
.comb
+= self
.out_of
.eq(in_of
)
987 # normalisation decrease condition
988 decrease
= Signal(reset_less
=True)
989 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
992 # *sigh* not entirely obvious: count leading zeros (clz)
993 # with a PriorityEncoder: to find from the MSB
994 # we reverse the order of the bits.
995 temp_m
= Signal(mwid
, reset_less
=True)
996 temp_s
= Signal(mwid
+1, reset_less
=True)
997 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
999 # cat round and guard bits back into the mantissa
1000 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1001 pe
.i
.eq(temp_m
[::-1]), # inverted
1002 clz
.eq(pe
.o
), # count zeros from MSB down
1003 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1004 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1005 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1012 def __init__(self
, width
, id_wid
):
1013 self
.roundz
= Signal(reset_less
=True)
1014 self
.z
= FPNumBase(width
, False)
1015 self
.mid
= Signal(id_wid
, reset_less
=True)
1018 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1021 class FPNorm1ModSingle
:
1023 def __init__(self
, width
, id_wid
):
1025 self
.id_wid
= id_wid
1026 self
.i
= self
.ispec()
1027 self
.o
= self
.ospec()
1030 return FPAddStage1Data(self
.width
, self
.id_wid
)
1033 return FPNorm1Data(self
.width
, self
.id_wid
)
1035 def setup(self
, m
, i
):
1036 """ links module to inputs and outputs
1038 m
.submodules
.normalise_1
= self
1039 m
.d
.comb
+= self
.i
.eq(i
)
1041 def process(self
, i
):
1044 def elaborate(self
, platform
):
1047 mwid
= self
.o
.z
.m_width
+2
1048 pe
= PriorityEncoder(mwid
)
1049 m
.submodules
.norm_pe
= pe
1052 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1054 m
.submodules
.norm1_out_z
= self
.o
.z
1055 m
.submodules
.norm1_out_overflow
= of
1056 m
.submodules
.norm1_in_z
= self
.i
.z
1057 m
.submodules
.norm1_in_overflow
= self
.i
.of
1060 m
.submodules
.norm1_insel_z
= i
.z
1061 m
.submodules
.norm1_insel_overflow
= i
.of
1063 espec
= (len(i
.z
.e
), True)
1064 ediff_n126
= Signal(espec
, reset_less
=True)
1065 msr
= MultiShiftRMerge(mwid
, espec
)
1066 m
.submodules
.multishift_r
= msr
1068 m
.d
.comb
+= i
.eq(self
.i
)
1069 # initialise out from in (overridden below)
1070 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1071 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1072 m
.d
.comb
+= of
.eq(i
.of
)
1073 # normalisation increase/decrease conditions
1074 decrease
= Signal(reset_less
=True)
1075 increase
= Signal(reset_less
=True)
1076 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1077 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1079 with m
.If(decrease
):
1080 # *sigh* not entirely obvious: count leading zeros (clz)
1081 # with a PriorityEncoder: to find from the MSB
1082 # we reverse the order of the bits.
1083 temp_m
= Signal(mwid
, reset_less
=True)
1084 temp_s
= Signal(mwid
+1, reset_less
=True)
1085 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1086 # make sure that the amount to decrease by does NOT
1087 # go below the minimum non-INF/NaN exponent
1088 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1091 # cat round and guard bits back into the mantissa
1092 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1093 pe
.i
.eq(temp_m
[::-1]), # inverted
1094 clz
.eq(limclz
), # count zeros from MSB down
1095 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1096 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1097 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1098 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1099 # overflow in bits 0..1: got shifted too (leave sticky)
1100 of
.guard
.eq(temp_s
[1]), # guard
1101 of
.round_bit
.eq(temp_s
[0]), # round
1104 with m
.Elif(increase
):
1105 temp_m
= Signal(mwid
+1, reset_less
=True)
1107 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1109 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1110 # connect multi-shifter to inp/out mantissa (and ediff)
1112 msr
.diff
.eq(ediff_n126
),
1113 self
.o
.z
.m
.eq(msr
.m
[3:]),
1114 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1115 # overflow in bits 0..1: got shifted too (leave sticky)
1116 of
.guard
.eq(temp_s
[2]), # guard
1117 of
.round_bit
.eq(temp_s
[1]), # round
1118 of
.sticky
.eq(temp_s
[0]), # sticky
1119 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1125 class FPNorm1ModMulti
:
1127 def __init__(self
, width
, single_cycle
=True):
1129 self
.in_select
= Signal(reset_less
=True)
1130 self
.in_z
= FPNumBase(width
, False)
1131 self
.in_of
= Overflow()
1132 self
.temp_z
= FPNumBase(width
, False)
1133 self
.temp_of
= Overflow()
1134 self
.out_z
= FPNumBase(width
, False)
1135 self
.out_of
= Overflow()
1137 def elaborate(self
, platform
):
1140 m
.submodules
.norm1_out_z
= self
.out_z
1141 m
.submodules
.norm1_out_overflow
= self
.out_of
1142 m
.submodules
.norm1_temp_z
= self
.temp_z
1143 m
.submodules
.norm1_temp_of
= self
.temp_of
1144 m
.submodules
.norm1_in_z
= self
.in_z
1145 m
.submodules
.norm1_in_overflow
= self
.in_of
1147 in_z
= FPNumBase(self
.width
, False)
1149 m
.submodules
.norm1_insel_z
= in_z
1150 m
.submodules
.norm1_insel_overflow
= in_of
1152 # select which of temp or in z/of to use
1153 with m
.If(self
.in_select
):
1154 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1155 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1157 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1158 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1159 # initialise out from in (overridden below)
1160 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1161 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1162 # normalisation increase/decrease conditions
1163 decrease
= Signal(reset_less
=True)
1164 increase
= Signal(reset_less
=True)
1165 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1166 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1167 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1169 with m
.If(decrease
):
1171 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1172 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1173 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1174 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1175 self
.out_of
.round_bit
.eq(0), # reset round bit
1176 self
.out_of
.m0
.eq(in_of
.guard
),
1179 with m
.Elif(increase
):
1181 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1182 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1183 self
.out_of
.guard
.eq(in_z
.m
[0]),
1184 self
.out_of
.m0
.eq(in_z
.m
[1]),
1185 self
.out_of
.round_bit
.eq(in_of
.guard
),
1186 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1192 class FPNorm1Single(FPState
):
1194 def __init__(self
, width
, id_wid
, single_cycle
=True):
1195 FPState
.__init
__(self
, "normalise_1")
1196 self
.mod
= FPNorm1ModSingle(width
)
1197 self
.o
= self
.ospec()
1198 self
.out_z
= FPNumBase(width
, False)
1199 self
.out_roundz
= Signal(reset_less
=True)
1202 return self
.mod
.ispec()
1205 return self
.mod
.ospec()
1207 def setup(self
, m
, i
):
1208 """ links module to inputs and outputs
1210 self
.mod
.setup(m
, i
)
1212 def action(self
, m
):
1216 class FPNorm1Multi(FPState
):
1218 def __init__(self
, width
, id_wid
):
1219 FPState
.__init
__(self
, "normalise_1")
1220 self
.mod
= FPNorm1ModMulti(width
)
1221 self
.stb
= Signal(reset_less
=True)
1222 self
.ack
= Signal(reset
=0, reset_less
=True)
1223 self
.out_norm
= Signal(reset_less
=True)
1224 self
.in_accept
= Signal(reset_less
=True)
1225 self
.temp_z
= FPNumBase(width
)
1226 self
.temp_of
= Overflow()
1227 self
.out_z
= FPNumBase(width
)
1228 self
.out_roundz
= Signal(reset_less
=True)
1230 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1231 """ links module to inputs and outputs
1233 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1234 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1235 self
.out_z
, self
.out_norm
)
1237 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1238 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1240 def action(self
, m
):
1241 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1242 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1243 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1244 with m
.If(self
.out_norm
):
1245 with m
.If(self
.in_accept
):
1250 m
.d
.sync
+= self
.ack
.eq(0)
1252 # normalisation not required (or done).
1254 m
.d
.sync
+= self
.ack
.eq(1)
1255 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1258 class FPNormToPack(FPState
):
1260 def __init__(self
, width
, id_wid
):
1261 FPState
.__init
__(self
, "normalise_1")
1262 self
.id_wid
= id_wid
1266 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1269 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1271 def setup(self
, m
, i
):
1272 """ links module to inputs and outputs
1275 # Normalisation, Rounding Corrections, Pack - in a chain
1276 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1277 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1278 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1279 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1280 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1282 self
.out_z
= pmod
.ospec()
1284 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1285 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1287 def action(self
, m
):
1288 m
.next
= "pack_put_z"
1293 def __init__(self
, width
, id_wid
):
1294 self
.z
= FPNumBase(width
, False)
1295 self
.mid
= Signal(id_wid
, reset_less
=True)
1298 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1303 def __init__(self
, width
, id_wid
):
1305 self
.id_wid
= id_wid
1306 self
.i
= self
.ispec()
1307 self
.out_z
= self
.ospec()
1310 return FPNorm1Data(self
.width
, self
.id_wid
)
1313 return FPRoundData(self
.width
, self
.id_wid
)
1315 def process(self
, i
):
1318 def setup(self
, m
, i
):
1319 m
.submodules
.roundz
= self
1320 m
.d
.comb
+= self
.i
.eq(i
)
1322 def elaborate(self
, platform
):
1324 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1325 with m
.If(self
.i
.roundz
):
1326 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1327 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1328 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1332 class FPRound(FPState
):
1334 def __init__(self
, width
, id_wid
):
1335 FPState
.__init
__(self
, "round")
1336 self
.mod
= FPRoundMod(width
)
1337 self
.out_z
= self
.ospec()
1340 return self
.mod
.ispec()
1343 return self
.mod
.ospec()
1345 def setup(self
, m
, i
):
1346 """ links module to inputs and outputs
1348 self
.mod
.setup(m
, i
)
1351 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1352 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1354 def action(self
, m
):
1355 m
.next
= "corrections"
1358 class FPCorrectionsMod
:
1360 def __init__(self
, width
, id_wid
):
1362 self
.id_wid
= id_wid
1363 self
.i
= self
.ispec()
1364 self
.out_z
= self
.ospec()
1367 return FPRoundData(self
.width
, self
.id_wid
)
1370 return FPRoundData(self
.width
, self
.id_wid
)
1372 def process(self
, i
):
1375 def setup(self
, m
, i
):
1376 """ links module to inputs and outputs
1378 m
.submodules
.corrections
= self
1379 m
.d
.comb
+= self
.i
.eq(i
)
1381 def elaborate(self
, platform
):
1383 m
.submodules
.corr_in_z
= self
.i
.z
1384 m
.submodules
.corr_out_z
= self
.out_z
.z
1385 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1386 with m
.If(self
.i
.z
.is_denormalised
):
1387 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1391 class FPCorrections(FPState
):
1393 def __init__(self
, width
, id_wid
):
1394 FPState
.__init
__(self
, "corrections")
1395 self
.mod
= FPCorrectionsMod(width
)
1396 self
.out_z
= self
.ospec()
1399 return self
.mod
.ispec()
1402 return self
.mod
.ospec()
1404 def setup(self
, m
, in_z
):
1405 """ links module to inputs and outputs
1407 self
.mod
.setup(m
, in_z
)
1409 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1410 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1412 def action(self
, m
):
1418 def __init__(self
, width
, id_wid
):
1419 self
.z
= FPNumOut(width
, False)
1420 self
.mid
= Signal(id_wid
, reset_less
=True)
1423 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1428 def __init__(self
, width
, id_wid
):
1430 self
.id_wid
= id_wid
1431 self
.i
= self
.ispec()
1432 self
.o
= self
.ospec()
1435 return FPRoundData(self
.width
, self
.id_wid
)
1438 return FPPackData(self
.width
, self
.id_wid
)
1440 def process(self
, i
):
1443 def setup(self
, m
, in_z
):
1444 """ links module to inputs and outputs
1446 m
.submodules
.pack
= self
1447 m
.d
.comb
+= self
.i
.eq(in_z
)
1449 def elaborate(self
, platform
):
1451 m
.submodules
.pack_in_z
= self
.i
.z
1452 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1453 with m
.If(self
.i
.z
.is_overflowed
):
1454 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1456 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1461 def __init__(self
, width
, id_wid
):
1462 self
.z
= FPNumOut(width
, False)
1463 self
.mid
= Signal(id_wid
, reset_less
=True)
1466 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1469 class FPPack(FPState
):
1471 def __init__(self
, width
, id_wid
):
1472 FPState
.__init
__(self
, "pack")
1473 self
.mod
= FPPackMod(width
)
1474 self
.out_z
= self
.ospec()
1477 return self
.mod
.ispec()
1480 return self
.mod
.ospec()
1482 def setup(self
, m
, in_z
):
1483 """ links module to inputs and outputs
1485 self
.mod
.setup(m
, in_z
)
1487 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1488 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1490 def action(self
, m
):
1491 m
.next
= "pack_put_z"
1494 class FPPutZ(FPState
):
1496 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1497 FPState
.__init
__(self
, state
)
1498 if to_state
is None:
1499 to_state
= "get_ops"
1500 self
.to_state
= to_state
1503 self
.in_mid
= in_mid
1504 self
.out_mid
= out_mid
1506 def action(self
, m
):
1507 if self
.in_mid
is not None:
1508 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1510 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1512 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1513 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1514 m
.next
= self
.to_state
1516 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1519 class FPPutZIdx(FPState
):
1521 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1522 FPState
.__init
__(self
, state
)
1523 if to_state
is None:
1524 to_state
= "get_ops"
1525 self
.to_state
= to_state
1527 self
.out_zs
= out_zs
1528 self
.in_mid
= in_mid
1530 def action(self
, m
):
1531 outz_stb
= Signal(reset_less
=True)
1532 outz_ack
= Signal(reset_less
=True)
1533 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1534 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1537 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1539 with m
.If(outz_stb
& outz_ack
):
1540 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1541 m
.next
= self
.to_state
1543 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1545 class FPADDBaseData
:
1547 def __init__(self
, width
, id_wid
):
1549 self
.id_wid
= id_wid
1550 self
.a
= Signal(width
)
1551 self
.b
= Signal(width
)
1552 self
.mid
= Signal(id_wid
, reset_less
=True)
1555 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1559 def __init__(self
, width
, id_wid
):
1560 self
.z
= FPOp(width
)
1561 self
.mid
= Signal(id_wid
, reset_less
=True)
1564 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1569 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1572 * width: bit-width of IEEE754. supported: 16, 32, 64
1573 * id_wid: an identifier that is sync-connected to the input
1574 * single_cycle: True indicates each stage to complete in 1 clock
1575 * compact: True indicates a reduced number of stages
1578 self
.id_wid
= id_wid
1579 self
.single_cycle
= single_cycle
1580 self
.compact
= compact
1582 self
.in_t
= Trigger()
1583 self
.i
= self
.ispec()
1584 self
.o
= self
.ospec()
1589 return FPADDBaseData(self
.width
, self
.id_wid
)
1592 return FPOpData(self
.width
, self
.id_wid
)
1594 def add_state(self
, state
):
1595 self
.states
.append(state
)
1598 def get_fragment(self
, platform
=None):
1599 """ creates the HDL code-fragment for FPAdd
1602 m
.submodules
.out_z
= self
.o
.z
1603 m
.submodules
.in_t
= self
.in_t
1605 self
.get_compact_fragment(m
, platform
)
1607 self
.get_longer_fragment(m
, platform
)
1609 with m
.FSM() as fsm
:
1611 for state
in self
.states
:
1612 with m
.State(state
.state_from
):
1617 def get_longer_fragment(self
, m
, platform
=None):
1619 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1621 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1625 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1626 sc
.setup(m
, a
, b
, self
.in_mid
)
1628 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1629 dn
.setup(m
, a
, b
, sc
.in_mid
)
1631 if self
.single_cycle
:
1632 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1633 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1635 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1636 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1638 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1639 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1641 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1642 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1644 if self
.single_cycle
:
1645 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1646 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1648 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1649 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1651 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1652 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1654 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1655 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1657 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1658 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1660 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1661 pa
.in_mid
, self
.out_mid
))
1663 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1664 pa
.in_mid
, self
.out_mid
))
1666 def get_compact_fragment(self
, m
, platform
=None):
1668 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1669 self
.width
, self
.id_wid
))
1670 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1672 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1675 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1678 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1679 n1
.setup(m
, alm
.a1o
)
1681 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1682 n1
.out_z
.mid
, self
.o
.mid
))
1684 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1685 sc
.o
.mid
, self
.o
.mid
))
1688 class FPADDBase(FPState
):
1690 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1693 * width: bit-width of IEEE754. supported: 16, 32, 64
1694 * id_wid: an identifier that is sync-connected to the input
1695 * single_cycle: True indicates each stage to complete in 1 clock
1697 FPState
.__init
__(self
, "fpadd")
1699 self
.single_cycle
= single_cycle
1700 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1701 self
.o
= self
.ospec()
1703 self
.in_t
= Trigger()
1704 self
.i
= self
.ispec()
1706 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1707 self
.in_accept
= Signal(reset_less
=True)
1708 self
.add_stb
= Signal(reset_less
=True)
1709 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1712 return self
.mod
.ispec()
1715 return self
.mod
.ospec()
1717 def setup(self
, m
, i
, add_stb
, in_mid
):
1718 m
.d
.comb
+= [self
.i
.eq(i
),
1719 self
.mod
.i
.eq(self
.i
),
1720 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1721 #self.add_stb.eq(add_stb),
1722 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1723 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1724 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1725 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1726 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1727 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1730 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1731 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1732 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1733 #m.d.sync += self.in_t.stb.eq(0)
1735 m
.submodules
.fpadd
= self
.mod
1737 def action(self
, m
):
1739 # in_accept is set on incoming strobe HIGH and ack LOW.
1740 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1742 #with m.If(self.in_t.ack):
1743 # m.d.sync += self.in_t.stb.eq(0)
1744 with m
.If(~self
.z_done
):
1745 # not done: test for accepting an incoming operand pair
1746 with m
.If(self
.in_accept
):
1748 self
.add_ack
.eq(1), # acknowledge receipt...
1749 self
.in_t
.stb
.eq(1), # initiate add
1752 m
.d
.sync
+= [self
.add_ack
.eq(0),
1753 self
.in_t
.stb
.eq(0),
1757 # done: acknowledge, and write out id and value
1758 m
.d
.sync
+= [self
.add_ack
.eq(1),
1765 if self
.in_mid
is not None:
1766 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1769 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1771 # move to output state on detecting z ack
1772 with m
.If(self
.out_z
.trigger
):
1773 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1776 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1780 def __init__(self
, width
, id_wid
):
1782 self
.id_wid
= id_wid
1784 for i
in range(rs_sz
):
1786 out_z
.name
= "out_z_%d" % i
1788 self
.res
= Array(res
)
1789 self
.in_z
= FPOp(width
)
1790 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1792 def setup(self
, m
, in_z
, in_mid
):
1793 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1794 self
.in_mid
.eq(in_mid
)]
1796 def get_fragment(self
, platform
=None):
1797 """ creates the HDL code-fragment for FPAdd
1800 m
.submodules
.res_in_z
= self
.in_z
1801 m
.submodules
+= self
.res
1813 """ FPADD: stages as follows:
1819 FPAddBase---> FPAddBaseMod
1821 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1823 FPAddBase is tricky: it is both a stage and *has* stages.
1824 Connection to FPAddBaseMod therefore requires an in stb/ack
1825 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1826 needs to be the thing that raises the incoming stb.
1829 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1832 * width: bit-width of IEEE754. supported: 16, 32, 64
1833 * id_wid: an identifier that is sync-connected to the input
1834 * single_cycle: True indicates each stage to complete in 1 clock
1837 self
.id_wid
= id_wid
1838 self
.single_cycle
= single_cycle
1840 #self.out_z = FPOp(width)
1841 self
.ids
= FPID(id_wid
)
1844 for i
in range(rs_sz
):
1847 in_a
.name
= "in_a_%d" % i
1848 in_b
.name
= "in_b_%d" % i
1849 rs
.append((in_a
, in_b
))
1853 for i
in range(rs_sz
):
1855 out_z
.name
= "out_z_%d" % i
1857 self
.res
= Array(res
)
1861 def add_state(self
, state
):
1862 self
.states
.append(state
)
1865 def get_fragment(self
, platform
=None):
1866 """ creates the HDL code-fragment for FPAdd
1869 m
.submodules
+= self
.rs
1871 in_a
= self
.rs
[0][0]
1872 in_b
= self
.rs
[0][1]
1874 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1879 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1884 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1885 ab
= self
.add_state(ab
)
1886 abd
= ab
.ispec() # create an input spec object for FPADDBase
1887 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1888 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1891 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1894 with m
.FSM() as fsm
:
1896 for state
in self
.states
:
1897 with m
.State(state
.state_from
):
1903 if __name__
== "__main__":
1905 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1906 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1907 alu
.rs
[0][1].ports() + \
1908 alu
.res
[0].ports() + \
1909 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1911 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1912 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1913 alu
.in_t
.ports() + \
1914 alu
.out_z
.ports() + \
1915 [alu
.in_mid
, alu
.out_mid
])
1918 # works... but don't use, just do "python fname.py convert -t v"
1919 #print (verilog.convert(alu, ports=[
1920 # ports=alu.in_a.ports() + \
1921 # alu.in_b.ports() + \
1922 # alu.out_z.ports())