1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
)
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.oz
= Signal(width
, reset_less
=True)
270 self
.out_do_z
= Signal(reset_less
=True)
271 self
.mid
= Signal(id_wid
, reset_less
=True)
274 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
275 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
278 class FPAddSpecialCasesMod
:
279 """ special cases: NaNs, infs, zeros, denormalised
280 NOTE: some of these are unique to add. see "Special Operations"
281 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 def __init__(self
, width
, id_wid
):
287 self
.i
= self
.ispec()
288 self
.o
= self
.ospec()
291 return FPNumBase2Ops(self
.width
, self
.id_wid
)
294 return FPSCData(self
.width
, self
.id_wid
)
296 def setup(self
, m
, i
):
297 """ links module to inputs and outputs
299 m
.submodules
.specialcases
= self
300 m
.d
.comb
+= self
.i
.eq(i
)
302 def process(self
, i
):
305 def elaborate(self
, platform
):
308 m
.submodules
.sc_in_a
= self
.i
.a
309 m
.submodules
.sc_in_b
= self
.i
.b
310 m
.submodules
.sc_out_z
= self
.o
.z
313 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
316 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
318 # if a is NaN or b is NaN return NaN
319 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
320 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
321 m
.d
.comb
+= self
.o
.z
.nan(0)
323 # XXX WEIRDNESS for FP16 non-canonical NaN handling
326 ## if a is zero and b is NaN return -b
327 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
328 # m.d.comb += self.o.out_do_z.eq(1)
329 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
331 ## if b is zero and a is NaN return -a
332 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
333 # m.d.comb += self.o.out_do_z.eq(1)
334 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
336 ## if a is -zero and b is NaN return -b
337 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
338 # m.d.comb += self.o.out_do_z.eq(1)
339 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
341 ## if b is -zero and a is NaN return -a
342 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
343 # m.d.comb += self.o.out_do_z.eq(1)
344 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
346 # if a is inf return inf (or NaN)
347 with m
.Elif(self
.i
.a
.is_inf
):
348 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
349 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
350 # if a is inf and signs don't match return NaN
351 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
352 m
.d
.comb
+= self
.o
.z
.nan(0)
354 # if b is inf return inf
355 with m
.Elif(self
.i
.b
.is_inf
):
356 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
359 # if a is zero and b zero return signed-a/b
360 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
361 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
362 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
366 # if a is zero return b
367 with m
.Elif(self
.i
.a
.is_zero
):
368 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
369 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
372 # if b is zero return a
373 with m
.Elif(self
.i
.b
.is_zero
):
374 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
375 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
378 # if a equal to -b return zero (+ve zero)
379 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
380 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
381 m
.d
.comb
+= self
.o
.z
.zero(0)
383 # Denormalised Number checks next, so pass a/b data through
385 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
386 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
387 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
389 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
390 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
396 def __init__(self
, id_wid
):
399 self
.in_mid
= Signal(id_wid
, reset_less
=True)
400 self
.out_mid
= Signal(id_wid
, reset_less
=True)
406 if self
.id_wid
is not None:
407 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
410 class FPAddSpecialCases(FPState
):
411 """ special cases: NaNs, infs, zeros, denormalised
412 NOTE: some of these are unique to add. see "Special Operations"
413 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
416 def __init__(self
, width
, id_wid
):
417 FPState
.__init
__(self
, "special_cases")
418 self
.mod
= FPAddSpecialCasesMod(width
)
419 self
.out_z
= self
.mod
.ospec()
420 self
.out_do_z
= Signal(reset_less
=True)
422 def setup(self
, m
, i
):
423 """ links module to inputs and outputs
425 self
.mod
.setup(m
, i
, self
.out_do_z
)
426 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
427 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
431 with m
.If(self
.out_do_z
):
434 m
.next
= "denormalise"
437 class FPAddSpecialCasesDeNorm(FPState
):
438 """ special cases: NaNs, infs, zeros, denormalised
439 NOTE: some of these are unique to add. see "Special Operations"
440 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
443 def __init__(self
, width
, id_wid
):
444 FPState
.__init
__(self
, "special_cases")
445 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
446 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
447 self
.o
= self
.ospec()
450 return self
.smod
.ispec()
453 return self
.dmod
.ospec()
455 def setup(self
, m
, i
):
456 """ links module to inputs and outputs
458 # these only needed for break-out (early-out)
459 # out_z = self.smod.ospec()
460 # out_do_z = Signal(reset_less=True)
461 self
.smod
.setup(m
, i
)
462 self
.dmod
.setup(m
, self
.smod
.o
)
463 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
465 # out_do_z=True, only needed for early-out (split pipeline)
466 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
467 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
470 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
472 def process(self
, i
):
476 #with m.If(self.out_do_z):
482 class FPAddDeNormMod(FPState
):
484 def __init__(self
, width
, id_wid
):
487 self
.i
= self
.ispec()
488 self
.o
= self
.ospec()
491 return FPSCData(self
.width
, self
.id_wid
)
494 return FPSCData(self
.width
, self
.id_wid
)
496 def setup(self
, m
, i
):
497 """ links module to inputs and outputs
499 m
.submodules
.denormalise
= self
500 m
.d
.comb
+= self
.i
.eq(i
)
502 def elaborate(self
, platform
):
504 m
.submodules
.denorm_in_a
= self
.i
.a
505 m
.submodules
.denorm_in_b
= self
.i
.b
506 m
.submodules
.denorm_out_a
= self
.o
.a
507 m
.submodules
.denorm_out_b
= self
.o
.b
509 with m
.If(~self
.i
.out_do_z
):
510 # XXX hmmm, don't like repeating identical code
511 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
512 with m
.If(self
.i
.a
.exp_n127
):
513 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
515 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
517 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
518 with m
.If(self
.i
.b
.exp_n127
):
519 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
521 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
523 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
524 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
525 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
526 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
531 class FPAddDeNorm(FPState
):
533 def __init__(self
, width
, id_wid
):
534 FPState
.__init
__(self
, "denormalise")
535 self
.mod
= FPAddDeNormMod(width
)
536 self
.out_a
= FPNumBase(width
)
537 self
.out_b
= FPNumBase(width
)
539 def setup(self
, m
, i
):
540 """ links module to inputs and outputs
544 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
545 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
548 # Denormalised Number checks
552 class FPAddAlignMultiMod(FPState
):
554 def __init__(self
, width
):
555 self
.in_a
= FPNumBase(width
)
556 self
.in_b
= FPNumBase(width
)
557 self
.out_a
= FPNumIn(None, width
)
558 self
.out_b
= FPNumIn(None, width
)
559 self
.exp_eq
= Signal(reset_less
=True)
561 def elaborate(self
, platform
):
562 # This one however (single-cycle) will do the shift
567 m
.submodules
.align_in_a
= self
.in_a
568 m
.submodules
.align_in_b
= self
.in_b
569 m
.submodules
.align_out_a
= self
.out_a
570 m
.submodules
.align_out_b
= self
.out_b
572 # NOTE: this does *not* do single-cycle multi-shifting,
573 # it *STAYS* in the align state until exponents match
575 # exponent of a greater than b: shift b down
576 m
.d
.comb
+= self
.exp_eq
.eq(0)
577 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
578 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
579 agtb
= Signal(reset_less
=True)
580 altb
= Signal(reset_less
=True)
581 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
582 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
584 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
585 # exponent of b greater than a: shift a down
587 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
588 # exponents equal: move to next stage.
590 m
.d
.comb
+= self
.exp_eq
.eq(1)
594 class FPAddAlignMulti(FPState
):
596 def __init__(self
, width
, id_wid
):
597 FPState
.__init
__(self
, "align")
598 self
.mod
= FPAddAlignMultiMod(width
)
599 self
.out_a
= FPNumIn(None, width
)
600 self
.out_b
= FPNumIn(None, width
)
601 self
.exp_eq
= Signal(reset_less
=True)
603 def setup(self
, m
, in_a
, in_b
):
604 """ links module to inputs and outputs
606 m
.submodules
.align
= self
.mod
607 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
608 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
609 #m.d.comb += self.out_a.eq(self.mod.out_a)
610 #m.d.comb += self.out_b.eq(self.mod.out_b)
611 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
612 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
613 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
616 with m
.If(self
.exp_eq
):
622 def __init__(self
, width
, id_wid
):
623 self
.a
= FPNumIn(None, width
)
624 self
.b
= FPNumIn(None, width
)
625 self
.z
= FPNumOut(width
, False)
626 self
.out_do_z
= Signal(reset_less
=True)
627 self
.oz
= Signal(width
, reset_less
=True)
628 self
.mid
= Signal(id_wid
, reset_less
=True)
631 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
632 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
635 class FPAddAlignSingleMod
:
637 def __init__(self
, width
, id_wid
):
640 self
.i
= self
.ispec()
641 self
.o
= self
.ospec()
644 return FPSCData(self
.width
, self
.id_wid
)
647 return FPNumIn2Ops(self
.width
, self
.id_wid
)
649 def process(self
, i
):
652 def setup(self
, m
, i
):
653 """ links module to inputs and outputs
655 m
.submodules
.align
= self
656 m
.d
.comb
+= self
.i
.eq(i
)
658 def elaborate(self
, platform
):
659 """ Aligns A against B or B against A, depending on which has the
660 greater exponent. This is done in a *single* cycle using
661 variable-width bit-shift
663 the shifter used here is quite expensive in terms of gates.
664 Mux A or B in (and out) into temporaries, as only one of them
665 needs to be aligned against the other
669 m
.submodules
.align_in_a
= self
.i
.a
670 m
.submodules
.align_in_b
= self
.i
.b
671 m
.submodules
.align_out_a
= self
.o
.a
672 m
.submodules
.align_out_b
= self
.o
.b
674 # temporary (muxed) input and output to be shifted
675 t_inp
= FPNumBase(self
.width
)
676 t_out
= FPNumIn(None, self
.width
)
677 espec
= (len(self
.i
.a
.e
), True)
678 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
679 m
.submodules
.align_t_in
= t_inp
680 m
.submodules
.align_t_out
= t_out
681 m
.submodules
.multishift_r
= msr
683 ediff
= Signal(espec
, reset_less
=True)
684 ediffr
= Signal(espec
, reset_less
=True)
685 tdiff
= Signal(espec
, reset_less
=True)
686 elz
= Signal(reset_less
=True)
687 egz
= Signal(reset_less
=True)
689 # connect multi-shifter to t_inp/out mantissa (and tdiff)
690 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
691 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
692 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
693 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
694 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
696 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
697 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
698 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
699 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
701 # default: A-exp == B-exp, A and B untouched (fall through)
702 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
703 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
704 # only one shifter (muxed)
705 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
706 # exponent of a greater than b: shift b down
707 with m
.If(~self
.i
.out_do_z
):
709 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
712 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
714 # exponent of b greater than a: shift a down
716 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
719 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
722 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
723 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
724 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
725 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
730 class FPAddAlignSingle(FPState
):
732 def __init__(self
, width
, id_wid
):
733 FPState
.__init
__(self
, "align")
734 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
735 self
.out_a
= FPNumIn(None, width
)
736 self
.out_b
= FPNumIn(None, width
)
738 def setup(self
, m
, i
):
739 """ links module to inputs and outputs
743 # NOTE: could be done as comb
744 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
745 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
751 class FPAddAlignSingleAdd(FPState
):
753 def __init__(self
, width
, id_wid
):
754 FPState
.__init
__(self
, "align")
757 self
.a1o
= self
.ospec()
760 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
763 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
765 def setup(self
, m
, i
):
766 """ links module to inputs and outputs
769 # chain AddAlignSingle, AddStage0 and AddStage1
770 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
771 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
772 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
774 chain
= StageChain([mod
, a0mod
, a1mod
])
777 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
779 def process(self
, i
):
783 m
.next
= "normalise_1"
786 class FPAddStage0Data
:
788 def __init__(self
, width
, id_wid
):
789 self
.z
= FPNumBase(width
, False)
790 self
.out_do_z
= Signal(reset_less
=True)
791 self
.oz
= Signal(width
, reset_less
=True)
792 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
793 self
.mid
= Signal(id_wid
, reset_less
=True)
796 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
797 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
800 class FPAddStage0Mod
:
802 def __init__(self
, width
, id_wid
):
805 self
.i
= self
.ispec()
806 self
.o
= self
.ospec()
809 return FPSCData(self
.width
, self
.id_wid
)
812 return FPAddStage0Data(self
.width
, self
.id_wid
)
814 def process(self
, i
):
817 def setup(self
, m
, i
):
818 """ links module to inputs and outputs
820 m
.submodules
.add0
= self
821 m
.d
.comb
+= self
.i
.eq(i
)
823 def elaborate(self
, platform
):
825 m
.submodules
.add0_in_a
= self
.i
.a
826 m
.submodules
.add0_in_b
= self
.i
.b
827 m
.submodules
.add0_out_z
= self
.o
.z
829 # store intermediate tests (and zero-extended mantissas)
830 seq
= Signal(reset_less
=True)
831 mge
= Signal(reset_less
=True)
832 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
833 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
834 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
835 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
836 am0
.eq(Cat(self
.i
.a
.m
, 0)),
837 bm0
.eq(Cat(self
.i
.b
.m
, 0))
839 # same-sign (both negative or both positive) add mantissas
840 with m
.If(~self
.i
.out_do_z
):
841 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
844 self
.o
.tot
.eq(am0
+ bm0
),
845 self
.o
.z
.s
.eq(self
.i
.a
.s
)
847 # a mantissa greater than b, use a
850 self
.o
.tot
.eq(am0
- bm0
),
851 self
.o
.z
.s
.eq(self
.i
.a
.s
)
853 # b mantissa greater than a, use b
856 self
.o
.tot
.eq(bm0
- am0
),
857 self
.o
.z
.s
.eq(self
.i
.b
.s
)
860 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
861 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
862 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
866 class FPAddStage0(FPState
):
867 """ First stage of add. covers same-sign (add) and subtract
868 special-casing when mantissas are greater or equal, to
869 give greatest accuracy.
872 def __init__(self
, width
, id_wid
):
873 FPState
.__init
__(self
, "add_0")
874 self
.mod
= FPAddStage0Mod(width
)
875 self
.o
= self
.mod
.ospec()
877 def setup(self
, m
, i
):
878 """ links module to inputs and outputs
882 # NOTE: these could be done as combinatorial (merge add0+add1)
883 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
889 class FPAddStage1Data
:
891 def __init__(self
, width
, id_wid
):
892 self
.z
= FPNumBase(width
, False)
893 self
.out_do_z
= Signal(reset_less
=True)
894 self
.oz
= Signal(width
, reset_less
=True)
896 self
.mid
= Signal(id_wid
, reset_less
=True)
899 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
900 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
904 class FPAddStage1Mod(FPState
):
905 """ Second stage of add: preparation for normalisation.
906 detects when tot sum is too big (tot[27] is kinda a carry bit)
909 def __init__(self
, width
, id_wid
):
912 self
.i
= self
.ispec()
913 self
.o
= self
.ospec()
916 return FPAddStage0Data(self
.width
, self
.id_wid
)
919 return FPAddStage1Data(self
.width
, self
.id_wid
)
921 def process(self
, i
):
924 def setup(self
, m
, i
):
925 """ links module to inputs and outputs
927 m
.submodules
.add1
= self
928 m
.submodules
.add1_out_overflow
= self
.o
.of
930 m
.d
.comb
+= self
.i
.eq(i
)
932 def elaborate(self
, platform
):
934 #m.submodules.norm1_in_overflow = self.in_of
935 #m.submodules.norm1_out_overflow = self.out_of
936 #m.submodules.norm1_in_z = self.in_z
937 #m.submodules.norm1_out_z = self.out_z
938 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
939 # tot[-1] (MSB) gets set when the sum overflows. shift result down
940 with m
.If(~self
.i
.out_do_z
):
941 with m
.If(self
.i
.tot
[-1]):
943 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
944 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
945 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
946 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
947 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
948 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
950 # tot[-1] (MSB) zero case
953 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
954 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
955 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
956 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
957 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
960 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
961 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
962 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
967 class FPAddStage1(FPState
):
969 def __init__(self
, width
, id_wid
):
970 FPState
.__init
__(self
, "add_1")
971 self
.mod
= FPAddStage1Mod(width
)
972 self
.out_z
= FPNumBase(width
, False)
973 self
.out_of
= Overflow()
974 self
.norm_stb
= Signal()
976 def setup(self
, m
, i
):
977 """ links module to inputs and outputs
981 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
983 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
984 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
985 m
.d
.sync
+= self
.norm_stb
.eq(1)
988 m
.next
= "normalise_1"
991 class FPNormaliseModSingle
:
993 def __init__(self
, width
):
995 self
.in_z
= self
.ispec()
996 self
.out_z
= self
.ospec()
999 return FPNumBase(self
.width
, False)
1002 return FPNumBase(self
.width
, False)
1004 def setup(self
, m
, i
):
1005 """ links module to inputs and outputs
1007 m
.submodules
.normalise
= self
1008 m
.d
.comb
+= self
.i
.eq(i
)
1010 def elaborate(self
, platform
):
1013 mwid
= self
.out_z
.m_width
+2
1014 pe
= PriorityEncoder(mwid
)
1015 m
.submodules
.norm_pe
= pe
1017 m
.submodules
.norm1_out_z
= self
.out_z
1018 m
.submodules
.norm1_in_z
= self
.in_z
1020 in_z
= FPNumBase(self
.width
, False)
1022 m
.submodules
.norm1_insel_z
= in_z
1023 m
.submodules
.norm1_insel_overflow
= in_of
1025 espec
= (len(in_z
.e
), True)
1026 ediff_n126
= Signal(espec
, reset_less
=True)
1027 msr
= MultiShiftRMerge(mwid
, espec
)
1028 m
.submodules
.multishift_r
= msr
1030 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1031 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1032 # initialise out from in (overridden below)
1033 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1034 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1035 # normalisation decrease condition
1036 decrease
= Signal(reset_less
=True)
1037 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1039 with m
.If(decrease
):
1040 # *sigh* not entirely obvious: count leading zeros (clz)
1041 # with a PriorityEncoder: to find from the MSB
1042 # we reverse the order of the bits.
1043 temp_m
= Signal(mwid
, reset_less
=True)
1044 temp_s
= Signal(mwid
+1, reset_less
=True)
1045 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1047 # cat round and guard bits back into the mantissa
1048 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1049 pe
.i
.eq(temp_m
[::-1]), # inverted
1050 clz
.eq(pe
.o
), # count zeros from MSB down
1051 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1052 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1053 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1060 def __init__(self
, width
, id_wid
):
1061 self
.roundz
= Signal(reset_less
=True)
1062 self
.z
= FPNumBase(width
, False)
1063 self
.out_do_z
= Signal(reset_less
=True)
1064 self
.oz
= Signal(width
, reset_less
=True)
1065 self
.mid
= Signal(id_wid
, reset_less
=True)
1068 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1069 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1072 class FPNorm1ModSingle
:
1074 def __init__(self
, width
, id_wid
):
1076 self
.id_wid
= id_wid
1077 self
.i
= self
.ispec()
1078 self
.o
= self
.ospec()
1081 return FPAddStage1Data(self
.width
, self
.id_wid
)
1084 return FPNorm1Data(self
.width
, self
.id_wid
)
1086 def setup(self
, m
, i
):
1087 """ links module to inputs and outputs
1089 m
.submodules
.normalise_1
= self
1090 m
.d
.comb
+= self
.i
.eq(i
)
1092 def process(self
, i
):
1095 def elaborate(self
, platform
):
1098 mwid
= self
.o
.z
.m_width
+2
1099 pe
= PriorityEncoder(mwid
)
1100 m
.submodules
.norm_pe
= pe
1103 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1105 m
.submodules
.norm1_out_z
= self
.o
.z
1106 m
.submodules
.norm1_out_overflow
= of
1107 m
.submodules
.norm1_in_z
= self
.i
.z
1108 m
.submodules
.norm1_in_overflow
= self
.i
.of
1111 m
.submodules
.norm1_insel_z
= i
.z
1112 m
.submodules
.norm1_insel_overflow
= i
.of
1114 espec
= (len(i
.z
.e
), True)
1115 ediff_n126
= Signal(espec
, reset_less
=True)
1116 msr
= MultiShiftRMerge(mwid
, espec
)
1117 m
.submodules
.multishift_r
= msr
1119 m
.d
.comb
+= i
.eq(self
.i
)
1120 # initialise out from in (overridden below)
1121 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1122 m
.d
.comb
+= of
.eq(i
.of
)
1123 # normalisation increase/decrease conditions
1124 decrease
= Signal(reset_less
=True)
1125 increase
= Signal(reset_less
=True)
1126 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1127 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1129 with m
.If(~self
.i
.out_do_z
):
1130 with m
.If(decrease
):
1131 # *sigh* not entirely obvious: count leading zeros (clz)
1132 # with a PriorityEncoder: to find from the MSB
1133 # we reverse the order of the bits.
1134 temp_m
= Signal(mwid
, reset_less
=True)
1135 temp_s
= Signal(mwid
+1, reset_less
=True)
1136 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1137 # make sure that the amount to decrease by does NOT
1138 # go below the minimum non-INF/NaN exponent
1139 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1142 # cat round and guard bits back into the mantissa
1143 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1144 pe
.i
.eq(temp_m
[::-1]), # inverted
1145 clz
.eq(limclz
), # count zeros from MSB down
1146 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1147 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1148 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1149 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1150 # overflow in bits 0..1: got shifted too (leave sticky)
1151 of
.guard
.eq(temp_s
[1]), # guard
1152 of
.round_bit
.eq(temp_s
[0]), # round
1155 with m
.Elif(increase
):
1156 temp_m
= Signal(mwid
+1, reset_less
=True)
1158 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1160 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1161 # connect multi-shifter to inp/out mantissa (and ediff)
1163 msr
.diff
.eq(ediff_n126
),
1164 self
.o
.z
.m
.eq(msr
.m
[3:]),
1165 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1166 # overflow in bits 0..1: got shifted too (leave sticky)
1167 of
.guard
.eq(temp_s
[2]), # guard
1168 of
.round_bit
.eq(temp_s
[1]), # round
1169 of
.sticky
.eq(temp_s
[0]), # sticky
1170 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1173 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1174 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1175 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1180 class FPNorm1ModMulti
:
1182 def __init__(self
, width
, single_cycle
=True):
1184 self
.in_select
= Signal(reset_less
=True)
1185 self
.in_z
= FPNumBase(width
, False)
1186 self
.in_of
= Overflow()
1187 self
.temp_z
= FPNumBase(width
, False)
1188 self
.temp_of
= Overflow()
1189 self
.out_z
= FPNumBase(width
, False)
1190 self
.out_of
= Overflow()
1192 def elaborate(self
, platform
):
1195 m
.submodules
.norm1_out_z
= self
.out_z
1196 m
.submodules
.norm1_out_overflow
= self
.out_of
1197 m
.submodules
.norm1_temp_z
= self
.temp_z
1198 m
.submodules
.norm1_temp_of
= self
.temp_of
1199 m
.submodules
.norm1_in_z
= self
.in_z
1200 m
.submodules
.norm1_in_overflow
= self
.in_of
1202 in_z
= FPNumBase(self
.width
, False)
1204 m
.submodules
.norm1_insel_z
= in_z
1205 m
.submodules
.norm1_insel_overflow
= in_of
1207 # select which of temp or in z/of to use
1208 with m
.If(self
.in_select
):
1209 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1210 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1212 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1213 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1214 # initialise out from in (overridden below)
1215 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1216 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1217 # normalisation increase/decrease conditions
1218 decrease
= Signal(reset_less
=True)
1219 increase
= Signal(reset_less
=True)
1220 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1221 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1222 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1224 with m
.If(decrease
):
1226 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1227 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1228 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1229 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1230 self
.out_of
.round_bit
.eq(0), # reset round bit
1231 self
.out_of
.m0
.eq(in_of
.guard
),
1234 with m
.Elif(increase
):
1236 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1237 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1238 self
.out_of
.guard
.eq(in_z
.m
[0]),
1239 self
.out_of
.m0
.eq(in_z
.m
[1]),
1240 self
.out_of
.round_bit
.eq(in_of
.guard
),
1241 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1247 class FPNorm1Single(FPState
):
1249 def __init__(self
, width
, id_wid
, single_cycle
=True):
1250 FPState
.__init
__(self
, "normalise_1")
1251 self
.mod
= FPNorm1ModSingle(width
)
1252 self
.o
= self
.ospec()
1253 self
.out_z
= FPNumBase(width
, False)
1254 self
.out_roundz
= Signal(reset_less
=True)
1257 return self
.mod
.ispec()
1260 return self
.mod
.ospec()
1262 def setup(self
, m
, i
):
1263 """ links module to inputs and outputs
1265 self
.mod
.setup(m
, i
)
1267 def action(self
, m
):
1271 class FPNorm1Multi(FPState
):
1273 def __init__(self
, width
, id_wid
):
1274 FPState
.__init
__(self
, "normalise_1")
1275 self
.mod
= FPNorm1ModMulti(width
)
1276 self
.stb
= Signal(reset_less
=True)
1277 self
.ack
= Signal(reset
=0, reset_less
=True)
1278 self
.out_norm
= Signal(reset_less
=True)
1279 self
.in_accept
= Signal(reset_less
=True)
1280 self
.temp_z
= FPNumBase(width
)
1281 self
.temp_of
= Overflow()
1282 self
.out_z
= FPNumBase(width
)
1283 self
.out_roundz
= Signal(reset_less
=True)
1285 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1286 """ links module to inputs and outputs
1288 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1289 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1290 self
.out_z
, self
.out_norm
)
1292 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1293 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1295 def action(self
, m
):
1296 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1297 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1298 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1299 with m
.If(self
.out_norm
):
1300 with m
.If(self
.in_accept
):
1305 m
.d
.sync
+= self
.ack
.eq(0)
1307 # normalisation not required (or done).
1309 m
.d
.sync
+= self
.ack
.eq(1)
1310 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1313 class FPNormToPack(FPState
):
1315 def __init__(self
, width
, id_wid
):
1316 FPState
.__init
__(self
, "normalise_1")
1317 self
.id_wid
= id_wid
1321 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1324 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1326 def setup(self
, m
, i
):
1327 """ links module to inputs and outputs
1330 # Normalisation, Rounding Corrections, Pack - in a chain
1331 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1332 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1333 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1334 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1335 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1337 self
.out_z
= pmod
.ospec()
1339 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1340 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1342 def process(self
, i
):
1345 def action(self
, m
):
1346 m
.next
= "pack_put_z"
1351 def __init__(self
, width
, id_wid
):
1352 self
.z
= FPNumBase(width
, False)
1353 self
.out_do_z
= Signal(reset_less
=True)
1354 self
.oz
= Signal(width
, reset_less
=True)
1355 self
.mid
= Signal(id_wid
, reset_less
=True)
1358 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1364 def __init__(self
, width
, id_wid
):
1366 self
.id_wid
= id_wid
1367 self
.i
= self
.ispec()
1368 self
.out_z
= self
.ospec()
1371 return FPNorm1Data(self
.width
, self
.id_wid
)
1374 return FPRoundData(self
.width
, self
.id_wid
)
1376 def process(self
, i
):
1379 def setup(self
, m
, i
):
1380 m
.submodules
.roundz
= self
1381 m
.d
.comb
+= self
.i
.eq(i
)
1383 def elaborate(self
, platform
):
1385 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1386 with m
.If(~self
.i
.out_do_z
):
1387 with m
.If(self
.i
.roundz
):
1388 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1389 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1390 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1395 class FPRound(FPState
):
1397 def __init__(self
, width
, id_wid
):
1398 FPState
.__init
__(self
, "round")
1399 self
.mod
= FPRoundMod(width
)
1400 self
.out_z
= self
.ospec()
1403 return self
.mod
.ispec()
1406 return self
.mod
.ospec()
1408 def setup(self
, m
, i
):
1409 """ links module to inputs and outputs
1411 self
.mod
.setup(m
, i
)
1414 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1415 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1417 def action(self
, m
):
1418 m
.next
= "corrections"
1421 class FPCorrectionsMod
:
1423 def __init__(self
, width
, id_wid
):
1425 self
.id_wid
= id_wid
1426 self
.i
= self
.ispec()
1427 self
.out_z
= self
.ospec()
1430 return FPRoundData(self
.width
, self
.id_wid
)
1433 return FPRoundData(self
.width
, self
.id_wid
)
1435 def process(self
, i
):
1438 def setup(self
, m
, i
):
1439 """ links module to inputs and outputs
1441 m
.submodules
.corrections
= self
1442 m
.d
.comb
+= self
.i
.eq(i
)
1444 def elaborate(self
, platform
):
1446 m
.submodules
.corr_in_z
= self
.i
.z
1447 m
.submodules
.corr_out_z
= self
.out_z
.z
1448 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1449 with m
.If(~self
.i
.out_do_z
):
1450 with m
.If(self
.i
.z
.is_denormalised
):
1451 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1455 class FPCorrections(FPState
):
1457 def __init__(self
, width
, id_wid
):
1458 FPState
.__init
__(self
, "corrections")
1459 self
.mod
= FPCorrectionsMod(width
)
1460 self
.out_z
= self
.ospec()
1463 return self
.mod
.ispec()
1466 return self
.mod
.ospec()
1468 def setup(self
, m
, in_z
):
1469 """ links module to inputs and outputs
1471 self
.mod
.setup(m
, in_z
)
1473 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1474 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1476 def action(self
, m
):
1482 def __init__(self
, width
, id_wid
):
1483 self
.z
= FPNumOut(width
, False)
1484 self
.mid
= Signal(id_wid
, reset_less
=True)
1487 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1492 def __init__(self
, width
, id_wid
):
1494 self
.id_wid
= id_wid
1495 self
.i
= self
.ispec()
1496 self
.o
= self
.ospec()
1499 return FPRoundData(self
.width
, self
.id_wid
)
1502 return FPPackData(self
.width
, self
.id_wid
)
1504 def process(self
, i
):
1507 def setup(self
, m
, in_z
):
1508 """ links module to inputs and outputs
1510 m
.submodules
.pack
= self
1511 m
.d
.comb
+= self
.i
.eq(in_z
)
1513 def elaborate(self
, platform
):
1515 m
.submodules
.pack_in_z
= self
.i
.z
1516 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1517 with m
.If(~self
.i
.out_do_z
):
1518 with m
.If(self
.i
.z
.is_overflowed
):
1519 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1521 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1523 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1527 class FPPack(FPState
):
1529 def __init__(self
, width
, id_wid
):
1530 FPState
.__init
__(self
, "pack")
1531 self
.mod
= FPPackMod(width
)
1532 self
.out_z
= self
.ospec()
1535 return self
.mod
.ispec()
1538 return self
.mod
.ospec()
1540 def setup(self
, m
, in_z
):
1541 """ links module to inputs and outputs
1543 self
.mod
.setup(m
, in_z
)
1545 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1546 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1548 def action(self
, m
):
1549 m
.next
= "pack_put_z"
1552 class FPPutZ(FPState
):
1554 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1555 FPState
.__init
__(self
, state
)
1556 if to_state
is None:
1557 to_state
= "get_ops"
1558 self
.to_state
= to_state
1561 self
.in_mid
= in_mid
1562 self
.out_mid
= out_mid
1564 def action(self
, m
):
1565 if self
.in_mid
is not None:
1566 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1568 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1570 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1571 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1572 m
.next
= self
.to_state
1574 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1577 class FPPutZIdx(FPState
):
1579 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1580 FPState
.__init
__(self
, state
)
1581 if to_state
is None:
1582 to_state
= "get_ops"
1583 self
.to_state
= to_state
1585 self
.out_zs
= out_zs
1586 self
.in_mid
= in_mid
1588 def action(self
, m
):
1589 outz_stb
= Signal(reset_less
=True)
1590 outz_ack
= Signal(reset_less
=True)
1591 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1592 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1595 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1597 with m
.If(outz_stb
& outz_ack
):
1598 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1599 m
.next
= self
.to_state
1601 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1603 class FPADDBaseData
:
1605 def __init__(self
, width
, id_wid
):
1607 self
.id_wid
= id_wid
1608 self
.a
= Signal(width
)
1609 self
.b
= Signal(width
)
1610 self
.mid
= Signal(id_wid
, reset_less
=True)
1613 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1617 def __init__(self
, width
, id_wid
):
1618 self
.z
= FPOp(width
)
1619 self
.mid
= Signal(id_wid
, reset_less
=True)
1622 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1627 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1630 * width: bit-width of IEEE754. supported: 16, 32, 64
1631 * id_wid: an identifier that is sync-connected to the input
1632 * single_cycle: True indicates each stage to complete in 1 clock
1633 * compact: True indicates a reduced number of stages
1636 self
.id_wid
= id_wid
1637 self
.single_cycle
= single_cycle
1638 self
.compact
= compact
1640 self
.in_t
= Trigger()
1641 self
.i
= self
.ispec()
1642 self
.o
= self
.ospec()
1647 return FPADDBaseData(self
.width
, self
.id_wid
)
1650 return FPOpData(self
.width
, self
.id_wid
)
1652 def add_state(self
, state
):
1653 self
.states
.append(state
)
1656 def get_fragment(self
, platform
=None):
1657 """ creates the HDL code-fragment for FPAdd
1660 m
.submodules
.out_z
= self
.o
.z
1661 m
.submodules
.in_t
= self
.in_t
1663 self
.get_compact_fragment(m
, platform
)
1665 self
.get_longer_fragment(m
, platform
)
1667 with m
.FSM() as fsm
:
1669 for state
in self
.states
:
1670 with m
.State(state
.state_from
):
1675 def get_longer_fragment(self
, m
, platform
=None):
1677 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1679 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1683 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1684 sc
.setup(m
, a
, b
, self
.in_mid
)
1686 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1687 dn
.setup(m
, a
, b
, sc
.in_mid
)
1689 if self
.single_cycle
:
1690 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1691 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1693 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1694 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1696 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1697 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1699 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1700 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1702 if self
.single_cycle
:
1703 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1704 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1706 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1707 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1709 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1710 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1712 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1713 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1715 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1716 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1718 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1719 pa
.in_mid
, self
.out_mid
))
1721 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1722 pa
.in_mid
, self
.out_mid
))
1724 def get_compact_fragment(self
, m
, platform
=None):
1726 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1727 self
.width
, self
.id_wid
))
1728 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1730 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1733 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1736 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1737 n1
.setup(m
, alm
.a1o
)
1739 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1740 n1
.out_z
.mid
, self
.o
.mid
))
1742 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1743 # sc.o.mid, self.o.mid))
1746 class FPADDBase(FPState
):
1748 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1751 * width: bit-width of IEEE754. supported: 16, 32, 64
1752 * id_wid: an identifier that is sync-connected to the input
1753 * single_cycle: True indicates each stage to complete in 1 clock
1755 FPState
.__init
__(self
, "fpadd")
1757 self
.single_cycle
= single_cycle
1758 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1759 self
.o
= self
.ospec()
1761 self
.in_t
= Trigger()
1762 self
.i
= self
.ispec()
1764 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1765 self
.in_accept
= Signal(reset_less
=True)
1766 self
.add_stb
= Signal(reset_less
=True)
1767 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1770 return self
.mod
.ispec()
1773 return self
.mod
.ospec()
1775 def setup(self
, m
, i
, add_stb
, in_mid
):
1776 m
.d
.comb
+= [self
.i
.eq(i
),
1777 self
.mod
.i
.eq(self
.i
),
1778 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1779 #self.add_stb.eq(add_stb),
1780 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1781 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1782 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1783 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1784 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1785 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1788 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1789 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1790 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1791 #m.d.sync += self.in_t.stb.eq(0)
1793 m
.submodules
.fpadd
= self
.mod
1795 def action(self
, m
):
1797 # in_accept is set on incoming strobe HIGH and ack LOW.
1798 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1800 #with m.If(self.in_t.ack):
1801 # m.d.sync += self.in_t.stb.eq(0)
1802 with m
.If(~self
.z_done
):
1803 # not done: test for accepting an incoming operand pair
1804 with m
.If(self
.in_accept
):
1806 self
.add_ack
.eq(1), # acknowledge receipt...
1807 self
.in_t
.stb
.eq(1), # initiate add
1810 m
.d
.sync
+= [self
.add_ack
.eq(0),
1811 self
.in_t
.stb
.eq(0),
1815 # done: acknowledge, and write out id and value
1816 m
.d
.sync
+= [self
.add_ack
.eq(1),
1823 if self
.in_mid
is not None:
1824 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1827 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1829 # move to output state on detecting z ack
1830 with m
.If(self
.out_z
.trigger
):
1831 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1834 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1837 def __init__(self
, width
, id_wid
):
1838 self
.a
= Signal(width
)
1839 self
.b
= Signal(width
)
1840 self
.mid
= Signal(id_wid
, reset_less
=True)
1843 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1846 class FPADDStageOut
:
1847 def __init__(self
, width
, id_wid
):
1848 self
.z
= Signal(width
)
1849 self
.mid
= Signal(id_wid
, reset_less
=True)
1852 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1855 # matches the format of FPADDStageOut, allows eq function to do assignments
1856 class PlaceHolder
: pass
1859 class FPAddBaseStage
:
1860 def __init__(self
, width
, id_wid
):
1862 self
.id_wid
= id_wid
1865 return FPADDStageIn(self
.width
, self
.id_wid
)
1868 return FPADDStageOut(self
.width
, self
.id_wid
)
1870 def process(self
, i
):
1877 class FPADDBasePipe(ControlBase
):
1878 def __init__(self
, width
, id_wid
):
1879 ControlBase
.__init
__(self
)
1881 def elaborate(self
, platform
):
1883 stage1
= FPAddBaseStage(width
, id_wid
)
1884 m
.d
.comb
+= self
.connect([stage1
])
1889 def __init__(self
, width
, id_wid
):
1891 self
.id_wid
= id_wid
1893 for i
in range(rs_sz
):
1895 out_z
.name
= "out_z_%d" % i
1897 self
.res
= Array(res
)
1898 self
.in_z
= FPOp(width
)
1899 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1901 def setup(self
, m
, in_z
, in_mid
):
1902 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1903 self
.in_mid
.eq(in_mid
)]
1905 def get_fragment(self
, platform
=None):
1906 """ creates the HDL code-fragment for FPAdd
1909 m
.submodules
.res_in_z
= self
.in_z
1910 m
.submodules
+= self
.res
1922 """ FPADD: stages as follows:
1928 FPAddBase---> FPAddBaseMod
1930 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1932 FPAddBase is tricky: it is both a stage and *has* stages.
1933 Connection to FPAddBaseMod therefore requires an in stb/ack
1934 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1935 needs to be the thing that raises the incoming stb.
1938 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1941 * width: bit-width of IEEE754. supported: 16, 32, 64
1942 * id_wid: an identifier that is sync-connected to the input
1943 * single_cycle: True indicates each stage to complete in 1 clock
1946 self
.id_wid
= id_wid
1947 self
.single_cycle
= single_cycle
1949 #self.out_z = FPOp(width)
1950 self
.ids
= FPID(id_wid
)
1953 for i
in range(rs_sz
):
1956 in_a
.name
= "in_a_%d" % i
1957 in_b
.name
= "in_b_%d" % i
1958 rs
.append((in_a
, in_b
))
1962 for i
in range(rs_sz
):
1964 out_z
.name
= "out_z_%d" % i
1966 self
.res
= Array(res
)
1970 def add_state(self
, state
):
1971 self
.states
.append(state
)
1974 def get_fragment(self
, platform
=None):
1975 """ creates the HDL code-fragment for FPAdd
1978 m
.submodules
+= self
.rs
1980 in_a
= self
.rs
[0][0]
1981 in_b
= self
.rs
[0][1]
1983 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1988 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1993 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1994 ab
= self
.add_state(ab
)
1995 abd
= ab
.ispec() # create an input spec object for FPADDBase
1996 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1997 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
2000 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
2003 with m
.FSM() as fsm
:
2005 for state
in self
.states
:
2006 with m
.State(state
.state_from
):
2012 if __name__
== "__main__":
2014 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
2015 main(alu
, ports
=alu
.rs
[0][0].ports() + \
2016 alu
.rs
[0][1].ports() + \
2017 alu
.res
[0].ports() + \
2018 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
2020 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
2021 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
2022 alu
.in_t
.ports() + \
2023 alu
.out_z
.ports() + \
2024 [alu
.in_mid
, alu
.out_mid
])
2027 # works... but don't use, just do "python fname.py convert -t v"
2028 #print (verilog.convert(alu, ports=[
2029 # ports=alu.in_a.ports() + \
2030 # alu.in_b.ports() + \
2031 # alu.out_z.ports())