1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
, UnbufferedPipeline
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.oz
= Signal(width
, reset_less
=True)
270 self
.out_do_z
= Signal(reset_less
=True)
271 self
.mid
= Signal(id_wid
, reset_less
=True)
274 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
275 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
278 class FPAddSpecialCasesMod
:
279 """ special cases: NaNs, infs, zeros, denormalised
280 NOTE: some of these are unique to add. see "Special Operations"
281 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 def __init__(self
, width
, id_wid
):
287 self
.i
= self
.ispec()
288 self
.o
= self
.ospec()
291 return FPNumBase2Ops(self
.width
, self
.id_wid
)
294 return FPSCData(self
.width
, self
.id_wid
)
296 def setup(self
, m
, i
):
297 """ links module to inputs and outputs
299 m
.submodules
.specialcases
= self
300 m
.d
.comb
+= self
.i
.eq(i
)
302 def elaborate(self
, platform
):
305 m
.submodules
.sc_in_a
= self
.i
.a
306 m
.submodules
.sc_in_b
= self
.i
.b
307 m
.submodules
.sc_out_z
= self
.o
.z
310 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
313 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
315 # if a is NaN or b is NaN return NaN
316 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
317 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
318 m
.d
.comb
+= self
.o
.z
.nan(0)
320 # XXX WEIRDNESS for FP16 non-canonical NaN handling
323 ## if a is zero and b is NaN return -b
324 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
325 # m.d.comb += self.o.out_do_z.eq(1)
326 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
328 ## if b is zero and a is NaN return -a
329 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
330 # m.d.comb += self.o.out_do_z.eq(1)
331 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
333 ## if a is -zero and b is NaN return -b
334 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
335 # m.d.comb += self.o.out_do_z.eq(1)
336 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
338 ## if b is -zero and a is NaN return -a
339 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
340 # m.d.comb += self.o.out_do_z.eq(1)
341 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
343 # if a is inf return inf (or NaN)
344 with m
.Elif(self
.i
.a
.is_inf
):
345 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
347 # if a is inf and signs don't match return NaN
348 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
349 m
.d
.comb
+= self
.o
.z
.nan(0)
351 # if b is inf return inf
352 with m
.Elif(self
.i
.b
.is_inf
):
353 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
354 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
356 # if a is zero and b zero return signed-a/b
357 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
363 # if a is zero return b
364 with m
.Elif(self
.i
.a
.is_zero
):
365 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
366 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
369 # if b is zero return a
370 with m
.Elif(self
.i
.b
.is_zero
):
371 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
372 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
375 # if a equal to -b return zero (+ve zero)
376 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
377 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
378 m
.d
.comb
+= self
.o
.z
.zero(0)
380 # Denormalised Number checks next, so pass a/b data through
382 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
383 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
384 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
386 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
387 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
393 def __init__(self
, id_wid
):
396 self
.in_mid
= Signal(id_wid
, reset_less
=True)
397 self
.out_mid
= Signal(id_wid
, reset_less
=True)
403 if self
.id_wid
is not None:
404 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
407 class FPAddSpecialCases(FPState
):
408 """ special cases: NaNs, infs, zeros, denormalised
409 NOTE: some of these are unique to add. see "Special Operations"
410 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
413 def __init__(self
, width
, id_wid
):
414 FPState
.__init
__(self
, "special_cases")
415 self
.mod
= FPAddSpecialCasesMod(width
)
416 self
.out_z
= self
.mod
.ospec()
417 self
.out_do_z
= Signal(reset_less
=True)
419 def setup(self
, m
, i
):
420 """ links module to inputs and outputs
422 self
.mod
.setup(m
, i
, self
.out_do_z
)
423 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
424 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
428 with m
.If(self
.out_do_z
):
431 m
.next
= "denormalise"
434 class FPAddSpecialCasesDeNorm(FPState
):
435 """ special cases: NaNs, infs, zeros, denormalised
436 NOTE: some of these are unique to add. see "Special Operations"
437 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
440 def __init__(self
, width
, id_wid
):
441 FPState
.__init
__(self
, "special_cases")
442 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
443 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
444 self
.o
= self
.ospec()
447 return self
.smod
.ispec()
450 return self
.dmod
.ospec()
452 def setup(self
, m
, i
):
453 """ links module to inputs and outputs
455 # these only needed for break-out (early-out)
456 # out_z = self.smod.ospec()
457 # out_do_z = Signal(reset_less=True)
458 self
.smod
.setup(m
, i
)
459 self
.dmod
.setup(m
, self
.smod
.o
)
460 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
462 # out_do_z=True, only needed for early-out (split pipeline)
463 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
464 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
467 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
469 def process(self
, i
):
473 #with m.If(self.out_do_z):
479 class FPAddDeNormMod(FPState
):
481 def __init__(self
, width
, id_wid
):
484 self
.i
= self
.ispec()
485 self
.o
= self
.ospec()
488 return FPSCData(self
.width
, self
.id_wid
)
491 return FPSCData(self
.width
, self
.id_wid
)
493 def setup(self
, m
, i
):
494 """ links module to inputs and outputs
496 m
.submodules
.denormalise
= self
497 m
.d
.comb
+= self
.i
.eq(i
)
499 def elaborate(self
, platform
):
501 m
.submodules
.denorm_in_a
= self
.i
.a
502 m
.submodules
.denorm_in_b
= self
.i
.b
503 m
.submodules
.denorm_out_a
= self
.o
.a
504 m
.submodules
.denorm_out_b
= self
.o
.b
506 with m
.If(~self
.i
.out_do_z
):
507 # XXX hmmm, don't like repeating identical code
508 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
509 with m
.If(self
.i
.a
.exp_n127
):
510 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
512 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
514 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
515 with m
.If(self
.i
.b
.exp_n127
):
516 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
518 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
520 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
521 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
522 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
523 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
528 class FPAddDeNorm(FPState
):
530 def __init__(self
, width
, id_wid
):
531 FPState
.__init
__(self
, "denormalise")
532 self
.mod
= FPAddDeNormMod(width
)
533 self
.out_a
= FPNumBase(width
)
534 self
.out_b
= FPNumBase(width
)
536 def setup(self
, m
, i
):
537 """ links module to inputs and outputs
541 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
542 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
545 # Denormalised Number checks
549 class FPAddAlignMultiMod(FPState
):
551 def __init__(self
, width
):
552 self
.in_a
= FPNumBase(width
)
553 self
.in_b
= FPNumBase(width
)
554 self
.out_a
= FPNumIn(None, width
)
555 self
.out_b
= FPNumIn(None, width
)
556 self
.exp_eq
= Signal(reset_less
=True)
558 def elaborate(self
, platform
):
559 # This one however (single-cycle) will do the shift
564 m
.submodules
.align_in_a
= self
.in_a
565 m
.submodules
.align_in_b
= self
.in_b
566 m
.submodules
.align_out_a
= self
.out_a
567 m
.submodules
.align_out_b
= self
.out_b
569 # NOTE: this does *not* do single-cycle multi-shifting,
570 # it *STAYS* in the align state until exponents match
572 # exponent of a greater than b: shift b down
573 m
.d
.comb
+= self
.exp_eq
.eq(0)
574 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
575 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
576 agtb
= Signal(reset_less
=True)
577 altb
= Signal(reset_less
=True)
578 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
579 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
581 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
582 # exponent of b greater than a: shift a down
584 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
585 # exponents equal: move to next stage.
587 m
.d
.comb
+= self
.exp_eq
.eq(1)
591 class FPAddAlignMulti(FPState
):
593 def __init__(self
, width
, id_wid
):
594 FPState
.__init
__(self
, "align")
595 self
.mod
= FPAddAlignMultiMod(width
)
596 self
.out_a
= FPNumIn(None, width
)
597 self
.out_b
= FPNumIn(None, width
)
598 self
.exp_eq
= Signal(reset_less
=True)
600 def setup(self
, m
, in_a
, in_b
):
601 """ links module to inputs and outputs
603 m
.submodules
.align
= self
.mod
604 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
605 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
606 #m.d.comb += self.out_a.eq(self.mod.out_a)
607 #m.d.comb += self.out_b.eq(self.mod.out_b)
608 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
609 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
610 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
613 with m
.If(self
.exp_eq
):
619 def __init__(self
, width
, id_wid
):
620 self
.a
= FPNumIn(None, width
)
621 self
.b
= FPNumIn(None, width
)
622 self
.z
= FPNumOut(width
, False)
623 self
.out_do_z
= Signal(reset_less
=True)
624 self
.oz
= Signal(width
, reset_less
=True)
625 self
.mid
= Signal(id_wid
, reset_less
=True)
628 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
629 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
632 class FPAddAlignSingleMod
:
634 def __init__(self
, width
, id_wid
):
637 self
.i
= self
.ispec()
638 self
.o
= self
.ospec()
641 return FPSCData(self
.width
, self
.id_wid
)
644 return FPNumIn2Ops(self
.width
, self
.id_wid
)
646 def process(self
, i
):
649 def setup(self
, m
, i
):
650 """ links module to inputs and outputs
652 m
.submodules
.align
= self
653 m
.d
.comb
+= self
.i
.eq(i
)
655 def elaborate(self
, platform
):
656 """ Aligns A against B or B against A, depending on which has the
657 greater exponent. This is done in a *single* cycle using
658 variable-width bit-shift
660 the shifter used here is quite expensive in terms of gates.
661 Mux A or B in (and out) into temporaries, as only one of them
662 needs to be aligned against the other
666 m
.submodules
.align_in_a
= self
.i
.a
667 m
.submodules
.align_in_b
= self
.i
.b
668 m
.submodules
.align_out_a
= self
.o
.a
669 m
.submodules
.align_out_b
= self
.o
.b
671 # temporary (muxed) input and output to be shifted
672 t_inp
= FPNumBase(self
.width
)
673 t_out
= FPNumIn(None, self
.width
)
674 espec
= (len(self
.i
.a
.e
), True)
675 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
676 m
.submodules
.align_t_in
= t_inp
677 m
.submodules
.align_t_out
= t_out
678 m
.submodules
.multishift_r
= msr
680 ediff
= Signal(espec
, reset_less
=True)
681 ediffr
= Signal(espec
, reset_less
=True)
682 tdiff
= Signal(espec
, reset_less
=True)
683 elz
= Signal(reset_less
=True)
684 egz
= Signal(reset_less
=True)
686 # connect multi-shifter to t_inp/out mantissa (and tdiff)
687 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
688 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
689 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
690 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
691 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
693 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
694 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
695 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
696 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
698 # default: A-exp == B-exp, A and B untouched (fall through)
699 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
700 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
701 # only one shifter (muxed)
702 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
703 # exponent of a greater than b: shift b down
704 with m
.If(~self
.i
.out_do_z
):
706 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
709 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
711 # exponent of b greater than a: shift a down
713 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
716 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
719 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
720 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
721 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
722 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
727 class FPAddAlignSingle(FPState
):
729 def __init__(self
, width
, id_wid
):
730 FPState
.__init
__(self
, "align")
731 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
732 self
.out_a
= FPNumIn(None, width
)
733 self
.out_b
= FPNumIn(None, width
)
735 def setup(self
, m
, i
):
736 """ links module to inputs and outputs
740 # NOTE: could be done as comb
741 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
742 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
748 class FPAddAlignSingleAdd(FPState
):
750 def __init__(self
, width
, id_wid
):
751 FPState
.__init
__(self
, "align")
754 self
.a1o
= self
.ospec()
757 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
760 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
762 def setup(self
, m
, i
):
763 """ links module to inputs and outputs
766 # chain AddAlignSingle, AddStage0 and AddStage1
767 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
768 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
769 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
771 chain
= StageChain([mod
, a0mod
, a1mod
])
774 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
776 def process(self
, i
):
780 m
.next
= "normalise_1"
783 class FPAddStage0Data
:
785 def __init__(self
, width
, id_wid
):
786 self
.z
= FPNumBase(width
, False)
787 self
.out_do_z
= Signal(reset_less
=True)
788 self
.oz
= Signal(width
, reset_less
=True)
789 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
790 self
.mid
= Signal(id_wid
, reset_less
=True)
793 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
794 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
797 class FPAddStage0Mod
:
799 def __init__(self
, width
, id_wid
):
802 self
.i
= self
.ispec()
803 self
.o
= self
.ospec()
806 return FPSCData(self
.width
, self
.id_wid
)
809 return FPAddStage0Data(self
.width
, self
.id_wid
)
811 def process(self
, i
):
814 def setup(self
, m
, i
):
815 """ links module to inputs and outputs
817 m
.submodules
.add0
= self
818 m
.d
.comb
+= self
.i
.eq(i
)
820 def elaborate(self
, platform
):
822 m
.submodules
.add0_in_a
= self
.i
.a
823 m
.submodules
.add0_in_b
= self
.i
.b
824 m
.submodules
.add0_out_z
= self
.o
.z
826 # store intermediate tests (and zero-extended mantissas)
827 seq
= Signal(reset_less
=True)
828 mge
= Signal(reset_less
=True)
829 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
830 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
831 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
832 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
833 am0
.eq(Cat(self
.i
.a
.m
, 0)),
834 bm0
.eq(Cat(self
.i
.b
.m
, 0))
836 # same-sign (both negative or both positive) add mantissas
837 with m
.If(~self
.i
.out_do_z
):
838 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
841 self
.o
.tot
.eq(am0
+ bm0
),
842 self
.o
.z
.s
.eq(self
.i
.a
.s
)
844 # a mantissa greater than b, use a
847 self
.o
.tot
.eq(am0
- bm0
),
848 self
.o
.z
.s
.eq(self
.i
.a
.s
)
850 # b mantissa greater than a, use b
853 self
.o
.tot
.eq(bm0
- am0
),
854 self
.o
.z
.s
.eq(self
.i
.b
.s
)
857 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
858 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
859 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
863 class FPAddStage0(FPState
):
864 """ First stage of add. covers same-sign (add) and subtract
865 special-casing when mantissas are greater or equal, to
866 give greatest accuracy.
869 def __init__(self
, width
, id_wid
):
870 FPState
.__init
__(self
, "add_0")
871 self
.mod
= FPAddStage0Mod(width
)
872 self
.o
= self
.mod
.ospec()
874 def setup(self
, m
, i
):
875 """ links module to inputs and outputs
879 # NOTE: these could be done as combinatorial (merge add0+add1)
880 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
886 class FPAddStage1Data
:
888 def __init__(self
, width
, id_wid
):
889 self
.z
= FPNumBase(width
, False)
890 self
.out_do_z
= Signal(reset_less
=True)
891 self
.oz
= Signal(width
, reset_less
=True)
893 self
.mid
= Signal(id_wid
, reset_less
=True)
896 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
897 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
901 class FPAddStage1Mod(FPState
):
902 """ Second stage of add: preparation for normalisation.
903 detects when tot sum is too big (tot[27] is kinda a carry bit)
906 def __init__(self
, width
, id_wid
):
909 self
.i
= self
.ispec()
910 self
.o
= self
.ospec()
913 return FPAddStage0Data(self
.width
, self
.id_wid
)
916 return FPAddStage1Data(self
.width
, self
.id_wid
)
918 def process(self
, i
):
921 def setup(self
, m
, i
):
922 """ links module to inputs and outputs
924 m
.submodules
.add1
= self
925 m
.submodules
.add1_out_overflow
= self
.o
.of
927 m
.d
.comb
+= self
.i
.eq(i
)
929 def elaborate(self
, platform
):
931 #m.submodules.norm1_in_overflow = self.in_of
932 #m.submodules.norm1_out_overflow = self.out_of
933 #m.submodules.norm1_in_z = self.in_z
934 #m.submodules.norm1_out_z = self.out_z
935 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
936 # tot[-1] (MSB) gets set when the sum overflows. shift result down
937 with m
.If(~self
.i
.out_do_z
):
938 with m
.If(self
.i
.tot
[-1]):
940 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
941 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
942 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
943 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
944 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
945 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
947 # tot[-1] (MSB) zero case
950 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
951 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
952 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
953 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
954 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
957 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
958 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
959 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
964 class FPAddStage1(FPState
):
966 def __init__(self
, width
, id_wid
):
967 FPState
.__init
__(self
, "add_1")
968 self
.mod
= FPAddStage1Mod(width
)
969 self
.out_z
= FPNumBase(width
, False)
970 self
.out_of
= Overflow()
971 self
.norm_stb
= Signal()
973 def setup(self
, m
, i
):
974 """ links module to inputs and outputs
978 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
980 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
981 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
982 m
.d
.sync
+= self
.norm_stb
.eq(1)
985 m
.next
= "normalise_1"
988 class FPNormaliseModSingle
:
990 def __init__(self
, width
):
992 self
.in_z
= self
.ispec()
993 self
.out_z
= self
.ospec()
996 return FPNumBase(self
.width
, False)
999 return FPNumBase(self
.width
, False)
1001 def setup(self
, m
, i
):
1002 """ links module to inputs and outputs
1004 m
.submodules
.normalise
= self
1005 m
.d
.comb
+= self
.i
.eq(i
)
1007 def elaborate(self
, platform
):
1010 mwid
= self
.out_z
.m_width
+2
1011 pe
= PriorityEncoder(mwid
)
1012 m
.submodules
.norm_pe
= pe
1014 m
.submodules
.norm1_out_z
= self
.out_z
1015 m
.submodules
.norm1_in_z
= self
.in_z
1017 in_z
= FPNumBase(self
.width
, False)
1019 m
.submodules
.norm1_insel_z
= in_z
1020 m
.submodules
.norm1_insel_overflow
= in_of
1022 espec
= (len(in_z
.e
), True)
1023 ediff_n126
= Signal(espec
, reset_less
=True)
1024 msr
= MultiShiftRMerge(mwid
, espec
)
1025 m
.submodules
.multishift_r
= msr
1027 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1028 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1029 # initialise out from in (overridden below)
1030 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1031 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1032 # normalisation decrease condition
1033 decrease
= Signal(reset_less
=True)
1034 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1036 with m
.If(decrease
):
1037 # *sigh* not entirely obvious: count leading zeros (clz)
1038 # with a PriorityEncoder: to find from the MSB
1039 # we reverse the order of the bits.
1040 temp_m
= Signal(mwid
, reset_less
=True)
1041 temp_s
= Signal(mwid
+1, reset_less
=True)
1042 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1044 # cat round and guard bits back into the mantissa
1045 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1046 pe
.i
.eq(temp_m
[::-1]), # inverted
1047 clz
.eq(pe
.o
), # count zeros from MSB down
1048 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1049 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1050 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1057 def __init__(self
, width
, id_wid
):
1058 self
.roundz
= Signal(reset_less
=True)
1059 self
.z
= FPNumBase(width
, False)
1060 self
.out_do_z
= Signal(reset_less
=True)
1061 self
.oz
= Signal(width
, reset_less
=True)
1062 self
.mid
= Signal(id_wid
, reset_less
=True)
1065 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1066 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1069 class FPNorm1ModSingle
:
1071 def __init__(self
, width
, id_wid
):
1073 self
.id_wid
= id_wid
1074 self
.i
= self
.ispec()
1075 self
.o
= self
.ospec()
1078 return FPAddStage1Data(self
.width
, self
.id_wid
)
1081 return FPNorm1Data(self
.width
, self
.id_wid
)
1083 def setup(self
, m
, i
):
1084 """ links module to inputs and outputs
1086 m
.submodules
.normalise_1
= self
1087 m
.d
.comb
+= self
.i
.eq(i
)
1089 def process(self
, i
):
1092 def elaborate(self
, platform
):
1095 mwid
= self
.o
.z
.m_width
+2
1096 pe
= PriorityEncoder(mwid
)
1097 m
.submodules
.norm_pe
= pe
1100 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1102 m
.submodules
.norm1_out_z
= self
.o
.z
1103 m
.submodules
.norm1_out_overflow
= of
1104 m
.submodules
.norm1_in_z
= self
.i
.z
1105 m
.submodules
.norm1_in_overflow
= self
.i
.of
1108 m
.submodules
.norm1_insel_z
= i
.z
1109 m
.submodules
.norm1_insel_overflow
= i
.of
1111 espec
= (len(i
.z
.e
), True)
1112 ediff_n126
= Signal(espec
, reset_less
=True)
1113 msr
= MultiShiftRMerge(mwid
, espec
)
1114 m
.submodules
.multishift_r
= msr
1116 m
.d
.comb
+= i
.eq(self
.i
)
1117 # initialise out from in (overridden below)
1118 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1119 m
.d
.comb
+= of
.eq(i
.of
)
1120 # normalisation increase/decrease conditions
1121 decrease
= Signal(reset_less
=True)
1122 increase
= Signal(reset_less
=True)
1123 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1124 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1126 with m
.If(~self
.i
.out_do_z
):
1127 with m
.If(decrease
):
1128 # *sigh* not entirely obvious: count leading zeros (clz)
1129 # with a PriorityEncoder: to find from the MSB
1130 # we reverse the order of the bits.
1131 temp_m
= Signal(mwid
, reset_less
=True)
1132 temp_s
= Signal(mwid
+1, reset_less
=True)
1133 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1134 # make sure that the amount to decrease by does NOT
1135 # go below the minimum non-INF/NaN exponent
1136 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1139 # cat round and guard bits back into the mantissa
1140 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1141 pe
.i
.eq(temp_m
[::-1]), # inverted
1142 clz
.eq(limclz
), # count zeros from MSB down
1143 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1144 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1145 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1146 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1147 # overflow in bits 0..1: got shifted too (leave sticky)
1148 of
.guard
.eq(temp_s
[1]), # guard
1149 of
.round_bit
.eq(temp_s
[0]), # round
1152 with m
.Elif(increase
):
1153 temp_m
= Signal(mwid
+1, reset_less
=True)
1155 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1157 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1158 # connect multi-shifter to inp/out mantissa (and ediff)
1160 msr
.diff
.eq(ediff_n126
),
1161 self
.o
.z
.m
.eq(msr
.m
[3:]),
1162 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1163 # overflow in bits 0..1: got shifted too (leave sticky)
1164 of
.guard
.eq(temp_s
[2]), # guard
1165 of
.round_bit
.eq(temp_s
[1]), # round
1166 of
.sticky
.eq(temp_s
[0]), # sticky
1167 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1170 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1171 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1172 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1177 class FPNorm1ModMulti
:
1179 def __init__(self
, width
, single_cycle
=True):
1181 self
.in_select
= Signal(reset_less
=True)
1182 self
.in_z
= FPNumBase(width
, False)
1183 self
.in_of
= Overflow()
1184 self
.temp_z
= FPNumBase(width
, False)
1185 self
.temp_of
= Overflow()
1186 self
.out_z
= FPNumBase(width
, False)
1187 self
.out_of
= Overflow()
1189 def elaborate(self
, platform
):
1192 m
.submodules
.norm1_out_z
= self
.out_z
1193 m
.submodules
.norm1_out_overflow
= self
.out_of
1194 m
.submodules
.norm1_temp_z
= self
.temp_z
1195 m
.submodules
.norm1_temp_of
= self
.temp_of
1196 m
.submodules
.norm1_in_z
= self
.in_z
1197 m
.submodules
.norm1_in_overflow
= self
.in_of
1199 in_z
= FPNumBase(self
.width
, False)
1201 m
.submodules
.norm1_insel_z
= in_z
1202 m
.submodules
.norm1_insel_overflow
= in_of
1204 # select which of temp or in z/of to use
1205 with m
.If(self
.in_select
):
1206 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1207 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1209 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1210 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1211 # initialise out from in (overridden below)
1212 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1213 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1214 # normalisation increase/decrease conditions
1215 decrease
= Signal(reset_less
=True)
1216 increase
= Signal(reset_less
=True)
1217 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1218 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1219 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1221 with m
.If(decrease
):
1223 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1224 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1225 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1226 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1227 self
.out_of
.round_bit
.eq(0), # reset round bit
1228 self
.out_of
.m0
.eq(in_of
.guard
),
1231 with m
.Elif(increase
):
1233 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1234 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1235 self
.out_of
.guard
.eq(in_z
.m
[0]),
1236 self
.out_of
.m0
.eq(in_z
.m
[1]),
1237 self
.out_of
.round_bit
.eq(in_of
.guard
),
1238 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1244 class FPNorm1Single(FPState
):
1246 def __init__(self
, width
, id_wid
, single_cycle
=True):
1247 FPState
.__init
__(self
, "normalise_1")
1248 self
.mod
= FPNorm1ModSingle(width
)
1249 self
.o
= self
.ospec()
1250 self
.out_z
= FPNumBase(width
, False)
1251 self
.out_roundz
= Signal(reset_less
=True)
1254 return self
.mod
.ispec()
1257 return self
.mod
.ospec()
1259 def setup(self
, m
, i
):
1260 """ links module to inputs and outputs
1262 self
.mod
.setup(m
, i
)
1264 def action(self
, m
):
1268 class FPNorm1Multi(FPState
):
1270 def __init__(self
, width
, id_wid
):
1271 FPState
.__init
__(self
, "normalise_1")
1272 self
.mod
= FPNorm1ModMulti(width
)
1273 self
.stb
= Signal(reset_less
=True)
1274 self
.ack
= Signal(reset
=0, reset_less
=True)
1275 self
.out_norm
= Signal(reset_less
=True)
1276 self
.in_accept
= Signal(reset_less
=True)
1277 self
.temp_z
= FPNumBase(width
)
1278 self
.temp_of
= Overflow()
1279 self
.out_z
= FPNumBase(width
)
1280 self
.out_roundz
= Signal(reset_less
=True)
1282 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1283 """ links module to inputs and outputs
1285 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1286 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1287 self
.out_z
, self
.out_norm
)
1289 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1290 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1292 def action(self
, m
):
1293 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1294 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1295 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1296 with m
.If(self
.out_norm
):
1297 with m
.If(self
.in_accept
):
1302 m
.d
.sync
+= self
.ack
.eq(0)
1304 # normalisation not required (or done).
1306 m
.d
.sync
+= self
.ack
.eq(1)
1307 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1310 class FPNormToPack(FPState
):
1312 def __init__(self
, width
, id_wid
):
1313 FPState
.__init
__(self
, "normalise_1")
1314 self
.id_wid
= id_wid
1318 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1321 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1323 def setup(self
, m
, i
):
1324 """ links module to inputs and outputs
1327 # Normalisation, Rounding Corrections, Pack - in a chain
1328 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1329 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1330 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1331 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1332 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1334 self
.out_z
= pmod
.ospec()
1336 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1337 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1339 def process(self
, i
):
1342 def action(self
, m
):
1343 m
.next
= "pack_put_z"
1348 def __init__(self
, width
, id_wid
):
1349 self
.z
= FPNumBase(width
, False)
1350 self
.out_do_z
= Signal(reset_less
=True)
1351 self
.oz
= Signal(width
, reset_less
=True)
1352 self
.mid
= Signal(id_wid
, reset_less
=True)
1355 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1361 def __init__(self
, width
, id_wid
):
1363 self
.id_wid
= id_wid
1364 self
.i
= self
.ispec()
1365 self
.out_z
= self
.ospec()
1368 return FPNorm1Data(self
.width
, self
.id_wid
)
1371 return FPRoundData(self
.width
, self
.id_wid
)
1373 def process(self
, i
):
1376 def setup(self
, m
, i
):
1377 m
.submodules
.roundz
= self
1378 m
.d
.comb
+= self
.i
.eq(i
)
1380 def elaborate(self
, platform
):
1382 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1383 with m
.If(~self
.i
.out_do_z
):
1384 with m
.If(self
.i
.roundz
):
1385 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1386 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1387 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1392 class FPRound(FPState
):
1394 def __init__(self
, width
, id_wid
):
1395 FPState
.__init
__(self
, "round")
1396 self
.mod
= FPRoundMod(width
)
1397 self
.out_z
= self
.ospec()
1400 return self
.mod
.ispec()
1403 return self
.mod
.ospec()
1405 def setup(self
, m
, i
):
1406 """ links module to inputs and outputs
1408 self
.mod
.setup(m
, i
)
1411 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1412 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1414 def action(self
, m
):
1415 m
.next
= "corrections"
1418 class FPCorrectionsMod
:
1420 def __init__(self
, width
, id_wid
):
1422 self
.id_wid
= id_wid
1423 self
.i
= self
.ispec()
1424 self
.out_z
= self
.ospec()
1427 return FPRoundData(self
.width
, self
.id_wid
)
1430 return FPRoundData(self
.width
, self
.id_wid
)
1432 def process(self
, i
):
1435 def setup(self
, m
, i
):
1436 """ links module to inputs and outputs
1438 m
.submodules
.corrections
= self
1439 m
.d
.comb
+= self
.i
.eq(i
)
1441 def elaborate(self
, platform
):
1443 m
.submodules
.corr_in_z
= self
.i
.z
1444 m
.submodules
.corr_out_z
= self
.out_z
.z
1445 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1446 with m
.If(~self
.i
.out_do_z
):
1447 with m
.If(self
.i
.z
.is_denormalised
):
1448 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1452 class FPCorrections(FPState
):
1454 def __init__(self
, width
, id_wid
):
1455 FPState
.__init
__(self
, "corrections")
1456 self
.mod
= FPCorrectionsMod(width
)
1457 self
.out_z
= self
.ospec()
1460 return self
.mod
.ispec()
1463 return self
.mod
.ospec()
1465 def setup(self
, m
, in_z
):
1466 """ links module to inputs and outputs
1468 self
.mod
.setup(m
, in_z
)
1470 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1471 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1473 def action(self
, m
):
1479 def __init__(self
, width
, id_wid
):
1480 self
.z
= FPNumOut(width
, False)
1481 self
.mid
= Signal(id_wid
, reset_less
=True)
1484 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1489 def __init__(self
, width
, id_wid
):
1491 self
.id_wid
= id_wid
1492 self
.i
= self
.ispec()
1493 self
.o
= self
.ospec()
1496 return FPRoundData(self
.width
, self
.id_wid
)
1499 return FPPackData(self
.width
, self
.id_wid
)
1501 def process(self
, i
):
1504 def setup(self
, m
, in_z
):
1505 """ links module to inputs and outputs
1507 m
.submodules
.pack
= self
1508 m
.d
.comb
+= self
.i
.eq(in_z
)
1510 def elaborate(self
, platform
):
1512 m
.submodules
.pack_in_z
= self
.i
.z
1513 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1514 with m
.If(~self
.i
.out_do_z
):
1515 with m
.If(self
.i
.z
.is_overflowed
):
1516 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1518 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1520 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1524 class FPPack(FPState
):
1526 def __init__(self
, width
, id_wid
):
1527 FPState
.__init
__(self
, "pack")
1528 self
.mod
= FPPackMod(width
)
1529 self
.out_z
= self
.ospec()
1532 return self
.mod
.ispec()
1535 return self
.mod
.ospec()
1537 def setup(self
, m
, in_z
):
1538 """ links module to inputs and outputs
1540 self
.mod
.setup(m
, in_z
)
1542 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1543 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1545 def action(self
, m
):
1546 m
.next
= "pack_put_z"
1549 class FPPutZ(FPState
):
1551 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1552 FPState
.__init
__(self
, state
)
1553 if to_state
is None:
1554 to_state
= "get_ops"
1555 self
.to_state
= to_state
1558 self
.in_mid
= in_mid
1559 self
.out_mid
= out_mid
1561 def action(self
, m
):
1562 if self
.in_mid
is not None:
1563 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1565 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1567 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1568 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1569 m
.next
= self
.to_state
1571 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1574 class FPPutZIdx(FPState
):
1576 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1577 FPState
.__init
__(self
, state
)
1578 if to_state
is None:
1579 to_state
= "get_ops"
1580 self
.to_state
= to_state
1582 self
.out_zs
= out_zs
1583 self
.in_mid
= in_mid
1585 def action(self
, m
):
1586 outz_stb
= Signal(reset_less
=True)
1587 outz_ack
= Signal(reset_less
=True)
1588 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1589 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1592 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1594 with m
.If(outz_stb
& outz_ack
):
1595 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1596 m
.next
= self
.to_state
1598 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1600 class FPADDBaseData
:
1602 def __init__(self
, width
, id_wid
):
1604 self
.id_wid
= id_wid
1605 self
.a
= Signal(width
)
1606 self
.b
= Signal(width
)
1607 self
.mid
= Signal(id_wid
, reset_less
=True)
1610 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1614 def __init__(self
, width
, id_wid
):
1615 self
.z
= FPOp(width
)
1616 self
.mid
= Signal(id_wid
, reset_less
=True)
1619 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1624 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1627 * width: bit-width of IEEE754. supported: 16, 32, 64
1628 * id_wid: an identifier that is sync-connected to the input
1629 * single_cycle: True indicates each stage to complete in 1 clock
1630 * compact: True indicates a reduced number of stages
1633 self
.id_wid
= id_wid
1634 self
.single_cycle
= single_cycle
1635 self
.compact
= compact
1637 self
.in_t
= Trigger()
1638 self
.i
= self
.ispec()
1639 self
.o
= self
.ospec()
1644 return FPADDBaseData(self
.width
, self
.id_wid
)
1647 return FPOpData(self
.width
, self
.id_wid
)
1649 def add_state(self
, state
):
1650 self
.states
.append(state
)
1653 def get_fragment(self
, platform
=None):
1654 """ creates the HDL code-fragment for FPAdd
1657 m
.submodules
.out_z
= self
.o
.z
1658 m
.submodules
.in_t
= self
.in_t
1660 self
.get_compact_fragment(m
, platform
)
1662 self
.get_longer_fragment(m
, platform
)
1664 with m
.FSM() as fsm
:
1666 for state
in self
.states
:
1667 with m
.State(state
.state_from
):
1672 def get_longer_fragment(self
, m
, platform
=None):
1674 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1676 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1680 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1681 sc
.setup(m
, a
, b
, self
.in_mid
)
1683 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1684 dn
.setup(m
, a
, b
, sc
.in_mid
)
1686 if self
.single_cycle
:
1687 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1688 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1690 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1691 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1693 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1694 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1696 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1697 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1699 if self
.single_cycle
:
1700 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1701 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1703 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1704 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1706 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1707 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1709 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1710 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1712 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1713 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1715 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1716 pa
.in_mid
, self
.out_mid
))
1718 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1719 pa
.in_mid
, self
.out_mid
))
1721 def get_compact_fragment(self
, m
, platform
=None):
1723 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1724 self
.width
, self
.id_wid
))
1725 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1727 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1730 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1733 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1734 n1
.setup(m
, alm
.a1o
)
1736 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1737 n1
.out_z
.mid
, self
.o
.mid
))
1739 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1740 # sc.o.mid, self.o.mid))
1743 class FPADDBase(FPState
):
1745 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1748 * width: bit-width of IEEE754. supported: 16, 32, 64
1749 * id_wid: an identifier that is sync-connected to the input
1750 * single_cycle: True indicates each stage to complete in 1 clock
1752 FPState
.__init
__(self
, "fpadd")
1754 self
.single_cycle
= single_cycle
1755 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1756 self
.o
= self
.ospec()
1758 self
.in_t
= Trigger()
1759 self
.i
= self
.ispec()
1761 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1762 self
.in_accept
= Signal(reset_less
=True)
1763 self
.add_stb
= Signal(reset_less
=True)
1764 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1767 return self
.mod
.ispec()
1770 return self
.mod
.ospec()
1772 def setup(self
, m
, i
, add_stb
, in_mid
):
1773 m
.d
.comb
+= [self
.i
.eq(i
),
1774 self
.mod
.i
.eq(self
.i
),
1775 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1776 #self.add_stb.eq(add_stb),
1777 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1778 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1779 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1780 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1781 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1782 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1785 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1786 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1787 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1788 #m.d.sync += self.in_t.stb.eq(0)
1790 m
.submodules
.fpadd
= self
.mod
1792 def action(self
, m
):
1794 # in_accept is set on incoming strobe HIGH and ack LOW.
1795 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1797 #with m.If(self.in_t.ack):
1798 # m.d.sync += self.in_t.stb.eq(0)
1799 with m
.If(~self
.z_done
):
1800 # not done: test for accepting an incoming operand pair
1801 with m
.If(self
.in_accept
):
1803 self
.add_ack
.eq(1), # acknowledge receipt...
1804 self
.in_t
.stb
.eq(1), # initiate add
1807 m
.d
.sync
+= [self
.add_ack
.eq(0),
1808 self
.in_t
.stb
.eq(0),
1812 # done: acknowledge, and write out id and value
1813 m
.d
.sync
+= [self
.add_ack
.eq(1),
1820 if self
.in_mid
is not None:
1821 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1824 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1826 # move to output state on detecting z ack
1827 with m
.If(self
.out_z
.trigger
):
1828 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1831 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1834 def __init__(self
, width
, id_wid
):
1835 self
.a
= Signal(width
)
1836 self
.b
= Signal(width
)
1837 self
.mid
= Signal(id_wid
, reset_less
=True)
1840 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1843 class FPADDStageOut
:
1844 def __init__(self
, width
, id_wid
):
1845 self
.z
= Signal(width
)
1846 self
.mid
= Signal(id_wid
, reset_less
=True)
1849 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1852 # matches the format of FPADDStageOut, allows eq function to do assignments
1853 class PlaceHolder
: pass
1856 class FPAddBaseStage
:
1857 def __init__(self
, width
, id_wid
):
1859 self
.id_wid
= id_wid
1862 return FPADDStageIn(self
.width
, self
.id_wid
)
1865 return FPADDStageOut(self
.width
, self
.id_wid
)
1867 def process(self
, i
):
1874 class FPADDBasePipe
:
1875 def __init__(self
, width
, id_wid
):
1876 stage1
= FPAddBaseStage(width
, id_wid
)
1877 self
.pipe
= UnbufferedPipeline(stage1
)
1879 def elaborate(self
, platform
):
1880 return self
.pipe
.elaborate(platform
)
1883 return self
.pipe
.ports()
1886 def __init__(self
, width
, id_wid
):
1888 self
.id_wid
= id_wid
1890 for i
in range(rs_sz
):
1892 out_z
.name
= "out_z_%d" % i
1894 self
.res
= Array(res
)
1895 self
.in_z
= FPOp(width
)
1896 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1898 def setup(self
, m
, in_z
, in_mid
):
1899 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1900 self
.in_mid
.eq(in_mid
)]
1902 def get_fragment(self
, platform
=None):
1903 """ creates the HDL code-fragment for FPAdd
1906 m
.submodules
.res_in_z
= self
.in_z
1907 m
.submodules
+= self
.res
1919 """ FPADD: stages as follows:
1925 FPAddBase---> FPAddBaseMod
1927 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1929 FPAddBase is tricky: it is both a stage and *has* stages.
1930 Connection to FPAddBaseMod therefore requires an in stb/ack
1931 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1932 needs to be the thing that raises the incoming stb.
1935 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1938 * width: bit-width of IEEE754. supported: 16, 32, 64
1939 * id_wid: an identifier that is sync-connected to the input
1940 * single_cycle: True indicates each stage to complete in 1 clock
1943 self
.id_wid
= id_wid
1944 self
.single_cycle
= single_cycle
1946 #self.out_z = FPOp(width)
1947 self
.ids
= FPID(id_wid
)
1950 for i
in range(rs_sz
):
1953 in_a
.name
= "in_a_%d" % i
1954 in_b
.name
= "in_b_%d" % i
1955 rs
.append((in_a
, in_b
))
1959 for i
in range(rs_sz
):
1961 out_z
.name
= "out_z_%d" % i
1963 self
.res
= Array(res
)
1967 def add_state(self
, state
):
1968 self
.states
.append(state
)
1971 def get_fragment(self
, platform
=None):
1972 """ creates the HDL code-fragment for FPAdd
1975 m
.submodules
+= self
.rs
1977 in_a
= self
.rs
[0][0]
1978 in_b
= self
.rs
[0][1]
1980 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1985 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1990 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1991 ab
= self
.add_state(ab
)
1992 abd
= ab
.ispec() # create an input spec object for FPADDBase
1993 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1994 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1997 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
2000 with m
.FSM() as fsm
:
2002 for state
in self
.states
:
2003 with m
.State(state
.state_from
):
2009 if __name__
== "__main__":
2011 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
2012 main(alu
, ports
=alu
.rs
[0][0].ports() + \
2013 alu
.rs
[0][1].ports() + \
2014 alu
.res
[0].ports() + \
2015 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
2017 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
2018 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
2019 alu
.in_t
.ports() + \
2020 alu
.out_z
.ports() + \
2021 [alu
.in_mid
, alu
.out_mid
])
2024 # works... but don't use, just do "python fname.py convert -t v"
2025 #print (verilog.convert(alu, ports=[
2026 # ports=alu.in_a.ports() + \
2027 # alu.in_b.ports() + \
2028 # alu.out_z.ports())