1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
371 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
377 def __init__(self
, id_wid
):
380 self
.in_mid
= Signal(id_wid
, reset_less
=True)
381 self
.out_mid
= Signal(id_wid
, reset_less
=True)
387 if self
.id_wid
is not None:
388 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
391 class FPAddSpecialCases(FPState
):
392 """ special cases: NaNs, infs, zeros, denormalised
393 NOTE: some of these are unique to add. see "Special Operations"
394 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
397 def __init__(self
, width
, id_wid
):
398 FPState
.__init
__(self
, "special_cases")
399 self
.mod
= FPAddSpecialCasesMod(width
)
400 self
.out_z
= self
.mod
.ospec()
401 self
.out_do_z
= Signal(reset_less
=True)
403 def setup(self
, m
, i
):
404 """ links module to inputs and outputs
406 self
.mod
.setup(m
, i
, self
.out_do_z
)
407 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
408 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
412 with m
.If(self
.out_do_z
):
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
427 self
.out_z
= self
.smod
.ospec()
428 self
.out_do_z
= Signal(reset_less
=True)
430 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
431 self
.o
= self
.dmod
.ospec()
433 def setup(self
, m
, i
):
434 """ links module to inputs and outputs
436 self
.smod
.setup(m
, i
, self
.out_do_z
)
437 self
.dmod
.setup(m
, i
)
440 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
441 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
443 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
446 with m
.If(self
.out_do_z
):
452 class FPAddDeNormMod(FPState
):
454 def __init__(self
, width
, id_wid
):
457 self
.i
= self
.ispec()
458 self
.o
= self
.ospec()
461 return FPNumBase2Ops(self
.width
, self
.id_wid
)
464 return FPNumBase2Ops(self
.width
, self
.id_wid
)
466 def setup(self
, m
, i
):
467 """ links module to inputs and outputs
469 m
.submodules
.denormalise
= self
470 m
.d
.comb
+= self
.i
.eq(i
)
472 def elaborate(self
, platform
):
474 m
.submodules
.denorm_in_a
= self
.i
.a
475 m
.submodules
.denorm_in_b
= self
.i
.b
476 m
.submodules
.denorm_out_a
= self
.o
.a
477 m
.submodules
.denorm_out_b
= self
.o
.b
478 # hmmm, don't like repeating identical code
479 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
480 with m
.If(self
.i
.a
.exp_n127
):
481 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
483 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
485 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
486 with m
.If(self
.i
.b
.exp_n127
):
487 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
489 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
491 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
496 class FPAddDeNorm(FPState
):
498 def __init__(self
, width
, id_wid
):
499 FPState
.__init
__(self
, "denormalise")
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, i
):
505 """ links module to inputs and outputs
509 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
510 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
513 # Denormalised Number checks
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
):
561 def __init__(self
, width
, id_wid
):
562 FPState
.__init
__(self
, "align")
563 self
.mod
= FPAddAlignMultiMod(width
)
564 self
.out_a
= FPNumIn(None, width
)
565 self
.out_b
= FPNumIn(None, width
)
566 self
.exp_eq
= Signal(reset_less
=True)
568 def setup(self
, m
, in_a
, in_b
):
569 """ links module to inputs and outputs
571 m
.submodules
.align
= self
.mod
572 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
573 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
574 #m.d.comb += self.out_a.eq(self.mod.out_a)
575 #m.d.comb += self.out_b.eq(self.mod.out_b)
576 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
577 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
578 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
581 with m
.If(self
.exp_eq
):
587 def __init__(self
, width
, id_wid
):
588 self
.a
= FPNumIn(None, width
)
589 self
.b
= FPNumIn(None, width
)
590 self
.mid
= Signal(id_wid
, reset_less
=True)
593 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
596 class FPAddAlignSingleMod
:
598 def __init__(self
, width
, id_wid
):
601 self
.i
= self
.ispec()
602 self
.o
= self
.ospec()
605 return FPNumBase2Ops(self
.width
, self
.id_wid
)
608 return FPNumIn2Ops(self
.width
, self
.id_wid
)
610 def process(self
, i
):
613 def setup(self
, m
, i
):
614 """ links module to inputs and outputs
616 m
.submodules
.align
= self
617 m
.d
.comb
+= self
.i
.eq(i
)
619 def elaborate(self
, platform
):
620 """ Aligns A against B or B against A, depending on which has the
621 greater exponent. This is done in a *single* cycle using
622 variable-width bit-shift
624 the shifter used here is quite expensive in terms of gates.
625 Mux A or B in (and out) into temporaries, as only one of them
626 needs to be aligned against the other
630 m
.submodules
.align_in_a
= self
.i
.a
631 m
.submodules
.align_in_b
= self
.i
.b
632 m
.submodules
.align_out_a
= self
.o
.a
633 m
.submodules
.align_out_b
= self
.o
.b
635 # temporary (muxed) input and output to be shifted
636 t_inp
= FPNumBase(self
.width
)
637 t_out
= FPNumIn(None, self
.width
)
638 espec
= (len(self
.i
.a
.e
), True)
639 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
640 m
.submodules
.align_t_in
= t_inp
641 m
.submodules
.align_t_out
= t_out
642 m
.submodules
.multishift_r
= msr
644 ediff
= Signal(espec
, reset_less
=True)
645 ediffr
= Signal(espec
, reset_less
=True)
646 tdiff
= Signal(espec
, reset_less
=True)
647 elz
= Signal(reset_less
=True)
648 egz
= Signal(reset_less
=True)
650 # connect multi-shifter to t_inp/out mantissa (and tdiff)
651 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
652 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
653 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
654 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
655 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
657 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
658 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
659 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
660 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
662 # default: A-exp == B-exp, A and B untouched (fall through)
663 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
664 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
665 # only one shifter (muxed)
666 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
667 # exponent of a greater than b: shift b down
669 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
672 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
674 # exponent of b greater than a: shift a down
676 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
679 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
682 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
687 class FPAddAlignSingle(FPState
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
692 self
.out_a
= FPNumIn(None, width
)
693 self
.out_b
= FPNumIn(None, width
)
695 def setup(self
, m
, i
):
696 """ links module to inputs and outputs
700 # NOTE: could be done as comb
701 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
702 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
708 class FPAddAlignSingleAdd(FPState
):
710 def __init__(self
, width
, id_wid
):
711 FPState
.__init
__(self
, "align")
714 self
.a1o
= self
.ospec()
717 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
720 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
722 def setup(self
, m
, i
):
723 """ links module to inputs and outputs
726 # chain AddAlignSingle, AddStage0 and AddStage1
727 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
728 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
731 chain
= StageChain([mod
, a0mod
, a1mod
])
734 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
737 m
.next
= "normalise_1"
740 class FPAddStage0Data
:
742 def __init__(self
, width
, id_wid
):
743 self
.z
= FPNumBase(width
, False)
744 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
745 self
.mid
= Signal(id_wid
, reset_less
=True)
748 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
751 class FPAddStage0Mod
:
753 def __init__(self
, width
, id_wid
):
756 self
.i
= self
.ispec()
757 self
.o
= self
.ospec()
760 return FPNumBase2Ops(self
.width
, self
.id_wid
)
763 return FPAddStage0Data(self
.width
, self
.id_wid
)
765 def process(self
, i
):
768 def setup(self
, m
, i
):
769 """ links module to inputs and outputs
771 m
.submodules
.add0
= self
772 m
.d
.comb
+= self
.i
.eq(i
)
774 def elaborate(self
, platform
):
776 m
.submodules
.add0_in_a
= self
.i
.a
777 m
.submodules
.add0_in_b
= self
.i
.b
778 m
.submodules
.add0_out_z
= self
.o
.z
780 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
781 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
783 # store intermediate tests (and zero-extended mantissas)
784 seq
= Signal(reset_less
=True)
785 mge
= Signal(reset_less
=True)
786 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
787 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
788 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
789 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
790 am0
.eq(Cat(self
.i
.a
.m
, 0)),
791 bm0
.eq(Cat(self
.i
.b
.m
, 0))
793 # same-sign (both negative or both positive) add mantissas
796 self
.o
.tot
.eq(am0
+ bm0
),
797 self
.o
.z
.s
.eq(self
.i
.a
.s
)
799 # a mantissa greater than b, use a
802 self
.o
.tot
.eq(am0
- bm0
),
803 self
.o
.z
.s
.eq(self
.i
.a
.s
)
805 # b mantissa greater than a, use b
808 self
.o
.tot
.eq(bm0
- am0
),
809 self
.o
.z
.s
.eq(self
.i
.b
.s
)
814 class FPAddStage0(FPState
):
815 """ First stage of add. covers same-sign (add) and subtract
816 special-casing when mantissas are greater or equal, to
817 give greatest accuracy.
820 def __init__(self
, width
, id_wid
):
821 FPState
.__init
__(self
, "add_0")
822 self
.mod
= FPAddStage0Mod(width
)
823 self
.o
= self
.mod
.ospec()
825 def setup(self
, m
, i
):
826 """ links module to inputs and outputs
830 # NOTE: these could be done as combinatorial (merge add0+add1)
831 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
837 class FPAddStage1Data
:
839 def __init__(self
, width
, id_wid
):
840 self
.z
= FPNumBase(width
, False)
842 self
.mid
= Signal(id_wid
, reset_less
=True)
845 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
849 class FPAddStage1Mod(FPState
):
850 """ Second stage of add: preparation for normalisation.
851 detects when tot sum is too big (tot[27] is kinda a carry bit)
854 def __init__(self
, width
, id_wid
):
857 self
.i
= self
.ispec()
858 self
.o
= self
.ospec()
861 return FPAddStage0Data(self
.width
, self
.id_wid
)
864 return FPAddStage1Data(self
.width
, self
.id_wid
)
866 def process(self
, i
):
869 def setup(self
, m
, i
):
870 """ links module to inputs and outputs
872 m
.submodules
.add1
= self
873 m
.submodules
.add1_out_overflow
= self
.o
.of
875 m
.d
.comb
+= self
.i
.eq(i
)
877 def elaborate(self
, platform
):
879 #m.submodules.norm1_in_overflow = self.in_of
880 #m.submodules.norm1_out_overflow = self.out_of
881 #m.submodules.norm1_in_z = self.in_z
882 #m.submodules.norm1_out_z = self.out_z
883 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
884 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
885 # tot[-1] (MSB) gets set when the sum overflows. shift result down
886 with m
.If(self
.i
.tot
[-1]):
888 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
889 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
890 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
891 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
892 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
893 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
895 # tot[-1] (MSB) zero case
898 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
899 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
900 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
901 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
902 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
907 class FPAddStage1(FPState
):
909 def __init__(self
, width
, id_wid
):
910 FPState
.__init
__(self
, "add_1")
911 self
.mod
= FPAddStage1Mod(width
)
912 self
.out_z
= FPNumBase(width
, False)
913 self
.out_of
= Overflow()
914 self
.norm_stb
= Signal()
916 def setup(self
, m
, i
):
917 """ links module to inputs and outputs
921 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
923 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
924 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
925 m
.d
.sync
+= self
.norm_stb
.eq(1)
928 m
.next
= "normalise_1"
931 class FPNormaliseModSingle
:
933 def __init__(self
, width
):
935 self
.in_z
= self
.ispec()
936 self
.out_z
= self
.ospec()
939 return FPNumBase(self
.width
, False)
942 return FPNumBase(self
.width
, False)
944 def setup(self
, m
, i
):
945 """ links module to inputs and outputs
947 m
.submodules
.normalise
= self
948 m
.d
.comb
+= self
.i
.eq(i
)
950 def elaborate(self
, platform
):
953 mwid
= self
.out_z
.m_width
+2
954 pe
= PriorityEncoder(mwid
)
955 m
.submodules
.norm_pe
= pe
957 m
.submodules
.norm1_out_z
= self
.out_z
958 m
.submodules
.norm1_in_z
= self
.in_z
960 in_z
= FPNumBase(self
.width
, False)
962 m
.submodules
.norm1_insel_z
= in_z
963 m
.submodules
.norm1_insel_overflow
= in_of
965 espec
= (len(in_z
.e
), True)
966 ediff_n126
= Signal(espec
, reset_less
=True)
967 msr
= MultiShiftRMerge(mwid
, espec
)
968 m
.submodules
.multishift_r
= msr
970 m
.d
.comb
+= in_z
.eq(self
.in_z
)
971 m
.d
.comb
+= in_of
.eq(self
.in_of
)
972 # initialise out from in (overridden below)
973 m
.d
.comb
+= self
.out_z
.eq(in_z
)
974 m
.d
.comb
+= self
.out_of
.eq(in_of
)
975 # normalisation decrease condition
976 decrease
= Signal(reset_less
=True)
977 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
980 # *sigh* not entirely obvious: count leading zeros (clz)
981 # with a PriorityEncoder: to find from the MSB
982 # we reverse the order of the bits.
983 temp_m
= Signal(mwid
, reset_less
=True)
984 temp_s
= Signal(mwid
+1, reset_less
=True)
985 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
987 # cat round and guard bits back into the mantissa
988 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
989 pe
.i
.eq(temp_m
[::-1]), # inverted
990 clz
.eq(pe
.o
), # count zeros from MSB down
991 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
992 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
993 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1000 def __init__(self
, width
, id_wid
):
1001 self
.roundz
= Signal(reset_less
=True)
1002 self
.z
= FPNumBase(width
, False)
1003 self
.mid
= Signal(id_wid
, reset_less
=True)
1006 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1009 class FPNorm1ModSingle
:
1011 def __init__(self
, width
, id_wid
):
1013 self
.id_wid
= id_wid
1014 self
.i
= self
.ispec()
1015 self
.o
= self
.ospec()
1018 return FPAddStage1Data(self
.width
, self
.id_wid
)
1021 return FPNorm1Data(self
.width
, self
.id_wid
)
1023 def setup(self
, m
, i
):
1024 """ links module to inputs and outputs
1026 m
.submodules
.normalise_1
= self
1027 m
.d
.comb
+= self
.i
.eq(i
)
1029 def process(self
, i
):
1032 def elaborate(self
, platform
):
1035 mwid
= self
.o
.z
.m_width
+2
1036 pe
= PriorityEncoder(mwid
)
1037 m
.submodules
.norm_pe
= pe
1040 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1042 m
.submodules
.norm1_out_z
= self
.o
.z
1043 m
.submodules
.norm1_out_overflow
= of
1044 m
.submodules
.norm1_in_z
= self
.i
.z
1045 m
.submodules
.norm1_in_overflow
= self
.i
.of
1048 m
.submodules
.norm1_insel_z
= i
.z
1049 m
.submodules
.norm1_insel_overflow
= i
.of
1051 espec
= (len(i
.z
.e
), True)
1052 ediff_n126
= Signal(espec
, reset_less
=True)
1053 msr
= MultiShiftRMerge(mwid
, espec
)
1054 m
.submodules
.multishift_r
= msr
1056 m
.d
.comb
+= i
.eq(self
.i
)
1057 # initialise out from in (overridden below)
1058 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1059 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1060 m
.d
.comb
+= of
.eq(i
.of
)
1061 # normalisation increase/decrease conditions
1062 decrease
= Signal(reset_less
=True)
1063 increase
= Signal(reset_less
=True)
1064 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1065 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1067 with m
.If(decrease
):
1068 # *sigh* not entirely obvious: count leading zeros (clz)
1069 # with a PriorityEncoder: to find from the MSB
1070 # we reverse the order of the bits.
1071 temp_m
= Signal(mwid
, reset_less
=True)
1072 temp_s
= Signal(mwid
+1, reset_less
=True)
1073 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1074 # make sure that the amount to decrease by does NOT
1075 # go below the minimum non-INF/NaN exponent
1076 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1079 # cat round and guard bits back into the mantissa
1080 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1081 pe
.i
.eq(temp_m
[::-1]), # inverted
1082 clz
.eq(limclz
), # count zeros from MSB down
1083 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1084 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1085 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1086 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1087 # overflow in bits 0..1: got shifted too (leave sticky)
1088 of
.guard
.eq(temp_s
[1]), # guard
1089 of
.round_bit
.eq(temp_s
[0]), # round
1092 with m
.Elif(increase
):
1093 temp_m
= Signal(mwid
+1, reset_less
=True)
1095 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1097 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1098 # connect multi-shifter to inp/out mantissa (and ediff)
1100 msr
.diff
.eq(ediff_n126
),
1101 self
.o
.z
.m
.eq(msr
.m
[3:]),
1102 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1103 # overflow in bits 0..1: got shifted too (leave sticky)
1104 of
.guard
.eq(temp_s
[2]), # guard
1105 of
.round_bit
.eq(temp_s
[1]), # round
1106 of
.sticky
.eq(temp_s
[0]), # sticky
1107 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1113 class FPNorm1ModMulti
:
1115 def __init__(self
, width
, single_cycle
=True):
1117 self
.in_select
= Signal(reset_less
=True)
1118 self
.in_z
= FPNumBase(width
, False)
1119 self
.in_of
= Overflow()
1120 self
.temp_z
= FPNumBase(width
, False)
1121 self
.temp_of
= Overflow()
1122 self
.out_z
= FPNumBase(width
, False)
1123 self
.out_of
= Overflow()
1125 def elaborate(self
, platform
):
1128 m
.submodules
.norm1_out_z
= self
.out_z
1129 m
.submodules
.norm1_out_overflow
= self
.out_of
1130 m
.submodules
.norm1_temp_z
= self
.temp_z
1131 m
.submodules
.norm1_temp_of
= self
.temp_of
1132 m
.submodules
.norm1_in_z
= self
.in_z
1133 m
.submodules
.norm1_in_overflow
= self
.in_of
1135 in_z
= FPNumBase(self
.width
, False)
1137 m
.submodules
.norm1_insel_z
= in_z
1138 m
.submodules
.norm1_insel_overflow
= in_of
1140 # select which of temp or in z/of to use
1141 with m
.If(self
.in_select
):
1142 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1143 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1145 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1146 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1147 # initialise out from in (overridden below)
1148 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1149 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1150 # normalisation increase/decrease conditions
1151 decrease
= Signal(reset_less
=True)
1152 increase
= Signal(reset_less
=True)
1153 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1154 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1155 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1157 with m
.If(decrease
):
1159 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1160 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1161 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1162 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1163 self
.out_of
.round_bit
.eq(0), # reset round bit
1164 self
.out_of
.m0
.eq(in_of
.guard
),
1167 with m
.Elif(increase
):
1169 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1170 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1171 self
.out_of
.guard
.eq(in_z
.m
[0]),
1172 self
.out_of
.m0
.eq(in_z
.m
[1]),
1173 self
.out_of
.round_bit
.eq(in_of
.guard
),
1174 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1180 class FPNorm1Single(FPState
):
1182 def __init__(self
, width
, id_wid
, single_cycle
=True):
1183 FPState
.__init
__(self
, "normalise_1")
1184 self
.mod
= FPNorm1ModSingle(width
)
1185 self
.o
= self
.ospec()
1186 self
.out_z
= FPNumBase(width
, False)
1187 self
.out_roundz
= Signal(reset_less
=True)
1190 return self
.mod
.ispec()
1193 return self
.mod
.ospec()
1195 def setup(self
, m
, i
):
1196 """ links module to inputs and outputs
1198 self
.mod
.setup(m
, i
)
1200 def action(self
, m
):
1204 class FPNorm1Multi(FPState
):
1206 def __init__(self
, width
, id_wid
):
1207 FPState
.__init
__(self
, "normalise_1")
1208 self
.mod
= FPNorm1ModMulti(width
)
1209 self
.stb
= Signal(reset_less
=True)
1210 self
.ack
= Signal(reset
=0, reset_less
=True)
1211 self
.out_norm
= Signal(reset_less
=True)
1212 self
.in_accept
= Signal(reset_less
=True)
1213 self
.temp_z
= FPNumBase(width
)
1214 self
.temp_of
= Overflow()
1215 self
.out_z
= FPNumBase(width
)
1216 self
.out_roundz
= Signal(reset_less
=True)
1218 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1219 """ links module to inputs and outputs
1221 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1222 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1223 self
.out_z
, self
.out_norm
)
1225 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1226 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1228 def action(self
, m
):
1229 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1230 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1231 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1232 with m
.If(self
.out_norm
):
1233 with m
.If(self
.in_accept
):
1238 m
.d
.sync
+= self
.ack
.eq(0)
1240 # normalisation not required (or done).
1242 m
.d
.sync
+= self
.ack
.eq(1)
1243 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1246 class FPNormToPack(FPState
):
1248 def __init__(self
, width
, id_wid
):
1249 FPState
.__init
__(self
, "normalise_1")
1250 self
.id_wid
= id_wid
1254 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1257 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1259 def setup(self
, m
, i
):
1260 """ links module to inputs and outputs
1263 # Normalisation, Rounding Corrections, Pack - in a chain
1264 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1265 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1266 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1267 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1268 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1270 self
.out_z
= pmod
.ospec()
1272 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1273 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1275 def action(self
, m
):
1276 m
.next
= "pack_put_z"
1281 def __init__(self
, width
, id_wid
):
1282 self
.z
= FPNumBase(width
, False)
1283 self
.mid
= Signal(id_wid
, reset_less
=True)
1286 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1291 def __init__(self
, width
, id_wid
):
1293 self
.id_wid
= id_wid
1294 self
.i
= self
.ispec()
1295 self
.out_z
= self
.ospec()
1298 return FPNorm1Data(self
.width
, self
.id_wid
)
1301 return FPRoundData(self
.width
, self
.id_wid
)
1303 def process(self
, i
):
1306 def setup(self
, m
, i
):
1307 m
.submodules
.roundz
= self
1308 m
.d
.comb
+= self
.i
.eq(i
)
1310 def elaborate(self
, platform
):
1312 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1313 with m
.If(self
.i
.roundz
):
1314 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1315 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1316 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1320 class FPRound(FPState
):
1322 def __init__(self
, width
, id_wid
):
1323 FPState
.__init
__(self
, "round")
1324 self
.mod
= FPRoundMod(width
)
1325 self
.out_z
= self
.ospec()
1328 return self
.mod
.ispec()
1331 return self
.mod
.ospec()
1333 def setup(self
, m
, i
):
1334 """ links module to inputs and outputs
1336 self
.mod
.setup(m
, i
)
1339 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1340 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1342 def action(self
, m
):
1343 m
.next
= "corrections"
1346 class FPCorrectionsMod
:
1348 def __init__(self
, width
, id_wid
):
1350 self
.id_wid
= id_wid
1351 self
.i
= self
.ispec()
1352 self
.out_z
= self
.ospec()
1355 return FPRoundData(self
.width
, self
.id_wid
)
1358 return FPRoundData(self
.width
, self
.id_wid
)
1360 def process(self
, i
):
1363 def setup(self
, m
, i
):
1364 """ links module to inputs and outputs
1366 m
.submodules
.corrections
= self
1367 m
.d
.comb
+= self
.i
.eq(i
)
1369 def elaborate(self
, platform
):
1371 m
.submodules
.corr_in_z
= self
.i
.z
1372 m
.submodules
.corr_out_z
= self
.out_z
.z
1373 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1374 with m
.If(self
.i
.z
.is_denormalised
):
1375 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1379 class FPCorrections(FPState
):
1381 def __init__(self
, width
, id_wid
):
1382 FPState
.__init
__(self
, "corrections")
1383 self
.mod
= FPCorrectionsMod(width
)
1384 self
.out_z
= self
.ospec()
1387 return self
.mod
.ispec()
1390 return self
.mod
.ospec()
1392 def setup(self
, m
, in_z
):
1393 """ links module to inputs and outputs
1395 self
.mod
.setup(m
, in_z
)
1397 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1398 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1400 def action(self
, m
):
1406 def __init__(self
, width
, id_wid
):
1407 self
.z
= FPNumOut(width
, False)
1408 self
.mid
= Signal(id_wid
, reset_less
=True)
1411 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1416 def __init__(self
, width
, id_wid
):
1418 self
.id_wid
= id_wid
1419 self
.i
= self
.ispec()
1420 self
.o
= self
.ospec()
1423 return FPRoundData(self
.width
, self
.id_wid
)
1426 return FPPackData(self
.width
, self
.id_wid
)
1428 def process(self
, i
):
1431 def setup(self
, m
, in_z
):
1432 """ links module to inputs and outputs
1434 m
.submodules
.pack
= self
1435 m
.d
.comb
+= self
.i
.eq(in_z
)
1437 def elaborate(self
, platform
):
1439 m
.submodules
.pack_in_z
= self
.i
.z
1440 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1441 with m
.If(self
.i
.z
.is_overflowed
):
1442 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1444 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1449 def __init__(self
, width
, id_wid
):
1450 self
.z
= FPNumOut(width
, False)
1451 self
.mid
= Signal(id_wid
, reset_less
=True)
1454 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1457 class FPPack(FPState
):
1459 def __init__(self
, width
, id_wid
):
1460 FPState
.__init
__(self
, "pack")
1461 self
.mod
= FPPackMod(width
)
1462 self
.out_z
= self
.ospec()
1465 return self
.mod
.ispec()
1468 return self
.mod
.ospec()
1470 def setup(self
, m
, in_z
):
1471 """ links module to inputs and outputs
1473 self
.mod
.setup(m
, in_z
)
1475 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1476 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1478 def action(self
, m
):
1479 m
.next
= "pack_put_z"
1482 class FPPutZ(FPState
):
1484 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1485 FPState
.__init
__(self
, state
)
1486 if to_state
is None:
1487 to_state
= "get_ops"
1488 self
.to_state
= to_state
1491 self
.in_mid
= in_mid
1492 self
.out_mid
= out_mid
1494 def action(self
, m
):
1495 if self
.in_mid
is not None:
1496 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1498 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1500 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1501 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1502 m
.next
= self
.to_state
1504 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1507 class FPPutZIdx(FPState
):
1509 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1510 FPState
.__init
__(self
, state
)
1511 if to_state
is None:
1512 to_state
= "get_ops"
1513 self
.to_state
= to_state
1515 self
.out_zs
= out_zs
1516 self
.in_mid
= in_mid
1518 def action(self
, m
):
1519 outz_stb
= Signal(reset_less
=True)
1520 outz_ack
= Signal(reset_less
=True)
1521 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1522 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1525 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1527 with m
.If(outz_stb
& outz_ack
):
1528 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1529 m
.next
= self
.to_state
1531 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1533 class FPADDBaseData
:
1535 def __init__(self
, width
, id_wid
):
1537 self
.id_wid
= id_wid
1538 self
.a
= Signal(width
)
1539 self
.b
= Signal(width
)
1540 self
.mid
= Signal(id_wid
, reset_less
=True)
1543 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1547 def __init__(self
, width
, id_wid
):
1548 self
.z
= FPOp(width
)
1549 self
.mid
= Signal(id_wid
, reset_less
=True)
1552 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1557 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1560 * width: bit-width of IEEE754. supported: 16, 32, 64
1561 * id_wid: an identifier that is sync-connected to the input
1562 * single_cycle: True indicates each stage to complete in 1 clock
1563 * compact: True indicates a reduced number of stages
1566 self
.id_wid
= id_wid
1567 self
.single_cycle
= single_cycle
1568 self
.compact
= compact
1570 self
.in_t
= Trigger()
1571 self
.i
= self
.ispec()
1572 self
.o
= self
.ospec()
1577 return FPADDBaseData(self
.width
, self
.id_wid
)
1580 return FPOpData(self
.width
, self
.id_wid
)
1582 def add_state(self
, state
):
1583 self
.states
.append(state
)
1586 def get_fragment(self
, platform
=None):
1587 """ creates the HDL code-fragment for FPAdd
1590 m
.submodules
.out_z
= self
.o
.z
1591 m
.submodules
.in_t
= self
.in_t
1593 self
.get_compact_fragment(m
, platform
)
1595 self
.get_longer_fragment(m
, platform
)
1597 with m
.FSM() as fsm
:
1599 for state
in self
.states
:
1600 with m
.State(state
.state_from
):
1605 def get_longer_fragment(self
, m
, platform
=None):
1607 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1609 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1613 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1614 sc
.setup(m
, a
, b
, self
.in_mid
)
1616 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1617 dn
.setup(m
, a
, b
, sc
.in_mid
)
1619 if self
.single_cycle
:
1620 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1621 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1623 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1624 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1626 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1627 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1629 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1630 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1632 if self
.single_cycle
:
1633 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1634 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1636 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1637 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1639 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1640 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1642 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1643 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1645 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1646 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1648 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1649 pa
.in_mid
, self
.out_mid
))
1651 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1652 pa
.in_mid
, self
.out_mid
))
1654 def get_compact_fragment(self
, m
, platform
=None):
1656 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1657 self
.width
, self
.id_wid
))
1658 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1660 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1663 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1666 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1667 n1
.setup(m
, alm
.a1o
)
1669 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1670 n1
.out_z
.mid
, self
.o
.mid
))
1672 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1673 sc
.o
.mid
, self
.o
.mid
))
1676 class FPADDBase(FPState
):
1678 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1681 * width: bit-width of IEEE754. supported: 16, 32, 64
1682 * id_wid: an identifier that is sync-connected to the input
1683 * single_cycle: True indicates each stage to complete in 1 clock
1685 FPState
.__init
__(self
, "fpadd")
1687 self
.single_cycle
= single_cycle
1688 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1689 self
.o
= self
.ospec()
1691 self
.in_t
= Trigger()
1692 self
.i
= self
.ispec()
1694 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1695 self
.in_accept
= Signal(reset_less
=True)
1696 self
.add_stb
= Signal(reset_less
=True)
1697 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1700 return self
.mod
.ispec()
1703 return self
.mod
.ospec()
1705 def setup(self
, m
, i
, add_stb
, in_mid
):
1706 m
.d
.comb
+= [self
.i
.eq(i
),
1707 self
.mod
.i
.eq(self
.i
),
1708 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1709 #self.add_stb.eq(add_stb),
1710 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1711 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1712 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1713 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1714 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1715 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1718 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1719 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1720 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1721 #m.d.sync += self.in_t.stb.eq(0)
1723 m
.submodules
.fpadd
= self
.mod
1725 def action(self
, m
):
1727 # in_accept is set on incoming strobe HIGH and ack LOW.
1728 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1730 #with m.If(self.in_t.ack):
1731 # m.d.sync += self.in_t.stb.eq(0)
1732 with m
.If(~self
.z_done
):
1733 # not done: test for accepting an incoming operand pair
1734 with m
.If(self
.in_accept
):
1736 self
.add_ack
.eq(1), # acknowledge receipt...
1737 self
.in_t
.stb
.eq(1), # initiate add
1740 m
.d
.sync
+= [self
.add_ack
.eq(0),
1741 self
.in_t
.stb
.eq(0),
1745 # done: acknowledge, and write out id and value
1746 m
.d
.sync
+= [self
.add_ack
.eq(1),
1753 if self
.in_mid
is not None:
1754 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1757 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1759 # move to output state on detecting z ack
1760 with m
.If(self
.out_z
.trigger
):
1761 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1764 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1768 def __init__(self
, width
, id_wid
):
1770 self
.id_wid
= id_wid
1772 for i
in range(rs_sz
):
1774 out_z
.name
= "out_z_%d" % i
1776 self
.res
= Array(res
)
1777 self
.in_z
= FPOp(width
)
1778 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1780 def setup(self
, m
, in_z
, in_mid
):
1781 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1782 self
.in_mid
.eq(in_mid
)]
1784 def get_fragment(self
, platform
=None):
1785 """ creates the HDL code-fragment for FPAdd
1788 m
.submodules
.res_in_z
= self
.in_z
1789 m
.submodules
+= self
.res
1801 """ FPADD: stages as follows:
1807 FPAddBase---> FPAddBaseMod
1809 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1811 FPAddBase is tricky: it is both a stage and *has* stages.
1812 Connection to FPAddBaseMod therefore requires an in stb/ack
1813 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1814 needs to be the thing that raises the incoming stb.
1817 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1820 * width: bit-width of IEEE754. supported: 16, 32, 64
1821 * id_wid: an identifier that is sync-connected to the input
1822 * single_cycle: True indicates each stage to complete in 1 clock
1825 self
.id_wid
= id_wid
1826 self
.single_cycle
= single_cycle
1828 #self.out_z = FPOp(width)
1829 self
.ids
= FPID(id_wid
)
1832 for i
in range(rs_sz
):
1835 in_a
.name
= "in_a_%d" % i
1836 in_b
.name
= "in_b_%d" % i
1837 rs
.append((in_a
, in_b
))
1841 for i
in range(rs_sz
):
1843 out_z
.name
= "out_z_%d" % i
1845 self
.res
= Array(res
)
1849 def add_state(self
, state
):
1850 self
.states
.append(state
)
1853 def get_fragment(self
, platform
=None):
1854 """ creates the HDL code-fragment for FPAdd
1857 m
.submodules
+= self
.rs
1859 in_a
= self
.rs
[0][0]
1860 in_b
= self
.rs
[0][1]
1862 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1867 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1872 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1873 ab
= self
.add_state(ab
)
1874 abd
= ab
.ispec() # create an input spec object for FPADDBase
1875 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1876 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1879 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1882 with m
.FSM() as fsm
:
1884 for state
in self
.states
:
1885 with m
.State(state
.state_from
):
1891 if __name__
== "__main__":
1893 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1894 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1895 alu
.rs
[0][1].ports() + \
1896 alu
.res
[0].ports() + \
1897 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1899 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1900 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1901 alu
.in_t
.ports() + \
1902 alu
.out_z
.ports() + \
1903 [alu
.in_mid
, alu
.out_mid
])
1906 # works... but don't use, just do "python fname.py convert -t v"
1907 #print (verilog.convert(alu, ports=[
1908 # ports=alu.in_a.ports() + \
1909 # alu.in_b.ports() + \
1910 # alu.out_z.ports())