1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
371 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
377 def __init__(self
, id_wid
):
380 self
.in_mid
= Signal(id_wid
, reset_less
=True)
381 self
.out_mid
= Signal(id_wid
, reset_less
=True)
387 if self
.id_wid
is not None:
388 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
391 class FPAddSpecialCases(FPState
):
392 """ special cases: NaNs, infs, zeros, denormalised
393 NOTE: some of these are unique to add. see "Special Operations"
394 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
397 def __init__(self
, width
, id_wid
):
398 FPState
.__init
__(self
, "special_cases")
399 self
.mod
= FPAddSpecialCasesMod(width
)
400 self
.out_z
= self
.mod
.ospec()
401 self
.out_do_z
= Signal(reset_less
=True)
403 def setup(self
, m
, i
):
404 """ links module to inputs and outputs
406 self
.mod
.setup(m
, i
, self
.out_do_z
)
407 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
408 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
412 with m
.If(self
.out_do_z
):
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
427 self
.out_z
= self
.smod
.ospec()
428 self
.out_do_z
= Signal(reset_less
=True)
430 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
431 self
.o
= self
.dmod
.ospec()
433 def setup(self
, m
, i
):
434 """ links module to inputs and outputs
436 self
.smod
.setup(m
, i
, self
.out_do_z
)
437 self
.dmod
.setup(m
, i
)
440 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
441 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
443 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
446 with m
.If(self
.out_do_z
):
452 class FPAddDeNormMod(FPState
):
454 def __init__(self
, width
, id_wid
):
457 self
.i
= self
.ispec()
458 self
.o
= self
.ospec()
461 return FPNumBase2Ops(self
.width
, self
.id_wid
)
464 return FPNumBase2Ops(self
.width
, self
.id_wid
)
466 def setup(self
, m
, i
):
467 """ links module to inputs and outputs
469 m
.submodules
.denormalise
= self
470 m
.d
.comb
+= self
.i
.eq(i
)
472 def elaborate(self
, platform
):
474 m
.submodules
.denorm_in_a
= self
.i
.a
475 m
.submodules
.denorm_in_b
= self
.i
.b
476 m
.submodules
.denorm_out_a
= self
.o
.a
477 m
.submodules
.denorm_out_b
= self
.o
.b
478 # hmmm, don't like repeating identical code
479 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
480 with m
.If(self
.i
.a
.exp_n127
):
481 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
483 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
485 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
486 with m
.If(self
.i
.b
.exp_n127
):
487 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
489 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
491 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
496 class FPAddDeNorm(FPState
):
498 def __init__(self
, width
, id_wid
):
499 FPState
.__init
__(self
, "denormalise")
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, i
):
505 """ links module to inputs and outputs
509 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
510 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
513 # Denormalised Number checks
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
):
561 def __init__(self
, width
, id_wid
):
562 FPState
.__init
__(self
, "align")
563 self
.mod
= FPAddAlignMultiMod(width
)
564 self
.out_a
= FPNumIn(None, width
)
565 self
.out_b
= FPNumIn(None, width
)
566 self
.exp_eq
= Signal(reset_less
=True)
568 def setup(self
, m
, in_a
, in_b
):
569 """ links module to inputs and outputs
571 m
.submodules
.align
= self
.mod
572 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
573 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
574 #m.d.comb += self.out_a.eq(self.mod.out_a)
575 #m.d.comb += self.out_b.eq(self.mod.out_b)
576 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
577 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
578 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
581 with m
.If(self
.exp_eq
):
587 def __init__(self
, width
, id_wid
):
588 self
.a
= FPNumIn(None, width
)
589 self
.b
= FPNumIn(None, width
)
590 self
.mid
= Signal(id_wid
, reset_less
=True)
593 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
596 class FPAddAlignSingleMod
:
598 def __init__(self
, width
, id_wid
):
601 self
.i
= self
.ispec()
602 self
.o
= self
.ospec()
605 return FPNumBase2Ops(self
.width
, self
.id_wid
)
608 return FPNumIn2Ops(self
.width
, self
.id_wid
)
610 def process(self
, i
):
613 def setup(self
, m
, i
):
614 """ links module to inputs and outputs
616 m
.submodules
.align
= self
617 m
.d
.comb
+= self
.i
.eq(i
)
619 def elaborate(self
, platform
):
620 """ Aligns A against B or B against A, depending on which has the
621 greater exponent. This is done in a *single* cycle using
622 variable-width bit-shift
624 the shifter used here is quite expensive in terms of gates.
625 Mux A or B in (and out) into temporaries, as only one of them
626 needs to be aligned against the other
630 m
.submodules
.align_in_a
= self
.i
.a
631 m
.submodules
.align_in_b
= self
.i
.b
632 m
.submodules
.align_out_a
= self
.o
.a
633 m
.submodules
.align_out_b
= self
.o
.b
635 # temporary (muxed) input and output to be shifted
636 t_inp
= FPNumBase(self
.width
)
637 t_out
= FPNumIn(None, self
.width
)
638 espec
= (len(self
.i
.a
.e
), True)
639 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
640 m
.submodules
.align_t_in
= t_inp
641 m
.submodules
.align_t_out
= t_out
642 m
.submodules
.multishift_r
= msr
644 ediff
= Signal(espec
, reset_less
=True)
645 ediffr
= Signal(espec
, reset_less
=True)
646 tdiff
= Signal(espec
, reset_less
=True)
647 elz
= Signal(reset_less
=True)
648 egz
= Signal(reset_less
=True)
650 # connect multi-shifter to t_inp/out mantissa (and tdiff)
651 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
652 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
653 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
654 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
655 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
657 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
658 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
659 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
660 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
662 # default: A-exp == B-exp, A and B untouched (fall through)
663 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
664 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
665 # only one shifter (muxed)
666 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
667 # exponent of a greater than b: shift b down
669 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
672 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
674 # exponent of b greater than a: shift a down
676 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
679 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
682 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
687 class FPAddAlignSingle(FPState
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
692 self
.out_a
= FPNumIn(None, width
)
693 self
.out_b
= FPNumIn(None, width
)
695 def setup(self
, m
, i
):
696 """ links module to inputs and outputs
700 # NOTE: could be done as comb
701 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
702 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
708 class FPAddAlignSingleAdd(FPState
):
710 def __init__(self
, width
, id_wid
):
711 FPState
.__init
__(self
, "align")
714 self
.a1o
= self
.ospec()
717 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
720 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
722 def setup(self
, m
, i
):
723 """ links module to inputs and outputs
726 # chain AddAlignSingle, AddStage0 and AddStage1
727 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
728 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
729 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
730 self
.a1modo
= a1mod
.o
732 chain
= StageChain([mod
, a0mod
, a1mod
])
735 m
.d
.sync
+= self
.a1o
.eq(self
.a1modo
)
738 m
.next
= "normalise_1"
741 class FPAddStage0Data
:
743 def __init__(self
, width
, id_wid
):
744 self
.z
= FPNumBase(width
, False)
745 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
746 self
.mid
= Signal(id_wid
, reset_less
=True)
749 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
752 class FPAddStage0Mod
:
754 def __init__(self
, width
, id_wid
):
757 self
.i
= self
.ispec()
758 self
.o
= self
.ospec()
761 return FPNumBase2Ops(self
.width
, self
.id_wid
)
764 return FPAddStage0Data(self
.width
, self
.id_wid
)
766 def process(self
, i
):
769 def setup(self
, m
, i
):
770 """ links module to inputs and outputs
772 m
.submodules
.add0
= self
773 m
.d
.comb
+= self
.i
.eq(i
)
775 def elaborate(self
, platform
):
777 m
.submodules
.add0_in_a
= self
.i
.a
778 m
.submodules
.add0_in_b
= self
.i
.b
779 m
.submodules
.add0_out_z
= self
.o
.z
781 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
782 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
784 # store intermediate tests (and zero-extended mantissas)
785 seq
= Signal(reset_less
=True)
786 mge
= Signal(reset_less
=True)
787 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
788 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
789 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
790 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
791 am0
.eq(Cat(self
.i
.a
.m
, 0)),
792 bm0
.eq(Cat(self
.i
.b
.m
, 0))
794 # same-sign (both negative or both positive) add mantissas
797 self
.o
.tot
.eq(am0
+ bm0
),
798 self
.o
.z
.s
.eq(self
.i
.a
.s
)
800 # a mantissa greater than b, use a
803 self
.o
.tot
.eq(am0
- bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # b mantissa greater than a, use b
809 self
.o
.tot
.eq(bm0
- am0
),
810 self
.o
.z
.s
.eq(self
.i
.b
.s
)
815 class FPAddStage0(FPState
):
816 """ First stage of add. covers same-sign (add) and subtract
817 special-casing when mantissas are greater or equal, to
818 give greatest accuracy.
821 def __init__(self
, width
, id_wid
):
822 FPState
.__init
__(self
, "add_0")
823 self
.mod
= FPAddStage0Mod(width
)
824 self
.o
= self
.mod
.ospec()
826 def setup(self
, m
, i
):
827 """ links module to inputs and outputs
831 # NOTE: these could be done as combinatorial (merge add0+add1)
832 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
838 class FPAddStage1Data
:
840 def __init__(self
, width
, id_wid
):
841 self
.z
= FPNumBase(width
, False)
843 self
.mid
= Signal(id_wid
, reset_less
=True)
846 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
850 class FPAddStage1Mod(FPState
):
851 """ Second stage of add: preparation for normalisation.
852 detects when tot sum is too big (tot[27] is kinda a carry bit)
855 def __init__(self
, width
, id_wid
):
858 self
.i
= self
.ispec()
859 self
.o
= self
.ospec()
862 return FPAddStage0Data(self
.width
, self
.id_wid
)
865 return FPAddStage1Data(self
.width
, self
.id_wid
)
867 def process(self
, i
):
870 def setup(self
, m
, i
):
871 """ links module to inputs and outputs
873 m
.submodules
.add1
= self
874 m
.submodules
.add1_out_overflow
= self
.o
.of
876 m
.d
.comb
+= self
.i
.eq(i
)
878 def elaborate(self
, platform
):
880 #m.submodules.norm1_in_overflow = self.in_of
881 #m.submodules.norm1_out_overflow = self.out_of
882 #m.submodules.norm1_in_z = self.in_z
883 #m.submodules.norm1_out_z = self.out_z
884 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
885 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 self
.mod
= FPAddStage1Mod(width
)
913 self
.out_z
= FPNumBase(width
, False)
914 self
.out_of
= Overflow()
915 self
.norm_stb
= Signal()
917 def setup(self
, m
, i
):
918 """ links module to inputs and outputs
922 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
924 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
925 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
926 m
.d
.sync
+= self
.norm_stb
.eq(1)
929 m
.next
= "normalise_1"
932 class FPNormaliseModSingle
:
934 def __init__(self
, width
):
936 self
.in_z
= self
.ispec()
937 self
.out_z
= self
.ospec()
940 return FPNumBase(self
.width
, False)
943 return FPNumBase(self
.width
, False)
945 def setup(self
, m
, i
):
946 """ links module to inputs and outputs
948 m
.submodules
.normalise
= self
949 m
.d
.comb
+= self
.i
.eq(i
)
951 def elaborate(self
, platform
):
954 mwid
= self
.out_z
.m_width
+2
955 pe
= PriorityEncoder(mwid
)
956 m
.submodules
.norm_pe
= pe
958 m
.submodules
.norm1_out_z
= self
.out_z
959 m
.submodules
.norm1_in_z
= self
.in_z
961 in_z
= FPNumBase(self
.width
, False)
963 m
.submodules
.norm1_insel_z
= in_z
964 m
.submodules
.norm1_insel_overflow
= in_of
966 espec
= (len(in_z
.e
), True)
967 ediff_n126
= Signal(espec
, reset_less
=True)
968 msr
= MultiShiftRMerge(mwid
, espec
)
969 m
.submodules
.multishift_r
= msr
971 m
.d
.comb
+= in_z
.eq(self
.in_z
)
972 m
.d
.comb
+= in_of
.eq(self
.in_of
)
973 # initialise out from in (overridden below)
974 m
.d
.comb
+= self
.out_z
.eq(in_z
)
975 m
.d
.comb
+= self
.out_of
.eq(in_of
)
976 # normalisation decrease condition
977 decrease
= Signal(reset_less
=True)
978 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
981 # *sigh* not entirely obvious: count leading zeros (clz)
982 # with a PriorityEncoder: to find from the MSB
983 # we reverse the order of the bits.
984 temp_m
= Signal(mwid
, reset_less
=True)
985 temp_s
= Signal(mwid
+1, reset_less
=True)
986 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
988 # cat round and guard bits back into the mantissa
989 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
990 pe
.i
.eq(temp_m
[::-1]), # inverted
991 clz
.eq(pe
.o
), # count zeros from MSB down
992 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
993 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
994 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1001 def __init__(self
, width
, id_wid
):
1002 self
.roundz
= Signal(reset_less
=True)
1003 self
.z
= FPNumBase(width
, False)
1004 self
.mid
= Signal(id_wid
, reset_less
=True)
1007 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1010 class FPNorm1ModSingle
:
1012 def __init__(self
, width
, id_wid
):
1014 self
.id_wid
= id_wid
1015 self
.i
= self
.ispec()
1016 self
.o
= self
.ospec()
1019 return FPAddStage1Data(self
.width
, self
.id_wid
)
1022 return FPNorm1Data(self
.width
, self
.id_wid
)
1024 def setup(self
, m
, i
):
1025 """ links module to inputs and outputs
1027 m
.submodules
.normalise_1
= self
1028 m
.d
.comb
+= self
.i
.eq(i
)
1030 def process(self
, i
):
1033 def elaborate(self
, platform
):
1036 mwid
= self
.o
.z
.m_width
+2
1037 pe
= PriorityEncoder(mwid
)
1038 m
.submodules
.norm_pe
= pe
1041 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1043 m
.submodules
.norm1_out_z
= self
.o
.z
1044 m
.submodules
.norm1_out_overflow
= of
1045 m
.submodules
.norm1_in_z
= self
.i
.z
1046 m
.submodules
.norm1_in_overflow
= self
.i
.of
1049 m
.submodules
.norm1_insel_z
= i
.z
1050 m
.submodules
.norm1_insel_overflow
= i
.of
1052 espec
= (len(i
.z
.e
), True)
1053 ediff_n126
= Signal(espec
, reset_less
=True)
1054 msr
= MultiShiftRMerge(mwid
, espec
)
1055 m
.submodules
.multishift_r
= msr
1057 m
.d
.comb
+= i
.eq(self
.i
)
1058 # initialise out from in (overridden below)
1059 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1060 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1061 m
.d
.comb
+= of
.eq(i
.of
)
1062 # normalisation increase/decrease conditions
1063 decrease
= Signal(reset_less
=True)
1064 increase
= Signal(reset_less
=True)
1065 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1066 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1068 with m
.If(decrease
):
1069 # *sigh* not entirely obvious: count leading zeros (clz)
1070 # with a PriorityEncoder: to find from the MSB
1071 # we reverse the order of the bits.
1072 temp_m
= Signal(mwid
, reset_less
=True)
1073 temp_s
= Signal(mwid
+1, reset_less
=True)
1074 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1075 # make sure that the amount to decrease by does NOT
1076 # go below the minimum non-INF/NaN exponent
1077 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1080 # cat round and guard bits back into the mantissa
1081 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1082 pe
.i
.eq(temp_m
[::-1]), # inverted
1083 clz
.eq(limclz
), # count zeros from MSB down
1084 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1085 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1086 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1087 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1088 # overflow in bits 0..1: got shifted too (leave sticky)
1089 of
.guard
.eq(temp_s
[1]), # guard
1090 of
.round_bit
.eq(temp_s
[0]), # round
1093 with m
.Elif(increase
):
1094 temp_m
= Signal(mwid
+1, reset_less
=True)
1096 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1098 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1099 # connect multi-shifter to inp/out mantissa (and ediff)
1101 msr
.diff
.eq(ediff_n126
),
1102 self
.o
.z
.m
.eq(msr
.m
[3:]),
1103 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1104 # overflow in bits 0..1: got shifted too (leave sticky)
1105 of
.guard
.eq(temp_s
[2]), # guard
1106 of
.round_bit
.eq(temp_s
[1]), # round
1107 of
.sticky
.eq(temp_s
[0]), # sticky
1108 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1114 class FPNorm1ModMulti
:
1116 def __init__(self
, width
, single_cycle
=True):
1118 self
.in_select
= Signal(reset_less
=True)
1119 self
.in_z
= FPNumBase(width
, False)
1120 self
.in_of
= Overflow()
1121 self
.temp_z
= FPNumBase(width
, False)
1122 self
.temp_of
= Overflow()
1123 self
.out_z
= FPNumBase(width
, False)
1124 self
.out_of
= Overflow()
1126 def elaborate(self
, platform
):
1129 m
.submodules
.norm1_out_z
= self
.out_z
1130 m
.submodules
.norm1_out_overflow
= self
.out_of
1131 m
.submodules
.norm1_temp_z
= self
.temp_z
1132 m
.submodules
.norm1_temp_of
= self
.temp_of
1133 m
.submodules
.norm1_in_z
= self
.in_z
1134 m
.submodules
.norm1_in_overflow
= self
.in_of
1136 in_z
= FPNumBase(self
.width
, False)
1138 m
.submodules
.norm1_insel_z
= in_z
1139 m
.submodules
.norm1_insel_overflow
= in_of
1141 # select which of temp or in z/of to use
1142 with m
.If(self
.in_select
):
1143 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1144 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1146 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1147 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1148 # initialise out from in (overridden below)
1149 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1150 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1151 # normalisation increase/decrease conditions
1152 decrease
= Signal(reset_less
=True)
1153 increase
= Signal(reset_less
=True)
1154 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1155 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1156 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1158 with m
.If(decrease
):
1160 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1161 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1162 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1163 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1164 self
.out_of
.round_bit
.eq(0), # reset round bit
1165 self
.out_of
.m0
.eq(in_of
.guard
),
1168 with m
.Elif(increase
):
1170 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1171 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1172 self
.out_of
.guard
.eq(in_z
.m
[0]),
1173 self
.out_of
.m0
.eq(in_z
.m
[1]),
1174 self
.out_of
.round_bit
.eq(in_of
.guard
),
1175 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1181 class FPNorm1Single(FPState
):
1183 def __init__(self
, width
, id_wid
, single_cycle
=True):
1184 FPState
.__init
__(self
, "normalise_1")
1185 self
.mod
= FPNorm1ModSingle(width
)
1186 self
.o
= self
.ospec()
1187 self
.out_z
= FPNumBase(width
, False)
1188 self
.out_roundz
= Signal(reset_less
=True)
1191 return self
.mod
.ispec()
1194 return self
.mod
.ospec()
1196 def setup(self
, m
, i
):
1197 """ links module to inputs and outputs
1199 self
.mod
.setup(m
, i
)
1201 def action(self
, m
):
1205 class FPNorm1Multi(FPState
):
1207 def __init__(self
, width
, id_wid
):
1208 FPState
.__init
__(self
, "normalise_1")
1209 self
.mod
= FPNorm1ModMulti(width
)
1210 self
.stb
= Signal(reset_less
=True)
1211 self
.ack
= Signal(reset
=0, reset_less
=True)
1212 self
.out_norm
= Signal(reset_less
=True)
1213 self
.in_accept
= Signal(reset_less
=True)
1214 self
.temp_z
= FPNumBase(width
)
1215 self
.temp_of
= Overflow()
1216 self
.out_z
= FPNumBase(width
)
1217 self
.out_roundz
= Signal(reset_less
=True)
1219 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1220 """ links module to inputs and outputs
1222 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1223 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1224 self
.out_z
, self
.out_norm
)
1226 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1227 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1229 def action(self
, m
):
1230 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1231 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1232 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1233 with m
.If(self
.out_norm
):
1234 with m
.If(self
.in_accept
):
1239 m
.d
.sync
+= self
.ack
.eq(0)
1241 # normalisation not required (or done).
1243 m
.d
.sync
+= self
.ack
.eq(1)
1244 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1247 class FPNormToPack(FPState
):
1249 def __init__(self
, width
, id_wid
):
1250 FPState
.__init
__(self
, "normalise_1")
1251 self
.id_wid
= id_wid
1255 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1258 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1260 def setup(self
, m
, i
):
1261 """ links module to inputs and outputs
1264 # Normalisation, Rounding Corrections, Pack - in a chain
1265 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1266 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1267 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1268 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1269 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1271 self
.out_z
= pmod
.ospec()
1273 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1274 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1276 def action(self
, m
):
1277 m
.next
= "pack_put_z"
1282 def __init__(self
, width
, id_wid
):
1283 self
.z
= FPNumBase(width
, False)
1284 self
.mid
= Signal(id_wid
, reset_less
=True)
1287 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1292 def __init__(self
, width
, id_wid
):
1294 self
.id_wid
= id_wid
1295 self
.i
= self
.ispec()
1296 self
.out_z
= self
.ospec()
1299 return FPNorm1Data(self
.width
, self
.id_wid
)
1302 return FPRoundData(self
.width
, self
.id_wid
)
1304 def process(self
, i
):
1307 def setup(self
, m
, i
):
1308 m
.submodules
.roundz
= self
1309 m
.d
.comb
+= self
.i
.eq(i
)
1311 def elaborate(self
, platform
):
1313 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1314 with m
.If(self
.i
.roundz
):
1315 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1316 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1317 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1321 class FPRound(FPState
):
1323 def __init__(self
, width
, id_wid
):
1324 FPState
.__init
__(self
, "round")
1325 self
.mod
= FPRoundMod(width
)
1326 self
.out_z
= self
.ospec()
1329 return self
.mod
.ispec()
1332 return self
.mod
.ospec()
1334 def setup(self
, m
, i
):
1335 """ links module to inputs and outputs
1337 self
.mod
.setup(m
, i
)
1340 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1341 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1343 def action(self
, m
):
1344 m
.next
= "corrections"
1347 class FPCorrectionsMod
:
1349 def __init__(self
, width
, id_wid
):
1351 self
.id_wid
= id_wid
1352 self
.i
= self
.ispec()
1353 self
.out_z
= self
.ospec()
1356 return FPRoundData(self
.width
, self
.id_wid
)
1359 return FPRoundData(self
.width
, self
.id_wid
)
1361 def process(self
, i
):
1364 def setup(self
, m
, i
):
1365 """ links module to inputs and outputs
1367 m
.submodules
.corrections
= self
1368 m
.d
.comb
+= self
.i
.eq(i
)
1370 def elaborate(self
, platform
):
1372 m
.submodules
.corr_in_z
= self
.i
.z
1373 m
.submodules
.corr_out_z
= self
.out_z
.z
1374 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1375 with m
.If(self
.i
.z
.is_denormalised
):
1376 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1380 class FPCorrections(FPState
):
1382 def __init__(self
, width
, id_wid
):
1383 FPState
.__init
__(self
, "corrections")
1384 self
.mod
= FPCorrectionsMod(width
)
1385 self
.out_z
= self
.ospec()
1388 return self
.mod
.ispec()
1391 return self
.mod
.ospec()
1393 def setup(self
, m
, in_z
):
1394 """ links module to inputs and outputs
1396 self
.mod
.setup(m
, in_z
)
1398 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1399 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1401 def action(self
, m
):
1407 def __init__(self
, width
, id_wid
):
1408 self
.z
= FPNumOut(width
, False)
1409 self
.mid
= Signal(id_wid
, reset_less
=True)
1412 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1417 def __init__(self
, width
, id_wid
):
1419 self
.id_wid
= id_wid
1420 self
.i
= self
.ispec()
1421 self
.o
= self
.ospec()
1424 return FPRoundData(self
.width
, self
.id_wid
)
1427 return FPPackData(self
.width
, self
.id_wid
)
1429 def process(self
, i
):
1432 def setup(self
, m
, in_z
):
1433 """ links module to inputs and outputs
1435 m
.submodules
.pack
= self
1436 m
.d
.comb
+= self
.i
.eq(in_z
)
1438 def elaborate(self
, platform
):
1440 m
.submodules
.pack_in_z
= self
.i
.z
1441 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1442 with m
.If(self
.i
.z
.is_overflowed
):
1443 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1445 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1450 def __init__(self
, width
, id_wid
):
1451 self
.z
= FPNumOut(width
, False)
1452 self
.mid
= Signal(id_wid
, reset_less
=True)
1455 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1458 class FPPack(FPState
):
1460 def __init__(self
, width
, id_wid
):
1461 FPState
.__init
__(self
, "pack")
1462 self
.mod
= FPPackMod(width
)
1463 self
.out_z
= self
.ospec()
1466 return self
.mod
.ispec()
1469 return self
.mod
.ospec()
1471 def setup(self
, m
, in_z
):
1472 """ links module to inputs and outputs
1474 self
.mod
.setup(m
, in_z
)
1476 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1477 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1479 def action(self
, m
):
1480 m
.next
= "pack_put_z"
1483 class FPPutZ(FPState
):
1485 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1486 FPState
.__init
__(self
, state
)
1487 if to_state
is None:
1488 to_state
= "get_ops"
1489 self
.to_state
= to_state
1492 self
.in_mid
= in_mid
1493 self
.out_mid
= out_mid
1495 def action(self
, m
):
1496 if self
.in_mid
is not None:
1497 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1499 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1501 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1502 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1503 m
.next
= self
.to_state
1505 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1508 class FPPutZIdx(FPState
):
1510 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1511 FPState
.__init
__(self
, state
)
1512 if to_state
is None:
1513 to_state
= "get_ops"
1514 self
.to_state
= to_state
1516 self
.out_zs
= out_zs
1517 self
.in_mid
= in_mid
1519 def action(self
, m
):
1520 outz_stb
= Signal(reset_less
=True)
1521 outz_ack
= Signal(reset_less
=True)
1522 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1523 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1526 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1528 with m
.If(outz_stb
& outz_ack
):
1529 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1530 m
.next
= self
.to_state
1532 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1534 class FPADDBaseData
:
1536 def __init__(self
, width
, id_wid
):
1538 self
.id_wid
= id_wid
1539 self
.a
= Signal(width
)
1540 self
.b
= Signal(width
)
1541 self
.mid
= Signal(id_wid
, reset_less
=True)
1544 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1548 def __init__(self
, width
, id_wid
):
1549 self
.z
= FPOp(width
)
1550 self
.mid
= Signal(id_wid
, reset_less
=True)
1553 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1558 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1561 * width: bit-width of IEEE754. supported: 16, 32, 64
1562 * id_wid: an identifier that is sync-connected to the input
1563 * single_cycle: True indicates each stage to complete in 1 clock
1564 * compact: True indicates a reduced number of stages
1567 self
.id_wid
= id_wid
1568 self
.single_cycle
= single_cycle
1569 self
.compact
= compact
1571 self
.in_t
= Trigger()
1572 self
.i
= self
.ispec()
1573 self
.o
= self
.ospec()
1578 return FPADDBaseData(self
.width
, self
.id_wid
)
1581 return FPOpData(self
.width
, self
.id_wid
)
1583 def add_state(self
, state
):
1584 self
.states
.append(state
)
1587 def get_fragment(self
, platform
=None):
1588 """ creates the HDL code-fragment for FPAdd
1591 m
.submodules
.out_z
= self
.o
.z
1592 m
.submodules
.in_t
= self
.in_t
1594 self
.get_compact_fragment(m
, platform
)
1596 self
.get_longer_fragment(m
, platform
)
1598 with m
.FSM() as fsm
:
1600 for state
in self
.states
:
1601 with m
.State(state
.state_from
):
1606 def get_longer_fragment(self
, m
, platform
=None):
1608 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1610 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1614 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1615 sc
.setup(m
, a
, b
, self
.in_mid
)
1617 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1618 dn
.setup(m
, a
, b
, sc
.in_mid
)
1620 if self
.single_cycle
:
1621 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1622 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1624 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1625 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1627 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1628 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1630 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1631 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1633 if self
.single_cycle
:
1634 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1635 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1637 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1638 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1640 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1641 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1643 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1644 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1646 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1647 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1649 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1650 pa
.in_mid
, self
.out_mid
))
1652 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1653 pa
.in_mid
, self
.out_mid
))
1655 def get_compact_fragment(self
, m
, platform
=None):
1657 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1658 self
.width
, self
.id_wid
))
1659 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1661 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1664 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1667 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1668 n1
.setup(m
, alm
.a1o
)
1670 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1671 n1
.out_z
.mid
, self
.o
.mid
))
1673 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1674 sc
.o
.mid
, self
.o
.mid
))
1677 class FPADDBase(FPState
):
1679 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1682 * width: bit-width of IEEE754. supported: 16, 32, 64
1683 * id_wid: an identifier that is sync-connected to the input
1684 * single_cycle: True indicates each stage to complete in 1 clock
1686 FPState
.__init
__(self
, "fpadd")
1688 self
.single_cycle
= single_cycle
1689 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1690 self
.o
= self
.ospec()
1692 self
.in_t
= Trigger()
1693 self
.i
= self
.ispec()
1695 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1696 self
.in_accept
= Signal(reset_less
=True)
1697 self
.add_stb
= Signal(reset_less
=True)
1698 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1701 return self
.mod
.ispec()
1704 return self
.mod
.ospec()
1706 def setup(self
, m
, i
, add_stb
, in_mid
):
1707 m
.d
.comb
+= [self
.i
.eq(i
),
1708 self
.mod
.i
.eq(self
.i
),
1709 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1710 #self.add_stb.eq(add_stb),
1711 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1712 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1713 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1714 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1715 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1716 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1719 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1720 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1721 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1722 #m.d.sync += self.in_t.stb.eq(0)
1724 m
.submodules
.fpadd
= self
.mod
1726 def action(self
, m
):
1728 # in_accept is set on incoming strobe HIGH and ack LOW.
1729 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1731 #with m.If(self.in_t.ack):
1732 # m.d.sync += self.in_t.stb.eq(0)
1733 with m
.If(~self
.z_done
):
1734 # not done: test for accepting an incoming operand pair
1735 with m
.If(self
.in_accept
):
1737 self
.add_ack
.eq(1), # acknowledge receipt...
1738 self
.in_t
.stb
.eq(1), # initiate add
1741 m
.d
.sync
+= [self
.add_ack
.eq(0),
1742 self
.in_t
.stb
.eq(0),
1746 # done: acknowledge, and write out id and value
1747 m
.d
.sync
+= [self
.add_ack
.eq(1),
1754 if self
.in_mid
is not None:
1755 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1758 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1760 # move to output state on detecting z ack
1761 with m
.If(self
.out_z
.trigger
):
1762 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1765 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1769 def __init__(self
, width
, id_wid
):
1771 self
.id_wid
= id_wid
1773 for i
in range(rs_sz
):
1775 out_z
.name
= "out_z_%d" % i
1777 self
.res
= Array(res
)
1778 self
.in_z
= FPOp(width
)
1779 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1781 def setup(self
, m
, in_z
, in_mid
):
1782 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1783 self
.in_mid
.eq(in_mid
)]
1785 def get_fragment(self
, platform
=None):
1786 """ creates the HDL code-fragment for FPAdd
1789 m
.submodules
.res_in_z
= self
.in_z
1790 m
.submodules
+= self
.res
1802 """ FPADD: stages as follows:
1808 FPAddBase---> FPAddBaseMod
1810 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1812 FPAddBase is tricky: it is both a stage and *has* stages.
1813 Connection to FPAddBaseMod therefore requires an in stb/ack
1814 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1815 needs to be the thing that raises the incoming stb.
1818 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1821 * width: bit-width of IEEE754. supported: 16, 32, 64
1822 * id_wid: an identifier that is sync-connected to the input
1823 * single_cycle: True indicates each stage to complete in 1 clock
1826 self
.id_wid
= id_wid
1827 self
.single_cycle
= single_cycle
1829 #self.out_z = FPOp(width)
1830 self
.ids
= FPID(id_wid
)
1833 for i
in range(rs_sz
):
1836 in_a
.name
= "in_a_%d" % i
1837 in_b
.name
= "in_b_%d" % i
1838 rs
.append((in_a
, in_b
))
1842 for i
in range(rs_sz
):
1844 out_z
.name
= "out_z_%d" % i
1846 self
.res
= Array(res
)
1850 def add_state(self
, state
):
1851 self
.states
.append(state
)
1854 def get_fragment(self
, platform
=None):
1855 """ creates the HDL code-fragment for FPAdd
1858 m
.submodules
+= self
.rs
1860 in_a
= self
.rs
[0][0]
1861 in_b
= self
.rs
[0][1]
1863 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1868 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1873 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1874 ab
= self
.add_state(ab
)
1875 abd
= ab
.ispec() # create an input spec object for FPADDBase
1876 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1877 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1880 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1883 with m
.FSM() as fsm
:
1885 for state
in self
.states
:
1886 with m
.State(state
.state_from
):
1892 if __name__
== "__main__":
1894 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1895 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1896 alu
.rs
[0][1].ports() + \
1897 alu
.res
[0].ports() + \
1898 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1900 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1901 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1902 alu
.in_t
.ports() + \
1903 alu
.out_z
.ports() + \
1904 [alu
.in_mid
, alu
.out_mid
])
1907 # works... but don't use, just do "python fname.py convert -t v"
1908 #print (verilog.convert(alu, ports=[
1909 # ports=alu.in_a.ports() + \
1910 # alu.in_b.ports() + \
1911 # alu.out_z.ports())