291b56feb3e369636d91c04284abca7b35de2bca
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.out_op
= FPOps(width
, num_ops
)
94 def elaborate(self
, platform
):
97 pe
= PriorityEncoder(self
.num_rows
)
98 m
.submodules
.selector
= pe
99 m
.submodules
.out_op
= self
.out_op
100 m
.submodules
+= self
.rs
102 # connect priority encoder
104 for i
in range(self
.num_rows
):
105 in_ready
.append(self
.rs
[i
].ready
)
106 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
107 m
.d
.comb
+= self
.out_op
.stb
.eq(pe
.n
) # strobe-out when encoder active
110 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
111 for i
in range(self
.num_rows
):
112 with m
.If(pe
.o
== Const(i
, (self
.mmax
, False))):
113 for j
in range(self
.num_ops
):
114 m
.d
.sync
+= self
.out_op
.v
[j
].eq(self
.rs
[i
].out_op
[j
])
119 for i
in range(self
.num_rows
):
121 res
+= inop
.in_op
+ [inop
.stb
]
122 return self
.out_op
.ports() + res
+ [self
.ack
+ self
.stb
]
126 def __init__(self
, width
):
127 self
.in_op
= FPOp(width
)
128 self
.out_op
= Signal(width
)
129 self
.out_decode
= Signal(reset_less
=True)
131 def elaborate(self
, platform
):
133 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
134 m
.submodules
.get_op_in
= self
.in_op
135 #m.submodules.get_op_out = self.out_op
136 with m
.If(self
.out_decode
):
138 self
.out_op
.eq(self
.in_op
.v
),
143 class FPGetOp(FPState
):
147 def __init__(self
, in_state
, out_state
, in_op
, width
):
148 FPState
.__init
__(self
, in_state
)
149 self
.out_state
= out_state
150 self
.mod
= FPGetOpMod(width
)
152 self
.out_op
= Signal(width
)
153 self
.out_decode
= Signal(reset_less
=True)
155 def setup(self
, m
, in_op
):
156 """ links module to inputs and outputs
158 setattr(m
.submodules
, self
.state_from
, self
.mod
)
159 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
160 #m.d.comb += self.out_op.eq(self.mod.out_op)
161 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
164 with m
.If(self
.out_decode
):
165 m
.next
= self
.out_state
167 self
.in_op
.ack
.eq(0),
168 self
.out_op
.eq(self
.mod
.out_op
)
171 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
174 class FPGet2OpMod(Trigger
):
175 def __init__(self
, width
):
176 Trigger
.__init
__(self
)
177 self
.in_op1
= Signal(width
, reset_less
=True)
178 self
.in_op2
= Signal(width
, reset_less
=True)
179 self
.out_op1
= FPNumIn(None, width
)
180 self
.out_op2
= FPNumIn(None, width
)
182 def elaborate(self
, platform
):
183 m
= Trigger
.elaborate(self
, platform
)
184 #m.submodules.get_op_in = self.in_op
185 m
.submodules
.get_op1_out
= self
.out_op1
186 m
.submodules
.get_op2_out
= self
.out_op2
187 with m
.If(self
.trigger
):
189 self
.out_op1
.decode(self
.in_op1
),
190 self
.out_op2
.decode(self
.in_op2
),
195 class FPGet2Op(FPState
):
199 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
200 FPState
.__init
__(self
, in_state
)
201 self
.out_state
= out_state
202 self
.mod
= FPGet2OpMod(width
)
205 self
.out_op1
= FPNumIn(None, width
)
206 self
.out_op2
= FPNumIn(None, width
)
207 self
.in_stb
= Signal(reset_less
=True)
208 self
.out_ack
= Signal(reset_less
=True)
209 self
.out_decode
= Signal(reset_less
=True)
211 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
212 """ links module to inputs and outputs
214 m
.submodules
.get_ops
= self
.mod
215 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
216 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
217 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
218 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
219 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
220 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
223 with m
.If(self
.out_decode
):
224 m
.next
= self
.out_state
227 #self.out_op1.v.eq(self.mod.out_op1.v),
228 #self.out_op2.v.eq(self.mod.out_op2.v),
229 self
.out_op1
.copy(self
.mod
.out_op1
),
230 self
.out_op2
.copy(self
.mod
.out_op2
)
233 m
.d
.sync
+= self
.mod
.ack
.eq(1)
236 class FPAddSpecialCasesMod
:
237 """ special cases: NaNs, infs, zeros, denormalised
238 NOTE: some of these are unique to add. see "Special Operations"
239 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
242 def __init__(self
, width
):
243 self
.in_a
= FPNumBase(width
)
244 self
.in_b
= FPNumBase(width
)
245 self
.out_z
= FPNumOut(width
, False)
246 self
.out_do_z
= Signal(reset_less
=True)
248 def setup(self
, m
, in_a
, in_b
, out_do_z
):
249 """ links module to inputs and outputs
251 m
.submodules
.specialcases
= self
252 m
.d
.comb
+= self
.in_a
.copy(in_a
)
253 m
.d
.comb
+= self
.in_b
.copy(in_b
)
254 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
256 def elaborate(self
, platform
):
259 m
.submodules
.sc_in_a
= self
.in_a
260 m
.submodules
.sc_in_b
= self
.in_b
261 m
.submodules
.sc_out_z
= self
.out_z
264 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
267 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
269 # if a is NaN or b is NaN return NaN
270 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
271 m
.d
.comb
+= self
.out_do_z
.eq(1)
272 m
.d
.comb
+= self
.out_z
.nan(0)
274 # XXX WEIRDNESS for FP16 non-canonical NaN handling
277 ## if a is zero and b is NaN return -b
278 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
279 # m.d.comb += self.out_do_z.eq(1)
280 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
282 ## if b is zero and a is NaN return -a
283 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
284 # m.d.comb += self.out_do_z.eq(1)
285 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
287 ## if a is -zero and b is NaN return -b
288 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
289 # m.d.comb += self.out_do_z.eq(1)
290 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
292 ## if b is -zero and a is NaN return -a
293 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
294 # m.d.comb += self.out_do_z.eq(1)
295 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
297 # if a is inf return inf (or NaN)
298 with m
.Elif(self
.in_a
.is_inf
):
299 m
.d
.comb
+= self
.out_do_z
.eq(1)
300 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
301 # if a is inf and signs don't match return NaN
302 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
303 m
.d
.comb
+= self
.out_z
.nan(0)
305 # if b is inf return inf
306 with m
.Elif(self
.in_b
.is_inf
):
307 m
.d
.comb
+= self
.out_do_z
.eq(1)
308 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
310 # if a is zero and b zero return signed-a/b
311 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
312 m
.d
.comb
+= self
.out_do_z
.eq(1)
313 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
317 # if a is zero return b
318 with m
.Elif(self
.in_a
.is_zero
):
319 m
.d
.comb
+= self
.out_do_z
.eq(1)
320 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
323 # if b is zero return a
324 with m
.Elif(self
.in_b
.is_zero
):
325 m
.d
.comb
+= self
.out_do_z
.eq(1)
326 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
329 # if a equal to -b return zero (+ve zero)
330 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
331 m
.d
.comb
+= self
.out_do_z
.eq(1)
332 m
.d
.comb
+= self
.out_z
.zero(0)
334 # Denormalised Number checks
336 m
.d
.comb
+= self
.out_do_z
.eq(0)
342 def __init__(self
, id_wid
):
345 self
.in_mid
= Signal(id_wid
, reset_less
=True)
346 self
.out_mid
= Signal(id_wid
, reset_less
=True)
352 if self
.id_wid
is not None:
353 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
356 class FPAddSpecialCases(FPState
, FPID
):
357 """ special cases: NaNs, infs, zeros, denormalised
358 NOTE: some of these are unique to add. see "Special Operations"
359 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
362 def __init__(self
, width
, id_wid
):
363 FPState
.__init
__(self
, "special_cases")
364 FPID
.__init
__(self
, id_wid
)
365 self
.mod
= FPAddSpecialCasesMod(width
)
366 self
.out_z
= FPNumOut(width
, False)
367 self
.out_do_z
= Signal(reset_less
=True)
369 def setup(self
, m
, in_a
, in_b
, in_mid
):
370 """ links module to inputs and outputs
372 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
373 if self
.in_mid
is not None:
374 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
378 with m
.If(self
.out_do_z
):
379 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
382 m
.next
= "denormalise"
385 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
386 """ special cases: NaNs, infs, zeros, denormalised
387 NOTE: some of these are unique to add. see "Special Operations"
388 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
391 def __init__(self
, width
, id_wid
):
392 FPState
.__init
__(self
, "special_cases")
393 FPID
.__init
__(self
, id_wid
)
394 self
.smod
= FPAddSpecialCasesMod(width
)
395 self
.out_z
= FPNumOut(width
, False)
396 self
.out_do_z
= Signal(reset_less
=True)
398 self
.dmod
= FPAddDeNormMod(width
)
399 self
.out_a
= FPNumBase(width
)
400 self
.out_b
= FPNumBase(width
)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 self
.dmod
.setup(m
, in_a
, in_b
)
407 if self
.in_mid
is not None:
408 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
412 with m
.If(self
.out_do_z
):
413 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
417 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
418 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
421 class FPAddDeNormMod(FPState
):
423 def __init__(self
, width
):
424 self
.in_a
= FPNumBase(width
)
425 self
.in_b
= FPNumBase(width
)
426 self
.out_a
= FPNumBase(width
)
427 self
.out_b
= FPNumBase(width
)
429 def setup(self
, m
, in_a
, in_b
):
430 """ links module to inputs and outputs
432 m
.submodules
.denormalise
= self
433 m
.d
.comb
+= self
.in_a
.copy(in_a
)
434 m
.d
.comb
+= self
.in_b
.copy(in_b
)
436 def elaborate(self
, platform
):
438 m
.submodules
.denorm_in_a
= self
.in_a
439 m
.submodules
.denorm_in_b
= self
.in_b
440 m
.submodules
.denorm_out_a
= self
.out_a
441 m
.submodules
.denorm_out_b
= self
.out_b
442 # hmmm, don't like repeating identical code
443 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
444 with m
.If(self
.in_a
.exp_n127
):
445 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
447 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
449 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
450 with m
.If(self
.in_b
.exp_n127
):
451 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
453 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
458 class FPAddDeNorm(FPState
, FPID
):
460 def __init__(self
, width
, id_wid
):
461 FPState
.__init
__(self
, "denormalise")
462 FPID
.__init
__(self
, id_wid
)
463 self
.mod
= FPAddDeNormMod(width
)
464 self
.out_a
= FPNumBase(width
)
465 self
.out_b
= FPNumBase(width
)
467 def setup(self
, m
, in_a
, in_b
, in_mid
):
468 """ links module to inputs and outputs
470 self
.mod
.setup(m
, in_a
, in_b
)
471 if self
.in_mid
is not None:
472 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
476 # Denormalised Number checks
478 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
479 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
482 class FPAddAlignMultiMod(FPState
):
484 def __init__(self
, width
):
485 self
.in_a
= FPNumBase(width
)
486 self
.in_b
= FPNumBase(width
)
487 self
.out_a
= FPNumIn(None, width
)
488 self
.out_b
= FPNumIn(None, width
)
489 self
.exp_eq
= Signal(reset_less
=True)
491 def elaborate(self
, platform
):
492 # This one however (single-cycle) will do the shift
497 m
.submodules
.align_in_a
= self
.in_a
498 m
.submodules
.align_in_b
= self
.in_b
499 m
.submodules
.align_out_a
= self
.out_a
500 m
.submodules
.align_out_b
= self
.out_b
502 # NOTE: this does *not* do single-cycle multi-shifting,
503 # it *STAYS* in the align state until exponents match
505 # exponent of a greater than b: shift b down
506 m
.d
.comb
+= self
.exp_eq
.eq(0)
507 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
508 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
509 agtb
= Signal(reset_less
=True)
510 altb
= Signal(reset_less
=True)
511 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
512 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
514 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
515 # exponent of b greater than a: shift a down
517 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
518 # exponents equal: move to next stage.
520 m
.d
.comb
+= self
.exp_eq
.eq(1)
524 class FPAddAlignMulti(FPState
, FPID
):
526 def __init__(self
, width
, id_wid
):
527 FPID
.__init
__(self
, id_wid
)
528 FPState
.__init
__(self
, "align")
529 self
.mod
= FPAddAlignMultiMod(width
)
530 self
.out_a
= FPNumIn(None, width
)
531 self
.out_b
= FPNumIn(None, width
)
532 self
.exp_eq
= Signal(reset_less
=True)
534 def setup(self
, m
, in_a
, in_b
, in_mid
):
535 """ links module to inputs and outputs
537 m
.submodules
.align
= self
.mod
538 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
539 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
540 #m.d.comb += self.out_a.copy(self.mod.out_a)
541 #m.d.comb += self.out_b.copy(self.mod.out_b)
542 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
543 if self
.in_mid
is not None:
544 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
548 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
549 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
550 with m
.If(self
.exp_eq
):
554 class FPAddAlignSingleMod
:
556 def __init__(self
, width
):
558 self
.in_a
= FPNumBase(width
)
559 self
.in_b
= FPNumBase(width
)
560 self
.out_a
= FPNumIn(None, width
)
561 self
.out_b
= FPNumIn(None, width
)
563 def setup(self
, m
, in_a
, in_b
):
564 """ links module to inputs and outputs
566 m
.submodules
.align
= self
567 m
.d
.comb
+= self
.in_a
.copy(in_a
)
568 m
.d
.comb
+= self
.in_b
.copy(in_b
)
570 def elaborate(self
, platform
):
571 """ Aligns A against B or B against A, depending on which has the
572 greater exponent. This is done in a *single* cycle using
573 variable-width bit-shift
575 the shifter used here is quite expensive in terms of gates.
576 Mux A or B in (and out) into temporaries, as only one of them
577 needs to be aligned against the other
581 m
.submodules
.align_in_a
= self
.in_a
582 m
.submodules
.align_in_b
= self
.in_b
583 m
.submodules
.align_out_a
= self
.out_a
584 m
.submodules
.align_out_b
= self
.out_b
586 # temporary (muxed) input and output to be shifted
587 t_inp
= FPNumBase(self
.width
)
588 t_out
= FPNumIn(None, self
.width
)
589 espec
= (len(self
.in_a
.e
), True)
590 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
591 m
.submodules
.align_t_in
= t_inp
592 m
.submodules
.align_t_out
= t_out
593 m
.submodules
.multishift_r
= msr
595 ediff
= Signal(espec
, reset_less
=True)
596 ediffr
= Signal(espec
, reset_less
=True)
597 tdiff
= Signal(espec
, reset_less
=True)
598 elz
= Signal(reset_less
=True)
599 egz
= Signal(reset_less
=True)
601 # connect multi-shifter to t_inp/out mantissa (and tdiff)
602 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
603 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
604 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
605 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
606 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
608 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
609 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
610 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
611 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
613 # default: A-exp == B-exp, A and B untouched (fall through)
614 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
615 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
616 # only one shifter (muxed)
617 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
618 # exponent of a greater than b: shift b down
620 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
622 self
.out_b
.copy(t_out
),
623 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
625 # exponent of b greater than a: shift a down
627 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
629 self
.out_a
.copy(t_out
),
630 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
635 class FPAddAlignSingle(FPState
, FPID
):
637 def __init__(self
, width
, id_wid
):
638 FPState
.__init
__(self
, "align")
639 FPID
.__init
__(self
, id_wid
)
640 self
.mod
= FPAddAlignSingleMod(width
)
641 self
.out_a
= FPNumIn(None, width
)
642 self
.out_b
= FPNumIn(None, width
)
644 def setup(self
, m
, in_a
, in_b
, in_mid
):
645 """ links module to inputs and outputs
647 self
.mod
.setup(m
, in_a
, in_b
)
648 if self
.in_mid
is not None:
649 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
653 # NOTE: could be done as comb
654 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
655 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
659 class FPAddAlignSingleAdd(FPState
, FPID
):
661 def __init__(self
, width
, id_wid
):
662 FPState
.__init
__(self
, "align")
663 FPID
.__init
__(self
, id_wid
)
664 self
.mod
= FPAddAlignSingleMod(width
)
665 self
.out_a
= FPNumIn(None, width
)
666 self
.out_b
= FPNumIn(None, width
)
668 self
.a0mod
= FPAddStage0Mod(width
)
669 self
.a0_out_z
= FPNumBase(width
, False)
670 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
671 self
.a0_out_z
= FPNumBase(width
, False)
673 self
.a1mod
= FPAddStage1Mod(width
)
674 self
.out_z
= FPNumBase(width
, False)
675 self
.out_of
= Overflow()
677 def setup(self
, m
, in_a
, in_b
, in_mid
):
678 """ links module to inputs and outputs
680 self
.mod
.setup(m
, in_a
, in_b
)
681 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
682 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
684 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
685 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
686 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
688 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
690 if self
.in_mid
is not None:
691 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
695 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
696 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
697 m
.next
= "normalise_1"
700 class FPAddStage0Mod
:
702 def __init__(self
, width
):
703 self
.in_a
= FPNumBase(width
)
704 self
.in_b
= FPNumBase(width
)
705 self
.in_z
= FPNumBase(width
, False)
706 self
.out_z
= FPNumBase(width
, False)
707 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
709 def setup(self
, m
, in_a
, in_b
):
710 """ links module to inputs and outputs
712 m
.submodules
.add0
= self
713 m
.d
.comb
+= self
.in_a
.copy(in_a
)
714 m
.d
.comb
+= self
.in_b
.copy(in_b
)
716 def elaborate(self
, platform
):
718 m
.submodules
.add0_in_a
= self
.in_a
719 m
.submodules
.add0_in_b
= self
.in_b
720 m
.submodules
.add0_out_z
= self
.out_z
722 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
724 # store intermediate tests (and zero-extended mantissas)
725 seq
= Signal(reset_less
=True)
726 mge
= Signal(reset_less
=True)
727 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
728 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
729 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
730 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
731 am0
.eq(Cat(self
.in_a
.m
, 0)),
732 bm0
.eq(Cat(self
.in_b
.m
, 0))
734 # same-sign (both negative or both positive) add mantissas
737 self
.out_tot
.eq(am0
+ bm0
),
738 self
.out_z
.s
.eq(self
.in_a
.s
)
740 # a mantissa greater than b, use a
743 self
.out_tot
.eq(am0
- bm0
),
744 self
.out_z
.s
.eq(self
.in_a
.s
)
746 # b mantissa greater than a, use b
749 self
.out_tot
.eq(bm0
- am0
),
750 self
.out_z
.s
.eq(self
.in_b
.s
)
755 class FPAddStage0(FPState
, FPID
):
756 """ First stage of add. covers same-sign (add) and subtract
757 special-casing when mantissas are greater or equal, to
758 give greatest accuracy.
761 def __init__(self
, width
, id_wid
):
762 FPState
.__init
__(self
, "add_0")
763 FPID
.__init
__(self
, id_wid
)
764 self
.mod
= FPAddStage0Mod(width
)
765 self
.out_z
= FPNumBase(width
, False)
766 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
768 def setup(self
, m
, in_a
, in_b
, in_mid
):
769 """ links module to inputs and outputs
771 self
.mod
.setup(m
, in_a
, in_b
)
772 if self
.in_mid
is not None:
773 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
777 # NOTE: these could be done as combinatorial (merge add0+add1)
778 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
779 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
783 class FPAddStage1Mod(FPState
):
784 """ Second stage of add: preparation for normalisation.
785 detects when tot sum is too big (tot[27] is kinda a carry bit)
788 def __init__(self
, width
):
789 self
.out_norm
= Signal(reset_less
=True)
790 self
.in_z
= FPNumBase(width
, False)
791 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
792 self
.out_z
= FPNumBase(width
, False)
793 self
.out_of
= Overflow()
795 def setup(self
, m
, in_tot
, in_z
):
796 """ links module to inputs and outputs
798 m
.submodules
.add1
= self
799 m
.submodules
.add1_out_overflow
= self
.out_of
801 m
.d
.comb
+= self
.in_z
.copy(in_z
)
802 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
804 def elaborate(self
, platform
):
806 #m.submodules.norm1_in_overflow = self.in_of
807 #m.submodules.norm1_out_overflow = self.out_of
808 #m.submodules.norm1_in_z = self.in_z
809 #m.submodules.norm1_out_z = self.out_z
810 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
811 # tot[27] gets set when the sum overflows. shift result down
812 with m
.If(self
.in_tot
[-1]):
814 self
.out_z
.m
.eq(self
.in_tot
[4:]),
815 self
.out_of
.m0
.eq(self
.in_tot
[4]),
816 self
.out_of
.guard
.eq(self
.in_tot
[3]),
817 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
818 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
819 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
824 self
.out_z
.m
.eq(self
.in_tot
[3:]),
825 self
.out_of
.m0
.eq(self
.in_tot
[3]),
826 self
.out_of
.guard
.eq(self
.in_tot
[2]),
827 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
828 self
.out_of
.sticky
.eq(self
.in_tot
[0])
833 class FPAddStage1(FPState
, FPID
):
835 def __init__(self
, width
, id_wid
):
836 FPState
.__init
__(self
, "add_1")
837 FPID
.__init
__(self
, id_wid
)
838 self
.mod
= FPAddStage1Mod(width
)
839 self
.out_z
= FPNumBase(width
, False)
840 self
.out_of
= Overflow()
841 self
.norm_stb
= Signal()
843 def setup(self
, m
, in_tot
, in_z
, in_mid
):
844 """ links module to inputs and outputs
846 self
.mod
.setup(m
, in_tot
, in_z
)
848 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
850 if self
.in_mid
is not None:
851 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
855 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
856 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
857 m
.d
.sync
+= self
.norm_stb
.eq(1)
858 m
.next
= "normalise_1"
861 class FPNorm1ModSingle
:
863 def __init__(self
, width
):
865 self
.out_norm
= Signal(reset_less
=True)
866 self
.in_z
= FPNumBase(width
, False)
867 self
.in_of
= Overflow()
868 self
.out_z
= FPNumBase(width
, False)
869 self
.out_of
= Overflow()
871 def setup(self
, m
, in_z
, in_of
, out_z
):
872 """ links module to inputs and outputs
874 m
.submodules
.normalise_1
= self
876 m
.d
.comb
+= self
.in_z
.copy(in_z
)
877 m
.d
.comb
+= self
.in_of
.copy(in_of
)
879 m
.d
.comb
+= out_z
.copy(self
.out_z
)
881 def elaborate(self
, platform
):
884 mwid
= self
.out_z
.m_width
+2
885 pe
= PriorityEncoder(mwid
)
886 m
.submodules
.norm_pe
= pe
888 m
.submodules
.norm1_out_z
= self
.out_z
889 m
.submodules
.norm1_out_overflow
= self
.out_of
890 m
.submodules
.norm1_in_z
= self
.in_z
891 m
.submodules
.norm1_in_overflow
= self
.in_of
893 in_z
= FPNumBase(self
.width
, False)
895 m
.submodules
.norm1_insel_z
= in_z
896 m
.submodules
.norm1_insel_overflow
= in_of
898 espec
= (len(in_z
.e
), True)
899 ediff_n126
= Signal(espec
, reset_less
=True)
900 msr
= MultiShiftRMerge(mwid
, espec
)
901 m
.submodules
.multishift_r
= msr
903 m
.d
.comb
+= in_z
.copy(self
.in_z
)
904 m
.d
.comb
+= in_of
.copy(self
.in_of
)
905 # initialise out from in (overridden below)
906 m
.d
.comb
+= self
.out_z
.copy(in_z
)
907 m
.d
.comb
+= self
.out_of
.copy(in_of
)
908 # normalisation increase/decrease conditions
909 decrease
= Signal(reset_less
=True)
910 increase
= Signal(reset_less
=True)
911 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
912 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
915 # *sigh* not entirely obvious: count leading zeros (clz)
916 # with a PriorityEncoder: to find from the MSB
917 # we reverse the order of the bits.
918 temp_m
= Signal(mwid
, reset_less
=True)
919 temp_s
= Signal(mwid
+1, reset_less
=True)
920 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
921 # make sure that the amount to decrease by does NOT
922 # go below the minimum non-INF/NaN exponent
923 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
926 # cat round and guard bits back into the mantissa
927 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
928 pe
.i
.eq(temp_m
[::-1]), # inverted
929 clz
.eq(limclz
), # count zeros from MSB down
930 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
931 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
932 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
933 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
934 # overflow in bits 0..1: got shifted too (leave sticky)
935 self
.out_of
.guard
.eq(temp_s
[1]), # guard
936 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
939 with m
.Elif(increase
):
940 temp_m
= Signal(mwid
+1, reset_less
=True)
942 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
944 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
945 # connect multi-shifter to inp/out mantissa (and ediff)
947 msr
.diff
.eq(ediff_n126
),
948 self
.out_z
.m
.eq(msr
.m
[3:]),
949 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
950 # overflow in bits 0..1: got shifted too (leave sticky)
951 self
.out_of
.guard
.eq(temp_s
[2]), # guard
952 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
953 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
954 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
960 class FPNorm1ModMulti
:
962 def __init__(self
, width
, single_cycle
=True):
964 self
.in_select
= Signal(reset_less
=True)
965 self
.out_norm
= Signal(reset_less
=True)
966 self
.in_z
= FPNumBase(width
, False)
967 self
.in_of
= Overflow()
968 self
.temp_z
= FPNumBase(width
, False)
969 self
.temp_of
= Overflow()
970 self
.out_z
= FPNumBase(width
, False)
971 self
.out_of
= Overflow()
973 def elaborate(self
, platform
):
976 m
.submodules
.norm1_out_z
= self
.out_z
977 m
.submodules
.norm1_out_overflow
= self
.out_of
978 m
.submodules
.norm1_temp_z
= self
.temp_z
979 m
.submodules
.norm1_temp_of
= self
.temp_of
980 m
.submodules
.norm1_in_z
= self
.in_z
981 m
.submodules
.norm1_in_overflow
= self
.in_of
983 in_z
= FPNumBase(self
.width
, False)
985 m
.submodules
.norm1_insel_z
= in_z
986 m
.submodules
.norm1_insel_overflow
= in_of
988 # select which of temp or in z/of to use
989 with m
.If(self
.in_select
):
990 m
.d
.comb
+= in_z
.copy(self
.in_z
)
991 m
.d
.comb
+= in_of
.copy(self
.in_of
)
993 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
994 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
995 # initialise out from in (overridden below)
996 m
.d
.comb
+= self
.out_z
.copy(in_z
)
997 m
.d
.comb
+= self
.out_of
.copy(in_of
)
998 # normalisation increase/decrease conditions
999 decrease
= Signal(reset_less
=True)
1000 increase
= Signal(reset_less
=True)
1001 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1002 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1003 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1005 with m
.If(decrease
):
1007 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1008 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1009 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1010 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1011 self
.out_of
.round_bit
.eq(0), # reset round bit
1012 self
.out_of
.m0
.eq(in_of
.guard
),
1015 with m
.Elif(increase
):
1017 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1018 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1019 self
.out_of
.guard
.eq(in_z
.m
[0]),
1020 self
.out_of
.m0
.eq(in_z
.m
[1]),
1021 self
.out_of
.round_bit
.eq(in_of
.guard
),
1022 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1028 class FPNorm1Single(FPState
, FPID
):
1030 def __init__(self
, width
, id_wid
, single_cycle
=True):
1031 FPID
.__init
__(self
, id_wid
)
1032 FPState
.__init
__(self
, "normalise_1")
1033 self
.mod
= FPNorm1ModSingle(width
)
1034 self
.out_norm
= Signal(reset_less
=True)
1035 self
.out_z
= FPNumBase(width
)
1036 self
.out_roundz
= Signal(reset_less
=True)
1038 def setup(self
, m
, in_z
, in_of
, in_mid
):
1039 """ links module to inputs and outputs
1041 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1043 if self
.in_mid
is not None:
1044 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1046 def action(self
, m
):
1048 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1052 class FPNorm1Multi(FPState
, FPID
):
1054 def __init__(self
, width
, id_wid
):
1055 FPID
.__init
__(self
, id_wid
)
1056 FPState
.__init
__(self
, "normalise_1")
1057 self
.mod
= FPNorm1ModMulti(width
)
1058 self
.stb
= Signal(reset_less
=True)
1059 self
.ack
= Signal(reset
=0, reset_less
=True)
1060 self
.out_norm
= Signal(reset_less
=True)
1061 self
.in_accept
= Signal(reset_less
=True)
1062 self
.temp_z
= FPNumBase(width
)
1063 self
.temp_of
= Overflow()
1064 self
.out_z
= FPNumBase(width
)
1065 self
.out_roundz
= Signal(reset_less
=True)
1067 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1068 """ links module to inputs and outputs
1070 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1071 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1072 self
.out_z
, self
.out_norm
)
1074 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1075 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1077 if self
.in_mid
is not None:
1078 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1080 def action(self
, m
):
1082 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1083 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1084 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1085 with m
.If(self
.out_norm
):
1086 with m
.If(self
.in_accept
):
1091 m
.d
.sync
+= self
.ack
.eq(0)
1093 # normalisation not required (or done).
1095 m
.d
.sync
+= self
.ack
.eq(1)
1096 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1099 class FPNormToPack(FPState
, FPID
):
1101 def __init__(self
, width
, id_wid
):
1102 FPID
.__init
__(self
, id_wid
)
1103 FPState
.__init
__(self
, "normalise_1")
1106 def setup(self
, m
, in_z
, in_of
, in_mid
):
1107 """ links module to inputs and outputs
1110 # Normalisation (chained to input in_z+in_of)
1111 nmod
= FPNorm1ModSingle(self
.width
)
1112 n_out_z
= FPNumBase(self
.width
)
1113 n_out_roundz
= Signal(reset_less
=True)
1114 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1116 # Rounding (chained to normalisation)
1117 rmod
= FPRoundMod(self
.width
)
1118 r_out_z
= FPNumBase(self
.width
)
1119 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1120 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1121 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1123 # Corrections (chained to rounding)
1124 cmod
= FPCorrectionsMod(self
.width
)
1125 c_out_z
= FPNumBase(self
.width
)
1126 cmod
.setup(m
, r_out_z
)
1127 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1129 # Pack (chained to corrections)
1130 self
.pmod
= FPPackMod(self
.width
)
1131 self
.out_z
= FPNumBase(self
.width
)
1132 self
.pmod
.setup(m
, c_out_z
)
1135 if self
.in_mid
is not None:
1136 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1138 def action(self
, m
):
1139 self
.idsync(m
) # copies incoming ID to outgoing
1140 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1141 m
.next
= "pack_put_z"
1146 def __init__(self
, width
):
1147 self
.in_roundz
= Signal(reset_less
=True)
1148 self
.in_z
= FPNumBase(width
, False)
1149 self
.out_z
= FPNumBase(width
, False)
1151 def setup(self
, m
, in_z
, roundz
):
1152 m
.submodules
.roundz
= self
1154 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1155 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1157 def elaborate(self
, platform
):
1159 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1160 with m
.If(self
.in_roundz
):
1161 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1162 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1163 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1167 class FPRound(FPState
, FPID
):
1169 def __init__(self
, width
, id_wid
):
1170 FPState
.__init
__(self
, "round")
1171 FPID
.__init
__(self
, id_wid
)
1172 self
.mod
= FPRoundMod(width
)
1173 self
.out_z
= FPNumBase(width
)
1175 def setup(self
, m
, in_z
, roundz
, in_mid
):
1176 """ links module to inputs and outputs
1178 self
.mod
.setup(m
, in_z
, roundz
)
1180 if self
.in_mid
is not None:
1181 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1183 def action(self
, m
):
1185 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1186 m
.next
= "corrections"
1189 class FPCorrectionsMod
:
1191 def __init__(self
, width
):
1192 self
.in_z
= FPNumOut(width
, False)
1193 self
.out_z
= FPNumOut(width
, False)
1195 def setup(self
, m
, in_z
):
1196 """ links module to inputs and outputs
1198 m
.submodules
.corrections
= self
1199 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1201 def elaborate(self
, platform
):
1203 m
.submodules
.corr_in_z
= self
.in_z
1204 m
.submodules
.corr_out_z
= self
.out_z
1205 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1206 with m
.If(self
.in_z
.is_denormalised
):
1207 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1211 class FPCorrections(FPState
, FPID
):
1213 def __init__(self
, width
, id_wid
):
1214 FPState
.__init
__(self
, "corrections")
1215 FPID
.__init
__(self
, id_wid
)
1216 self
.mod
= FPCorrectionsMod(width
)
1217 self
.out_z
= FPNumBase(width
)
1219 def setup(self
, m
, in_z
, in_mid
):
1220 """ links module to inputs and outputs
1222 self
.mod
.setup(m
, in_z
)
1223 if self
.in_mid
is not None:
1224 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1226 def action(self
, m
):
1228 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1234 def __init__(self
, width
):
1235 self
.in_z
= FPNumOut(width
, False)
1236 self
.out_z
= FPNumOut(width
, False)
1238 def setup(self
, m
, in_z
):
1239 """ links module to inputs and outputs
1241 m
.submodules
.pack
= self
1242 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1244 def elaborate(self
, platform
):
1246 m
.submodules
.pack_in_z
= self
.in_z
1247 with m
.If(self
.in_z
.is_overflowed
):
1248 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1250 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1254 class FPPack(FPState
, FPID
):
1256 def __init__(self
, width
, id_wid
):
1257 FPState
.__init
__(self
, "pack")
1258 FPID
.__init
__(self
, id_wid
)
1259 self
.mod
= FPPackMod(width
)
1260 self
.out_z
= FPNumOut(width
, False)
1262 def setup(self
, m
, in_z
, in_mid
):
1263 """ links module to inputs and outputs
1265 self
.mod
.setup(m
, in_z
)
1266 if self
.in_mid
is not None:
1267 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1269 def action(self
, m
):
1271 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1272 m
.next
= "pack_put_z"
1275 class FPPutZ(FPState
):
1277 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1278 FPState
.__init
__(self
, state
)
1279 if to_state
is None:
1280 to_state
= "get_ops"
1281 self
.to_state
= to_state
1284 self
.in_mid
= in_mid
1285 self
.out_mid
= out_mid
1287 def action(self
, m
):
1288 if self
.in_mid
is not None:
1289 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1291 self
.out_z
.v
.eq(self
.in_z
.v
)
1293 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1294 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1295 m
.next
= self
.to_state
1297 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1300 class FPPutZIdx(FPState
):
1302 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1303 FPState
.__init
__(self
, state
)
1304 if to_state
is None:
1305 to_state
= "get_ops"
1306 self
.to_state
= to_state
1308 self
.out_zs
= out_zs
1309 self
.in_mid
= in_mid
1311 def action(self
, m
):
1312 outz_stb
= Signal(reset_less
=True)
1313 outz_ack
= Signal(reset_less
=True)
1314 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1315 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1318 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1320 with m
.If(outz_stb
& outz_ack
):
1321 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1322 m
.next
= self
.to_state
1324 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1327 class FPADDBaseMod(FPID
):
1329 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1332 * width: bit-width of IEEE754. supported: 16, 32, 64
1333 * id_wid: an identifier that is sync-connected to the input
1334 * single_cycle: True indicates each stage to complete in 1 clock
1335 * compact: True indicates a reduced number of stages
1337 FPID
.__init
__(self
, id_wid
)
1339 self
.single_cycle
= single_cycle
1340 self
.compact
= compact
1342 self
.in_t
= Trigger()
1343 self
.in_a
= Signal(width
)
1344 self
.in_b
= Signal(width
)
1345 self
.out_z
= FPOp(width
)
1349 def add_state(self
, state
):
1350 self
.states
.append(state
)
1353 def get_fragment(self
, platform
=None):
1354 """ creates the HDL code-fragment for FPAdd
1357 m
.submodules
.out_z
= self
.out_z
1358 m
.submodules
.in_t
= self
.in_t
1360 self
.get_compact_fragment(m
, platform
)
1362 self
.get_longer_fragment(m
, platform
)
1364 with m
.FSM() as fsm
:
1366 for state
in self
.states
:
1367 with m
.State(state
.state_from
):
1372 def get_longer_fragment(self
, m
, platform
=None):
1374 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1375 self
.in_a
, self
.in_b
, self
.width
))
1376 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1380 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1381 sc
.setup(m
, a
, b
, self
.in_mid
)
1383 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1384 dn
.setup(m
, a
, b
, sc
.in_mid
)
1386 if self
.single_cycle
:
1387 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1388 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1390 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1391 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1393 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1394 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1396 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1397 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1399 if self
.single_cycle
:
1400 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1401 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1403 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1404 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1406 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1407 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1409 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1410 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1412 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1413 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1415 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1416 pa
.in_mid
, self
.out_mid
))
1418 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1419 pa
.in_mid
, self
.out_mid
))
1421 def get_compact_fragment(self
, m
, platform
=None):
1423 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1424 self
.in_a
, self
.in_b
, self
.width
))
1425 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1429 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1430 sc
.setup(m
, a
, b
, self
.in_mid
)
1432 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1433 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1435 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1436 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1438 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1439 n1
.in_mid
, self
.out_mid
))
1441 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1442 sc
.in_mid
, self
.out_mid
))
1445 class FPADDBase(FPState
, FPID
):
1447 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1450 * width: bit-width of IEEE754. supported: 16, 32, 64
1451 * id_wid: an identifier that is sync-connected to the input
1452 * single_cycle: True indicates each stage to complete in 1 clock
1454 FPID
.__init
__(self
, id_wid
)
1455 FPState
.__init
__(self
, "fpadd")
1457 self
.single_cycle
= single_cycle
1458 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1460 self
.in_t
= Trigger()
1461 self
.in_a
= Signal(width
)
1462 self
.in_b
= Signal(width
)
1463 #self.out_z = FPOp(width)
1465 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1466 self
.in_accept
= Signal(reset_less
=True)
1467 self
.add_stb
= Signal(reset_less
=True)
1468 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1470 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1472 self
.out_mid
= out_mid
1473 m
.d
.comb
+= [self
.in_a
.eq(a
),
1475 self
.mod
.in_a
.eq(self
.in_a
),
1476 self
.mod
.in_b
.eq(self
.in_b
),
1477 self
.in_mid
.eq(in_mid
),
1478 self
.mod
.in_mid
.eq(self
.in_mid
),
1479 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1480 #self.add_stb.eq(add_stb),
1481 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1482 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1483 self
.out_mid
.eq(self
.mod
.out_mid
),
1484 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1485 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1486 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1489 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1490 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1491 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1492 #m.d.sync += self.in_t.stb.eq(0)
1494 m
.submodules
.fpadd
= self
.mod
1496 def action(self
, m
):
1498 # in_accept is set on incoming strobe HIGH and ack LOW.
1499 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1501 #with m.If(self.in_t.ack):
1502 # m.d.sync += self.in_t.stb.eq(0)
1503 with m
.If(~self
.z_done
):
1504 # not done: test for accepting an incoming operand pair
1505 with m
.If(self
.in_accept
):
1507 self
.add_ack
.eq(1), # acknowledge receipt...
1508 self
.in_t
.stb
.eq(1), # initiate add
1511 m
.d
.sync
+= [self
.add_ack
.eq(0),
1512 self
.in_t
.stb
.eq(0),
1513 self
.out_z
.ack
.eq(1),
1516 # done: acknowledge, and write out id and value
1517 m
.d
.sync
+= [self
.add_ack
.eq(1),
1524 if self
.in_mid
is not None:
1525 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1528 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1530 # move to output state on detecting z ack
1531 with m
.If(self
.out_z
.trigger
):
1532 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1535 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1538 def __init__(self
, width
, id_wid
):
1540 self
.id_wid
= id_wid
1542 for i
in range(rs_sz
):
1544 out_z
.name
= "out_z_%d" % i
1546 self
.res
= Array(res
)
1547 self
.in_z
= FPOp(width
)
1548 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1550 def setup(self
, m
, in_z
, in_mid
):
1551 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1552 self
.in_mid
.eq(in_mid
)]
1554 def get_fragment(self
, platform
=None):
1555 """ creates the HDL code-fragment for FPAdd
1558 m
.submodules
.res_in_z
= self
.in_z
1559 m
.submodules
+= self
.res
1571 """ FPADD: stages as follows:
1577 FPAddBase---> FPAddBaseMod
1579 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1581 FPAddBase is tricky: it is both a stage and *has* stages.
1582 Connection to FPAddBaseMod therefore requires an in stb/ack
1583 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1584 needs to be the thing that raises the incoming stb.
1587 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1590 * width: bit-width of IEEE754. supported: 16, 32, 64
1591 * id_wid: an identifier that is sync-connected to the input
1592 * single_cycle: True indicates each stage to complete in 1 clock
1595 self
.id_wid
= id_wid
1596 self
.single_cycle
= single_cycle
1598 #self.out_z = FPOp(width)
1599 self
.ids
= FPID(id_wid
)
1602 for i
in range(rs_sz
):
1605 in_a
.name
= "in_a_%d" % i
1606 in_b
.name
= "in_b_%d" % i
1607 rs
.append((in_a
, in_b
))
1611 for i
in range(rs_sz
):
1613 out_z
.name
= "out_z_%d" % i
1615 self
.res
= Array(res
)
1619 def add_state(self
, state
):
1620 self
.states
.append(state
)
1623 def get_fragment(self
, platform
=None):
1624 """ creates the HDL code-fragment for FPAdd
1627 m
.submodules
+= self
.rs
1629 in_a
= self
.rs
[0][0]
1630 in_b
= self
.rs
[0][1]
1632 out_z
= FPOp(self
.width
)
1633 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1634 m
.submodules
.out_z
= out_z
1636 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1641 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1646 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1647 ab
= self
.add_state(ab
)
1648 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1651 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1654 with m
.FSM() as fsm
:
1656 for state
in self
.states
:
1657 with m
.State(state
.state_from
):
1663 if __name__
== "__main__":
1665 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1666 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1667 alu
.rs
[0][1].ports() + \
1668 alu
.res
[0].ports() + \
1669 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1671 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1672 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1673 alu
.in_t
.ports() + \
1674 alu
.out_z
.ports() + \
1675 [alu
.in_mid
, alu
.out_mid
])
1678 # works... but don't use, just do "python fname.py convert -t v"
1679 #print (verilog.convert(alu, ports=[
1680 # ports=alu.in_a.ports() + \
1681 # alu.in_b.ports() + \
1682 # alu.out_z.ports())