1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
61 class InputGroup(Trigger
):
62 def __init__(self
, width
, num_ops
=2, num_rows
=4):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
66 self
.num_rows
= num_rows
67 self
.mmax
= int(log(self
.num_rows
) / log(2))
69 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
70 for i
in range(num_rows
):
71 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
74 for i
in range(num_ops
):
75 outops
.append(Signal(width
, reset_less
=True))
78 def elaborate(self
, platform
):
79 m
= Trigger
.elaborate(self
, platform
)
80 pe
= PriorityEncoder(self
.num_rows
)
81 m
.submodules
.selector
= pe
83 # connect priority encoder
85 for i
in range(self
.num_rows
):
86 in_ready
.append(self
.rs
[i
].ready
)
87 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
88 m
.d
.comb
+= self
.stb
.eq(pe
.n
) # strobe-out valid when encoder is active
91 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
92 for i
in range(self
.num_rows
):
93 with m
.If(pe
.o
== Const(i
, (self
.mmax
, False))):
94 for j
in range(self
.num_ops
):
95 m
.d
.sync
+= self
.out_op
[j
].eq(self
.rs
[i
].out_op
[j
])
100 for i
in range(self
.num_rows
):
102 res
+= inop
.in_op
+ [inop
.stb
]
103 return self
.out_op
+ res
#+ [self.ack + self.stb]
106 def __init__(self
, width
):
107 self
.in_op
= FPOp(width
)
108 self
.out_op
= Signal(width
)
109 self
.out_decode
= Signal(reset_less
=True)
111 def elaborate(self
, platform
):
113 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
114 m
.submodules
.get_op_in
= self
.in_op
115 #m.submodules.get_op_out = self.out_op
116 with m
.If(self
.out_decode
):
118 self
.out_op
.eq(self
.in_op
.v
),
123 class FPGetOp(FPState
):
127 def __init__(self
, in_state
, out_state
, in_op
, width
):
128 FPState
.__init
__(self
, in_state
)
129 self
.out_state
= out_state
130 self
.mod
= FPGetOpMod(width
)
132 self
.out_op
= Signal(width
)
133 self
.out_decode
= Signal(reset_less
=True)
135 def setup(self
, m
, in_op
):
136 """ links module to inputs and outputs
138 setattr(m
.submodules
, self
.state_from
, self
.mod
)
139 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
140 #m.d.comb += self.out_op.eq(self.mod.out_op)
141 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
144 with m
.If(self
.out_decode
):
145 m
.next
= self
.out_state
147 self
.in_op
.ack
.eq(0),
148 self
.out_op
.eq(self
.mod
.out_op
)
151 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
154 class FPGet2OpMod(Trigger
):
155 def __init__(self
, width
):
156 Trigger
.__init
__(self
)
157 self
.in_op1
= Signal(width
, reset_less
=True)
158 self
.in_op2
= Signal(width
, reset_less
=True)
159 self
.out_op1
= FPNumIn(None, width
)
160 self
.out_op2
= FPNumIn(None, width
)
162 def elaborate(self
, platform
):
163 m
= Trigger
.elaborate(self
, platform
)
164 #m.submodules.get_op_in = self.in_op
165 m
.submodules
.get_op1_out
= self
.out_op1
166 m
.submodules
.get_op2_out
= self
.out_op2
167 with m
.If(self
.trigger
):
169 self
.out_op1
.decode(self
.in_op1
),
170 self
.out_op2
.decode(self
.in_op2
),
175 class FPGet2Op(FPState
):
179 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
180 FPState
.__init
__(self
, in_state
)
181 self
.out_state
= out_state
182 self
.mod
= FPGet2OpMod(width
)
185 self
.out_op1
= FPNumIn(None, width
)
186 self
.out_op2
= FPNumIn(None, width
)
187 self
.in_stb
= Signal(reset_less
=True)
188 self
.out_ack
= Signal(reset_less
=True)
189 self
.out_decode
= Signal(reset_less
=True)
191 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
192 """ links module to inputs and outputs
194 m
.submodules
.get_ops
= self
.mod
195 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
196 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
197 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
198 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
199 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
200 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
203 with m
.If(self
.out_decode
):
204 m
.next
= self
.out_state
207 #self.out_op1.v.eq(self.mod.out_op1.v),
208 #self.out_op2.v.eq(self.mod.out_op2.v),
209 self
.out_op1
.copy(self
.mod
.out_op1
),
210 self
.out_op2
.copy(self
.mod
.out_op2
)
213 m
.d
.sync
+= self
.mod
.ack
.eq(1)
216 class FPAddSpecialCasesMod
:
217 """ special cases: NaNs, infs, zeros, denormalised
218 NOTE: some of these are unique to add. see "Special Operations"
219 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
222 def __init__(self
, width
):
223 self
.in_a
= FPNumBase(width
)
224 self
.in_b
= FPNumBase(width
)
225 self
.out_z
= FPNumOut(width
, False)
226 self
.out_do_z
= Signal(reset_less
=True)
228 def setup(self
, m
, in_a
, in_b
, out_do_z
):
229 """ links module to inputs and outputs
231 m
.submodules
.specialcases
= self
232 m
.d
.comb
+= self
.in_a
.copy(in_a
)
233 m
.d
.comb
+= self
.in_b
.copy(in_b
)
234 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
236 def elaborate(self
, platform
):
239 m
.submodules
.sc_in_a
= self
.in_a
240 m
.submodules
.sc_in_b
= self
.in_b
241 m
.submodules
.sc_out_z
= self
.out_z
244 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
247 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
249 # if a is NaN or b is NaN return NaN
250 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
251 m
.d
.comb
+= self
.out_do_z
.eq(1)
252 m
.d
.comb
+= self
.out_z
.nan(0)
254 # XXX WEIRDNESS for FP16 non-canonical NaN handling
257 ## if a is zero and b is NaN return -b
258 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
259 # m.d.comb += self.out_do_z.eq(1)
260 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
262 ## if b is zero and a is NaN return -a
263 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
264 # m.d.comb += self.out_do_z.eq(1)
265 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
267 ## if a is -zero and b is NaN return -b
268 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
269 # m.d.comb += self.out_do_z.eq(1)
270 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
272 ## if b is -zero and a is NaN return -a
273 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
274 # m.d.comb += self.out_do_z.eq(1)
275 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
277 # if a is inf return inf (or NaN)
278 with m
.Elif(self
.in_a
.is_inf
):
279 m
.d
.comb
+= self
.out_do_z
.eq(1)
280 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
281 # if a is inf and signs don't match return NaN
282 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
283 m
.d
.comb
+= self
.out_z
.nan(0)
285 # if b is inf return inf
286 with m
.Elif(self
.in_b
.is_inf
):
287 m
.d
.comb
+= self
.out_do_z
.eq(1)
288 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
290 # if a is zero and b zero return signed-a/b
291 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
292 m
.d
.comb
+= self
.out_do_z
.eq(1)
293 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
297 # if a is zero return b
298 with m
.Elif(self
.in_a
.is_zero
):
299 m
.d
.comb
+= self
.out_do_z
.eq(1)
300 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
303 # if b is zero return a
304 with m
.Elif(self
.in_b
.is_zero
):
305 m
.d
.comb
+= self
.out_do_z
.eq(1)
306 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
309 # if a equal to -b return zero (+ve zero)
310 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
311 m
.d
.comb
+= self
.out_do_z
.eq(1)
312 m
.d
.comb
+= self
.out_z
.zero(0)
314 # Denormalised Number checks
316 m
.d
.comb
+= self
.out_do_z
.eq(0)
322 def __init__(self
, id_wid
):
325 self
.in_mid
= Signal(id_wid
, reset_less
=True)
326 self
.out_mid
= Signal(id_wid
, reset_less
=True)
332 if self
.id_wid
is not None:
333 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
336 class FPAddSpecialCases(FPState
, FPID
):
337 """ special cases: NaNs, infs, zeros, denormalised
338 NOTE: some of these are unique to add. see "Special Operations"
339 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
342 def __init__(self
, width
, id_wid
):
343 FPState
.__init
__(self
, "special_cases")
344 FPID
.__init
__(self
, id_wid
)
345 self
.mod
= FPAddSpecialCasesMod(width
)
346 self
.out_z
= FPNumOut(width
, False)
347 self
.out_do_z
= Signal(reset_less
=True)
349 def setup(self
, m
, in_a
, in_b
, in_mid
):
350 """ links module to inputs and outputs
352 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
353 if self
.in_mid
is not None:
354 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
358 with m
.If(self
.out_do_z
):
359 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
362 m
.next
= "denormalise"
365 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
366 """ special cases: NaNs, infs, zeros, denormalised
367 NOTE: some of these are unique to add. see "Special Operations"
368 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
371 def __init__(self
, width
, id_wid
):
372 FPState
.__init
__(self
, "special_cases")
373 FPID
.__init
__(self
, id_wid
)
374 self
.smod
= FPAddSpecialCasesMod(width
)
375 self
.out_z
= FPNumOut(width
, False)
376 self
.out_do_z
= Signal(reset_less
=True)
378 self
.dmod
= FPAddDeNormMod(width
)
379 self
.out_a
= FPNumBase(width
)
380 self
.out_b
= FPNumBase(width
)
382 def setup(self
, m
, in_a
, in_b
, in_mid
):
383 """ links module to inputs and outputs
385 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
386 self
.dmod
.setup(m
, in_a
, in_b
)
387 if self
.in_mid
is not None:
388 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
392 with m
.If(self
.out_do_z
):
393 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
397 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
398 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
401 class FPAddDeNormMod(FPState
):
403 def __init__(self
, width
):
404 self
.in_a
= FPNumBase(width
)
405 self
.in_b
= FPNumBase(width
)
406 self
.out_a
= FPNumBase(width
)
407 self
.out_b
= FPNumBase(width
)
409 def setup(self
, m
, in_a
, in_b
):
410 """ links module to inputs and outputs
412 m
.submodules
.denormalise
= self
413 m
.d
.comb
+= self
.in_a
.copy(in_a
)
414 m
.d
.comb
+= self
.in_b
.copy(in_b
)
416 def elaborate(self
, platform
):
418 m
.submodules
.denorm_in_a
= self
.in_a
419 m
.submodules
.denorm_in_b
= self
.in_b
420 m
.submodules
.denorm_out_a
= self
.out_a
421 m
.submodules
.denorm_out_b
= self
.out_b
422 # hmmm, don't like repeating identical code
423 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
424 with m
.If(self
.in_a
.exp_n127
):
425 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
427 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
429 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
430 with m
.If(self
.in_b
.exp_n127
):
431 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
433 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
438 class FPAddDeNorm(FPState
, FPID
):
440 def __init__(self
, width
, id_wid
):
441 FPState
.__init
__(self
, "denormalise")
442 FPID
.__init
__(self
, id_wid
)
443 self
.mod
= FPAddDeNormMod(width
)
444 self
.out_a
= FPNumBase(width
)
445 self
.out_b
= FPNumBase(width
)
447 def setup(self
, m
, in_a
, in_b
, in_mid
):
448 """ links module to inputs and outputs
450 self
.mod
.setup(m
, in_a
, in_b
)
451 if self
.in_mid
is not None:
452 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
456 # Denormalised Number checks
458 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
459 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
462 class FPAddAlignMultiMod(FPState
):
464 def __init__(self
, width
):
465 self
.in_a
= FPNumBase(width
)
466 self
.in_b
= FPNumBase(width
)
467 self
.out_a
= FPNumIn(None, width
)
468 self
.out_b
= FPNumIn(None, width
)
469 self
.exp_eq
= Signal(reset_less
=True)
471 def elaborate(self
, platform
):
472 # This one however (single-cycle) will do the shift
477 m
.submodules
.align_in_a
= self
.in_a
478 m
.submodules
.align_in_b
= self
.in_b
479 m
.submodules
.align_out_a
= self
.out_a
480 m
.submodules
.align_out_b
= self
.out_b
482 # NOTE: this does *not* do single-cycle multi-shifting,
483 # it *STAYS* in the align state until exponents match
485 # exponent of a greater than b: shift b down
486 m
.d
.comb
+= self
.exp_eq
.eq(0)
487 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
488 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
489 agtb
= Signal(reset_less
=True)
490 altb
= Signal(reset_less
=True)
491 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
492 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
494 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
495 # exponent of b greater than a: shift a down
497 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
498 # exponents equal: move to next stage.
500 m
.d
.comb
+= self
.exp_eq
.eq(1)
504 class FPAddAlignMulti(FPState
, FPID
):
506 def __init__(self
, width
, id_wid
):
507 FPID
.__init
__(self
, id_wid
)
508 FPState
.__init
__(self
, "align")
509 self
.mod
= FPAddAlignMultiMod(width
)
510 self
.out_a
= FPNumIn(None, width
)
511 self
.out_b
= FPNumIn(None, width
)
512 self
.exp_eq
= Signal(reset_less
=True)
514 def setup(self
, m
, in_a
, in_b
, in_mid
):
515 """ links module to inputs and outputs
517 m
.submodules
.align
= self
.mod
518 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
519 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
520 #m.d.comb += self.out_a.copy(self.mod.out_a)
521 #m.d.comb += self.out_b.copy(self.mod.out_b)
522 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
523 if self
.in_mid
is not None:
524 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
528 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
530 with m
.If(self
.exp_eq
):
534 class FPAddAlignSingleMod
:
536 def __init__(self
, width
):
538 self
.in_a
= FPNumBase(width
)
539 self
.in_b
= FPNumBase(width
)
540 self
.out_a
= FPNumIn(None, width
)
541 self
.out_b
= FPNumIn(None, width
)
543 def setup(self
, m
, in_a
, in_b
):
544 """ links module to inputs and outputs
546 m
.submodules
.align
= self
547 m
.d
.comb
+= self
.in_a
.copy(in_a
)
548 m
.d
.comb
+= self
.in_b
.copy(in_b
)
550 def elaborate(self
, platform
):
551 """ Aligns A against B or B against A, depending on which has the
552 greater exponent. This is done in a *single* cycle using
553 variable-width bit-shift
555 the shifter used here is quite expensive in terms of gates.
556 Mux A or B in (and out) into temporaries, as only one of them
557 needs to be aligned against the other
561 m
.submodules
.align_in_a
= self
.in_a
562 m
.submodules
.align_in_b
= self
.in_b
563 m
.submodules
.align_out_a
= self
.out_a
564 m
.submodules
.align_out_b
= self
.out_b
566 # temporary (muxed) input and output to be shifted
567 t_inp
= FPNumBase(self
.width
)
568 t_out
= FPNumIn(None, self
.width
)
569 espec
= (len(self
.in_a
.e
), True)
570 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
571 m
.submodules
.align_t_in
= t_inp
572 m
.submodules
.align_t_out
= t_out
573 m
.submodules
.multishift_r
= msr
575 ediff
= Signal(espec
, reset_less
=True)
576 ediffr
= Signal(espec
, reset_less
=True)
577 tdiff
= Signal(espec
, reset_less
=True)
578 elz
= Signal(reset_less
=True)
579 egz
= Signal(reset_less
=True)
581 # connect multi-shifter to t_inp/out mantissa (and tdiff)
582 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
583 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
584 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
585 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
586 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
588 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
589 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
590 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
591 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
593 # default: A-exp == B-exp, A and B untouched (fall through)
594 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
595 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
596 # only one shifter (muxed)
597 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
598 # exponent of a greater than b: shift b down
600 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
602 self
.out_b
.copy(t_out
),
603 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
605 # exponent of b greater than a: shift a down
607 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
609 self
.out_a
.copy(t_out
),
610 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
615 class FPAddAlignSingle(FPState
, FPID
):
617 def __init__(self
, width
, id_wid
):
618 FPState
.__init
__(self
, "align")
619 FPID
.__init
__(self
, id_wid
)
620 self
.mod
= FPAddAlignSingleMod(width
)
621 self
.out_a
= FPNumIn(None, width
)
622 self
.out_b
= FPNumIn(None, width
)
624 def setup(self
, m
, in_a
, in_b
, in_mid
):
625 """ links module to inputs and outputs
627 self
.mod
.setup(m
, in_a
, in_b
)
628 if self
.in_mid
is not None:
629 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
633 # NOTE: could be done as comb
634 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
635 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
639 class FPAddAlignSingleAdd(FPState
, FPID
):
641 def __init__(self
, width
, id_wid
):
642 FPState
.__init
__(self
, "align")
643 FPID
.__init
__(self
, id_wid
)
644 self
.mod
= FPAddAlignSingleMod(width
)
645 self
.out_a
= FPNumIn(None, width
)
646 self
.out_b
= FPNumIn(None, width
)
648 self
.a0mod
= FPAddStage0Mod(width
)
649 self
.a0_out_z
= FPNumBase(width
, False)
650 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
651 self
.a0_out_z
= FPNumBase(width
, False)
653 self
.a1mod
= FPAddStage1Mod(width
)
654 self
.out_z
= FPNumBase(width
, False)
655 self
.out_of
= Overflow()
657 def setup(self
, m
, in_a
, in_b
, in_mid
):
658 """ links module to inputs and outputs
660 self
.mod
.setup(m
, in_a
, in_b
)
661 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
662 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
664 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
665 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
666 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
668 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
670 if self
.in_mid
is not None:
671 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
675 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
676 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
677 m
.next
= "normalise_1"
680 class FPAddStage0Mod
:
682 def __init__(self
, width
):
683 self
.in_a
= FPNumBase(width
)
684 self
.in_b
= FPNumBase(width
)
685 self
.in_z
= FPNumBase(width
, False)
686 self
.out_z
= FPNumBase(width
, False)
687 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
689 def setup(self
, m
, in_a
, in_b
):
690 """ links module to inputs and outputs
692 m
.submodules
.add0
= self
693 m
.d
.comb
+= self
.in_a
.copy(in_a
)
694 m
.d
.comb
+= self
.in_b
.copy(in_b
)
696 def elaborate(self
, platform
):
698 m
.submodules
.add0_in_a
= self
.in_a
699 m
.submodules
.add0_in_b
= self
.in_b
700 m
.submodules
.add0_out_z
= self
.out_z
702 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
704 # store intermediate tests (and zero-extended mantissas)
705 seq
= Signal(reset_less
=True)
706 mge
= Signal(reset_less
=True)
707 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
708 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
709 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
710 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
711 am0
.eq(Cat(self
.in_a
.m
, 0)),
712 bm0
.eq(Cat(self
.in_b
.m
, 0))
714 # same-sign (both negative or both positive) add mantissas
717 self
.out_tot
.eq(am0
+ bm0
),
718 self
.out_z
.s
.eq(self
.in_a
.s
)
720 # a mantissa greater than b, use a
723 self
.out_tot
.eq(am0
- bm0
),
724 self
.out_z
.s
.eq(self
.in_a
.s
)
726 # b mantissa greater than a, use b
729 self
.out_tot
.eq(bm0
- am0
),
730 self
.out_z
.s
.eq(self
.in_b
.s
)
735 class FPAddStage0(FPState
, FPID
):
736 """ First stage of add. covers same-sign (add) and subtract
737 special-casing when mantissas are greater or equal, to
738 give greatest accuracy.
741 def __init__(self
, width
, id_wid
):
742 FPState
.__init
__(self
, "add_0")
743 FPID
.__init
__(self
, id_wid
)
744 self
.mod
= FPAddStage0Mod(width
)
745 self
.out_z
= FPNumBase(width
, False)
746 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
748 def setup(self
, m
, in_a
, in_b
, in_mid
):
749 """ links module to inputs and outputs
751 self
.mod
.setup(m
, in_a
, in_b
)
752 if self
.in_mid
is not None:
753 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
757 # NOTE: these could be done as combinatorial (merge add0+add1)
758 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
759 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
763 class FPAddStage1Mod(FPState
):
764 """ Second stage of add: preparation for normalisation.
765 detects when tot sum is too big (tot[27] is kinda a carry bit)
768 def __init__(self
, width
):
769 self
.out_norm
= Signal(reset_less
=True)
770 self
.in_z
= FPNumBase(width
, False)
771 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
772 self
.out_z
= FPNumBase(width
, False)
773 self
.out_of
= Overflow()
775 def setup(self
, m
, in_tot
, in_z
):
776 """ links module to inputs and outputs
778 m
.submodules
.add1
= self
779 m
.submodules
.add1_out_overflow
= self
.out_of
781 m
.d
.comb
+= self
.in_z
.copy(in_z
)
782 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
784 def elaborate(self
, platform
):
786 #m.submodules.norm1_in_overflow = self.in_of
787 #m.submodules.norm1_out_overflow = self.out_of
788 #m.submodules.norm1_in_z = self.in_z
789 #m.submodules.norm1_out_z = self.out_z
790 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
791 # tot[27] gets set when the sum overflows. shift result down
792 with m
.If(self
.in_tot
[-1]):
794 self
.out_z
.m
.eq(self
.in_tot
[4:]),
795 self
.out_of
.m0
.eq(self
.in_tot
[4]),
796 self
.out_of
.guard
.eq(self
.in_tot
[3]),
797 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
798 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
799 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
804 self
.out_z
.m
.eq(self
.in_tot
[3:]),
805 self
.out_of
.m0
.eq(self
.in_tot
[3]),
806 self
.out_of
.guard
.eq(self
.in_tot
[2]),
807 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
808 self
.out_of
.sticky
.eq(self
.in_tot
[0])
813 class FPAddStage1(FPState
, FPID
):
815 def __init__(self
, width
, id_wid
):
816 FPState
.__init
__(self
, "add_1")
817 FPID
.__init
__(self
, id_wid
)
818 self
.mod
= FPAddStage1Mod(width
)
819 self
.out_z
= FPNumBase(width
, False)
820 self
.out_of
= Overflow()
821 self
.norm_stb
= Signal()
823 def setup(self
, m
, in_tot
, in_z
, in_mid
):
824 """ links module to inputs and outputs
826 self
.mod
.setup(m
, in_tot
, in_z
)
828 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
830 if self
.in_mid
is not None:
831 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
835 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
836 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
837 m
.d
.sync
+= self
.norm_stb
.eq(1)
838 m
.next
= "normalise_1"
841 class FPNorm1ModSingle
:
843 def __init__(self
, width
):
845 self
.out_norm
= Signal(reset_less
=True)
846 self
.in_z
= FPNumBase(width
, False)
847 self
.in_of
= Overflow()
848 self
.out_z
= FPNumBase(width
, False)
849 self
.out_of
= Overflow()
851 def setup(self
, m
, in_z
, in_of
, out_z
):
852 """ links module to inputs and outputs
854 m
.submodules
.normalise_1
= self
856 m
.d
.comb
+= self
.in_z
.copy(in_z
)
857 m
.d
.comb
+= self
.in_of
.copy(in_of
)
859 m
.d
.comb
+= out_z
.copy(self
.out_z
)
861 def elaborate(self
, platform
):
864 mwid
= self
.out_z
.m_width
+2
865 pe
= PriorityEncoder(mwid
)
866 m
.submodules
.norm_pe
= pe
868 m
.submodules
.norm1_out_z
= self
.out_z
869 m
.submodules
.norm1_out_overflow
= self
.out_of
870 m
.submodules
.norm1_in_z
= self
.in_z
871 m
.submodules
.norm1_in_overflow
= self
.in_of
873 in_z
= FPNumBase(self
.width
, False)
875 m
.submodules
.norm1_insel_z
= in_z
876 m
.submodules
.norm1_insel_overflow
= in_of
878 espec
= (len(in_z
.e
), True)
879 ediff_n126
= Signal(espec
, reset_less
=True)
880 msr
= MultiShiftRMerge(mwid
, espec
)
881 m
.submodules
.multishift_r
= msr
883 m
.d
.comb
+= in_z
.copy(self
.in_z
)
884 m
.d
.comb
+= in_of
.copy(self
.in_of
)
885 # initialise out from in (overridden below)
886 m
.d
.comb
+= self
.out_z
.copy(in_z
)
887 m
.d
.comb
+= self
.out_of
.copy(in_of
)
888 # normalisation increase/decrease conditions
889 decrease
= Signal(reset_less
=True)
890 increase
= Signal(reset_less
=True)
891 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
892 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
895 # *sigh* not entirely obvious: count leading zeros (clz)
896 # with a PriorityEncoder: to find from the MSB
897 # we reverse the order of the bits.
898 temp_m
= Signal(mwid
, reset_less
=True)
899 temp_s
= Signal(mwid
+1, reset_less
=True)
900 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
901 # make sure that the amount to decrease by does NOT
902 # go below the minimum non-INF/NaN exponent
903 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
906 # cat round and guard bits back into the mantissa
907 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
908 pe
.i
.eq(temp_m
[::-1]), # inverted
909 clz
.eq(limclz
), # count zeros from MSB down
910 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
911 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
912 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
913 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
914 # overflow in bits 0..1: got shifted too (leave sticky)
915 self
.out_of
.guard
.eq(temp_s
[1]), # guard
916 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
919 with m
.Elif(increase
):
920 temp_m
= Signal(mwid
+1, reset_less
=True)
922 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
924 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
925 # connect multi-shifter to inp/out mantissa (and ediff)
927 msr
.diff
.eq(ediff_n126
),
928 self
.out_z
.m
.eq(msr
.m
[3:]),
929 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
930 # overflow in bits 0..1: got shifted too (leave sticky)
931 self
.out_of
.guard
.eq(temp_s
[2]), # guard
932 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
933 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
934 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
940 class FPNorm1ModMulti
:
942 def __init__(self
, width
, single_cycle
=True):
944 self
.in_select
= Signal(reset_less
=True)
945 self
.out_norm
= Signal(reset_less
=True)
946 self
.in_z
= FPNumBase(width
, False)
947 self
.in_of
= Overflow()
948 self
.temp_z
= FPNumBase(width
, False)
949 self
.temp_of
= Overflow()
950 self
.out_z
= FPNumBase(width
, False)
951 self
.out_of
= Overflow()
953 def elaborate(self
, platform
):
956 m
.submodules
.norm1_out_z
= self
.out_z
957 m
.submodules
.norm1_out_overflow
= self
.out_of
958 m
.submodules
.norm1_temp_z
= self
.temp_z
959 m
.submodules
.norm1_temp_of
= self
.temp_of
960 m
.submodules
.norm1_in_z
= self
.in_z
961 m
.submodules
.norm1_in_overflow
= self
.in_of
963 in_z
= FPNumBase(self
.width
, False)
965 m
.submodules
.norm1_insel_z
= in_z
966 m
.submodules
.norm1_insel_overflow
= in_of
968 # select which of temp or in z/of to use
969 with m
.If(self
.in_select
):
970 m
.d
.comb
+= in_z
.copy(self
.in_z
)
971 m
.d
.comb
+= in_of
.copy(self
.in_of
)
973 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
974 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
975 # initialise out from in (overridden below)
976 m
.d
.comb
+= self
.out_z
.copy(in_z
)
977 m
.d
.comb
+= self
.out_of
.copy(in_of
)
978 # normalisation increase/decrease conditions
979 decrease
= Signal(reset_less
=True)
980 increase
= Signal(reset_less
=True)
981 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
982 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
983 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
987 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
988 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
989 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
990 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
991 self
.out_of
.round_bit
.eq(0), # reset round bit
992 self
.out_of
.m0
.eq(in_of
.guard
),
995 with m
.Elif(increase
):
997 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
998 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
999 self
.out_of
.guard
.eq(in_z
.m
[0]),
1000 self
.out_of
.m0
.eq(in_z
.m
[1]),
1001 self
.out_of
.round_bit
.eq(in_of
.guard
),
1002 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1008 class FPNorm1Single(FPState
, FPID
):
1010 def __init__(self
, width
, id_wid
, single_cycle
=True):
1011 FPID
.__init
__(self
, id_wid
)
1012 FPState
.__init
__(self
, "normalise_1")
1013 self
.mod
= FPNorm1ModSingle(width
)
1014 self
.out_norm
= Signal(reset_less
=True)
1015 self
.out_z
= FPNumBase(width
)
1016 self
.out_roundz
= Signal(reset_less
=True)
1018 def setup(self
, m
, in_z
, in_of
, in_mid
):
1019 """ links module to inputs and outputs
1021 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1023 if self
.in_mid
is not None:
1024 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1026 def action(self
, m
):
1028 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1032 class FPNorm1Multi(FPState
, FPID
):
1034 def __init__(self
, width
, id_wid
):
1035 FPID
.__init
__(self
, id_wid
)
1036 FPState
.__init
__(self
, "normalise_1")
1037 self
.mod
= FPNorm1ModMulti(width
)
1038 self
.stb
= Signal(reset_less
=True)
1039 self
.ack
= Signal(reset
=0, reset_less
=True)
1040 self
.out_norm
= Signal(reset_less
=True)
1041 self
.in_accept
= Signal(reset_less
=True)
1042 self
.temp_z
= FPNumBase(width
)
1043 self
.temp_of
= Overflow()
1044 self
.out_z
= FPNumBase(width
)
1045 self
.out_roundz
= Signal(reset_less
=True)
1047 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1048 """ links module to inputs and outputs
1050 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1051 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1052 self
.out_z
, self
.out_norm
)
1054 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1055 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1057 if self
.in_mid
is not None:
1058 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1060 def action(self
, m
):
1062 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1063 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1064 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1065 with m
.If(self
.out_norm
):
1066 with m
.If(self
.in_accept
):
1071 m
.d
.sync
+= self
.ack
.eq(0)
1073 # normalisation not required (or done).
1075 m
.d
.sync
+= self
.ack
.eq(1)
1076 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1079 class FPNormToPack(FPState
, FPID
):
1081 def __init__(self
, width
, id_wid
):
1082 FPID
.__init
__(self
, id_wid
)
1083 FPState
.__init
__(self
, "normalise_1")
1086 def setup(self
, m
, in_z
, in_of
, in_mid
):
1087 """ links module to inputs and outputs
1090 # Normalisation (chained to input in_z+in_of)
1091 nmod
= FPNorm1ModSingle(self
.width
)
1092 n_out_z
= FPNumBase(self
.width
)
1093 n_out_roundz
= Signal(reset_less
=True)
1094 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1096 # Rounding (chained to normalisation)
1097 rmod
= FPRoundMod(self
.width
)
1098 r_out_z
= FPNumBase(self
.width
)
1099 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1100 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1101 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1103 # Corrections (chained to rounding)
1104 cmod
= FPCorrectionsMod(self
.width
)
1105 c_out_z
= FPNumBase(self
.width
)
1106 cmod
.setup(m
, r_out_z
)
1107 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1109 # Pack (chained to corrections)
1110 self
.pmod
= FPPackMod(self
.width
)
1111 self
.out_z
= FPNumBase(self
.width
)
1112 self
.pmod
.setup(m
, c_out_z
)
1115 if self
.in_mid
is not None:
1116 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1118 def action(self
, m
):
1119 self
.idsync(m
) # copies incoming ID to outgoing
1120 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1121 m
.next
= "pack_put_z"
1126 def __init__(self
, width
):
1127 self
.in_roundz
= Signal(reset_less
=True)
1128 self
.in_z
= FPNumBase(width
, False)
1129 self
.out_z
= FPNumBase(width
, False)
1131 def setup(self
, m
, in_z
, roundz
):
1132 m
.submodules
.roundz
= self
1134 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1135 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1137 def elaborate(self
, platform
):
1139 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1140 with m
.If(self
.in_roundz
):
1141 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1142 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1143 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1147 class FPRound(FPState
, FPID
):
1149 def __init__(self
, width
, id_wid
):
1150 FPState
.__init
__(self
, "round")
1151 FPID
.__init
__(self
, id_wid
)
1152 self
.mod
= FPRoundMod(width
)
1153 self
.out_z
= FPNumBase(width
)
1155 def setup(self
, m
, in_z
, roundz
, in_mid
):
1156 """ links module to inputs and outputs
1158 self
.mod
.setup(m
, in_z
, roundz
)
1160 if self
.in_mid
is not None:
1161 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1163 def action(self
, m
):
1165 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1166 m
.next
= "corrections"
1169 class FPCorrectionsMod
:
1171 def __init__(self
, width
):
1172 self
.in_z
= FPNumOut(width
, False)
1173 self
.out_z
= FPNumOut(width
, False)
1175 def setup(self
, m
, in_z
):
1176 """ links module to inputs and outputs
1178 m
.submodules
.corrections
= self
1179 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1181 def elaborate(self
, platform
):
1183 m
.submodules
.corr_in_z
= self
.in_z
1184 m
.submodules
.corr_out_z
= self
.out_z
1185 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1186 with m
.If(self
.in_z
.is_denormalised
):
1187 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1191 class FPCorrections(FPState
, FPID
):
1193 def __init__(self
, width
, id_wid
):
1194 FPState
.__init
__(self
, "corrections")
1195 FPID
.__init
__(self
, id_wid
)
1196 self
.mod
= FPCorrectionsMod(width
)
1197 self
.out_z
= FPNumBase(width
)
1199 def setup(self
, m
, in_z
, in_mid
):
1200 """ links module to inputs and outputs
1202 self
.mod
.setup(m
, in_z
)
1203 if self
.in_mid
is not None:
1204 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1206 def action(self
, m
):
1208 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1214 def __init__(self
, width
):
1215 self
.in_z
= FPNumOut(width
, False)
1216 self
.out_z
= FPNumOut(width
, False)
1218 def setup(self
, m
, in_z
):
1219 """ links module to inputs and outputs
1221 m
.submodules
.pack
= self
1222 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1224 def elaborate(self
, platform
):
1226 m
.submodules
.pack_in_z
= self
.in_z
1227 with m
.If(self
.in_z
.is_overflowed
):
1228 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1230 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1234 class FPPack(FPState
, FPID
):
1236 def __init__(self
, width
, id_wid
):
1237 FPState
.__init
__(self
, "pack")
1238 FPID
.__init
__(self
, id_wid
)
1239 self
.mod
= FPPackMod(width
)
1240 self
.out_z
= FPNumOut(width
, False)
1242 def setup(self
, m
, in_z
, in_mid
):
1243 """ links module to inputs and outputs
1245 self
.mod
.setup(m
, in_z
)
1246 if self
.in_mid
is not None:
1247 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1249 def action(self
, m
):
1251 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1252 m
.next
= "pack_put_z"
1255 class FPPutZ(FPState
):
1257 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1258 FPState
.__init
__(self
, state
)
1259 if to_state
is None:
1260 to_state
= "get_ops"
1261 self
.to_state
= to_state
1264 self
.in_mid
= in_mid
1265 self
.out_mid
= out_mid
1267 def action(self
, m
):
1268 if self
.in_mid
is not None:
1269 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1271 self
.out_z
.v
.eq(self
.in_z
.v
)
1273 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1274 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1275 m
.next
= self
.to_state
1277 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1280 class FPPutZIdx(FPState
):
1282 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1283 FPState
.__init
__(self
, state
)
1284 if to_state
is None:
1285 to_state
= "get_ops"
1286 self
.to_state
= to_state
1288 self
.out_zs
= out_zs
1289 self
.in_mid
= in_mid
1291 def action(self
, m
):
1292 outz_stb
= Signal(reset_less
=True)
1293 outz_ack
= Signal(reset_less
=True)
1294 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1295 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1298 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1300 with m
.If(outz_stb
& outz_ack
):
1301 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1302 m
.next
= self
.to_state
1304 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1307 class FPADDBaseMod(FPID
):
1309 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1312 * width: bit-width of IEEE754. supported: 16, 32, 64
1313 * id_wid: an identifier that is sync-connected to the input
1314 * single_cycle: True indicates each stage to complete in 1 clock
1315 * compact: True indicates a reduced number of stages
1317 FPID
.__init
__(self
, id_wid
)
1319 self
.single_cycle
= single_cycle
1320 self
.compact
= compact
1322 self
.in_t
= Trigger()
1323 self
.in_a
= Signal(width
)
1324 self
.in_b
= Signal(width
)
1325 self
.out_z
= FPOp(width
)
1329 def add_state(self
, state
):
1330 self
.states
.append(state
)
1333 def get_fragment(self
, platform
=None):
1334 """ creates the HDL code-fragment for FPAdd
1337 m
.submodules
.out_z
= self
.out_z
1338 m
.submodules
.in_t
= self
.in_t
1340 self
.get_compact_fragment(m
, platform
)
1342 self
.get_longer_fragment(m
, platform
)
1344 with m
.FSM() as fsm
:
1346 for state
in self
.states
:
1347 with m
.State(state
.state_from
):
1352 def get_longer_fragment(self
, m
, platform
=None):
1354 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1355 self
.in_a
, self
.in_b
, self
.width
))
1356 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1360 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1361 sc
.setup(m
, a
, b
, self
.in_mid
)
1363 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1364 dn
.setup(m
, a
, b
, sc
.in_mid
)
1366 if self
.single_cycle
:
1367 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1368 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1370 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1371 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1373 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1374 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1376 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1377 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1379 if self
.single_cycle
:
1380 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1381 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1383 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1384 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1386 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1387 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1389 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1390 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1392 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1393 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1395 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1396 pa
.in_mid
, self
.out_mid
))
1398 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1399 pa
.in_mid
, self
.out_mid
))
1401 def get_compact_fragment(self
, m
, platform
=None):
1403 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1404 self
.in_a
, self
.in_b
, self
.width
))
1405 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1409 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1410 sc
.setup(m
, a
, b
, self
.in_mid
)
1412 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1413 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1415 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1416 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1418 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1419 n1
.in_mid
, self
.out_mid
))
1421 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1422 sc
.in_mid
, self
.out_mid
))
1425 class FPADDBase(FPState
, FPID
):
1427 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1430 * width: bit-width of IEEE754. supported: 16, 32, 64
1431 * id_wid: an identifier that is sync-connected to the input
1432 * single_cycle: True indicates each stage to complete in 1 clock
1434 FPID
.__init
__(self
, id_wid
)
1435 FPState
.__init
__(self
, "fpadd")
1437 self
.single_cycle
= single_cycle
1438 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1440 self
.in_t
= Trigger()
1441 self
.in_a
= Signal(width
)
1442 self
.in_b
= Signal(width
)
1443 #self.out_z = FPOp(width)
1445 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1446 self
.in_accept
= Signal(reset_less
=True)
1447 self
.add_stb
= Signal(reset_less
=True)
1448 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1450 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1452 self
.out_mid
= out_mid
1453 m
.d
.comb
+= [self
.in_a
.eq(a
),
1455 self
.mod
.in_a
.eq(self
.in_a
),
1456 self
.mod
.in_b
.eq(self
.in_b
),
1457 self
.in_mid
.eq(in_mid
),
1458 self
.mod
.in_mid
.eq(self
.in_mid
),
1459 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1460 #self.add_stb.eq(add_stb),
1461 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1462 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1463 self
.out_mid
.eq(self
.mod
.out_mid
),
1464 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1465 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1466 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1469 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1470 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1471 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1472 #m.d.sync += self.in_t.stb.eq(0)
1474 m
.submodules
.fpadd
= self
.mod
1476 def action(self
, m
):
1478 # in_accept is set on incoming strobe HIGH and ack LOW.
1479 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1481 #with m.If(self.in_t.ack):
1482 # m.d.sync += self.in_t.stb.eq(0)
1483 with m
.If(~self
.z_done
):
1484 # not done: test for accepting an incoming operand pair
1485 with m
.If(self
.in_accept
):
1487 self
.add_ack
.eq(1), # acknowledge receipt...
1488 self
.in_t
.stb
.eq(1), # initiate add
1491 m
.d
.sync
+= [self
.add_ack
.eq(0),
1492 self
.in_t
.stb
.eq(0),
1493 self
.out_z
.ack
.eq(1),
1496 # done: acknowledge, and write out id and value
1497 m
.d
.sync
+= [self
.add_ack
.eq(1),
1504 if self
.in_mid
is not None:
1505 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1508 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1510 # move to output state on detecting z ack
1511 with m
.If(self
.out_z
.trigger
):
1512 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1515 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1518 def __init__(self
, width
, id_wid
):
1520 self
.id_wid
= id_wid
1522 for i
in range(rs_sz
):
1524 out_z
.name
= "out_z_%d" % i
1526 self
.res
= Array(res
)
1527 self
.in_z
= FPOp(width
)
1528 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1530 def setup(self
, m
, in_z
, in_mid
):
1531 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1532 self
.in_mid
.eq(in_mid
)]
1534 def get_fragment(self
, platform
=None):
1535 """ creates the HDL code-fragment for FPAdd
1538 m
.submodules
.res_in_z
= self
.in_z
1539 m
.submodules
+= self
.res
1551 """ FPADD: stages as follows:
1557 FPAddBase---> FPAddBaseMod
1559 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1561 FPAddBase is tricky: it is both a stage and *has* stages.
1562 Connection to FPAddBaseMod therefore requires an in stb/ack
1563 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1564 needs to be the thing that raises the incoming stb.
1567 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1570 * width: bit-width of IEEE754. supported: 16, 32, 64
1571 * id_wid: an identifier that is sync-connected to the input
1572 * single_cycle: True indicates each stage to complete in 1 clock
1575 self
.id_wid
= id_wid
1576 self
.single_cycle
= single_cycle
1578 #self.out_z = FPOp(width)
1579 self
.ids
= FPID(id_wid
)
1582 for i
in range(rs_sz
):
1585 in_a
.name
= "in_a_%d" % i
1586 in_b
.name
= "in_b_%d" % i
1587 rs
.append((in_a
, in_b
))
1591 for i
in range(rs_sz
):
1593 out_z
.name
= "out_z_%d" % i
1595 self
.res
= Array(res
)
1599 def add_state(self
, state
):
1600 self
.states
.append(state
)
1603 def get_fragment(self
, platform
=None):
1604 """ creates the HDL code-fragment for FPAdd
1607 m
.submodules
+= self
.rs
1609 in_a
= self
.rs
[0][0]
1610 in_b
= self
.rs
[0][1]
1612 out_z
= FPOp(self
.width
)
1613 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1614 m
.submodules
.out_z
= out_z
1616 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1621 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1626 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1627 ab
= self
.add_state(ab
)
1628 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1631 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1634 with m
.FSM() as fsm
:
1636 for state
in self
.states
:
1637 with m
.State(state
.state_from
):
1643 if __name__
== "__main__":
1645 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1646 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1647 alu
.rs
[0][1].ports() + \
1648 alu
.res
[0].ports() + \
1649 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1651 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1652 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1653 alu
.in_t
.ports() + \
1654 alu
.out_z
.ports() + \
1655 [alu
.in_mid
, alu
.out_mid
])
1658 # works... but don't use, just do "python fname.py convert -t v"
1659 #print (verilog.convert(alu, ports=[
1660 # ports=alu.in_a.ports() + \
1661 # alu.in_b.ports() + \
1662 # alu.out_z.ports())