1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
):
189 Trigger
.__init
__(self
)
190 self
.in_op1
= Signal(width
, reset_less
=True)
191 self
.in_op2
= Signal(width
, reset_less
=True)
192 self
.out_op1
= FPNumIn(None, width
)
193 self
.out_op2
= FPNumIn(None, width
)
195 def elaborate(self
, platform
):
196 m
= Trigger
.elaborate(self
, platform
)
197 #m.submodules.get_op_in = self.in_op
198 m
.submodules
.get_op1_out
= self
.out_op1
199 m
.submodules
.get_op2_out
= self
.out_op2
200 with m
.If(self
.trigger
):
202 self
.out_op1
.decode(self
.in_op1
),
203 self
.out_op2
.decode(self
.in_op2
),
208 class FPGet2Op(FPState
):
212 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
213 FPState
.__init
__(self
, in_state
)
214 self
.out_state
= out_state
215 self
.mod
= FPGet2OpMod(width
)
218 self
.out_op1
= FPNumIn(None, width
)
219 self
.out_op2
= FPNumIn(None, width
)
220 self
.in_stb
= Signal(reset_less
=True)
221 self
.out_ack
= Signal(reset_less
=True)
222 self
.out_decode
= Signal(reset_less
=True)
224 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
225 """ links module to inputs and outputs
227 m
.submodules
.get_ops
= self
.mod
228 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
229 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
230 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
231 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
232 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
233 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
236 with m
.If(self
.out_decode
):
237 m
.next
= self
.out_state
240 #self.out_op1.v.eq(self.mod.out_op1.v),
241 #self.out_op2.v.eq(self.mod.out_op2.v),
242 self
.out_op1
.copy(self
.mod
.out_op1
),
243 self
.out_op2
.copy(self
.mod
.out_op2
)
246 m
.d
.sync
+= self
.mod
.ack
.eq(1)
249 class FPAddSpecialCasesMod
:
250 """ special cases: NaNs, infs, zeros, denormalised
251 NOTE: some of these are unique to add. see "Special Operations"
252 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
255 def __init__(self
, width
):
256 self
.in_a
= FPNumBase(width
)
257 self
.in_b
= FPNumBase(width
)
258 self
.out_z
= FPNumOut(width
, False)
259 self
.out_do_z
= Signal(reset_less
=True)
261 def setup(self
, m
, in_a
, in_b
, out_do_z
):
262 """ links module to inputs and outputs
264 m
.submodules
.specialcases
= self
265 m
.d
.comb
+= self
.in_a
.copy(in_a
)
266 m
.d
.comb
+= self
.in_b
.copy(in_b
)
267 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
269 def elaborate(self
, platform
):
272 m
.submodules
.sc_in_a
= self
.in_a
273 m
.submodules
.sc_in_b
= self
.in_b
274 m
.submodules
.sc_out_z
= self
.out_z
277 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
280 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
282 # if a is NaN or b is NaN return NaN
283 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
284 m
.d
.comb
+= self
.out_do_z
.eq(1)
285 m
.d
.comb
+= self
.out_z
.nan(0)
287 # XXX WEIRDNESS for FP16 non-canonical NaN handling
290 ## if a is zero and b is NaN return -b
291 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
292 # m.d.comb += self.out_do_z.eq(1)
293 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
295 ## if b is zero and a is NaN return -a
296 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
297 # m.d.comb += self.out_do_z.eq(1)
298 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
300 ## if a is -zero and b is NaN return -b
301 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
302 # m.d.comb += self.out_do_z.eq(1)
303 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
305 ## if b is -zero and a is NaN return -a
306 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
307 # m.d.comb += self.out_do_z.eq(1)
308 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
310 # if a is inf return inf (or NaN)
311 with m
.Elif(self
.in_a
.is_inf
):
312 m
.d
.comb
+= self
.out_do_z
.eq(1)
313 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
314 # if a is inf and signs don't match return NaN
315 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
316 m
.d
.comb
+= self
.out_z
.nan(0)
318 # if b is inf return inf
319 with m
.Elif(self
.in_b
.is_inf
):
320 m
.d
.comb
+= self
.out_do_z
.eq(1)
321 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
323 # if a is zero and b zero return signed-a/b
324 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
325 m
.d
.comb
+= self
.out_do_z
.eq(1)
326 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
330 # if a is zero return b
331 with m
.Elif(self
.in_a
.is_zero
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
336 # if b is zero return a
337 with m
.Elif(self
.in_b
.is_zero
):
338 m
.d
.comb
+= self
.out_do_z
.eq(1)
339 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
342 # if a equal to -b return zero (+ve zero)
343 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
344 m
.d
.comb
+= self
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.out_z
.zero(0)
347 # Denormalised Number checks
349 m
.d
.comb
+= self
.out_do_z
.eq(0)
355 def __init__(self
, id_wid
):
358 self
.in_mid
= Signal(id_wid
, reset_less
=True)
359 self
.out_mid
= Signal(id_wid
, reset_less
=True)
365 if self
.id_wid
is not None:
366 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
369 class FPAddSpecialCases(FPState
, FPID
):
370 """ special cases: NaNs, infs, zeros, denormalised
371 NOTE: some of these are unique to add. see "Special Operations"
372 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
375 def __init__(self
, width
, id_wid
):
376 FPState
.__init
__(self
, "special_cases")
377 FPID
.__init
__(self
, id_wid
)
378 self
.mod
= FPAddSpecialCasesMod(width
)
379 self
.out_z
= FPNumOut(width
, False)
380 self
.out_do_z
= Signal(reset_less
=True)
382 def setup(self
, m
, in_a
, in_b
, in_mid
):
383 """ links module to inputs and outputs
385 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
386 if self
.in_mid
is not None:
387 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
391 with m
.If(self
.out_do_z
):
392 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
395 m
.next
= "denormalise"
398 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
399 """ special cases: NaNs, infs, zeros, denormalised
400 NOTE: some of these are unique to add. see "Special Operations"
401 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
404 def __init__(self
, width
, id_wid
):
405 FPState
.__init
__(self
, "special_cases")
406 FPID
.__init
__(self
, id_wid
)
407 self
.smod
= FPAddSpecialCasesMod(width
)
408 self
.out_z
= FPNumOut(width
, False)
409 self
.out_do_z
= Signal(reset_less
=True)
411 self
.dmod
= FPAddDeNormMod(width
)
412 self
.out_a
= FPNumBase(width
)
413 self
.out_b
= FPNumBase(width
)
415 def setup(self
, m
, in_a
, in_b
, in_mid
):
416 """ links module to inputs and outputs
418 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
419 self
.dmod
.setup(m
, in_a
, in_b
)
420 if self
.in_mid
is not None:
421 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
425 with m
.If(self
.out_do_z
):
426 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
430 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
431 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
434 class FPAddDeNormMod(FPState
):
436 def __init__(self
, width
):
437 self
.in_a
= FPNumBase(width
)
438 self
.in_b
= FPNumBase(width
)
439 self
.out_a
= FPNumBase(width
)
440 self
.out_b
= FPNumBase(width
)
442 def setup(self
, m
, in_a
, in_b
):
443 """ links module to inputs and outputs
445 m
.submodules
.denormalise
= self
446 m
.d
.comb
+= self
.in_a
.copy(in_a
)
447 m
.d
.comb
+= self
.in_b
.copy(in_b
)
449 def elaborate(self
, platform
):
451 m
.submodules
.denorm_in_a
= self
.in_a
452 m
.submodules
.denorm_in_b
= self
.in_b
453 m
.submodules
.denorm_out_a
= self
.out_a
454 m
.submodules
.denorm_out_b
= self
.out_b
455 # hmmm, don't like repeating identical code
456 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
457 with m
.If(self
.in_a
.exp_n127
):
458 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
460 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
462 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
463 with m
.If(self
.in_b
.exp_n127
):
464 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
466 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
471 class FPAddDeNorm(FPState
, FPID
):
473 def __init__(self
, width
, id_wid
):
474 FPState
.__init
__(self
, "denormalise")
475 FPID
.__init
__(self
, id_wid
)
476 self
.mod
= FPAddDeNormMod(width
)
477 self
.out_a
= FPNumBase(width
)
478 self
.out_b
= FPNumBase(width
)
480 def setup(self
, m
, in_a
, in_b
, in_mid
):
481 """ links module to inputs and outputs
483 self
.mod
.setup(m
, in_a
, in_b
)
484 if self
.in_mid
is not None:
485 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
489 # Denormalised Number checks
491 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
492 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
495 class FPAddAlignMultiMod(FPState
):
497 def __init__(self
, width
):
498 self
.in_a
= FPNumBase(width
)
499 self
.in_b
= FPNumBase(width
)
500 self
.out_a
= FPNumIn(None, width
)
501 self
.out_b
= FPNumIn(None, width
)
502 self
.exp_eq
= Signal(reset_less
=True)
504 def elaborate(self
, platform
):
505 # This one however (single-cycle) will do the shift
510 m
.submodules
.align_in_a
= self
.in_a
511 m
.submodules
.align_in_b
= self
.in_b
512 m
.submodules
.align_out_a
= self
.out_a
513 m
.submodules
.align_out_b
= self
.out_b
515 # NOTE: this does *not* do single-cycle multi-shifting,
516 # it *STAYS* in the align state until exponents match
518 # exponent of a greater than b: shift b down
519 m
.d
.comb
+= self
.exp_eq
.eq(0)
520 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
521 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
522 agtb
= Signal(reset_less
=True)
523 altb
= Signal(reset_less
=True)
524 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
525 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
527 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
528 # exponent of b greater than a: shift a down
530 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
531 # exponents equal: move to next stage.
533 m
.d
.comb
+= self
.exp_eq
.eq(1)
537 class FPAddAlignMulti(FPState
, FPID
):
539 def __init__(self
, width
, id_wid
):
540 FPID
.__init
__(self
, id_wid
)
541 FPState
.__init
__(self
, "align")
542 self
.mod
= FPAddAlignMultiMod(width
)
543 self
.out_a
= FPNumIn(None, width
)
544 self
.out_b
= FPNumIn(None, width
)
545 self
.exp_eq
= Signal(reset_less
=True)
547 def setup(self
, m
, in_a
, in_b
, in_mid
):
548 """ links module to inputs and outputs
550 m
.submodules
.align
= self
.mod
551 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
552 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
553 #m.d.comb += self.out_a.copy(self.mod.out_a)
554 #m.d.comb += self.out_b.copy(self.mod.out_b)
555 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
556 if self
.in_mid
is not None:
557 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
561 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
562 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
563 with m
.If(self
.exp_eq
):
567 class FPAddAlignSingleMod
:
569 def __init__(self
, width
):
571 self
.in_a
= FPNumBase(width
)
572 self
.in_b
= FPNumBase(width
)
573 self
.out_a
= FPNumIn(None, width
)
574 self
.out_b
= FPNumIn(None, width
)
576 def setup(self
, m
, in_a
, in_b
):
577 """ links module to inputs and outputs
579 m
.submodules
.align
= self
580 m
.d
.comb
+= self
.in_a
.copy(in_a
)
581 m
.d
.comb
+= self
.in_b
.copy(in_b
)
583 def elaborate(self
, platform
):
584 """ Aligns A against B or B against A, depending on which has the
585 greater exponent. This is done in a *single* cycle using
586 variable-width bit-shift
588 the shifter used here is quite expensive in terms of gates.
589 Mux A or B in (and out) into temporaries, as only one of them
590 needs to be aligned against the other
594 m
.submodules
.align_in_a
= self
.in_a
595 m
.submodules
.align_in_b
= self
.in_b
596 m
.submodules
.align_out_a
= self
.out_a
597 m
.submodules
.align_out_b
= self
.out_b
599 # temporary (muxed) input and output to be shifted
600 t_inp
= FPNumBase(self
.width
)
601 t_out
= FPNumIn(None, self
.width
)
602 espec
= (len(self
.in_a
.e
), True)
603 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
604 m
.submodules
.align_t_in
= t_inp
605 m
.submodules
.align_t_out
= t_out
606 m
.submodules
.multishift_r
= msr
608 ediff
= Signal(espec
, reset_less
=True)
609 ediffr
= Signal(espec
, reset_less
=True)
610 tdiff
= Signal(espec
, reset_less
=True)
611 elz
= Signal(reset_less
=True)
612 egz
= Signal(reset_less
=True)
614 # connect multi-shifter to t_inp/out mantissa (and tdiff)
615 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
616 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
617 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
618 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
619 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
621 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
622 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
623 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
624 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
626 # default: A-exp == B-exp, A and B untouched (fall through)
627 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
628 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
629 # only one shifter (muxed)
630 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
631 # exponent of a greater than b: shift b down
633 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
635 self
.out_b
.copy(t_out
),
636 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
638 # exponent of b greater than a: shift a down
640 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
642 self
.out_a
.copy(t_out
),
643 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
648 class FPAddAlignSingle(FPState
, FPID
):
650 def __init__(self
, width
, id_wid
):
651 FPState
.__init
__(self
, "align")
652 FPID
.__init
__(self
, id_wid
)
653 self
.mod
= FPAddAlignSingleMod(width
)
654 self
.out_a
= FPNumIn(None, width
)
655 self
.out_b
= FPNumIn(None, width
)
657 def setup(self
, m
, in_a
, in_b
, in_mid
):
658 """ links module to inputs and outputs
660 self
.mod
.setup(m
, in_a
, in_b
)
661 if self
.in_mid
is not None:
662 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
666 # NOTE: could be done as comb
667 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
668 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
672 class FPAddAlignSingleAdd(FPState
, FPID
):
674 def __init__(self
, width
, id_wid
):
675 FPState
.__init
__(self
, "align")
676 FPID
.__init
__(self
, id_wid
)
677 self
.mod
= FPAddAlignSingleMod(width
)
678 self
.out_a
= FPNumIn(None, width
)
679 self
.out_b
= FPNumIn(None, width
)
681 self
.a0mod
= FPAddStage0Mod(width
)
682 self
.a0_out_z
= FPNumBase(width
, False)
683 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
684 self
.a0_out_z
= FPNumBase(width
, False)
686 self
.a1mod
= FPAddStage1Mod(width
)
687 self
.out_z
= FPNumBase(width
, False)
688 self
.out_of
= Overflow()
690 def setup(self
, m
, in_a
, in_b
, in_mid
):
691 """ links module to inputs and outputs
693 self
.mod
.setup(m
, in_a
, in_b
)
694 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
695 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
697 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
698 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
699 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
701 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
703 if self
.in_mid
is not None:
704 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
708 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
709 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
710 m
.next
= "normalise_1"
713 class FPAddStage0Mod
:
715 def __init__(self
, width
):
716 self
.in_a
= FPNumBase(width
)
717 self
.in_b
= FPNumBase(width
)
718 self
.in_z
= FPNumBase(width
, False)
719 self
.out_z
= FPNumBase(width
, False)
720 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
722 def setup(self
, m
, in_a
, in_b
):
723 """ links module to inputs and outputs
725 m
.submodules
.add0
= self
726 m
.d
.comb
+= self
.in_a
.copy(in_a
)
727 m
.d
.comb
+= self
.in_b
.copy(in_b
)
729 def elaborate(self
, platform
):
731 m
.submodules
.add0_in_a
= self
.in_a
732 m
.submodules
.add0_in_b
= self
.in_b
733 m
.submodules
.add0_out_z
= self
.out_z
735 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
737 # store intermediate tests (and zero-extended mantissas)
738 seq
= Signal(reset_less
=True)
739 mge
= Signal(reset_less
=True)
740 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
741 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
742 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
743 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
744 am0
.eq(Cat(self
.in_a
.m
, 0)),
745 bm0
.eq(Cat(self
.in_b
.m
, 0))
747 # same-sign (both negative or both positive) add mantissas
750 self
.out_tot
.eq(am0
+ bm0
),
751 self
.out_z
.s
.eq(self
.in_a
.s
)
753 # a mantissa greater than b, use a
756 self
.out_tot
.eq(am0
- bm0
),
757 self
.out_z
.s
.eq(self
.in_a
.s
)
759 # b mantissa greater than a, use b
762 self
.out_tot
.eq(bm0
- am0
),
763 self
.out_z
.s
.eq(self
.in_b
.s
)
768 class FPAddStage0(FPState
, FPID
):
769 """ First stage of add. covers same-sign (add) and subtract
770 special-casing when mantissas are greater or equal, to
771 give greatest accuracy.
774 def __init__(self
, width
, id_wid
):
775 FPState
.__init
__(self
, "add_0")
776 FPID
.__init
__(self
, id_wid
)
777 self
.mod
= FPAddStage0Mod(width
)
778 self
.out_z
= FPNumBase(width
, False)
779 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
781 def setup(self
, m
, in_a
, in_b
, in_mid
):
782 """ links module to inputs and outputs
784 self
.mod
.setup(m
, in_a
, in_b
)
785 if self
.in_mid
is not None:
786 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
790 # NOTE: these could be done as combinatorial (merge add0+add1)
791 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
792 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
796 class FPAddStage1Mod(FPState
):
797 """ Second stage of add: preparation for normalisation.
798 detects when tot sum is too big (tot[27] is kinda a carry bit)
801 def __init__(self
, width
):
802 self
.out_norm
= Signal(reset_less
=True)
803 self
.in_z
= FPNumBase(width
, False)
804 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
805 self
.out_z
= FPNumBase(width
, False)
806 self
.out_of
= Overflow()
808 def setup(self
, m
, in_tot
, in_z
):
809 """ links module to inputs and outputs
811 m
.submodules
.add1
= self
812 m
.submodules
.add1_out_overflow
= self
.out_of
814 m
.d
.comb
+= self
.in_z
.copy(in_z
)
815 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
817 def elaborate(self
, platform
):
819 #m.submodules.norm1_in_overflow = self.in_of
820 #m.submodules.norm1_out_overflow = self.out_of
821 #m.submodules.norm1_in_z = self.in_z
822 #m.submodules.norm1_out_z = self.out_z
823 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
824 # tot[-1] (MSB) gets set when the sum overflows. shift result down
825 with m
.If(self
.in_tot
[-1]):
827 self
.out_z
.m
.eq(self
.in_tot
[4:]),
828 self
.out_of
.m0
.eq(self
.in_tot
[4]),
829 self
.out_of
.guard
.eq(self
.in_tot
[3]),
830 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
831 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
832 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
834 # tot[-1] (MSB) zero case
837 self
.out_z
.m
.eq(self
.in_tot
[3:]),
838 self
.out_of
.m0
.eq(self
.in_tot
[3]),
839 self
.out_of
.guard
.eq(self
.in_tot
[2]),
840 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
841 self
.out_of
.sticky
.eq(self
.in_tot
[0])
846 class FPAddStage1(FPState
, FPID
):
848 def __init__(self
, width
, id_wid
):
849 FPState
.__init
__(self
, "add_1")
850 FPID
.__init
__(self
, id_wid
)
851 self
.mod
= FPAddStage1Mod(width
)
852 self
.out_z
= FPNumBase(width
, False)
853 self
.out_of
= Overflow()
854 self
.norm_stb
= Signal()
856 def setup(self
, m
, in_tot
, in_z
, in_mid
):
857 """ links module to inputs and outputs
859 self
.mod
.setup(m
, in_tot
, in_z
)
861 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
863 if self
.in_mid
is not None:
864 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
868 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
869 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
870 m
.d
.sync
+= self
.norm_stb
.eq(1)
871 m
.next
= "normalise_1"
874 class FPNormaliseModSingle
:
876 def __init__(self
, width
):
878 self
.in_z
= FPNumBase(width
, False)
879 self
.out_z
= FPNumBase(width
, False)
881 def setup(self
, m
, in_z
, out_z
, modname
):
882 """ links module to inputs and outputs
884 m
.submodules
.normalise
= self
885 m
.d
.comb
+= self
.in_z
.copy(in_z
)
886 m
.d
.comb
+= out_z
.copy(self
.out_z
)
888 def elaborate(self
, platform
):
891 mwid
= self
.out_z
.m_width
+2
892 pe
= PriorityEncoder(mwid
)
893 m
.submodules
.norm_pe
= pe
895 m
.submodules
.norm1_out_z
= self
.out_z
896 m
.submodules
.norm1_in_z
= self
.in_z
898 in_z
= FPNumBase(self
.width
, False)
900 m
.submodules
.norm1_insel_z
= in_z
901 m
.submodules
.norm1_insel_overflow
= in_of
903 espec
= (len(in_z
.e
), True)
904 ediff_n126
= Signal(espec
, reset_less
=True)
905 msr
= MultiShiftRMerge(mwid
, espec
)
906 m
.submodules
.multishift_r
= msr
908 m
.d
.comb
+= in_z
.copy(self
.in_z
)
909 m
.d
.comb
+= in_of
.copy(self
.in_of
)
910 # initialise out from in (overridden below)
911 m
.d
.comb
+= self
.out_z
.copy(in_z
)
912 m
.d
.comb
+= self
.out_of
.copy(in_of
)
913 # normalisation increase/decrease conditions
914 decrease
= Signal(reset_less
=True)
915 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
918 # *sigh* not entirely obvious: count leading zeros (clz)
919 # with a PriorityEncoder: to find from the MSB
920 # we reverse the order of the bits.
921 temp_m
= Signal(mwid
, reset_less
=True)
922 temp_s
= Signal(mwid
+1, reset_less
=True)
923 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
925 # cat round and guard bits back into the mantissa
926 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
927 pe
.i
.eq(temp_m
[::-1]), # inverted
928 clz
.eq(pe
.o
), # count zeros from MSB down
929 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
930 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
931 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
937 class FPNorm1ModSingle
:
939 def __init__(self
, width
):
941 self
.out_norm
= Signal(reset_less
=True)
942 self
.in_z
= FPNumBase(width
, False)
943 self
.in_of
= Overflow()
944 self
.out_z
= FPNumBase(width
, False)
945 self
.out_of
= Overflow()
947 def setup(self
, m
, in_z
, in_of
, out_z
):
948 """ links module to inputs and outputs
950 m
.submodules
.normalise_1
= self
952 m
.d
.comb
+= self
.in_z
.copy(in_z
)
953 m
.d
.comb
+= self
.in_of
.copy(in_of
)
955 m
.d
.comb
+= out_z
.copy(self
.out_z
)
957 def elaborate(self
, platform
):
960 mwid
= self
.out_z
.m_width
+2
961 pe
= PriorityEncoder(mwid
)
962 m
.submodules
.norm_pe
= pe
964 m
.submodules
.norm1_out_z
= self
.out_z
965 m
.submodules
.norm1_out_overflow
= self
.out_of
966 m
.submodules
.norm1_in_z
= self
.in_z
967 m
.submodules
.norm1_in_overflow
= self
.in_of
969 in_z
= FPNumBase(self
.width
, False)
971 m
.submodules
.norm1_insel_z
= in_z
972 m
.submodules
.norm1_insel_overflow
= in_of
974 espec
= (len(in_z
.e
), True)
975 ediff_n126
= Signal(espec
, reset_less
=True)
976 msr
= MultiShiftRMerge(mwid
, espec
)
977 m
.submodules
.multishift_r
= msr
979 m
.d
.comb
+= in_z
.copy(self
.in_z
)
980 m
.d
.comb
+= in_of
.copy(self
.in_of
)
981 # initialise out from in (overridden below)
982 m
.d
.comb
+= self
.out_z
.copy(in_z
)
983 m
.d
.comb
+= self
.out_of
.copy(in_of
)
984 # normalisation increase/decrease conditions
985 decrease
= Signal(reset_less
=True)
986 increase
= Signal(reset_less
=True)
987 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
988 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
991 # *sigh* not entirely obvious: count leading zeros (clz)
992 # with a PriorityEncoder: to find from the MSB
993 # we reverse the order of the bits.
994 temp_m
= Signal(mwid
, reset_less
=True)
995 temp_s
= Signal(mwid
+1, reset_less
=True)
996 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
997 # make sure that the amount to decrease by does NOT
998 # go below the minimum non-INF/NaN exponent
999 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
1002 # cat round and guard bits back into the mantissa
1003 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1004 pe
.i
.eq(temp_m
[::-1]), # inverted
1005 clz
.eq(limclz
), # count zeros from MSB down
1006 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1007 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1008 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1009 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1010 # overflow in bits 0..1: got shifted too (leave sticky)
1011 self
.out_of
.guard
.eq(temp_s
[1]), # guard
1012 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
1015 with m
.Elif(increase
):
1016 temp_m
= Signal(mwid
+1, reset_less
=True)
1018 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
1020 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
1021 # connect multi-shifter to inp/out mantissa (and ediff)
1023 msr
.diff
.eq(ediff_n126
),
1024 self
.out_z
.m
.eq(msr
.m
[3:]),
1025 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1026 # overflow in bits 0..1: got shifted too (leave sticky)
1027 self
.out_of
.guard
.eq(temp_s
[2]), # guard
1028 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
1029 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
1030 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
1036 class FPNorm1ModMulti
:
1038 def __init__(self
, width
, single_cycle
=True):
1040 self
.in_select
= Signal(reset_less
=True)
1041 self
.out_norm
= Signal(reset_less
=True)
1042 self
.in_z
= FPNumBase(width
, False)
1043 self
.in_of
= Overflow()
1044 self
.temp_z
= FPNumBase(width
, False)
1045 self
.temp_of
= Overflow()
1046 self
.out_z
= FPNumBase(width
, False)
1047 self
.out_of
= Overflow()
1049 def elaborate(self
, platform
):
1052 m
.submodules
.norm1_out_z
= self
.out_z
1053 m
.submodules
.norm1_out_overflow
= self
.out_of
1054 m
.submodules
.norm1_temp_z
= self
.temp_z
1055 m
.submodules
.norm1_temp_of
= self
.temp_of
1056 m
.submodules
.norm1_in_z
= self
.in_z
1057 m
.submodules
.norm1_in_overflow
= self
.in_of
1059 in_z
= FPNumBase(self
.width
, False)
1061 m
.submodules
.norm1_insel_z
= in_z
1062 m
.submodules
.norm1_insel_overflow
= in_of
1064 # select which of temp or in z/of to use
1065 with m
.If(self
.in_select
):
1066 m
.d
.comb
+= in_z
.copy(self
.in_z
)
1067 m
.d
.comb
+= in_of
.copy(self
.in_of
)
1069 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
1070 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
1071 # initialise out from in (overridden below)
1072 m
.d
.comb
+= self
.out_z
.copy(in_z
)
1073 m
.d
.comb
+= self
.out_of
.copy(in_of
)
1074 # normalisation increase/decrease conditions
1075 decrease
= Signal(reset_less
=True)
1076 increase
= Signal(reset_less
=True)
1077 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1078 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1079 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1081 with m
.If(decrease
):
1083 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1084 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1085 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1086 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1087 self
.out_of
.round_bit
.eq(0), # reset round bit
1088 self
.out_of
.m0
.eq(in_of
.guard
),
1091 with m
.Elif(increase
):
1093 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1094 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1095 self
.out_of
.guard
.eq(in_z
.m
[0]),
1096 self
.out_of
.m0
.eq(in_z
.m
[1]),
1097 self
.out_of
.round_bit
.eq(in_of
.guard
),
1098 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1104 class FPNorm1Single(FPState
, FPID
):
1106 def __init__(self
, width
, id_wid
, single_cycle
=True):
1107 FPID
.__init
__(self
, id_wid
)
1108 FPState
.__init
__(self
, "normalise_1")
1109 self
.mod
= FPNorm1ModSingle(width
)
1110 self
.out_norm
= Signal(reset_less
=True)
1111 self
.out_z
= FPNumBase(width
)
1112 self
.out_roundz
= Signal(reset_less
=True)
1114 def setup(self
, m
, in_z
, in_of
, in_mid
):
1115 """ links module to inputs and outputs
1117 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1119 if self
.in_mid
is not None:
1120 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1122 def action(self
, m
):
1124 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1128 class FPNorm1Multi(FPState
, FPID
):
1130 def __init__(self
, width
, id_wid
):
1131 FPID
.__init
__(self
, id_wid
)
1132 FPState
.__init
__(self
, "normalise_1")
1133 self
.mod
= FPNorm1ModMulti(width
)
1134 self
.stb
= Signal(reset_less
=True)
1135 self
.ack
= Signal(reset
=0, reset_less
=True)
1136 self
.out_norm
= Signal(reset_less
=True)
1137 self
.in_accept
= Signal(reset_less
=True)
1138 self
.temp_z
= FPNumBase(width
)
1139 self
.temp_of
= Overflow()
1140 self
.out_z
= FPNumBase(width
)
1141 self
.out_roundz
= Signal(reset_less
=True)
1143 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1144 """ links module to inputs and outputs
1146 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1147 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1148 self
.out_z
, self
.out_norm
)
1150 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1151 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1153 if self
.in_mid
is not None:
1154 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1156 def action(self
, m
):
1158 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1159 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1160 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1161 with m
.If(self
.out_norm
):
1162 with m
.If(self
.in_accept
):
1167 m
.d
.sync
+= self
.ack
.eq(0)
1169 # normalisation not required (or done).
1171 m
.d
.sync
+= self
.ack
.eq(1)
1172 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1175 class FPNormToPack(FPState
, FPID
):
1177 def __init__(self
, width
, id_wid
):
1178 FPID
.__init
__(self
, id_wid
)
1179 FPState
.__init
__(self
, "normalise_1")
1182 def setup(self
, m
, in_z
, in_of
, in_mid
):
1183 """ links module to inputs and outputs
1186 # Normalisation (chained to input in_z+in_of)
1187 nmod
= FPNorm1ModSingle(self
.width
)
1188 n_out_z
= FPNumBase(self
.width
)
1189 n_out_roundz
= Signal(reset_less
=True)
1190 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1192 # Rounding (chained to normalisation)
1193 rmod
= FPRoundMod(self
.width
)
1194 r_out_z
= FPNumBase(self
.width
)
1195 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1196 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1197 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1199 # Corrections (chained to rounding)
1200 cmod
= FPCorrectionsMod(self
.width
)
1201 c_out_z
= FPNumBase(self
.width
)
1202 cmod
.setup(m
, r_out_z
)
1203 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1205 # Pack (chained to corrections)
1206 self
.pmod
= FPPackMod(self
.width
)
1207 self
.out_z
= FPNumBase(self
.width
)
1208 self
.pmod
.setup(m
, c_out_z
)
1211 if self
.in_mid
is not None:
1212 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1214 def action(self
, m
):
1215 self
.idsync(m
) # copies incoming ID to outgoing
1216 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1217 m
.next
= "pack_put_z"
1222 def __init__(self
, width
):
1223 self
.in_roundz
= Signal(reset_less
=True)
1224 self
.in_z
= FPNumBase(width
, False)
1225 self
.out_z
= FPNumBase(width
, False)
1227 def setup(self
, m
, in_z
, roundz
):
1228 m
.submodules
.roundz
= self
1230 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1231 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1233 def elaborate(self
, platform
):
1235 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1236 with m
.If(self
.in_roundz
):
1237 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1238 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1239 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1243 class FPRound(FPState
, FPID
):
1245 def __init__(self
, width
, id_wid
):
1246 FPState
.__init
__(self
, "round")
1247 FPID
.__init
__(self
, id_wid
)
1248 self
.mod
= FPRoundMod(width
)
1249 self
.out_z
= FPNumBase(width
)
1251 def setup(self
, m
, in_z
, roundz
, in_mid
):
1252 """ links module to inputs and outputs
1254 self
.mod
.setup(m
, in_z
, roundz
)
1256 if self
.in_mid
is not None:
1257 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1259 def action(self
, m
):
1261 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1262 m
.next
= "corrections"
1265 class FPCorrectionsMod
:
1267 def __init__(self
, width
):
1268 self
.in_z
= FPNumOut(width
, False)
1269 self
.out_z
= FPNumOut(width
, False)
1271 def setup(self
, m
, in_z
):
1272 """ links module to inputs and outputs
1274 m
.submodules
.corrections
= self
1275 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1277 def elaborate(self
, platform
):
1279 m
.submodules
.corr_in_z
= self
.in_z
1280 m
.submodules
.corr_out_z
= self
.out_z
1281 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1282 with m
.If(self
.in_z
.is_denormalised
):
1283 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1287 class FPCorrections(FPState
, FPID
):
1289 def __init__(self
, width
, id_wid
):
1290 FPState
.__init
__(self
, "corrections")
1291 FPID
.__init
__(self
, id_wid
)
1292 self
.mod
= FPCorrectionsMod(width
)
1293 self
.out_z
= FPNumBase(width
)
1295 def setup(self
, m
, in_z
, in_mid
):
1296 """ links module to inputs and outputs
1298 self
.mod
.setup(m
, in_z
)
1299 if self
.in_mid
is not None:
1300 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1302 def action(self
, m
):
1304 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1310 def __init__(self
, width
):
1311 self
.in_z
= FPNumOut(width
, False)
1312 self
.out_z
= FPNumOut(width
, False)
1314 def setup(self
, m
, in_z
):
1315 """ links module to inputs and outputs
1317 m
.submodules
.pack
= self
1318 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1320 def elaborate(self
, platform
):
1322 m
.submodules
.pack_in_z
= self
.in_z
1323 with m
.If(self
.in_z
.is_overflowed
):
1324 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1326 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1330 class FPPack(FPState
, FPID
):
1332 def __init__(self
, width
, id_wid
):
1333 FPState
.__init
__(self
, "pack")
1334 FPID
.__init
__(self
, id_wid
)
1335 self
.mod
= FPPackMod(width
)
1336 self
.out_z
= FPNumOut(width
, False)
1338 def setup(self
, m
, in_z
, in_mid
):
1339 """ links module to inputs and outputs
1341 self
.mod
.setup(m
, in_z
)
1342 if self
.in_mid
is not None:
1343 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1345 def action(self
, m
):
1347 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1348 m
.next
= "pack_put_z"
1351 class FPPutZ(FPState
):
1353 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1354 FPState
.__init
__(self
, state
)
1355 if to_state
is None:
1356 to_state
= "get_ops"
1357 self
.to_state
= to_state
1360 self
.in_mid
= in_mid
1361 self
.out_mid
= out_mid
1363 def action(self
, m
):
1364 if self
.in_mid
is not None:
1365 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1367 self
.out_z
.v
.eq(self
.in_z
.v
)
1369 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1370 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1371 m
.next
= self
.to_state
1373 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1376 class FPPutZIdx(FPState
):
1378 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1379 FPState
.__init
__(self
, state
)
1380 if to_state
is None:
1381 to_state
= "get_ops"
1382 self
.to_state
= to_state
1384 self
.out_zs
= out_zs
1385 self
.in_mid
= in_mid
1387 def action(self
, m
):
1388 outz_stb
= Signal(reset_less
=True)
1389 outz_ack
= Signal(reset_less
=True)
1390 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1391 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1394 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1396 with m
.If(outz_stb
& outz_ack
):
1397 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1398 m
.next
= self
.to_state
1400 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1403 class FPADDBaseMod(FPID
):
1405 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1408 * width: bit-width of IEEE754. supported: 16, 32, 64
1409 * id_wid: an identifier that is sync-connected to the input
1410 * single_cycle: True indicates each stage to complete in 1 clock
1411 * compact: True indicates a reduced number of stages
1413 FPID
.__init
__(self
, id_wid
)
1415 self
.single_cycle
= single_cycle
1416 self
.compact
= compact
1418 self
.in_t
= Trigger()
1419 self
.in_a
= Signal(width
)
1420 self
.in_b
= Signal(width
)
1421 self
.out_z
= FPOp(width
)
1425 def add_state(self
, state
):
1426 self
.states
.append(state
)
1429 def get_fragment(self
, platform
=None):
1430 """ creates the HDL code-fragment for FPAdd
1433 m
.submodules
.out_z
= self
.out_z
1434 m
.submodules
.in_t
= self
.in_t
1436 self
.get_compact_fragment(m
, platform
)
1438 self
.get_longer_fragment(m
, platform
)
1440 with m
.FSM() as fsm
:
1442 for state
in self
.states
:
1443 with m
.State(state
.state_from
):
1448 def get_longer_fragment(self
, m
, platform
=None):
1450 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1451 self
.in_a
, self
.in_b
, self
.width
))
1452 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1456 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1457 sc
.setup(m
, a
, b
, self
.in_mid
)
1459 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1460 dn
.setup(m
, a
, b
, sc
.in_mid
)
1462 if self
.single_cycle
:
1463 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1464 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1466 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1467 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1469 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1470 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1472 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1473 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1475 if self
.single_cycle
:
1476 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1477 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1479 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1480 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1482 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1483 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1485 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1486 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1488 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1489 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1491 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1492 pa
.in_mid
, self
.out_mid
))
1494 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1495 pa
.in_mid
, self
.out_mid
))
1497 def get_compact_fragment(self
, m
, platform
=None):
1499 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1500 self
.in_a
, self
.in_b
, self
.width
))
1501 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1505 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1506 sc
.setup(m
, a
, b
, self
.in_mid
)
1508 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1509 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1511 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1512 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1514 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1515 n1
.in_mid
, self
.out_mid
))
1517 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1518 sc
.in_mid
, self
.out_mid
))
1521 class FPADDBase(FPState
, FPID
):
1523 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1526 * width: bit-width of IEEE754. supported: 16, 32, 64
1527 * id_wid: an identifier that is sync-connected to the input
1528 * single_cycle: True indicates each stage to complete in 1 clock
1530 FPID
.__init
__(self
, id_wid
)
1531 FPState
.__init
__(self
, "fpadd")
1533 self
.single_cycle
= single_cycle
1534 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1536 self
.in_t
= Trigger()
1537 self
.in_a
= Signal(width
)
1538 self
.in_b
= Signal(width
)
1539 #self.out_z = FPOp(width)
1541 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1542 self
.in_accept
= Signal(reset_less
=True)
1543 self
.add_stb
= Signal(reset_less
=True)
1544 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1546 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1548 self
.out_mid
= out_mid
1549 m
.d
.comb
+= [self
.in_a
.eq(a
),
1551 self
.mod
.in_a
.eq(self
.in_a
),
1552 self
.mod
.in_b
.eq(self
.in_b
),
1553 self
.in_mid
.eq(in_mid
),
1554 self
.mod
.in_mid
.eq(self
.in_mid
),
1555 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1556 #self.add_stb.eq(add_stb),
1557 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1558 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1559 self
.out_mid
.eq(self
.mod
.out_mid
),
1560 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1561 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1562 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1565 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1566 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1567 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1568 #m.d.sync += self.in_t.stb.eq(0)
1570 m
.submodules
.fpadd
= self
.mod
1572 def action(self
, m
):
1574 # in_accept is set on incoming strobe HIGH and ack LOW.
1575 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1577 #with m.If(self.in_t.ack):
1578 # m.d.sync += self.in_t.stb.eq(0)
1579 with m
.If(~self
.z_done
):
1580 # not done: test for accepting an incoming operand pair
1581 with m
.If(self
.in_accept
):
1583 self
.add_ack
.eq(1), # acknowledge receipt...
1584 self
.in_t
.stb
.eq(1), # initiate add
1587 m
.d
.sync
+= [self
.add_ack
.eq(0),
1588 self
.in_t
.stb
.eq(0),
1589 self
.out_z
.ack
.eq(1),
1592 # done: acknowledge, and write out id and value
1593 m
.d
.sync
+= [self
.add_ack
.eq(1),
1600 if self
.in_mid
is not None:
1601 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1604 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1606 # move to output state on detecting z ack
1607 with m
.If(self
.out_z
.trigger
):
1608 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1611 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1614 def __init__(self
, width
, id_wid
):
1616 self
.id_wid
= id_wid
1618 for i
in range(rs_sz
):
1620 out_z
.name
= "out_z_%d" % i
1622 self
.res
= Array(res
)
1623 self
.in_z
= FPOp(width
)
1624 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1626 def setup(self
, m
, in_z
, in_mid
):
1627 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1628 self
.in_mid
.eq(in_mid
)]
1630 def get_fragment(self
, platform
=None):
1631 """ creates the HDL code-fragment for FPAdd
1634 m
.submodules
.res_in_z
= self
.in_z
1635 m
.submodules
+= self
.res
1647 """ FPADD: stages as follows:
1653 FPAddBase---> FPAddBaseMod
1655 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1657 FPAddBase is tricky: it is both a stage and *has* stages.
1658 Connection to FPAddBaseMod therefore requires an in stb/ack
1659 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1660 needs to be the thing that raises the incoming stb.
1663 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1666 * width: bit-width of IEEE754. supported: 16, 32, 64
1667 * id_wid: an identifier that is sync-connected to the input
1668 * single_cycle: True indicates each stage to complete in 1 clock
1671 self
.id_wid
= id_wid
1672 self
.single_cycle
= single_cycle
1674 #self.out_z = FPOp(width)
1675 self
.ids
= FPID(id_wid
)
1678 for i
in range(rs_sz
):
1681 in_a
.name
= "in_a_%d" % i
1682 in_b
.name
= "in_b_%d" % i
1683 rs
.append((in_a
, in_b
))
1687 for i
in range(rs_sz
):
1689 out_z
.name
= "out_z_%d" % i
1691 self
.res
= Array(res
)
1695 def add_state(self
, state
):
1696 self
.states
.append(state
)
1699 def get_fragment(self
, platform
=None):
1700 """ creates the HDL code-fragment for FPAdd
1703 m
.submodules
+= self
.rs
1705 in_a
= self
.rs
[0][0]
1706 in_b
= self
.rs
[0][1]
1708 out_z
= FPOp(self
.width
)
1709 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1710 m
.submodules
.out_z
= out_z
1712 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1717 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1722 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1723 ab
= self
.add_state(ab
)
1724 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1727 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1730 with m
.FSM() as fsm
:
1732 for state
in self
.states
:
1733 with m
.State(state
.state_from
):
1739 if __name__
== "__main__":
1741 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1742 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1743 alu
.rs
[0][1].ports() + \
1744 alu
.res
[0].ports() + \
1745 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1747 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1748 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1749 alu
.in_t
.ports() + \
1750 alu
.out_z
.ports() + \
1751 [alu
.in_mid
, alu
.out_mid
])
1754 # works... but don't use, just do "python fname.py convert -t v"
1755 #print (verilog.convert(alu, ports=[
1756 # ports=alu.in_a.ports() + \
1757 # alu.in_b.ports() + \
1758 # alu.out_z.ports())