1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
):
189 Trigger
.__init
__(self
)
190 self
.in_op1
= Signal(width
, reset_less
=True)
191 self
.in_op2
= Signal(width
, reset_less
=True)
192 self
.out_op1
= FPNumIn(None, width
)
193 self
.out_op2
= FPNumIn(None, width
)
195 def elaborate(self
, platform
):
196 m
= Trigger
.elaborate(self
, platform
)
197 #m.submodules.get_op_in = self.in_op
198 m
.submodules
.get_op1_out
= self
.out_op1
199 m
.submodules
.get_op2_out
= self
.out_op2
200 with m
.If(self
.trigger
):
202 self
.out_op1
.decode(self
.in_op1
),
203 self
.out_op2
.decode(self
.in_op2
),
208 class FPGet2Op(FPState
):
212 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
213 FPState
.__init
__(self
, in_state
)
214 self
.out_state
= out_state
215 self
.mod
= FPGet2OpMod(width
)
218 self
.out_op1
= FPNumIn(None, width
)
219 self
.out_op2
= FPNumIn(None, width
)
220 self
.in_stb
= Signal(reset_less
=True)
221 self
.out_ack
= Signal(reset_less
=True)
222 self
.out_decode
= Signal(reset_less
=True)
224 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
225 """ links module to inputs and outputs
227 m
.submodules
.get_ops
= self
.mod
228 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
229 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
230 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
231 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
232 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
233 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
236 with m
.If(self
.out_decode
):
237 m
.next
= self
.out_state
240 #self.out_op1.v.eq(self.mod.out_op1.v),
241 #self.out_op2.v.eq(self.mod.out_op2.v),
242 self
.out_op1
.eq(self
.mod
.out_op1
),
243 self
.out_op2
.eq(self
.mod
.out_op2
)
246 m
.d
.sync
+= self
.mod
.ack
.eq(1)
250 def __init__(self
, width
, id_wid
, m_extra
=True):
251 self
.a
= FPNumBase(width
, m_extra
)
252 self
.b
= FPNumBase(width
, m_extra
)
253 self
.mid
= Signal(id_wid
, reset_less
=True)
256 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
259 class FPAddSpecialCasesMod
:
260 """ special cases: NaNs, infs, zeros, denormalised
261 NOTE: some of these are unique to add. see "Special Operations"
262 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
265 def __init__(self
, width
, id_wid
):
268 self
.i
= self
.ispec()
269 self
.out_z
= self
.ospec()
270 self
.out_do_z
= Signal(reset_less
=True)
273 return FPNumBase2Ops(self
.width
, self
.id_wid
)
276 return FPNumOut(self
.width
, False)
278 def setup(self
, m
, in_a
, in_b
, out_do_z
):
279 """ links module to inputs and outputs
281 m
.submodules
.specialcases
= self
282 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
283 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
284 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
286 def elaborate(self
, platform
):
289 m
.submodules
.sc_in_a
= self
.i
.a
290 m
.submodules
.sc_in_b
= self
.i
.b
291 m
.submodules
.sc_out_z
= self
.out_z
294 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
297 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
299 # if a is NaN or b is NaN return NaN
300 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
301 m
.d
.comb
+= self
.out_do_z
.eq(1)
302 m
.d
.comb
+= self
.out_z
.nan(0)
304 # XXX WEIRDNESS for FP16 non-canonical NaN handling
307 ## if a is zero and b is NaN return -b
308 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
309 # m.d.comb += self.out_do_z.eq(1)
310 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
312 ## if b is zero and a is NaN return -a
313 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
314 # m.d.comb += self.out_do_z.eq(1)
315 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
317 ## if a is -zero and b is NaN return -b
318 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
319 # m.d.comb += self.out_do_z.eq(1)
320 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
322 ## if b is -zero and a is NaN return -a
323 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
324 # m.d.comb += self.out_do_z.eq(1)
325 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
327 # if a is inf return inf (or NaN)
328 with m
.Elif(self
.i
.a
.is_inf
):
329 m
.d
.comb
+= self
.out_do_z
.eq(1)
330 m
.d
.comb
+= self
.out_z
.inf(self
.i
.a
.s
)
331 # if a is inf and signs don't match return NaN
332 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
333 m
.d
.comb
+= self
.out_z
.nan(0)
335 # if b is inf return inf
336 with m
.Elif(self
.i
.b
.is_inf
):
337 m
.d
.comb
+= self
.out_do_z
.eq(1)
338 m
.d
.comb
+= self
.out_z
.inf(self
.i
.b
.s
)
340 # if a is zero and b zero return signed-a/b
341 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
342 m
.d
.comb
+= self
.out_do_z
.eq(1)
343 m
.d
.comb
+= self
.out_z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
347 # if a is zero return b
348 with m
.Elif(self
.i
.a
.is_zero
):
349 m
.d
.comb
+= self
.out_do_z
.eq(1)
350 m
.d
.comb
+= self
.out_z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
353 # if b is zero return a
354 with m
.Elif(self
.i
.b
.is_zero
):
355 m
.d
.comb
+= self
.out_do_z
.eq(1)
356 m
.d
.comb
+= self
.out_z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
359 # if a equal to -b return zero (+ve zero)
360 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
361 m
.d
.comb
+= self
.out_do_z
.eq(1)
362 m
.d
.comb
+= self
.out_z
.zero(0)
364 # Denormalised Number checks
366 m
.d
.comb
+= self
.out_do_z
.eq(0)
372 def __init__(self
, id_wid
):
375 self
.in_mid
= Signal(id_wid
, reset_less
=True)
376 self
.out_mid
= Signal(id_wid
, reset_less
=True)
382 if self
.id_wid
is not None:
383 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
386 class FPAddSpecialCases(FPState
, FPID
):
387 """ special cases: NaNs, infs, zeros, denormalised
388 NOTE: some of these are unique to add. see "Special Operations"
389 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
392 def __init__(self
, width
, id_wid
):
393 FPState
.__init
__(self
, "special_cases")
394 FPID
.__init
__(self
, id_wid
)
395 self
.mod
= FPAddSpecialCasesMod(width
)
396 self
.out_z
= self
.mod
.ospec()
397 self
.out_do_z
= Signal(reset_less
=True)
399 def setup(self
, m
, in_a
, in_b
, in_mid
):
400 """ links module to inputs and outputs
402 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
403 if self
.in_mid
is not None:
404 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
408 with m
.If(self
.out_do_z
):
409 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
412 m
.next
= "denormalise"
415 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
416 """ special cases: NaNs, infs, zeros, denormalised
417 NOTE: some of these are unique to add. see "Special Operations"
418 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
421 def __init__(self
, width
, id_wid
):
422 FPState
.__init
__(self
, "special_cases")
423 FPID
.__init
__(self
, id_wid
)
424 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
425 self
.out_z
= self
.smod
.ospec()
426 self
.out_do_z
= Signal(reset_less
=True)
428 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
429 self
.o
= self
.dmod
.ospec()
431 def setup(self
, m
, in_a
, in_b
, in_mid
):
432 """ links module to inputs and outputs
434 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
435 self
.dmod
.setup(m
, in_a
, in_b
)
436 if self
.in_mid
is not None:
437 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
441 with m
.If(self
.out_do_z
):
442 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
446 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
447 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
450 class FPAddDeNormMod(FPState
):
452 def __init__(self
, width
, id_wid
):
455 self
.i
= self
.ispec()
456 self
.o
= self
.ospec()
459 return FPNumBase2Ops(self
.width
, self
.id_wid
)
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
464 def setup(self
, m
, in_a
, in_b
):
465 """ links module to inputs and outputs
467 m
.submodules
.denormalise
= self
468 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
469 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
471 def elaborate(self
, platform
):
473 m
.submodules
.denorm_in_a
= self
.i
.a
474 m
.submodules
.denorm_in_b
= self
.i
.b
475 m
.submodules
.denorm_out_a
= self
.o
.a
476 m
.submodules
.denorm_out_b
= self
.o
.b
477 # hmmm, don't like repeating identical code
478 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
479 with m
.If(self
.i
.a
.exp_n127
):
480 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
482 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
484 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
485 with m
.If(self
.i
.b
.exp_n127
):
486 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
488 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
493 class FPAddDeNorm(FPState
, FPID
):
495 def __init__(self
, width
, id_wid
):
496 FPState
.__init
__(self
, "denormalise")
497 FPID
.__init
__(self
, id_wid
)
498 self
.mod
= FPAddDeNormMod(width
)
499 self
.out_a
= FPNumBase(width
)
500 self
.out_b
= FPNumBase(width
)
502 def setup(self
, m
, in_a
, in_b
, in_mid
):
503 """ links module to inputs and outputs
505 self
.mod
.setup(m
, in_a
, in_b
)
506 if self
.in_mid
is not None:
507 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
511 # Denormalised Number checks
513 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
517 class FPAddAlignMultiMod(FPState
):
519 def __init__(self
, width
):
520 self
.in_a
= FPNumBase(width
)
521 self
.in_b
= FPNumBase(width
)
522 self
.out_a
= FPNumIn(None, width
)
523 self
.out_b
= FPNumIn(None, width
)
524 self
.exp_eq
= Signal(reset_less
=True)
526 def elaborate(self
, platform
):
527 # This one however (single-cycle) will do the shift
532 m
.submodules
.align_in_a
= self
.in_a
533 m
.submodules
.align_in_b
= self
.in_b
534 m
.submodules
.align_out_a
= self
.out_a
535 m
.submodules
.align_out_b
= self
.out_b
537 # NOTE: this does *not* do single-cycle multi-shifting,
538 # it *STAYS* in the align state until exponents match
540 # exponent of a greater than b: shift b down
541 m
.d
.comb
+= self
.exp_eq
.eq(0)
542 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
543 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
544 agtb
= Signal(reset_less
=True)
545 altb
= Signal(reset_less
=True)
546 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
547 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
549 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
550 # exponent of b greater than a: shift a down
552 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
553 # exponents equal: move to next stage.
555 m
.d
.comb
+= self
.exp_eq
.eq(1)
559 class FPAddAlignMulti(FPState
, FPID
):
561 def __init__(self
, width
, id_wid
):
562 FPID
.__init
__(self
, id_wid
)
563 FPState
.__init
__(self
, "align")
564 self
.mod
= FPAddAlignMultiMod(width
)
565 self
.out_a
= FPNumIn(None, width
)
566 self
.out_b
= FPNumIn(None, width
)
567 self
.exp_eq
= Signal(reset_less
=True)
569 def setup(self
, m
, in_a
, in_b
, in_mid
):
570 """ links module to inputs and outputs
572 m
.submodules
.align
= self
.mod
573 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
574 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
575 #m.d.comb += self.out_a.eq(self.mod.out_a)
576 #m.d.comb += self.out_b.eq(self.mod.out_b)
577 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
578 if self
.in_mid
is not None:
579 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
583 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
584 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
585 with m
.If(self
.exp_eq
):
591 def __init__(self
, width
):
592 self
.a
= FPNumIn(None, width
)
593 self
.b
= FPNumIn(None, width
)
596 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
)]
599 class FPAddAlignSingleMod
:
601 def __init__(self
, width
, id_wid
):
604 self
.i
= self
.ispec()
605 self
.o
= self
.ospec()
608 return FPNumBase2Ops(self
.width
, self
.id_wid
)
611 return FPNumIn2Ops(self
.width
)
613 def setup(self
, m
, in_a
, in_b
):
614 """ links module to inputs and outputs
616 m
.submodules
.align
= self
617 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
618 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
620 def elaborate(self
, platform
):
621 """ Aligns A against B or B against A, depending on which has the
622 greater exponent. This is done in a *single* cycle using
623 variable-width bit-shift
625 the shifter used here is quite expensive in terms of gates.
626 Mux A or B in (and out) into temporaries, as only one of them
627 needs to be aligned against the other
631 m
.submodules
.align_in_a
= self
.i
.a
632 m
.submodules
.align_in_b
= self
.i
.b
633 m
.submodules
.align_out_a
= self
.o
.a
634 m
.submodules
.align_out_b
= self
.o
.b
636 # temporary (muxed) input and output to be shifted
637 t_inp
= FPNumBase(self
.width
)
638 t_out
= FPNumIn(None, self
.width
)
639 espec
= (len(self
.i
.a
.e
), True)
640 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
641 m
.submodules
.align_t_in
= t_inp
642 m
.submodules
.align_t_out
= t_out
643 m
.submodules
.multishift_r
= msr
645 ediff
= Signal(espec
, reset_less
=True)
646 ediffr
= Signal(espec
, reset_less
=True)
647 tdiff
= Signal(espec
, reset_less
=True)
648 elz
= Signal(reset_less
=True)
649 egz
= Signal(reset_less
=True)
651 # connect multi-shifter to t_inp/out mantissa (and tdiff)
652 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
653 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
654 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
655 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
656 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
658 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
659 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
660 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
661 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
663 # default: A-exp == B-exp, A and B untouched (fall through)
664 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
665 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
666 # only one shifter (muxed)
667 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
668 # exponent of a greater than b: shift b down
670 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
673 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
675 # exponent of b greater than a: shift a down
677 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
680 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
685 class FPAddAlignSingle(FPState
, FPID
):
687 def __init__(self
, width
, id_wid
):
688 FPState
.__init
__(self
, "align")
689 FPID
.__init
__(self
, id_wid
)
690 self
.mod
= FPAddAlignSingleMod(width
)
691 self
.out_a
= FPNumIn(None, width
)
692 self
.out_b
= FPNumIn(None, width
)
694 def setup(self
, m
, in_a
, in_b
, in_mid
):
695 """ links module to inputs and outputs
697 self
.mod
.setup(m
, in_a
, in_b
)
698 if self
.in_mid
is not None:
699 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
703 # NOTE: could be done as comb
704 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
705 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
709 class FPAddAlignSingleAdd(FPState
, FPID
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 FPID
.__init
__(self
, id_wid
)
714 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
715 self
.o
= self
.mod
.ospec()
717 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
718 self
.a0o
= self
.a0mod
.ospec()
720 self
.a1mod
= FPAddStage1Mod(width
)
721 self
.a1o
= self
.a1mod
.ospec()
723 def setup(self
, m
, in_a
, in_b
, in_mid
):
724 """ links module to inputs and outputs
726 self
.mod
.setup(m
, in_a
, in_b
)
727 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
729 self
.a0mod
.setup(m
, self
.o
.a
, self
.o
.b
)
730 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
732 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
734 if self
.in_mid
is not None:
735 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
739 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
740 m
.next
= "normalise_1"
743 class FPAddStage0Data
:
745 def __init__(self
, width
):
746 self
.z
= FPNumBase(width
, False)
747 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
750 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
)]
753 class FPAddStage0Mod
:
755 def __init__(self
, width
, id_wid
):
758 self
.i
= self
.ispec()
759 self
.o
= self
.ospec()
762 return FPNumBase2Ops(self
.width
, self
.id_wid
)
765 return FPAddStage0Data(self
.width
)
767 def setup(self
, m
, in_a
, in_b
):
768 """ links module to inputs and outputs
770 m
.submodules
.add0
= self
771 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
772 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
774 def elaborate(self
, platform
):
776 m
.submodules
.add0_in_a
= self
.i
.a
777 m
.submodules
.add0_in_b
= self
.i
.b
778 m
.submodules
.add0_out_z
= self
.o
.z
780 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
782 # store intermediate tests (and zero-extended mantissas)
783 seq
= Signal(reset_less
=True)
784 mge
= Signal(reset_less
=True)
785 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
786 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
787 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
788 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
789 am0
.eq(Cat(self
.i
.a
.m
, 0)),
790 bm0
.eq(Cat(self
.i
.b
.m
, 0))
792 # same-sign (both negative or both positive) add mantissas
795 self
.o
.tot
.eq(am0
+ bm0
),
796 self
.o
.z
.s
.eq(self
.i
.a
.s
)
798 # a mantissa greater than b, use a
801 self
.o
.tot
.eq(am0
- bm0
),
802 self
.o
.z
.s
.eq(self
.i
.a
.s
)
804 # b mantissa greater than a, use b
807 self
.o
.tot
.eq(bm0
- am0
),
808 self
.o
.z
.s
.eq(self
.i
.b
.s
)
813 class FPAddStage0(FPState
, FPID
):
814 """ First stage of add. covers same-sign (add) and subtract
815 special-casing when mantissas are greater or equal, to
816 give greatest accuracy.
819 def __init__(self
, width
, id_wid
):
820 FPState
.__init
__(self
, "add_0")
821 FPID
.__init
__(self
, id_wid
)
822 self
.mod
= FPAddStage0Mod(width
)
823 self
.o
= self
.mod
.ospec()
825 def setup(self
, m
, in_a
, in_b
, in_mid
):
826 """ links module to inputs and outputs
828 self
.mod
.setup(m
, in_a
, in_b
)
829 if self
.in_mid
is not None:
830 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
834 # NOTE: these could be done as combinatorial (merge add0+add1)
835 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
839 class FPAddStage1Data
:
841 def __init__(self
, width
):
842 self
.z
= FPNumBase(width
, False)
846 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
)]
850 class FPAddStage1Mod(FPState
):
851 """ Second stage of add: preparation for normalisation.
852 detects when tot sum is too big (tot[27] is kinda a carry bit)
855 def __init__(self
, width
):
857 self
.i
= self
.ispec()
858 self
.o
= self
.ospec()
861 return FPAddStage0Data(self
.width
)
864 return FPAddStage1Data(self
.width
)
866 def setup(self
, m
, in_tot
, in_z
):
867 """ links module to inputs and outputs
869 m
.submodules
.add1
= self
870 m
.submodules
.add1_out_overflow
= self
.o
.of
872 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
873 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
875 def elaborate(self
, platform
):
877 #m.submodules.norm1_in_overflow = self.in_of
878 #m.submodules.norm1_out_overflow = self.out_of
879 #m.submodules.norm1_in_z = self.in_z
880 #m.submodules.norm1_out_z = self.out_z
881 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
882 # tot[-1] (MSB) gets set when the sum overflows. shift result down
883 with m
.If(self
.i
.tot
[-1]):
885 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
886 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
887 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
888 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
889 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
890 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
892 # tot[-1] (MSB) zero case
895 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
896 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
897 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
898 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
899 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
904 class FPAddStage1(FPState
, FPID
):
906 def __init__(self
, width
, id_wid
):
907 FPState
.__init
__(self
, "add_1")
908 FPID
.__init
__(self
, id_wid
)
909 self
.mod
= FPAddStage1Mod(width
)
910 self
.out_z
= FPNumBase(width
, False)
911 self
.out_of
= Overflow()
912 self
.norm_stb
= Signal()
914 def setup(self
, m
, in_tot
, in_z
, in_mid
):
915 """ links module to inputs and outputs
917 self
.mod
.setup(m
, in_tot
, in_z
)
919 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
921 if self
.in_mid
is not None:
922 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
926 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
927 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
928 m
.d
.sync
+= self
.norm_stb
.eq(1)
929 m
.next
= "normalise_1"
932 class FPNormaliseModSingle
:
934 def __init__(self
, width
):
936 self
.in_z
= self
.ispec()
937 self
.out_z
= self
.ospec()
940 return FPNumBase(self
.width
, False)
943 return FPNumBase(self
.width
, False)
945 def setup(self
, m
, in_z
, out_z
):
946 """ links module to inputs and outputs
948 m
.submodules
.normalise
= self
949 m
.d
.comb
+= self
.in_z
.eq(in_z
)
950 m
.d
.comb
+= out_z
.eq(self
.out_z
)
952 def elaborate(self
, platform
):
955 mwid
= self
.out_z
.m_width
+2
956 pe
= PriorityEncoder(mwid
)
957 m
.submodules
.norm_pe
= pe
959 m
.submodules
.norm1_out_z
= self
.out_z
960 m
.submodules
.norm1_in_z
= self
.in_z
962 in_z
= FPNumBase(self
.width
, False)
964 m
.submodules
.norm1_insel_z
= in_z
965 m
.submodules
.norm1_insel_overflow
= in_of
967 espec
= (len(in_z
.e
), True)
968 ediff_n126
= Signal(espec
, reset_less
=True)
969 msr
= MultiShiftRMerge(mwid
, espec
)
970 m
.submodules
.multishift_r
= msr
972 m
.d
.comb
+= in_z
.eq(self
.in_z
)
973 m
.d
.comb
+= in_of
.eq(self
.in_of
)
974 # initialise out from in (overridden below)
975 m
.d
.comb
+= self
.out_z
.eq(in_z
)
976 m
.d
.comb
+= self
.out_of
.eq(in_of
)
977 # normalisation decrease condition
978 decrease
= Signal(reset_less
=True)
979 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
982 # *sigh* not entirely obvious: count leading zeros (clz)
983 # with a PriorityEncoder: to find from the MSB
984 # we reverse the order of the bits.
985 temp_m
= Signal(mwid
, reset_less
=True)
986 temp_s
= Signal(mwid
+1, reset_less
=True)
987 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
989 # cat round and guard bits back into the mantissa
990 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
991 pe
.i
.eq(temp_m
[::-1]), # inverted
992 clz
.eq(pe
.o
), # count zeros from MSB down
993 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
994 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
995 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1002 def __init__(self
, width
):
1004 self
.roundz
= Signal(reset_less
=True)
1005 self
.z
= FPNumBase(width
, False)
1008 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
)]
1011 class FPNorm1ModSingle
:
1013 def __init__(self
, width
):
1015 self
.i
= self
.ispec()
1016 self
.o
= self
.ospec()
1019 return FPAddStage1Data(self
.width
)
1022 return FPNorm1Data(self
.width
)
1024 def setup(self
, m
, in_z
, in_of
, out_z
):
1025 """ links module to inputs and outputs
1027 m
.submodules
.normalise_1
= self
1029 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1030 m
.d
.comb
+= self
.i
.of
.eq(in_of
)
1032 m
.d
.comb
+= out_z
.eq(self
.o
.z
)
1034 def elaborate(self
, platform
):
1037 mwid
= self
.o
.z
.m_width
+2
1038 pe
= PriorityEncoder(mwid
)
1039 m
.submodules
.norm_pe
= pe
1042 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1044 m
.submodules
.norm1_out_z
= self
.o
.z
1045 m
.submodules
.norm1_out_overflow
= of
1046 m
.submodules
.norm1_in_z
= self
.i
.z
1047 m
.submodules
.norm1_in_overflow
= self
.i
.of
1050 m
.submodules
.norm1_insel_z
= i
.z
1051 m
.submodules
.norm1_insel_overflow
= i
.of
1053 espec
= (len(i
.z
.e
), True)
1054 ediff_n126
= Signal(espec
, reset_less
=True)
1055 msr
= MultiShiftRMerge(mwid
, espec
)
1056 m
.submodules
.multishift_r
= msr
1058 m
.d
.comb
+= i
.eq(self
.i
)
1059 # initialise out from in (overridden below)
1060 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1061 m
.d
.comb
+= of
.eq(i
.of
)
1062 # normalisation increase/decrease conditions
1063 decrease
= Signal(reset_less
=True)
1064 increase
= Signal(reset_less
=True)
1065 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1066 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1068 with m
.If(decrease
):
1069 # *sigh* not entirely obvious: count leading zeros (clz)
1070 # with a PriorityEncoder: to find from the MSB
1071 # we reverse the order of the bits.
1072 temp_m
= Signal(mwid
, reset_less
=True)
1073 temp_s
= Signal(mwid
+1, reset_less
=True)
1074 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1075 # make sure that the amount to decrease by does NOT
1076 # go below the minimum non-INF/NaN exponent
1077 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1080 # cat round and guard bits back into the mantissa
1081 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1082 pe
.i
.eq(temp_m
[::-1]), # inverted
1083 clz
.eq(limclz
), # count zeros from MSB down
1084 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1085 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1086 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1087 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1088 # overflow in bits 0..1: got shifted too (leave sticky)
1089 of
.guard
.eq(temp_s
[1]), # guard
1090 of
.round_bit
.eq(temp_s
[0]), # round
1093 with m
.Elif(increase
):
1094 temp_m
= Signal(mwid
+1, reset_less
=True)
1096 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1098 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1099 # connect multi-shifter to inp/out mantissa (and ediff)
1101 msr
.diff
.eq(ediff_n126
),
1102 self
.o
.z
.m
.eq(msr
.m
[3:]),
1103 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1104 # overflow in bits 0..1: got shifted too (leave sticky)
1105 of
.guard
.eq(temp_s
[2]), # guard
1106 of
.round_bit
.eq(temp_s
[1]), # round
1107 of
.sticky
.eq(temp_s
[0]), # sticky
1108 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1114 class FPNorm1ModMulti
:
1116 def __init__(self
, width
, single_cycle
=True):
1118 self
.in_select
= Signal(reset_less
=True)
1119 self
.in_z
= FPNumBase(width
, False)
1120 self
.in_of
= Overflow()
1121 self
.temp_z
= FPNumBase(width
, False)
1122 self
.temp_of
= Overflow()
1123 self
.out_z
= FPNumBase(width
, False)
1124 self
.out_of
= Overflow()
1126 def elaborate(self
, platform
):
1129 m
.submodules
.norm1_out_z
= self
.out_z
1130 m
.submodules
.norm1_out_overflow
= self
.out_of
1131 m
.submodules
.norm1_temp_z
= self
.temp_z
1132 m
.submodules
.norm1_temp_of
= self
.temp_of
1133 m
.submodules
.norm1_in_z
= self
.in_z
1134 m
.submodules
.norm1_in_overflow
= self
.in_of
1136 in_z
= FPNumBase(self
.width
, False)
1138 m
.submodules
.norm1_insel_z
= in_z
1139 m
.submodules
.norm1_insel_overflow
= in_of
1141 # select which of temp or in z/of to use
1142 with m
.If(self
.in_select
):
1143 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1144 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1146 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1147 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1148 # initialise out from in (overridden below)
1149 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1150 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1151 # normalisation increase/decrease conditions
1152 decrease
= Signal(reset_less
=True)
1153 increase
= Signal(reset_less
=True)
1154 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1155 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1156 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1158 with m
.If(decrease
):
1160 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1161 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1162 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1163 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1164 self
.out_of
.round_bit
.eq(0), # reset round bit
1165 self
.out_of
.m0
.eq(in_of
.guard
),
1168 with m
.Elif(increase
):
1170 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1171 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1172 self
.out_of
.guard
.eq(in_z
.m
[0]),
1173 self
.out_of
.m0
.eq(in_z
.m
[1]),
1174 self
.out_of
.round_bit
.eq(in_of
.guard
),
1175 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1181 class FPNorm1Single(FPState
, FPID
):
1183 def __init__(self
, width
, id_wid
, single_cycle
=True):
1184 FPID
.__init
__(self
, id_wid
)
1185 FPState
.__init
__(self
, "normalise_1")
1186 self
.mod
= FPNorm1ModSingle(width
)
1187 self
.out_z
= FPNumBase(width
, False)
1188 self
.out_roundz
= Signal(reset_less
=True)
1190 def setup(self
, m
, in_z
, in_of
, in_mid
):
1191 """ links module to inputs and outputs
1193 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1195 if self
.in_mid
is not None:
1196 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1198 def action(self
, m
):
1200 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1204 class FPNorm1Multi(FPState
, FPID
):
1206 def __init__(self
, width
, id_wid
):
1207 FPID
.__init
__(self
, id_wid
)
1208 FPState
.__init
__(self
, "normalise_1")
1209 self
.mod
= FPNorm1ModMulti(width
)
1210 self
.stb
= Signal(reset_less
=True)
1211 self
.ack
= Signal(reset
=0, reset_less
=True)
1212 self
.out_norm
= Signal(reset_less
=True)
1213 self
.in_accept
= Signal(reset_less
=True)
1214 self
.temp_z
= FPNumBase(width
)
1215 self
.temp_of
= Overflow()
1216 self
.out_z
= FPNumBase(width
)
1217 self
.out_roundz
= Signal(reset_less
=True)
1219 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1220 """ links module to inputs and outputs
1222 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1223 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1224 self
.out_z
, self
.out_norm
)
1226 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1227 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1229 if self
.in_mid
is not None:
1230 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1232 def action(self
, m
):
1234 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1235 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1236 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1237 with m
.If(self
.out_norm
):
1238 with m
.If(self
.in_accept
):
1243 m
.d
.sync
+= self
.ack
.eq(0)
1245 # normalisation not required (or done).
1247 m
.d
.sync
+= self
.ack
.eq(1)
1248 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1251 class FPNormToPack(FPState
, FPID
):
1253 def __init__(self
, width
, id_wid
):
1254 FPID
.__init
__(self
, id_wid
)
1255 FPState
.__init
__(self
, "normalise_1")
1258 def setup(self
, m
, in_z
, in_of
, in_mid
):
1259 """ links module to inputs and outputs
1262 # Normalisation (chained to input in_z+in_of)
1263 nmod
= FPNorm1ModSingle(self
.width
)
1264 n_out
= nmod
.ospec()
1265 nmod
.setup(m
, in_z
, in_of
, n_out
.z
)
1266 m
.d
.comb
+= n_out
.roundz
.eq(nmod
.o
.roundz
)
1268 # Rounding (chained to normalisation)
1269 rmod
= FPRoundMod(self
.width
)
1270 r_out_z
= rmod
.ospec()
1271 rmod
.setup(m
, n_out
.z
, n_out
.roundz
)
1272 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1274 # Corrections (chained to rounding)
1275 cmod
= FPCorrectionsMod(self
.width
)
1276 c_out_z
= cmod
.ospec()
1277 cmod
.setup(m
, r_out_z
)
1278 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1280 # Pack (chained to corrections)
1281 self
.pmod
= FPPackMod(self
.width
)
1282 self
.out_z
= self
.pmod
.ospec()
1283 self
.pmod
.setup(m
, c_out_z
)
1286 if self
.in_mid
is not None:
1287 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1289 def action(self
, m
):
1290 self
.idsync(m
) # copies incoming ID to outgoing
1291 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1292 m
.next
= "pack_put_z"
1297 def __init__(self
, width
):
1299 self
.i
= self
.ispec()
1300 self
.out_z
= self
.ospec()
1303 return FPNorm1Data(self
.width
)
1306 return FPNumBase(self
.width
, False)
1308 def setup(self
, m
, in_z
, roundz
):
1309 m
.submodules
.roundz
= self
1311 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1312 m
.d
.comb
+= self
.i
.roundz
.eq(roundz
)
1314 def elaborate(self
, platform
):
1316 m
.d
.comb
+= self
.out_z
.eq(self
.i
.z
)
1317 with m
.If(self
.i
.roundz
):
1318 m
.d
.comb
+= self
.out_z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1319 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1320 m
.d
.comb
+= self
.out_z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1324 class FPRound(FPState
, FPID
):
1326 def __init__(self
, width
, id_wid
):
1327 FPState
.__init
__(self
, "round")
1328 FPID
.__init
__(self
, id_wid
)
1329 self
.mod
= FPRoundMod(width
)
1330 self
.out_z
= self
.mod
.ospec()
1332 def setup(self
, m
, in_z
, roundz
, in_mid
):
1333 """ links module to inputs and outputs
1335 self
.mod
.setup(m
, in_z
, roundz
)
1337 if self
.in_mid
is not None:
1338 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1340 def action(self
, m
):
1342 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1343 m
.next
= "corrections"
1346 class FPCorrectionsMod
:
1348 def __init__(self
, width
):
1350 self
.in_z
= self
.ispec()
1351 self
.out_z
= self
.ospec()
1354 return FPNumOut(self
.width
, False)
1357 return FPNumOut(self
.width
, False)
1359 def setup(self
, m
, in_z
):
1360 """ links module to inputs and outputs
1362 m
.submodules
.corrections
= self
1363 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1365 def elaborate(self
, platform
):
1367 m
.submodules
.corr_in_z
= self
.in_z
1368 m
.submodules
.corr_out_z
= self
.out_z
1369 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1370 with m
.If(self
.in_z
.is_denormalised
):
1371 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1375 class FPCorrections(FPState
, FPID
):
1377 def __init__(self
, width
, id_wid
):
1378 FPState
.__init
__(self
, "corrections")
1379 FPID
.__init
__(self
, id_wid
)
1380 self
.mod
= FPCorrectionsMod(width
)
1381 self
.out_z
= self
.mod
.ospec()
1383 def setup(self
, m
, in_z
, in_mid
):
1384 """ links module to inputs and outputs
1386 self
.mod
.setup(m
, in_z
)
1387 if self
.in_mid
is not None:
1388 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1390 def action(self
, m
):
1392 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1398 def __init__(self
, width
):
1400 self
.in_z
= self
.ispec()
1401 self
.out_z
= self
.ospec()
1404 return FPNumOut(self
.width
, False)
1407 return FPNumOut(self
.width
, False)
1409 def setup(self
, m
, in_z
):
1410 """ links module to inputs and outputs
1412 m
.submodules
.pack
= self
1413 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1415 def elaborate(self
, platform
):
1417 m
.submodules
.pack_in_z
= self
.in_z
1418 with m
.If(self
.in_z
.is_overflowed
):
1419 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1421 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1426 def __init__(self
, width
, id_wid
):
1427 self
.z
= FPNumOut(width
, False)
1428 self
.mid
= Signal(id_wid
, reset_less
=True)
1431 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1434 class FPPack(FPState
, FPID
):
1436 def __init__(self
, width
, id_wid
):
1437 FPState
.__init
__(self
, "pack")
1438 FPID
.__init
__(self
, id_wid
)
1439 self
.mod
= FPPackMod(width
)
1440 self
.out_z
= self
.ospec()
1443 return self
.mod
.ispec()
1446 return self
.mod
.ospec()
1448 def setup(self
, m
, in_z
, in_mid
):
1449 """ links module to inputs and outputs
1451 self
.mod
.setup(m
, in_z
)
1452 if self
.in_mid
is not None:
1453 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1455 def action(self
, m
):
1457 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1458 m
.next
= "pack_put_z"
1461 class FPPutZ(FPState
):
1463 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1464 FPState
.__init
__(self
, state
)
1465 if to_state
is None:
1466 to_state
= "get_ops"
1467 self
.to_state
= to_state
1470 self
.in_mid
= in_mid
1471 self
.out_mid
= out_mid
1473 def action(self
, m
):
1474 if self
.in_mid
is not None:
1475 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1477 self
.out_z
.v
.eq(self
.in_z
.v
)
1479 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1480 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1481 m
.next
= self
.to_state
1483 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1486 class FPPutZIdx(FPState
):
1488 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1489 FPState
.__init
__(self
, state
)
1490 if to_state
is None:
1491 to_state
= "get_ops"
1492 self
.to_state
= to_state
1494 self
.out_zs
= out_zs
1495 self
.in_mid
= in_mid
1497 def action(self
, m
):
1498 outz_stb
= Signal(reset_less
=True)
1499 outz_ack
= Signal(reset_less
=True)
1500 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1501 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1504 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1506 with m
.If(outz_stb
& outz_ack
):
1507 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1508 m
.next
= self
.to_state
1510 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1513 class FPADDBaseMod(FPID
):
1515 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1518 * width: bit-width of IEEE754. supported: 16, 32, 64
1519 * id_wid: an identifier that is sync-connected to the input
1520 * single_cycle: True indicates each stage to complete in 1 clock
1521 * compact: True indicates a reduced number of stages
1523 FPID
.__init
__(self
, id_wid
)
1525 self
.single_cycle
= single_cycle
1526 self
.compact
= compact
1528 self
.in_t
= Trigger()
1529 self
.in_a
= Signal(width
)
1530 self
.in_b
= Signal(width
)
1531 self
.out_z
= FPOp(width
)
1535 def add_state(self
, state
):
1536 self
.states
.append(state
)
1539 def get_fragment(self
, platform
=None):
1540 """ creates the HDL code-fragment for FPAdd
1543 m
.submodules
.out_z
= self
.out_z
1544 m
.submodules
.in_t
= self
.in_t
1546 self
.get_compact_fragment(m
, platform
)
1548 self
.get_longer_fragment(m
, platform
)
1550 with m
.FSM() as fsm
:
1552 for state
in self
.states
:
1553 with m
.State(state
.state_from
):
1558 def get_longer_fragment(self
, m
, platform
=None):
1560 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1561 self
.in_a
, self
.in_b
, self
.width
))
1562 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1566 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1567 sc
.setup(m
, a
, b
, self
.in_mid
)
1569 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1570 dn
.setup(m
, a
, b
, sc
.in_mid
)
1572 if self
.single_cycle
:
1573 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1574 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1576 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1577 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1579 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1580 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1582 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1583 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1585 if self
.single_cycle
:
1586 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1587 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1589 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1590 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1592 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1593 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1595 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1596 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1598 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1599 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1601 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1602 pa
.in_mid
, self
.out_mid
))
1604 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1605 pa
.in_mid
, self
.out_mid
))
1607 def get_compact_fragment(self
, m
, platform
=None):
1609 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1610 self
.in_a
, self
.in_b
, self
.width
))
1611 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1615 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1616 sc
.setup(m
, a
, b
, self
.in_mid
)
1618 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1619 alm
.setup(m
, sc
.o
.a
, sc
.o
.b
, sc
.in_mid
)
1621 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1622 n1
.setup(m
, alm
.a1o
.z
, alm
.a1o
.of
, alm
.in_mid
)
1624 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1625 n1
.in_mid
, self
.out_mid
))
1627 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1628 sc
.in_mid
, self
.out_mid
))
1631 class FPADDBase(FPState
, FPID
):
1633 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1636 * width: bit-width of IEEE754. supported: 16, 32, 64
1637 * id_wid: an identifier that is sync-connected to the input
1638 * single_cycle: True indicates each stage to complete in 1 clock
1640 FPID
.__init
__(self
, id_wid
)
1641 FPState
.__init
__(self
, "fpadd")
1643 self
.single_cycle
= single_cycle
1644 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1646 self
.in_t
= Trigger()
1647 self
.in_a
= Signal(width
)
1648 self
.in_b
= Signal(width
)
1649 #self.out_z = FPOp(width)
1651 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1652 self
.in_accept
= Signal(reset_less
=True)
1653 self
.add_stb
= Signal(reset_less
=True)
1654 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1656 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1658 self
.out_mid
= out_mid
1659 m
.d
.comb
+= [self
.in_a
.eq(a
),
1661 self
.mod
.in_a
.eq(self
.in_a
),
1662 self
.mod
.in_b
.eq(self
.in_b
),
1663 self
.in_mid
.eq(in_mid
),
1664 self
.mod
.in_mid
.eq(self
.in_mid
),
1665 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1666 #self.add_stb.eq(add_stb),
1667 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1668 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1669 self
.out_mid
.eq(self
.mod
.out_mid
),
1670 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1671 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1672 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1675 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1676 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1677 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1678 #m.d.sync += self.in_t.stb.eq(0)
1680 m
.submodules
.fpadd
= self
.mod
1682 def action(self
, m
):
1684 # in_accept is set on incoming strobe HIGH and ack LOW.
1685 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1687 #with m.If(self.in_t.ack):
1688 # m.d.sync += self.in_t.stb.eq(0)
1689 with m
.If(~self
.z_done
):
1690 # not done: test for accepting an incoming operand pair
1691 with m
.If(self
.in_accept
):
1693 self
.add_ack
.eq(1), # acknowledge receipt...
1694 self
.in_t
.stb
.eq(1), # initiate add
1697 m
.d
.sync
+= [self
.add_ack
.eq(0),
1698 self
.in_t
.stb
.eq(0),
1699 self
.out_z
.ack
.eq(1),
1702 # done: acknowledge, and write out id and value
1703 m
.d
.sync
+= [self
.add_ack
.eq(1),
1710 if self
.in_mid
is not None:
1711 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1714 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1716 # move to output state on detecting z ack
1717 with m
.If(self
.out_z
.trigger
):
1718 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1721 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1724 def __init__(self
, width
, id_wid
):
1726 self
.id_wid
= id_wid
1728 for i
in range(rs_sz
):
1730 out_z
.name
= "out_z_%d" % i
1732 self
.res
= Array(res
)
1733 self
.in_z
= FPOp(width
)
1734 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1736 def setup(self
, m
, in_z
, in_mid
):
1737 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1738 self
.in_mid
.eq(in_mid
)]
1740 def get_fragment(self
, platform
=None):
1741 """ creates the HDL code-fragment for FPAdd
1744 m
.submodules
.res_in_z
= self
.in_z
1745 m
.submodules
+= self
.res
1757 """ FPADD: stages as follows:
1763 FPAddBase---> FPAddBaseMod
1765 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1767 FPAddBase is tricky: it is both a stage and *has* stages.
1768 Connection to FPAddBaseMod therefore requires an in stb/ack
1769 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1770 needs to be the thing that raises the incoming stb.
1773 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1776 * width: bit-width of IEEE754. supported: 16, 32, 64
1777 * id_wid: an identifier that is sync-connected to the input
1778 * single_cycle: True indicates each stage to complete in 1 clock
1781 self
.id_wid
= id_wid
1782 self
.single_cycle
= single_cycle
1784 #self.out_z = FPOp(width)
1785 self
.ids
= FPID(id_wid
)
1788 for i
in range(rs_sz
):
1791 in_a
.name
= "in_a_%d" % i
1792 in_b
.name
= "in_b_%d" % i
1793 rs
.append((in_a
, in_b
))
1797 for i
in range(rs_sz
):
1799 out_z
.name
= "out_z_%d" % i
1801 self
.res
= Array(res
)
1805 def add_state(self
, state
):
1806 self
.states
.append(state
)
1809 def get_fragment(self
, platform
=None):
1810 """ creates the HDL code-fragment for FPAdd
1813 m
.submodules
+= self
.rs
1815 in_a
= self
.rs
[0][0]
1816 in_b
= self
.rs
[0][1]
1818 out_z
= FPOp(self
.width
)
1819 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1820 m
.submodules
.out_z
= out_z
1822 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1827 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1832 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1833 ab
= self
.add_state(ab
)
1834 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1837 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1840 with m
.FSM() as fsm
:
1842 for state
in self
.states
:
1843 with m
.State(state
.state_from
):
1849 if __name__
== "__main__":
1851 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1852 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1853 alu
.rs
[0][1].ports() + \
1854 alu
.res
[0].ports() + \
1855 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1857 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1858 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1859 alu
.in_t
.ports() + \
1860 alu
.out_z
.ports() + \
1861 [alu
.in_mid
, alu
.out_mid
])
1864 # works... but don't use, just do "python fname.py convert -t v"
1865 #print (verilog.convert(alu, ports=[
1866 # ports=alu.in_a.ports() + \
1867 # alu.in_b.ports() + \
1868 # alu.out_z.ports())