6f3cfae7779f7180a803868cc806d64fdbcd4217
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
):
189 Trigger
.__init
__(self
)
190 self
.in_op1
= Signal(width
, reset_less
=True)
191 self
.in_op2
= Signal(width
, reset_less
=True)
192 self
.out_op1
= FPNumIn(None, width
)
193 self
.out_op2
= FPNumIn(None, width
)
195 def elaborate(self
, platform
):
196 m
= Trigger
.elaborate(self
, platform
)
197 #m.submodules.get_op_in = self.in_op
198 m
.submodules
.get_op1_out
= self
.out_op1
199 m
.submodules
.get_op2_out
= self
.out_op2
200 with m
.If(self
.trigger
):
202 self
.out_op1
.decode(self
.in_op1
),
203 self
.out_op2
.decode(self
.in_op2
),
208 class FPGet2Op(FPState
):
212 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
213 FPState
.__init
__(self
, in_state
)
214 self
.out_state
= out_state
215 self
.mod
= FPGet2OpMod(width
)
218 self
.out_op1
= FPNumIn(None, width
)
219 self
.out_op2
= FPNumIn(None, width
)
220 self
.in_stb
= Signal(reset_less
=True)
221 self
.out_ack
= Signal(reset_less
=True)
222 self
.out_decode
= Signal(reset_less
=True)
224 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
225 """ links module to inputs and outputs
227 m
.submodules
.get_ops
= self
.mod
228 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
229 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
230 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
231 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
232 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
233 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
236 with m
.If(self
.out_decode
):
237 m
.next
= self
.out_state
240 #self.out_op1.v.eq(self.mod.out_op1.v),
241 #self.out_op2.v.eq(self.mod.out_op2.v),
242 self
.out_op1
.eq(self
.mod
.out_op1
),
243 self
.out_op2
.eq(self
.mod
.out_op2
)
246 m
.d
.sync
+= self
.mod
.ack
.eq(1)
250 def __init__(self
, width
, m_extra
=True):
251 self
.a
= FPNumBase(width
, m_extra
)
252 self
.b
= FPNumBase(width
, m_extra
)
255 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
)]
258 class FPAddSpecialCasesMod
:
259 """ special cases: NaNs, infs, zeros, denormalised
260 NOTE: some of these are unique to add. see "Special Operations"
261 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
264 def __init__(self
, width
):
266 self
.i
= self
.ispec()
267 self
.out_z
= self
.ospec()
268 self
.out_do_z
= Signal(reset_less
=True)
271 return FPNumBase2Ops(self
.width
)
274 return FPNumOut(self
.width
, False)
276 def setup(self
, m
, in_a
, in_b
, out_do_z
):
277 """ links module to inputs and outputs
279 m
.submodules
.specialcases
= self
280 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
281 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
282 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
284 def elaborate(self
, platform
):
287 m
.submodules
.sc_in_a
= self
.i
.a
288 m
.submodules
.sc_in_b
= self
.i
.b
289 m
.submodules
.sc_out_z
= self
.out_z
292 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
295 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
297 # if a is NaN or b is NaN return NaN
298 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
299 m
.d
.comb
+= self
.out_do_z
.eq(1)
300 m
.d
.comb
+= self
.out_z
.nan(0)
302 # XXX WEIRDNESS for FP16 non-canonical NaN handling
305 ## if a is zero and b is NaN return -b
306 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
307 # m.d.comb += self.out_do_z.eq(1)
308 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
310 ## if b is zero and a is NaN return -a
311 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
315 ## if a is -zero and b is NaN return -b
316 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
320 ## if b is -zero and a is NaN return -a
321 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
325 # if a is inf return inf (or NaN)
326 with m
.Elif(self
.i
.a
.is_inf
):
327 m
.d
.comb
+= self
.out_do_z
.eq(1)
328 m
.d
.comb
+= self
.out_z
.inf(self
.i
.a
.s
)
329 # if a is inf and signs don't match return NaN
330 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
331 m
.d
.comb
+= self
.out_z
.nan(0)
333 # if b is inf return inf
334 with m
.Elif(self
.i
.b
.is_inf
):
335 m
.d
.comb
+= self
.out_do_z
.eq(1)
336 m
.d
.comb
+= self
.out_z
.inf(self
.i
.b
.s
)
338 # if a is zero and b zero return signed-a/b
339 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.out_z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
345 # if a is zero return b
346 with m
.Elif(self
.i
.a
.is_zero
):
347 m
.d
.comb
+= self
.out_do_z
.eq(1)
348 m
.d
.comb
+= self
.out_z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
351 # if b is zero return a
352 with m
.Elif(self
.i
.b
.is_zero
):
353 m
.d
.comb
+= self
.out_do_z
.eq(1)
354 m
.d
.comb
+= self
.out_z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
357 # if a equal to -b return zero (+ve zero)
358 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
359 m
.d
.comb
+= self
.out_do_z
.eq(1)
360 m
.d
.comb
+= self
.out_z
.zero(0)
362 # Denormalised Number checks
364 m
.d
.comb
+= self
.out_do_z
.eq(0)
370 def __init__(self
, id_wid
):
373 self
.in_mid
= Signal(id_wid
, reset_less
=True)
374 self
.out_mid
= Signal(id_wid
, reset_less
=True)
380 if self
.id_wid
is not None:
381 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
384 class FPAddSpecialCases(FPState
, FPID
):
385 """ special cases: NaNs, infs, zeros, denormalised
386 NOTE: some of these are unique to add. see "Special Operations"
387 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
390 def __init__(self
, width
, id_wid
):
391 FPState
.__init
__(self
, "special_cases")
392 FPID
.__init
__(self
, id_wid
)
393 self
.mod
= FPAddSpecialCasesMod(width
)
394 self
.out_z
= self
.mod
.ospec()
395 self
.out_do_z
= Signal(reset_less
=True)
397 def setup(self
, m
, in_a
, in_b
, in_mid
):
398 """ links module to inputs and outputs
400 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
401 if self
.in_mid
is not None:
402 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
406 with m
.If(self
.out_do_z
):
407 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
410 m
.next
= "denormalise"
413 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
414 """ special cases: NaNs, infs, zeros, denormalised
415 NOTE: some of these are unique to add. see "Special Operations"
416 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
419 def __init__(self
, width
, id_wid
):
420 FPState
.__init
__(self
, "special_cases")
421 FPID
.__init
__(self
, id_wid
)
422 self
.smod
= FPAddSpecialCasesMod(width
)
423 self
.out_z
= self
.smod
.ospec()
424 self
.out_do_z
= Signal(reset_less
=True)
426 self
.dmod
= FPAddDeNormMod(width
)
427 self
.o
= self
.dmod
.ospec()
429 def setup(self
, m
, in_a
, in_b
, in_mid
):
430 """ links module to inputs and outputs
432 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
433 self
.dmod
.setup(m
, in_a
, in_b
)
434 if self
.in_mid
is not None:
435 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
439 with m
.If(self
.out_do_z
):
440 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
444 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
445 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
448 class FPAddDeNormMod(FPState
):
450 def __init__(self
, width
):
452 self
.i
= self
.ispec()
453 self
.o
= self
.ospec()
456 return FPNumBase2Ops(self
.width
)
459 return FPNumBase2Ops(self
.width
)
461 def setup(self
, m
, in_a
, in_b
):
462 """ links module to inputs and outputs
464 m
.submodules
.denormalise
= self
465 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
466 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
468 def elaborate(self
, platform
):
470 m
.submodules
.denorm_in_a
= self
.i
.a
471 m
.submodules
.denorm_in_b
= self
.i
.b
472 m
.submodules
.denorm_out_a
= self
.o
.a
473 m
.submodules
.denorm_out_b
= self
.o
.b
474 # hmmm, don't like repeating identical code
475 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
476 with m
.If(self
.i
.a
.exp_n127
):
477 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
479 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
481 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
482 with m
.If(self
.i
.b
.exp_n127
):
483 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
485 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
490 class FPAddDeNorm(FPState
, FPID
):
492 def __init__(self
, width
, id_wid
):
493 FPState
.__init
__(self
, "denormalise")
494 FPID
.__init
__(self
, id_wid
)
495 self
.mod
= FPAddDeNormMod(width
)
496 self
.out_a
= FPNumBase(width
)
497 self
.out_b
= FPNumBase(width
)
499 def setup(self
, m
, in_a
, in_b
, in_mid
):
500 """ links module to inputs and outputs
502 self
.mod
.setup(m
, in_a
, in_b
)
503 if self
.in_mid
is not None:
504 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
508 # Denormalised Number checks
510 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
511 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
514 class FPAddAlignMultiMod(FPState
):
516 def __init__(self
, width
):
517 self
.in_a
= FPNumBase(width
)
518 self
.in_b
= FPNumBase(width
)
519 self
.out_a
= FPNumIn(None, width
)
520 self
.out_b
= FPNumIn(None, width
)
521 self
.exp_eq
= Signal(reset_less
=True)
523 def elaborate(self
, platform
):
524 # This one however (single-cycle) will do the shift
529 m
.submodules
.align_in_a
= self
.in_a
530 m
.submodules
.align_in_b
= self
.in_b
531 m
.submodules
.align_out_a
= self
.out_a
532 m
.submodules
.align_out_b
= self
.out_b
534 # NOTE: this does *not* do single-cycle multi-shifting,
535 # it *STAYS* in the align state until exponents match
537 # exponent of a greater than b: shift b down
538 m
.d
.comb
+= self
.exp_eq
.eq(0)
539 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
540 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
541 agtb
= Signal(reset_less
=True)
542 altb
= Signal(reset_less
=True)
543 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
544 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
546 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
547 # exponent of b greater than a: shift a down
549 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
550 # exponents equal: move to next stage.
552 m
.d
.comb
+= self
.exp_eq
.eq(1)
556 class FPAddAlignMulti(FPState
, FPID
):
558 def __init__(self
, width
, id_wid
):
559 FPID
.__init
__(self
, id_wid
)
560 FPState
.__init
__(self
, "align")
561 self
.mod
= FPAddAlignMultiMod(width
)
562 self
.out_a
= FPNumIn(None, width
)
563 self
.out_b
= FPNumIn(None, width
)
564 self
.exp_eq
= Signal(reset_less
=True)
566 def setup(self
, m
, in_a
, in_b
, in_mid
):
567 """ links module to inputs and outputs
569 m
.submodules
.align
= self
.mod
570 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
571 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
572 #m.d.comb += self.out_a.eq(self.mod.out_a)
573 #m.d.comb += self.out_b.eq(self.mod.out_b)
574 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
575 if self
.in_mid
is not None:
576 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
580 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
581 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
582 with m
.If(self
.exp_eq
):
588 def __init__(self
, width
):
589 self
.a
= FPNumIn(None, width
)
590 self
.b
= FPNumIn(None, width
)
593 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
)]
596 class FPAddAlignSingleMod
:
598 def __init__(self
, width
):
600 self
.i
= self
.ispec()
601 self
.o
= self
.ospec()
604 return FPNumBase2Ops(self
.width
)
607 return FPNumIn2Ops(self
.width
)
609 def setup(self
, m
, in_a
, in_b
):
610 """ links module to inputs and outputs
612 m
.submodules
.align
= self
613 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
614 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
616 def elaborate(self
, platform
):
617 """ Aligns A against B or B against A, depending on which has the
618 greater exponent. This is done in a *single* cycle using
619 variable-width bit-shift
621 the shifter used here is quite expensive in terms of gates.
622 Mux A or B in (and out) into temporaries, as only one of them
623 needs to be aligned against the other
627 m
.submodules
.align_in_a
= self
.i
.a
628 m
.submodules
.align_in_b
= self
.i
.b
629 m
.submodules
.align_out_a
= self
.o
.a
630 m
.submodules
.align_out_b
= self
.o
.b
632 # temporary (muxed) input and output to be shifted
633 t_inp
= FPNumBase(self
.width
)
634 t_out
= FPNumIn(None, self
.width
)
635 espec
= (len(self
.i
.a
.e
), True)
636 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
637 m
.submodules
.align_t_in
= t_inp
638 m
.submodules
.align_t_out
= t_out
639 m
.submodules
.multishift_r
= msr
641 ediff
= Signal(espec
, reset_less
=True)
642 ediffr
= Signal(espec
, reset_less
=True)
643 tdiff
= Signal(espec
, reset_less
=True)
644 elz
= Signal(reset_less
=True)
645 egz
= Signal(reset_less
=True)
647 # connect multi-shifter to t_inp/out mantissa (and tdiff)
648 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
649 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
650 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
651 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
652 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
654 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
655 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
656 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
657 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
659 # default: A-exp == B-exp, A and B untouched (fall through)
660 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
661 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
662 # only one shifter (muxed)
663 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
664 # exponent of a greater than b: shift b down
666 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
669 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
671 # exponent of b greater than a: shift a down
673 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
676 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
681 class FPAddAlignSingle(FPState
, FPID
):
683 def __init__(self
, width
, id_wid
):
684 FPState
.__init
__(self
, "align")
685 FPID
.__init
__(self
, id_wid
)
686 self
.mod
= FPAddAlignSingleMod(width
)
687 self
.out_a
= FPNumIn(None, width
)
688 self
.out_b
= FPNumIn(None, width
)
690 def setup(self
, m
, in_a
, in_b
, in_mid
):
691 """ links module to inputs and outputs
693 self
.mod
.setup(m
, in_a
, in_b
)
694 if self
.in_mid
is not None:
695 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
699 # NOTE: could be done as comb
700 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
701 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
705 class FPAddAlignSingleAdd(FPState
, FPID
):
707 def __init__(self
, width
, id_wid
):
708 FPState
.__init
__(self
, "align")
709 FPID
.__init
__(self
, id_wid
)
710 self
.mod
= FPAddAlignSingleMod(width
)
711 self
.o
= self
.mod
.ospec()
713 self
.a0mod
= FPAddStage0Mod(width
)
714 self
.a0o
= self
.a0mod
.ospec()
716 self
.a1mod
= FPAddStage1Mod(width
)
717 self
.a1o
= self
.a1mod
.ospec()
719 def setup(self
, m
, in_a
, in_b
, in_mid
):
720 """ links module to inputs and outputs
722 self
.mod
.setup(m
, in_a
, in_b
)
723 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
725 self
.a0mod
.setup(m
, self
.o
.a
, self
.o
.b
)
726 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
728 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
730 if self
.in_mid
is not None:
731 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
735 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
736 m
.next
= "normalise_1"
739 class FPAddStage0Data
:
741 def __init__(self
, width
):
742 self
.z
= FPNumBase(width
, False)
743 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
746 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
)]
749 class FPAddStage0Mod
:
751 def __init__(self
, width
):
753 self
.i
= self
.ispec()
754 self
.o
= self
.ospec()
757 return FPNumBase2Ops(self
.width
)
760 return FPAddStage0Data(self
.width
)
762 def setup(self
, m
, in_a
, in_b
):
763 """ links module to inputs and outputs
765 m
.submodules
.add0
= self
766 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
767 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
769 def elaborate(self
, platform
):
771 m
.submodules
.add0_in_a
= self
.i
.a
772 m
.submodules
.add0_in_b
= self
.i
.b
773 m
.submodules
.add0_out_z
= self
.o
.z
775 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
777 # store intermediate tests (and zero-extended mantissas)
778 seq
= Signal(reset_less
=True)
779 mge
= Signal(reset_less
=True)
780 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
781 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
782 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
783 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
784 am0
.eq(Cat(self
.i
.a
.m
, 0)),
785 bm0
.eq(Cat(self
.i
.b
.m
, 0))
787 # same-sign (both negative or both positive) add mantissas
790 self
.o
.tot
.eq(am0
+ bm0
),
791 self
.o
.z
.s
.eq(self
.i
.a
.s
)
793 # a mantissa greater than b, use a
796 self
.o
.tot
.eq(am0
- bm0
),
797 self
.o
.z
.s
.eq(self
.i
.a
.s
)
799 # b mantissa greater than a, use b
802 self
.o
.tot
.eq(bm0
- am0
),
803 self
.o
.z
.s
.eq(self
.i
.b
.s
)
808 class FPAddStage0(FPState
, FPID
):
809 """ First stage of add. covers same-sign (add) and subtract
810 special-casing when mantissas are greater or equal, to
811 give greatest accuracy.
814 def __init__(self
, width
, id_wid
):
815 FPState
.__init
__(self
, "add_0")
816 FPID
.__init
__(self
, id_wid
)
817 self
.mod
= FPAddStage0Mod(width
)
818 self
.o
= self
.mod
.ospec()
820 def setup(self
, m
, in_a
, in_b
, in_mid
):
821 """ links module to inputs and outputs
823 self
.mod
.setup(m
, in_a
, in_b
)
824 if self
.in_mid
is not None:
825 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
829 # NOTE: these could be done as combinatorial (merge add0+add1)
830 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
834 class FPAddStage1Data
:
836 def __init__(self
, width
):
837 self
.z
= FPNumBase(width
, False)
841 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
)]
845 class FPAddStage1Mod(FPState
):
846 """ Second stage of add: preparation for normalisation.
847 detects when tot sum is too big (tot[27] is kinda a carry bit)
850 def __init__(self
, width
):
852 self
.i
= self
.ispec()
853 self
.o
= self
.ospec()
856 return FPAddStage0Data(self
.width
)
859 return FPAddStage1Data(self
.width
)
861 def setup(self
, m
, in_tot
, in_z
):
862 """ links module to inputs and outputs
864 m
.submodules
.add1
= self
865 m
.submodules
.add1_out_overflow
= self
.o
.of
867 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
868 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
870 def elaborate(self
, platform
):
872 #m.submodules.norm1_in_overflow = self.in_of
873 #m.submodules.norm1_out_overflow = self.out_of
874 #m.submodules.norm1_in_z = self.in_z
875 #m.submodules.norm1_out_z = self.out_z
876 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
877 # tot[-1] (MSB) gets set when the sum overflows. shift result down
878 with m
.If(self
.i
.tot
[-1]):
880 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
881 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
882 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
883 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
884 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
885 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
887 # tot[-1] (MSB) zero case
890 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
891 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
894 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
899 class FPAddStage1(FPState
, FPID
):
901 def __init__(self
, width
, id_wid
):
902 FPState
.__init
__(self
, "add_1")
903 FPID
.__init
__(self
, id_wid
)
904 self
.mod
= FPAddStage1Mod(width
)
905 self
.out_z
= FPNumBase(width
, False)
906 self
.out_of
= Overflow()
907 self
.norm_stb
= Signal()
909 def setup(self
, m
, in_tot
, in_z
, in_mid
):
910 """ links module to inputs and outputs
912 self
.mod
.setup(m
, in_tot
, in_z
)
914 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
916 if self
.in_mid
is not None:
917 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
921 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
922 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
923 m
.d
.sync
+= self
.norm_stb
.eq(1)
924 m
.next
= "normalise_1"
927 class FPNormaliseModSingle
:
929 def __init__(self
, width
):
931 self
.in_z
= self
.ispec()
932 self
.out_z
= self
.ospec()
935 return FPNumBase(self
.width
, False)
938 return FPNumBase(self
.width
, False)
940 def setup(self
, m
, in_z
, out_z
, modname
):
941 """ links module to inputs and outputs
943 m
.submodules
.normalise
= self
944 m
.d
.comb
+= self
.in_z
.eq(in_z
)
945 m
.d
.comb
+= out_z
.eq(self
.out_z
)
947 def elaborate(self
, platform
):
950 mwid
= self
.out_z
.m_width
+2
951 pe
= PriorityEncoder(mwid
)
952 m
.submodules
.norm_pe
= pe
954 m
.submodules
.norm1_out_z
= self
.out_z
955 m
.submodules
.norm1_in_z
= self
.in_z
957 in_z
= FPNumBase(self
.width
, False)
959 m
.submodules
.norm1_insel_z
= in_z
960 m
.submodules
.norm1_insel_overflow
= in_of
962 espec
= (len(in_z
.e
), True)
963 ediff_n126
= Signal(espec
, reset_less
=True)
964 msr
= MultiShiftRMerge(mwid
, espec
)
965 m
.submodules
.multishift_r
= msr
967 m
.d
.comb
+= in_z
.eq(self
.in_z
)
968 m
.d
.comb
+= in_of
.eq(self
.in_of
)
969 # initialise out from in (overridden below)
970 m
.d
.comb
+= self
.out_z
.eq(in_z
)
971 m
.d
.comb
+= self
.out_of
.eq(in_of
)
972 # normalisation decrease condition
973 decrease
= Signal(reset_less
=True)
974 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
977 # *sigh* not entirely obvious: count leading zeros (clz)
978 # with a PriorityEncoder: to find from the MSB
979 # we reverse the order of the bits.
980 temp_m
= Signal(mwid
, reset_less
=True)
981 temp_s
= Signal(mwid
+1, reset_less
=True)
982 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
984 # cat round and guard bits back into the mantissa
985 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
986 pe
.i
.eq(temp_m
[::-1]), # inverted
987 clz
.eq(pe
.o
), # count zeros from MSB down
988 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
989 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
990 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
996 class FPNorm1ModSingle
:
998 def __init__(self
, width
):
1000 self
.in_z
= FPNumBase(width
, False)
1001 self
.in_of
= Overflow()
1002 self
.out_z
= FPNumBase(width
, False)
1003 self
.out_of
= Overflow()
1005 def setup(self
, m
, in_z
, in_of
, out_z
):
1006 """ links module to inputs and outputs
1008 m
.submodules
.normalise_1
= self
1010 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1011 m
.d
.comb
+= self
.in_of
.eq(in_of
)
1013 m
.d
.comb
+= out_z
.eq(self
.out_z
)
1015 def elaborate(self
, platform
):
1018 mwid
= self
.out_z
.m_width
+2
1019 pe
= PriorityEncoder(mwid
)
1020 m
.submodules
.norm_pe
= pe
1022 m
.submodules
.norm1_out_z
= self
.out_z
1023 m
.submodules
.norm1_out_overflow
= self
.out_of
1024 m
.submodules
.norm1_in_z
= self
.in_z
1025 m
.submodules
.norm1_in_overflow
= self
.in_of
1027 in_z
= FPNumBase(self
.width
, False)
1029 m
.submodules
.norm1_insel_z
= in_z
1030 m
.submodules
.norm1_insel_overflow
= in_of
1032 espec
= (len(in_z
.e
), True)
1033 ediff_n126
= Signal(espec
, reset_less
=True)
1034 msr
= MultiShiftRMerge(mwid
, espec
)
1035 m
.submodules
.multishift_r
= msr
1037 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1038 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1039 # initialise out from in (overridden below)
1040 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1041 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1042 # normalisation increase/decrease conditions
1043 decrease
= Signal(reset_less
=True)
1044 increase
= Signal(reset_less
=True)
1045 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1046 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1048 with m
.If(decrease
):
1049 # *sigh* not entirely obvious: count leading zeros (clz)
1050 # with a PriorityEncoder: to find from the MSB
1051 # we reverse the order of the bits.
1052 temp_m
= Signal(mwid
, reset_less
=True)
1053 temp_s
= Signal(mwid
+1, reset_less
=True)
1054 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1055 # make sure that the amount to decrease by does NOT
1056 # go below the minimum non-INF/NaN exponent
1057 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
1060 # cat round and guard bits back into the mantissa
1061 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1062 pe
.i
.eq(temp_m
[::-1]), # inverted
1063 clz
.eq(limclz
), # count zeros from MSB down
1064 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1065 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1066 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1067 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1068 # overflow in bits 0..1: got shifted too (leave sticky)
1069 self
.out_of
.guard
.eq(temp_s
[1]), # guard
1070 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
1073 with m
.Elif(increase
):
1074 temp_m
= Signal(mwid
+1, reset_less
=True)
1076 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
1078 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
1079 # connect multi-shifter to inp/out mantissa (and ediff)
1081 msr
.diff
.eq(ediff_n126
),
1082 self
.out_z
.m
.eq(msr
.m
[3:]),
1083 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1084 # overflow in bits 0..1: got shifted too (leave sticky)
1085 self
.out_of
.guard
.eq(temp_s
[2]), # guard
1086 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
1087 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
1088 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
1094 class FPNorm1ModMulti
:
1096 def __init__(self
, width
, single_cycle
=True):
1098 self
.in_select
= Signal(reset_less
=True)
1099 self
.in_z
= FPNumBase(width
, False)
1100 self
.in_of
= Overflow()
1101 self
.temp_z
= FPNumBase(width
, False)
1102 self
.temp_of
= Overflow()
1103 self
.out_z
= FPNumBase(width
, False)
1104 self
.out_of
= Overflow()
1106 def elaborate(self
, platform
):
1109 m
.submodules
.norm1_out_z
= self
.out_z
1110 m
.submodules
.norm1_out_overflow
= self
.out_of
1111 m
.submodules
.norm1_temp_z
= self
.temp_z
1112 m
.submodules
.norm1_temp_of
= self
.temp_of
1113 m
.submodules
.norm1_in_z
= self
.in_z
1114 m
.submodules
.norm1_in_overflow
= self
.in_of
1116 in_z
= FPNumBase(self
.width
, False)
1118 m
.submodules
.norm1_insel_z
= in_z
1119 m
.submodules
.norm1_insel_overflow
= in_of
1121 # select which of temp or in z/of to use
1122 with m
.If(self
.in_select
):
1123 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1124 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1126 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1127 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1128 # initialise out from in (overridden below)
1129 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1130 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1131 # normalisation increase/decrease conditions
1132 decrease
= Signal(reset_less
=True)
1133 increase
= Signal(reset_less
=True)
1134 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1135 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1136 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1138 with m
.If(decrease
):
1140 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1141 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1142 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1143 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1144 self
.out_of
.round_bit
.eq(0), # reset round bit
1145 self
.out_of
.m0
.eq(in_of
.guard
),
1148 with m
.Elif(increase
):
1150 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1151 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1152 self
.out_of
.guard
.eq(in_z
.m
[0]),
1153 self
.out_of
.m0
.eq(in_z
.m
[1]),
1154 self
.out_of
.round_bit
.eq(in_of
.guard
),
1155 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1161 class FPNorm1Single(FPState
, FPID
):
1163 def __init__(self
, width
, id_wid
, single_cycle
=True):
1164 FPID
.__init
__(self
, id_wid
)
1165 FPState
.__init
__(self
, "normalise_1")
1166 self
.mod
= FPNorm1ModSingle(width
)
1167 self
.out_norm
= Signal(reset_less
=True)
1168 self
.out_z
= FPNumBase(width
)
1169 self
.out_roundz
= Signal(reset_less
=True)
1171 def setup(self
, m
, in_z
, in_of
, in_mid
):
1172 """ links module to inputs and outputs
1174 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1176 if self
.in_mid
is not None:
1177 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1179 def action(self
, m
):
1181 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1185 class FPNorm1Multi(FPState
, FPID
):
1187 def __init__(self
, width
, id_wid
):
1188 FPID
.__init
__(self
, id_wid
)
1189 FPState
.__init
__(self
, "normalise_1")
1190 self
.mod
= FPNorm1ModMulti(width
)
1191 self
.stb
= Signal(reset_less
=True)
1192 self
.ack
= Signal(reset
=0, reset_less
=True)
1193 self
.out_norm
= Signal(reset_less
=True)
1194 self
.in_accept
= Signal(reset_less
=True)
1195 self
.temp_z
= FPNumBase(width
)
1196 self
.temp_of
= Overflow()
1197 self
.out_z
= FPNumBase(width
)
1198 self
.out_roundz
= Signal(reset_less
=True)
1200 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1201 """ links module to inputs and outputs
1203 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1204 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1205 self
.out_z
, self
.out_norm
)
1207 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1208 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1210 if self
.in_mid
is not None:
1211 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1213 def action(self
, m
):
1215 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1216 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1217 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1218 with m
.If(self
.out_norm
):
1219 with m
.If(self
.in_accept
):
1224 m
.d
.sync
+= self
.ack
.eq(0)
1226 # normalisation not required (or done).
1228 m
.d
.sync
+= self
.ack
.eq(1)
1229 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1232 class FPNormToPack(FPState
, FPID
):
1234 def __init__(self
, width
, id_wid
):
1235 FPID
.__init
__(self
, id_wid
)
1236 FPState
.__init
__(self
, "normalise_1")
1239 def setup(self
, m
, in_z
, in_of
, in_mid
):
1240 """ links module to inputs and outputs
1243 # Normalisation (chained to input in_z+in_of)
1244 nmod
= FPNorm1ModSingle(self
.width
)
1245 n_out_z
= FPNumBase(self
.width
)
1246 n_out_roundz
= Signal(reset_less
=True)
1247 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1249 # Rounding (chained to normalisation)
1250 rmod
= FPRoundMod(self
.width
)
1251 r_out_z
= FPNumBase(self
.width
)
1252 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1253 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1254 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1256 # Corrections (chained to rounding)
1257 cmod
= FPCorrectionsMod(self
.width
)
1258 c_out_z
= FPNumBase(self
.width
)
1259 cmod
.setup(m
, r_out_z
)
1260 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1262 # Pack (chained to corrections)
1263 self
.pmod
= FPPackMod(self
.width
)
1264 self
.out_z
= FPNumBase(self
.width
)
1265 self
.pmod
.setup(m
, c_out_z
)
1268 if self
.in_mid
is not None:
1269 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1271 def action(self
, m
):
1272 self
.idsync(m
) # copies incoming ID to outgoing
1273 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1274 m
.next
= "pack_put_z"
1279 def __init__(self
, width
):
1280 self
.in_roundz
= Signal(reset_less
=True)
1281 self
.in_z
= FPNumBase(width
, False)
1282 self
.out_z
= FPNumBase(width
, False)
1284 def setup(self
, m
, in_z
, roundz
):
1285 m
.submodules
.roundz
= self
1287 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1288 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1290 def elaborate(self
, platform
):
1292 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1293 with m
.If(self
.in_roundz
):
1294 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1295 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1296 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1300 class FPRound(FPState
, FPID
):
1302 def __init__(self
, width
, id_wid
):
1303 FPState
.__init
__(self
, "round")
1304 FPID
.__init
__(self
, id_wid
)
1305 self
.mod
= FPRoundMod(width
)
1306 self
.out_z
= FPNumBase(width
)
1308 def setup(self
, m
, in_z
, roundz
, in_mid
):
1309 """ links module to inputs and outputs
1311 self
.mod
.setup(m
, in_z
, roundz
)
1313 if self
.in_mid
is not None:
1314 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1316 def action(self
, m
):
1318 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1319 m
.next
= "corrections"
1322 class FPCorrectionsMod
:
1324 def __init__(self
, width
):
1325 self
.in_z
= FPNumOut(width
, False)
1326 self
.out_z
= FPNumOut(width
, False)
1328 def setup(self
, m
, in_z
):
1329 """ links module to inputs and outputs
1331 m
.submodules
.corrections
= self
1332 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1334 def elaborate(self
, platform
):
1336 m
.submodules
.corr_in_z
= self
.in_z
1337 m
.submodules
.corr_out_z
= self
.out_z
1338 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1339 with m
.If(self
.in_z
.is_denormalised
):
1340 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1344 class FPCorrections(FPState
, FPID
):
1346 def __init__(self
, width
, id_wid
):
1347 FPState
.__init
__(self
, "corrections")
1348 FPID
.__init
__(self
, id_wid
)
1349 self
.mod
= FPCorrectionsMod(width
)
1350 self
.out_z
= FPNumBase(width
)
1352 def setup(self
, m
, in_z
, in_mid
):
1353 """ links module to inputs and outputs
1355 self
.mod
.setup(m
, in_z
)
1356 if self
.in_mid
is not None:
1357 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1359 def action(self
, m
):
1361 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1367 def __init__(self
, width
):
1368 self
.in_z
= FPNumOut(width
, False)
1369 self
.out_z
= FPNumOut(width
, False)
1371 def setup(self
, m
, in_z
):
1372 """ links module to inputs and outputs
1374 m
.submodules
.pack
= self
1375 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1377 def elaborate(self
, platform
):
1379 m
.submodules
.pack_in_z
= self
.in_z
1380 with m
.If(self
.in_z
.is_overflowed
):
1381 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1383 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1387 class FPPack(FPState
, FPID
):
1389 def __init__(self
, width
, id_wid
):
1390 FPState
.__init
__(self
, "pack")
1391 FPID
.__init
__(self
, id_wid
)
1392 self
.mod
= FPPackMod(width
)
1393 self
.out_z
= FPNumOut(width
, False)
1395 def setup(self
, m
, in_z
, in_mid
):
1396 """ links module to inputs and outputs
1398 self
.mod
.setup(m
, in_z
)
1399 if self
.in_mid
is not None:
1400 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1402 def action(self
, m
):
1404 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1405 m
.next
= "pack_put_z"
1408 class FPPutZ(FPState
):
1410 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1411 FPState
.__init
__(self
, state
)
1412 if to_state
is None:
1413 to_state
= "get_ops"
1414 self
.to_state
= to_state
1417 self
.in_mid
= in_mid
1418 self
.out_mid
= out_mid
1420 def action(self
, m
):
1421 if self
.in_mid
is not None:
1422 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1424 self
.out_z
.v
.eq(self
.in_z
.v
)
1426 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1427 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1428 m
.next
= self
.to_state
1430 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1433 class FPPutZIdx(FPState
):
1435 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1436 FPState
.__init
__(self
, state
)
1437 if to_state
is None:
1438 to_state
= "get_ops"
1439 self
.to_state
= to_state
1441 self
.out_zs
= out_zs
1442 self
.in_mid
= in_mid
1444 def action(self
, m
):
1445 outz_stb
= Signal(reset_less
=True)
1446 outz_ack
= Signal(reset_less
=True)
1447 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1448 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1451 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1453 with m
.If(outz_stb
& outz_ack
):
1454 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1455 m
.next
= self
.to_state
1457 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1460 class FPADDBaseMod(FPID
):
1462 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1465 * width: bit-width of IEEE754. supported: 16, 32, 64
1466 * id_wid: an identifier that is sync-connected to the input
1467 * single_cycle: True indicates each stage to complete in 1 clock
1468 * compact: True indicates a reduced number of stages
1470 FPID
.__init
__(self
, id_wid
)
1472 self
.single_cycle
= single_cycle
1473 self
.compact
= compact
1475 self
.in_t
= Trigger()
1476 self
.in_a
= Signal(width
)
1477 self
.in_b
= Signal(width
)
1478 self
.out_z
= FPOp(width
)
1482 def add_state(self
, state
):
1483 self
.states
.append(state
)
1486 def get_fragment(self
, platform
=None):
1487 """ creates the HDL code-fragment for FPAdd
1490 m
.submodules
.out_z
= self
.out_z
1491 m
.submodules
.in_t
= self
.in_t
1493 self
.get_compact_fragment(m
, platform
)
1495 self
.get_longer_fragment(m
, platform
)
1497 with m
.FSM() as fsm
:
1499 for state
in self
.states
:
1500 with m
.State(state
.state_from
):
1505 def get_longer_fragment(self
, m
, platform
=None):
1507 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1508 self
.in_a
, self
.in_b
, self
.width
))
1509 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1513 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1514 sc
.setup(m
, a
, b
, self
.in_mid
)
1516 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1517 dn
.setup(m
, a
, b
, sc
.in_mid
)
1519 if self
.single_cycle
:
1520 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1521 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1523 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1524 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1526 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1527 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1529 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1530 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1532 if self
.single_cycle
:
1533 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1534 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1536 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1537 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1539 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1540 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1542 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1543 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1545 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1546 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1548 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1549 pa
.in_mid
, self
.out_mid
))
1551 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1552 pa
.in_mid
, self
.out_mid
))
1554 def get_compact_fragment(self
, m
, platform
=None):
1556 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1557 self
.in_a
, self
.in_b
, self
.width
))
1558 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1562 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1563 sc
.setup(m
, a
, b
, self
.in_mid
)
1565 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1566 alm
.setup(m
, sc
.o
.a
, sc
.o
.b
, sc
.in_mid
)
1568 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1569 n1
.setup(m
, alm
.a1o
.z
, alm
.a1o
.of
, alm
.in_mid
)
1571 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1572 n1
.in_mid
, self
.out_mid
))
1574 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1575 sc
.in_mid
, self
.out_mid
))
1578 class FPADDBase(FPState
, FPID
):
1580 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1583 * width: bit-width of IEEE754. supported: 16, 32, 64
1584 * id_wid: an identifier that is sync-connected to the input
1585 * single_cycle: True indicates each stage to complete in 1 clock
1587 FPID
.__init
__(self
, id_wid
)
1588 FPState
.__init
__(self
, "fpadd")
1590 self
.single_cycle
= single_cycle
1591 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1593 self
.in_t
= Trigger()
1594 self
.in_a
= Signal(width
)
1595 self
.in_b
= Signal(width
)
1596 #self.out_z = FPOp(width)
1598 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1599 self
.in_accept
= Signal(reset_less
=True)
1600 self
.add_stb
= Signal(reset_less
=True)
1601 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1603 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1605 self
.out_mid
= out_mid
1606 m
.d
.comb
+= [self
.in_a
.eq(a
),
1608 self
.mod
.in_a
.eq(self
.in_a
),
1609 self
.mod
.in_b
.eq(self
.in_b
),
1610 self
.in_mid
.eq(in_mid
),
1611 self
.mod
.in_mid
.eq(self
.in_mid
),
1612 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1613 #self.add_stb.eq(add_stb),
1614 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1615 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1616 self
.out_mid
.eq(self
.mod
.out_mid
),
1617 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1618 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1619 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1622 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1623 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1624 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1625 #m.d.sync += self.in_t.stb.eq(0)
1627 m
.submodules
.fpadd
= self
.mod
1629 def action(self
, m
):
1631 # in_accept is set on incoming strobe HIGH and ack LOW.
1632 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1634 #with m.If(self.in_t.ack):
1635 # m.d.sync += self.in_t.stb.eq(0)
1636 with m
.If(~self
.z_done
):
1637 # not done: test for accepting an incoming operand pair
1638 with m
.If(self
.in_accept
):
1640 self
.add_ack
.eq(1), # acknowledge receipt...
1641 self
.in_t
.stb
.eq(1), # initiate add
1644 m
.d
.sync
+= [self
.add_ack
.eq(0),
1645 self
.in_t
.stb
.eq(0),
1646 self
.out_z
.ack
.eq(1),
1649 # done: acknowledge, and write out id and value
1650 m
.d
.sync
+= [self
.add_ack
.eq(1),
1657 if self
.in_mid
is not None:
1658 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1661 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1663 # move to output state on detecting z ack
1664 with m
.If(self
.out_z
.trigger
):
1665 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1668 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1671 def __init__(self
, width
, id_wid
):
1673 self
.id_wid
= id_wid
1675 for i
in range(rs_sz
):
1677 out_z
.name
= "out_z_%d" % i
1679 self
.res
= Array(res
)
1680 self
.in_z
= FPOp(width
)
1681 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1683 def setup(self
, m
, in_z
, in_mid
):
1684 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1685 self
.in_mid
.eq(in_mid
)]
1687 def get_fragment(self
, platform
=None):
1688 """ creates the HDL code-fragment for FPAdd
1691 m
.submodules
.res_in_z
= self
.in_z
1692 m
.submodules
+= self
.res
1704 """ FPADD: stages as follows:
1710 FPAddBase---> FPAddBaseMod
1712 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1714 FPAddBase is tricky: it is both a stage and *has* stages.
1715 Connection to FPAddBaseMod therefore requires an in stb/ack
1716 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1717 needs to be the thing that raises the incoming stb.
1720 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1723 * width: bit-width of IEEE754. supported: 16, 32, 64
1724 * id_wid: an identifier that is sync-connected to the input
1725 * single_cycle: True indicates each stage to complete in 1 clock
1728 self
.id_wid
= id_wid
1729 self
.single_cycle
= single_cycle
1731 #self.out_z = FPOp(width)
1732 self
.ids
= FPID(id_wid
)
1735 for i
in range(rs_sz
):
1738 in_a
.name
= "in_a_%d" % i
1739 in_b
.name
= "in_b_%d" % i
1740 rs
.append((in_a
, in_b
))
1744 for i
in range(rs_sz
):
1746 out_z
.name
= "out_z_%d" % i
1748 self
.res
= Array(res
)
1752 def add_state(self
, state
):
1753 self
.states
.append(state
)
1756 def get_fragment(self
, platform
=None):
1757 """ creates the HDL code-fragment for FPAdd
1760 m
.submodules
+= self
.rs
1762 in_a
= self
.rs
[0][0]
1763 in_b
= self
.rs
[0][1]
1765 out_z
= FPOp(self
.width
)
1766 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1767 m
.submodules
.out_z
= out_z
1769 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1774 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1779 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1780 ab
= self
.add_state(ab
)
1781 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1784 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1787 with m
.FSM() as fsm
:
1789 for state
in self
.states
:
1790 with m
.State(state
.state_from
):
1796 if __name__
== "__main__":
1798 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1799 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1800 alu
.rs
[0][1].ports() + \
1801 alu
.res
[0].ports() + \
1802 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1804 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1805 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1806 alu
.in_t
.ports() + \
1807 alu
.out_z
.ports() + \
1808 [alu
.in_mid
, alu
.out_mid
])
1811 # works... but don't use, just do "python fname.py convert -t v"
1812 #print (verilog.convert(alu, ports=[
1813 # ports=alu.in_a.ports() + \
1814 # alu.in_b.ports() + \
1815 # alu.out_z.ports())