1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.in_op1
= Signal(width
, reset_less
=True)
193 self
.in_op2
= Signal(width
, reset_less
=True)
194 self
.o
= FPNumBase2Ops(width
, id_wid
)
197 return FPNumBase2Ops(self
.width
, self
.id_wid
)
199 def elaborate(self
, platform
):
200 m
= Trigger
.elaborate(self
, platform
)
201 #m.submodules.get_op_in = self.in_op
202 m
.submodules
.get_op1_out
= self
.out_op1
203 m
.submodules
.get_op2_out
= self
.out_op2
204 out_op1
= FPNumIn(None, width
)
205 out_op2
= FPNumIn(None, width
)
206 with m
.If(self
.trigger
):
208 out_op1
.decode(self
.in_op1
),
209 out_op2
.decode(self
.in_op2
),
210 self
.o
.a
.eq(out_op1
),
211 self
.o
.b
.eq(out_op2
),
216 class FPGet2Op(FPState
):
220 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
, id_wid
):
221 FPState
.__init
__(self
, in_state
)
222 self
.out_state
= out_state
223 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.out_op1
= FPNumIn(None, width
)
227 self
.out_op2
= FPNumIn(None, width
)
228 self
.in_stb
= Signal(reset_less
=True)
229 self
.out_ack
= Signal(reset_less
=True)
230 self
.out_decode
= Signal(reset_less
=True)
232 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
233 """ links module to inputs and outputs
235 m
.submodules
.get_ops
= self
.mod
236 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
237 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
238 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
239 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
240 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
241 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
244 with m
.If(self
.out_decode
):
245 m
.next
= self
.out_state
248 #self.out_op1.v.eq(self.mod.out_op1.v),
249 #self.out_op2.v.eq(self.mod.out_op2.v),
250 self
.out_op1
.eq(self
.mod
.out_op1
),
251 self
.out_op2
.eq(self
.mod
.out_op2
)
254 m
.d
.sync
+= self
.mod
.ack
.eq(1)
258 def __init__(self
, width
, id_wid
, m_extra
=True):
259 self
.a
= FPNumBase(width
, m_extra
)
260 self
.b
= FPNumBase(width
, m_extra
)
261 self
.mid
= Signal(id_wid
, reset_less
=True)
264 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
267 class FPAddSpecialCasesMod
:
268 """ special cases: NaNs, infs, zeros, denormalised
269 NOTE: some of these are unique to add. see "Special Operations"
270 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
273 def __init__(self
, width
, id_wid
):
276 self
.i
= self
.ispec()
277 self
.o
= self
.ospec()
278 self
.out_do_z
= Signal(reset_less
=True)
281 return FPNumBase2Ops(self
.width
, self
.id_wid
)
284 return FPPackData(self
.width
, self
.id_wid
)
286 def setup(self
, m
, in_a
, in_b
, out_do_z
):
287 """ links module to inputs and outputs
289 m
.submodules
.specialcases
= self
290 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
291 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
292 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
294 def elaborate(self
, platform
):
297 m
.submodules
.sc_in_a
= self
.i
.a
298 m
.submodules
.sc_in_b
= self
.i
.b
299 m
.submodules
.sc_out_z
= self
.o
.z
302 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
305 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
307 # if a is NaN or b is NaN return NaN
308 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
309 m
.d
.comb
+= self
.out_do_z
.eq(1)
310 m
.d
.comb
+= self
.o
.z
.nan(0)
312 # XXX WEIRDNESS for FP16 non-canonical NaN handling
315 ## if a is zero and b is NaN return -b
316 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
320 ## if b is zero and a is NaN return -a
321 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
325 ## if a is -zero and b is NaN return -b
326 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
330 ## if b is -zero and a is NaN return -a
331 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
332 # m.d.comb += self.out_do_z.eq(1)
333 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
335 # if a is inf return inf (or NaN)
336 with m
.Elif(self
.i
.a
.is_inf
):
337 m
.d
.comb
+= self
.out_do_z
.eq(1)
338 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
339 # if a is inf and signs don't match return NaN
340 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
341 m
.d
.comb
+= self
.o
.z
.nan(0)
343 # if b is inf return inf
344 with m
.Elif(self
.i
.b
.is_inf
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
348 # if a is zero and b zero return signed-a/b
349 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
350 m
.d
.comb
+= self
.out_do_z
.eq(1)
351 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
355 # if a is zero return b
356 with m
.Elif(self
.i
.a
.is_zero
):
357 m
.d
.comb
+= self
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
361 # if b is zero return a
362 with m
.Elif(self
.i
.b
.is_zero
):
363 m
.d
.comb
+= self
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
367 # if a equal to -b return zero (+ve zero)
368 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
369 m
.d
.comb
+= self
.out_do_z
.eq(1)
370 m
.d
.comb
+= self
.o
.z
.zero(0)
372 # Denormalised Number checks
374 m
.d
.comb
+= self
.out_do_z
.eq(0)
380 def __init__(self
, id_wid
):
383 self
.in_mid
= Signal(id_wid
, reset_less
=True)
384 self
.out_mid
= Signal(id_wid
, reset_less
=True)
390 if self
.id_wid
is not None:
391 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
394 class FPAddSpecialCases(FPState
, FPID
):
395 """ special cases: NaNs, infs, zeros, denormalised
396 NOTE: some of these are unique to add. see "Special Operations"
397 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
400 def __init__(self
, width
, id_wid
):
401 FPState
.__init
__(self
, "special_cases")
402 FPID
.__init
__(self
, id_wid
)
403 self
.mod
= FPAddSpecialCasesMod(width
)
404 self
.out_z
= self
.mod
.ospec()
405 self
.out_do_z
= Signal(reset_less
=True)
407 def setup(self
, m
, in_a
, in_b
, in_mid
):
408 """ links module to inputs and outputs
410 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
411 if self
.in_mid
is not None:
412 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
416 with m
.If(self
.out_do_z
):
417 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
420 m
.next
= "denormalise"
423 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
424 """ special cases: NaNs, infs, zeros, denormalised
425 NOTE: some of these are unique to add. see "Special Operations"
426 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
429 def __init__(self
, width
, id_wid
):
430 FPState
.__init
__(self
, "special_cases")
431 FPID
.__init
__(self
, id_wid
)
432 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
433 self
.out_z
= self
.smod
.ospec()
434 self
.out_do_z
= Signal(reset_less
=True)
436 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
437 self
.o
= self
.dmod
.ospec()
439 def setup(self
, m
, in_a
, in_b
, in_mid
):
440 """ links module to inputs and outputs
442 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
443 self
.dmod
.setup(m
, in_a
, in_b
)
444 if self
.in_mid
is not None:
445 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
449 with m
.If(self
.out_do_z
):
450 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
454 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
455 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
458 class FPAddDeNormMod(FPState
):
460 def __init__(self
, width
, id_wid
):
463 self
.i
= self
.ispec()
464 self
.o
= self
.ospec()
467 return FPNumBase2Ops(self
.width
, self
.id_wid
)
470 return FPNumBase2Ops(self
.width
, self
.id_wid
)
472 def setup(self
, m
, in_a
, in_b
):
473 """ links module to inputs and outputs
475 m
.submodules
.denormalise
= self
476 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
477 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
479 def elaborate(self
, platform
):
481 m
.submodules
.denorm_in_a
= self
.i
.a
482 m
.submodules
.denorm_in_b
= self
.i
.b
483 m
.submodules
.denorm_out_a
= self
.o
.a
484 m
.submodules
.denorm_out_b
= self
.o
.b
485 # hmmm, don't like repeating identical code
486 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
487 with m
.If(self
.i
.a
.exp_n127
):
488 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
492 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
493 with m
.If(self
.i
.b
.exp_n127
):
494 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
496 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
501 class FPAddDeNorm(FPState
, FPID
):
503 def __init__(self
, width
, id_wid
):
504 FPState
.__init
__(self
, "denormalise")
505 FPID
.__init
__(self
, id_wid
)
506 self
.mod
= FPAddDeNormMod(width
)
507 self
.out_a
= FPNumBase(width
)
508 self
.out_b
= FPNumBase(width
)
510 def setup(self
, m
, in_a
, in_b
, in_mid
):
511 """ links module to inputs and outputs
513 self
.mod
.setup(m
, in_a
, in_b
)
514 if self
.in_mid
is not None:
515 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
519 # Denormalised Number checks
521 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
522 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
525 class FPAddAlignMultiMod(FPState
):
527 def __init__(self
, width
):
528 self
.in_a
= FPNumBase(width
)
529 self
.in_b
= FPNumBase(width
)
530 self
.out_a
= FPNumIn(None, width
)
531 self
.out_b
= FPNumIn(None, width
)
532 self
.exp_eq
= Signal(reset_less
=True)
534 def elaborate(self
, platform
):
535 # This one however (single-cycle) will do the shift
540 m
.submodules
.align_in_a
= self
.in_a
541 m
.submodules
.align_in_b
= self
.in_b
542 m
.submodules
.align_out_a
= self
.out_a
543 m
.submodules
.align_out_b
= self
.out_b
545 # NOTE: this does *not* do single-cycle multi-shifting,
546 # it *STAYS* in the align state until exponents match
548 # exponent of a greater than b: shift b down
549 m
.d
.comb
+= self
.exp_eq
.eq(0)
550 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
551 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
552 agtb
= Signal(reset_less
=True)
553 altb
= Signal(reset_less
=True)
554 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
555 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
557 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
558 # exponent of b greater than a: shift a down
560 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
561 # exponents equal: move to next stage.
563 m
.d
.comb
+= self
.exp_eq
.eq(1)
567 class FPAddAlignMulti(FPState
, FPID
):
569 def __init__(self
, width
, id_wid
):
570 FPID
.__init
__(self
, id_wid
)
571 FPState
.__init
__(self
, "align")
572 self
.mod
= FPAddAlignMultiMod(width
)
573 self
.out_a
= FPNumIn(None, width
)
574 self
.out_b
= FPNumIn(None, width
)
575 self
.exp_eq
= Signal(reset_less
=True)
577 def setup(self
, m
, in_a
, in_b
, in_mid
):
578 """ links module to inputs and outputs
580 m
.submodules
.align
= self
.mod
581 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
582 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
583 #m.d.comb += self.out_a.eq(self.mod.out_a)
584 #m.d.comb += self.out_b.eq(self.mod.out_b)
585 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
586 if self
.in_mid
is not None:
587 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
591 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
592 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
593 with m
.If(self
.exp_eq
):
599 def __init__(self
, width
, id_wid
):
600 self
.a
= FPNumIn(None, width
)
601 self
.b
= FPNumIn(None, width
)
602 self
.mid
= Signal(id_wid
, reset_less
=True)
605 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
608 class FPAddAlignSingleMod
:
610 def __init__(self
, width
, id_wid
):
613 self
.i
= self
.ispec()
614 self
.o
= self
.ospec()
617 return FPNumBase2Ops(self
.width
, self
.id_wid
)
620 return FPNumIn2Ops(self
.width
, self
.id_wid
)
622 def setup(self
, m
, in_a
, in_b
):
623 """ links module to inputs and outputs
625 m
.submodules
.align
= self
626 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
627 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
629 def elaborate(self
, platform
):
630 """ Aligns A against B or B against A, depending on which has the
631 greater exponent. This is done in a *single* cycle using
632 variable-width bit-shift
634 the shifter used here is quite expensive in terms of gates.
635 Mux A or B in (and out) into temporaries, as only one of them
636 needs to be aligned against the other
640 m
.submodules
.align_in_a
= self
.i
.a
641 m
.submodules
.align_in_b
= self
.i
.b
642 m
.submodules
.align_out_a
= self
.o
.a
643 m
.submodules
.align_out_b
= self
.o
.b
645 # temporary (muxed) input and output to be shifted
646 t_inp
= FPNumBase(self
.width
)
647 t_out
= FPNumIn(None, self
.width
)
648 espec
= (len(self
.i
.a
.e
), True)
649 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
650 m
.submodules
.align_t_in
= t_inp
651 m
.submodules
.align_t_out
= t_out
652 m
.submodules
.multishift_r
= msr
654 ediff
= Signal(espec
, reset_less
=True)
655 ediffr
= Signal(espec
, reset_less
=True)
656 tdiff
= Signal(espec
, reset_less
=True)
657 elz
= Signal(reset_less
=True)
658 egz
= Signal(reset_less
=True)
660 # connect multi-shifter to t_inp/out mantissa (and tdiff)
661 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
662 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
663 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
664 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
665 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
667 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
668 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
669 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
670 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
672 # default: A-exp == B-exp, A and B untouched (fall through)
673 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
674 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
675 # only one shifter (muxed)
676 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
677 # exponent of a greater than b: shift b down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
682 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
684 # exponent of b greater than a: shift a down
686 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
689 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
694 class FPAddAlignSingle(FPState
, FPID
):
696 def __init__(self
, width
, id_wid
):
697 FPState
.__init
__(self
, "align")
698 FPID
.__init
__(self
, id_wid
)
699 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
700 self
.out_a
= FPNumIn(None, width
)
701 self
.out_b
= FPNumIn(None, width
)
703 def setup(self
, m
, in_a
, in_b
, in_mid
):
704 """ links module to inputs and outputs
706 self
.mod
.setup(m
, in_a
, in_b
)
707 if self
.in_mid
is not None:
708 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
712 # NOTE: could be done as comb
713 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
714 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
718 class FPAddAlignSingleAdd(FPState
, FPID
):
720 def __init__(self
, width
, id_wid
):
721 FPState
.__init
__(self
, "align")
722 FPID
.__init
__(self
, id_wid
)
723 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
724 self
.o
= self
.mod
.ospec()
726 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
727 self
.a0o
= self
.a0mod
.ospec()
729 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
730 self
.a1o
= self
.a1mod
.ospec()
732 def setup(self
, m
, in_a
, in_b
, in_mid
):
733 """ links module to inputs and outputs
735 self
.mod
.setup(m
, in_a
, in_b
)
736 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
738 self
.a0mod
.setup(m
, self
.o
.a
, self
.o
.b
)
739 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
741 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
743 if self
.in_mid
is not None:
744 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
748 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
749 m
.next
= "normalise_1"
752 class FPAddStage0Data
:
754 def __init__(self
, width
, id_wid
):
755 self
.z
= FPNumBase(width
, False)
756 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
757 self
.mid
= Signal(id_wid
, reset_less
=True)
760 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
763 class FPAddStage0Mod
:
765 def __init__(self
, width
, id_wid
):
768 self
.i
= self
.ispec()
769 self
.o
= self
.ospec()
772 return FPNumBase2Ops(self
.width
, self
.id_wid
)
775 return FPAddStage0Data(self
.width
, self
.id_wid
)
777 def setup(self
, m
, in_a
, in_b
):
778 """ links module to inputs and outputs
780 m
.submodules
.add0
= self
781 m
.d
.comb
+= self
.i
.a
.eq(in_a
)
782 m
.d
.comb
+= self
.i
.b
.eq(in_b
)
784 def elaborate(self
, platform
):
786 m
.submodules
.add0_in_a
= self
.i
.a
787 m
.submodules
.add0_in_b
= self
.i
.b
788 m
.submodules
.add0_out_z
= self
.o
.z
790 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
792 # store intermediate tests (and zero-extended mantissas)
793 seq
= Signal(reset_less
=True)
794 mge
= Signal(reset_less
=True)
795 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
796 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
797 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
798 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
799 am0
.eq(Cat(self
.i
.a
.m
, 0)),
800 bm0
.eq(Cat(self
.i
.b
.m
, 0))
802 # same-sign (both negative or both positive) add mantissas
805 self
.o
.tot
.eq(am0
+ bm0
),
806 self
.o
.z
.s
.eq(self
.i
.a
.s
)
808 # a mantissa greater than b, use a
811 self
.o
.tot
.eq(am0
- bm0
),
812 self
.o
.z
.s
.eq(self
.i
.a
.s
)
814 # b mantissa greater than a, use b
817 self
.o
.tot
.eq(bm0
- am0
),
818 self
.o
.z
.s
.eq(self
.i
.b
.s
)
823 class FPAddStage0(FPState
, FPID
):
824 """ First stage of add. covers same-sign (add) and subtract
825 special-casing when mantissas are greater or equal, to
826 give greatest accuracy.
829 def __init__(self
, width
, id_wid
):
830 FPState
.__init
__(self
, "add_0")
831 FPID
.__init
__(self
, id_wid
)
832 self
.mod
= FPAddStage0Mod(width
)
833 self
.o
= self
.mod
.ospec()
835 def setup(self
, m
, in_a
, in_b
, in_mid
):
836 """ links module to inputs and outputs
838 self
.mod
.setup(m
, in_a
, in_b
)
839 if self
.in_mid
is not None:
840 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
844 # NOTE: these could be done as combinatorial (merge add0+add1)
845 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
849 class FPAddStage1Data
:
851 def __init__(self
, width
, id_wid
):
852 self
.z
= FPNumBase(width
, False)
854 self
.mid
= Signal(id_wid
, reset_less
=True)
857 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
861 class FPAddStage1Mod(FPState
):
862 """ Second stage of add: preparation for normalisation.
863 detects when tot sum is too big (tot[27] is kinda a carry bit)
866 def __init__(self
, width
, id_wid
):
869 self
.i
= self
.ispec()
870 self
.o
= self
.ospec()
873 return FPAddStage0Data(self
.width
, self
.id_wid
)
876 return FPAddStage1Data(self
.width
, self
.id_wid
)
878 def setup(self
, m
, in_tot
, in_z
):
879 """ links module to inputs and outputs
881 m
.submodules
.add1
= self
882 m
.submodules
.add1_out_overflow
= self
.o
.of
884 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
885 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
887 def elaborate(self
, platform
):
889 #m.submodules.norm1_in_overflow = self.in_of
890 #m.submodules.norm1_out_overflow = self.out_of
891 #m.submodules.norm1_in_z = self.in_z
892 #m.submodules.norm1_out_z = self.out_z
893 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
894 # tot[-1] (MSB) gets set when the sum overflows. shift result down
895 with m
.If(self
.i
.tot
[-1]):
897 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
898 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
899 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
900 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
901 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
902 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
904 # tot[-1] (MSB) zero case
907 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
908 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
909 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
910 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
911 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
916 class FPAddStage1(FPState
, FPID
):
918 def __init__(self
, width
, id_wid
):
919 FPState
.__init
__(self
, "add_1")
920 FPID
.__init
__(self
, id_wid
)
921 self
.mod
= FPAddStage1Mod(width
)
922 self
.out_z
= FPNumBase(width
, False)
923 self
.out_of
= Overflow()
924 self
.norm_stb
= Signal()
926 def setup(self
, m
, in_tot
, in_z
, in_mid
):
927 """ links module to inputs and outputs
929 self
.mod
.setup(m
, in_tot
, in_z
)
931 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
933 if self
.in_mid
is not None:
934 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
938 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
939 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
940 m
.d
.sync
+= self
.norm_stb
.eq(1)
941 m
.next
= "normalise_1"
944 class FPNormaliseModSingle
:
946 def __init__(self
, width
):
948 self
.in_z
= self
.ispec()
949 self
.out_z
= self
.ospec()
952 return FPNumBase(self
.width
, False)
955 return FPNumBase(self
.width
, False)
957 def setup(self
, m
, in_z
, out_z
):
958 """ links module to inputs and outputs
960 m
.submodules
.normalise
= self
961 m
.d
.comb
+= self
.in_z
.eq(in_z
)
962 m
.d
.comb
+= out_z
.eq(self
.out_z
)
964 def elaborate(self
, platform
):
967 mwid
= self
.out_z
.m_width
+2
968 pe
= PriorityEncoder(mwid
)
969 m
.submodules
.norm_pe
= pe
971 m
.submodules
.norm1_out_z
= self
.out_z
972 m
.submodules
.norm1_in_z
= self
.in_z
974 in_z
= FPNumBase(self
.width
, False)
976 m
.submodules
.norm1_insel_z
= in_z
977 m
.submodules
.norm1_insel_overflow
= in_of
979 espec
= (len(in_z
.e
), True)
980 ediff_n126
= Signal(espec
, reset_less
=True)
981 msr
= MultiShiftRMerge(mwid
, espec
)
982 m
.submodules
.multishift_r
= msr
984 m
.d
.comb
+= in_z
.eq(self
.in_z
)
985 m
.d
.comb
+= in_of
.eq(self
.in_of
)
986 # initialise out from in (overridden below)
987 m
.d
.comb
+= self
.out_z
.eq(in_z
)
988 m
.d
.comb
+= self
.out_of
.eq(in_of
)
989 # normalisation decrease condition
990 decrease
= Signal(reset_less
=True)
991 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
994 # *sigh* not entirely obvious: count leading zeros (clz)
995 # with a PriorityEncoder: to find from the MSB
996 # we reverse the order of the bits.
997 temp_m
= Signal(mwid
, reset_less
=True)
998 temp_s
= Signal(mwid
+1, reset_less
=True)
999 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1001 # cat round and guard bits back into the mantissa
1002 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1003 pe
.i
.eq(temp_m
[::-1]), # inverted
1004 clz
.eq(pe
.o
), # count zeros from MSB down
1005 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1006 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1007 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1014 def __init__(self
, width
, id_wid
):
1015 self
.roundz
= Signal(reset_less
=True)
1016 self
.z
= FPNumBase(width
, False)
1017 self
.mid
= Signal(id_wid
, reset_less
=True)
1020 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1023 class FPNorm1ModSingle
:
1025 def __init__(self
, width
, id_wid
):
1027 self
.id_wid
= id_wid
1028 self
.i
= self
.ispec()
1029 self
.o
= self
.ospec()
1032 return FPAddStage1Data(self
.width
, self
.id_wid
)
1035 return FPNorm1Data(self
.width
, self
.id_wid
)
1037 def setup(self
, m
, in_z
, in_of
, out_z
):
1038 """ links module to inputs and outputs
1040 m
.submodules
.normalise_1
= self
1042 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1043 m
.d
.comb
+= self
.i
.of
.eq(in_of
)
1045 m
.d
.comb
+= out_z
.eq(self
.o
.z
)
1047 def elaborate(self
, platform
):
1050 mwid
= self
.o
.z
.m_width
+2
1051 pe
= PriorityEncoder(mwid
)
1052 m
.submodules
.norm_pe
= pe
1055 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1057 m
.submodules
.norm1_out_z
= self
.o
.z
1058 m
.submodules
.norm1_out_overflow
= of
1059 m
.submodules
.norm1_in_z
= self
.i
.z
1060 m
.submodules
.norm1_in_overflow
= self
.i
.of
1063 m
.submodules
.norm1_insel_z
= i
.z
1064 m
.submodules
.norm1_insel_overflow
= i
.of
1066 espec
= (len(i
.z
.e
), True)
1067 ediff_n126
= Signal(espec
, reset_less
=True)
1068 msr
= MultiShiftRMerge(mwid
, espec
)
1069 m
.submodules
.multishift_r
= msr
1071 m
.d
.comb
+= i
.eq(self
.i
)
1072 # initialise out from in (overridden below)
1073 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1074 m
.d
.comb
+= of
.eq(i
.of
)
1075 # normalisation increase/decrease conditions
1076 decrease
= Signal(reset_less
=True)
1077 increase
= Signal(reset_less
=True)
1078 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1079 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1081 with m
.If(decrease
):
1082 # *sigh* not entirely obvious: count leading zeros (clz)
1083 # with a PriorityEncoder: to find from the MSB
1084 # we reverse the order of the bits.
1085 temp_m
= Signal(mwid
, reset_less
=True)
1086 temp_s
= Signal(mwid
+1, reset_less
=True)
1087 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1088 # make sure that the amount to decrease by does NOT
1089 # go below the minimum non-INF/NaN exponent
1090 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1093 # cat round and guard bits back into the mantissa
1094 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1095 pe
.i
.eq(temp_m
[::-1]), # inverted
1096 clz
.eq(limclz
), # count zeros from MSB down
1097 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1098 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1099 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1100 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1101 # overflow in bits 0..1: got shifted too (leave sticky)
1102 of
.guard
.eq(temp_s
[1]), # guard
1103 of
.round_bit
.eq(temp_s
[0]), # round
1106 with m
.Elif(increase
):
1107 temp_m
= Signal(mwid
+1, reset_less
=True)
1109 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1111 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1112 # connect multi-shifter to inp/out mantissa (and ediff)
1114 msr
.diff
.eq(ediff_n126
),
1115 self
.o
.z
.m
.eq(msr
.m
[3:]),
1116 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1117 # overflow in bits 0..1: got shifted too (leave sticky)
1118 of
.guard
.eq(temp_s
[2]), # guard
1119 of
.round_bit
.eq(temp_s
[1]), # round
1120 of
.sticky
.eq(temp_s
[0]), # sticky
1121 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1127 class FPNorm1ModMulti
:
1129 def __init__(self
, width
, single_cycle
=True):
1131 self
.in_select
= Signal(reset_less
=True)
1132 self
.in_z
= FPNumBase(width
, False)
1133 self
.in_of
= Overflow()
1134 self
.temp_z
= FPNumBase(width
, False)
1135 self
.temp_of
= Overflow()
1136 self
.out_z
= FPNumBase(width
, False)
1137 self
.out_of
= Overflow()
1139 def elaborate(self
, platform
):
1142 m
.submodules
.norm1_out_z
= self
.out_z
1143 m
.submodules
.norm1_out_overflow
= self
.out_of
1144 m
.submodules
.norm1_temp_z
= self
.temp_z
1145 m
.submodules
.norm1_temp_of
= self
.temp_of
1146 m
.submodules
.norm1_in_z
= self
.in_z
1147 m
.submodules
.norm1_in_overflow
= self
.in_of
1149 in_z
= FPNumBase(self
.width
, False)
1151 m
.submodules
.norm1_insel_z
= in_z
1152 m
.submodules
.norm1_insel_overflow
= in_of
1154 # select which of temp or in z/of to use
1155 with m
.If(self
.in_select
):
1156 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1157 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1159 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1160 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1161 # initialise out from in (overridden below)
1162 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1163 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1164 # normalisation increase/decrease conditions
1165 decrease
= Signal(reset_less
=True)
1166 increase
= Signal(reset_less
=True)
1167 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1168 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1169 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1171 with m
.If(decrease
):
1173 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1174 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1175 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1176 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1177 self
.out_of
.round_bit
.eq(0), # reset round bit
1178 self
.out_of
.m0
.eq(in_of
.guard
),
1181 with m
.Elif(increase
):
1183 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1184 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1185 self
.out_of
.guard
.eq(in_z
.m
[0]),
1186 self
.out_of
.m0
.eq(in_z
.m
[1]),
1187 self
.out_of
.round_bit
.eq(in_of
.guard
),
1188 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1194 class FPNorm1Single(FPState
, FPID
):
1196 def __init__(self
, width
, id_wid
, single_cycle
=True):
1197 FPID
.__init
__(self
, id_wid
)
1198 FPState
.__init
__(self
, "normalise_1")
1199 self
.mod
= FPNorm1ModSingle(width
)
1200 self
.out_z
= FPNumBase(width
, False)
1201 self
.out_roundz
= Signal(reset_less
=True)
1203 def setup(self
, m
, in_z
, in_of
, in_mid
):
1204 """ links module to inputs and outputs
1206 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1208 if self
.in_mid
is not None:
1209 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1211 def action(self
, m
):
1213 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1217 class FPNorm1Multi(FPState
, FPID
):
1219 def __init__(self
, width
, id_wid
):
1220 FPID
.__init
__(self
, id_wid
)
1221 FPState
.__init
__(self
, "normalise_1")
1222 self
.mod
= FPNorm1ModMulti(width
)
1223 self
.stb
= Signal(reset_less
=True)
1224 self
.ack
= Signal(reset
=0, reset_less
=True)
1225 self
.out_norm
= Signal(reset_less
=True)
1226 self
.in_accept
= Signal(reset_less
=True)
1227 self
.temp_z
= FPNumBase(width
)
1228 self
.temp_of
= Overflow()
1229 self
.out_z
= FPNumBase(width
)
1230 self
.out_roundz
= Signal(reset_less
=True)
1232 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1233 """ links module to inputs and outputs
1235 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1236 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1237 self
.out_z
, self
.out_norm
)
1239 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1240 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1242 if self
.in_mid
is not None:
1243 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1245 def action(self
, m
):
1247 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1248 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1249 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1250 with m
.If(self
.out_norm
):
1251 with m
.If(self
.in_accept
):
1256 m
.d
.sync
+= self
.ack
.eq(0)
1258 # normalisation not required (or done).
1260 m
.d
.sync
+= self
.ack
.eq(1)
1261 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1264 class FPNormToPack(FPState
, FPID
):
1266 def __init__(self
, width
, id_wid
):
1267 FPID
.__init
__(self
, id_wid
)
1268 FPState
.__init
__(self
, "normalise_1")
1271 def setup(self
, m
, in_z
, in_of
, in_mid
):
1272 """ links module to inputs and outputs
1275 # Normalisation (chained to input in_z+in_of)
1276 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1277 n_out
= nmod
.ospec()
1278 nmod
.setup(m
, in_z
, in_of
, n_out
.z
)
1279 m
.d
.comb
+= n_out
.roundz
.eq(nmod
.o
.roundz
)
1281 # Rounding (chained to normalisation)
1282 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1283 r_out_z
= rmod
.ospec()
1284 rmod
.setup(m
, n_out
.z
, n_out
.roundz
)
1285 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1287 # Corrections (chained to rounding)
1288 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1289 c_out_z
= cmod
.ospec()
1290 cmod
.setup(m
, r_out_z
)
1291 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1293 # Pack (chained to corrections)
1294 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1295 self
.out_z
= self
.pmod
.ospec()
1296 self
.pmod
.setup(m
, c_out_z
)
1299 if self
.in_mid
is not None:
1300 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1302 def action(self
, m
):
1303 self
.idsync(m
) # copies incoming ID to outgoing
1304 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1305 m
.next
= "pack_put_z"
1310 def __init__(self
, width
, id_wid
):
1311 self
.z
= FPNumBase(width
, False)
1312 self
.mid
= Signal(id_wid
, reset_less
=True)
1315 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1320 def __init__(self
, width
, id_wid
):
1322 self
.id_wid
= id_wid
1323 self
.i
= self
.ispec()
1324 self
.out_z
= self
.ospec()
1327 return FPNorm1Data(self
.width
, self
.id_wid
)
1330 return FPRoundData(self
.width
, self
.id_wid
)
1332 def setup(self
, m
, in_z
, roundz
):
1333 m
.submodules
.roundz
= self
1335 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1336 m
.d
.comb
+= self
.i
.roundz
.eq(roundz
)
1338 def elaborate(self
, platform
):
1340 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1341 with m
.If(self
.i
.roundz
):
1342 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1343 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1344 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1348 class FPRound(FPState
, FPID
):
1350 def __init__(self
, width
, id_wid
):
1351 FPState
.__init
__(self
, "round")
1352 FPID
.__init
__(self
, id_wid
)
1353 self
.mod
= FPRoundMod(width
)
1354 self
.out_z
= self
.mod
.ospec()
1356 def setup(self
, m
, in_z
, roundz
, in_mid
):
1357 """ links module to inputs and outputs
1359 self
.mod
.setup(m
, in_z
, roundz
)
1361 if self
.in_mid
is not None:
1362 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1364 def action(self
, m
):
1366 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1367 m
.next
= "corrections"
1370 class FPCorrectionsMod
:
1372 def __init__(self
, width
, id_wid
):
1374 self
.id_wid
= id_wid
1375 self
.in_z
= self
.ispec()
1376 self
.out_z
= self
.ospec()
1379 return FPRoundData(self
.width
, self
.id_wid
)
1382 return FPRoundData(self
.width
, self
.id_wid
)
1384 def setup(self
, m
, in_z
):
1385 """ links module to inputs and outputs
1387 m
.submodules
.corrections
= self
1388 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1390 def elaborate(self
, platform
):
1392 m
.submodules
.corr_in_z
= self
.in_z
.z
1393 m
.submodules
.corr_out_z
= self
.out_z
.z
1394 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1395 with m
.If(self
.in_z
.z
.is_denormalised
):
1396 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1400 class FPCorrections(FPState
, FPID
):
1402 def __init__(self
, width
, id_wid
):
1403 FPState
.__init
__(self
, "corrections")
1404 FPID
.__init
__(self
, id_wid
)
1405 self
.mod
= FPCorrectionsMod(width
)
1406 self
.out_z
= self
.mod
.ospec()
1408 def setup(self
, m
, in_z
, in_mid
):
1409 """ links module to inputs and outputs
1411 self
.mod
.setup(m
, in_z
)
1412 if self
.in_mid
is not None:
1413 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1415 def action(self
, m
):
1417 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1423 def __init__(self
, width
, id_wid
):
1424 self
.z
= FPNumOut(width
, False)
1425 self
.mid
= Signal(id_wid
, reset_less
=True)
1428 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1433 def __init__(self
, width
, id_wid
):
1435 self
.id_wid
= id_wid
1436 self
.i
= self
.ispec()
1437 self
.o
= self
.ospec()
1440 return FPRoundData(self
.width
, self
.id_wid
)
1443 return FPPackData(self
.width
, self
.id_wid
)
1445 def setup(self
, m
, in_z
):
1446 """ links module to inputs and outputs
1448 m
.submodules
.pack
= self
1449 m
.d
.comb
+= self
.i
.eq(in_z
)
1451 def elaborate(self
, platform
):
1453 m
.submodules
.pack_in_z
= self
.i
.z
1454 with m
.If(self
.i
.z
.is_overflowed
):
1455 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1457 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1462 def __init__(self
, width
, id_wid
):
1463 self
.z
= FPNumOut(width
, False)
1464 self
.mid
= Signal(id_wid
, reset_less
=True)
1467 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1470 class FPPack(FPState
, FPID
):
1472 def __init__(self
, width
, id_wid
):
1473 FPState
.__init
__(self
, "pack")
1474 FPID
.__init
__(self
, id_wid
)
1475 self
.mod
= FPPackMod(width
)
1476 self
.out_z
= self
.ospec()
1479 return self
.mod
.ispec()
1482 return self
.mod
.ospec()
1484 def setup(self
, m
, in_z
, in_mid
):
1485 """ links module to inputs and outputs
1487 self
.mod
.setup(m
, in_z
)
1488 if self
.in_mid
is not None:
1489 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1491 def action(self
, m
):
1493 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1494 m
.next
= "pack_put_z"
1497 class FPPutZ(FPState
):
1499 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1500 FPState
.__init
__(self
, state
)
1501 if to_state
is None:
1502 to_state
= "get_ops"
1503 self
.to_state
= to_state
1506 self
.in_mid
= in_mid
1507 self
.out_mid
= out_mid
1509 def action(self
, m
):
1510 if self
.in_mid
is not None:
1511 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1513 self
.out_z
.v
.eq(self
.in_z
.v
)
1515 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1516 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1517 m
.next
= self
.to_state
1519 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1522 class FPPutZIdx(FPState
):
1524 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1525 FPState
.__init
__(self
, state
)
1526 if to_state
is None:
1527 to_state
= "get_ops"
1528 self
.to_state
= to_state
1530 self
.out_zs
= out_zs
1531 self
.in_mid
= in_mid
1533 def action(self
, m
):
1534 outz_stb
= Signal(reset_less
=True)
1535 outz_ack
= Signal(reset_less
=True)
1536 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1537 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1540 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1542 with m
.If(outz_stb
& outz_ack
):
1543 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1544 m
.next
= self
.to_state
1546 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1549 class FPADDBaseMod(FPID
):
1551 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1554 * width: bit-width of IEEE754. supported: 16, 32, 64
1555 * id_wid: an identifier that is sync-connected to the input
1556 * single_cycle: True indicates each stage to complete in 1 clock
1557 * compact: True indicates a reduced number of stages
1559 FPID
.__init
__(self
, id_wid
)
1561 self
.single_cycle
= single_cycle
1562 self
.compact
= compact
1564 self
.in_t
= Trigger()
1565 self
.in_a
= Signal(width
)
1566 self
.in_b
= Signal(width
)
1567 self
.out_z
= FPOp(width
)
1571 def add_state(self
, state
):
1572 self
.states
.append(state
)
1575 def get_fragment(self
, platform
=None):
1576 """ creates the HDL code-fragment for FPAdd
1579 m
.submodules
.out_z
= self
.out_z
1580 m
.submodules
.in_t
= self
.in_t
1582 self
.get_compact_fragment(m
, platform
)
1584 self
.get_longer_fragment(m
, platform
)
1586 with m
.FSM() as fsm
:
1588 for state
in self
.states
:
1589 with m
.State(state
.state_from
):
1594 def get_longer_fragment(self
, m
, platform
=None):
1596 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1597 self
.in_a
, self
.in_b
, self
.width
))
1598 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1602 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1603 sc
.setup(m
, a
, b
, self
.in_mid
)
1605 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1606 dn
.setup(m
, a
, b
, sc
.in_mid
)
1608 if self
.single_cycle
:
1609 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1610 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1612 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1613 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1615 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1616 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1618 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1619 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1621 if self
.single_cycle
:
1622 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1623 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1625 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1626 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1628 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1629 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1631 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1632 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1634 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1635 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1637 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1638 pa
.in_mid
, self
.out_mid
))
1640 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1641 pa
.in_mid
, self
.out_mid
))
1643 def get_compact_fragment(self
, m
, platform
=None):
1645 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1646 self
.in_a
, self
.in_b
, self
.width
))
1647 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1651 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1652 sc
.setup(m
, a
, b
, self
.in_mid
)
1654 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1655 alm
.setup(m
, sc
.o
.a
, sc
.o
.b
, sc
.in_mid
)
1657 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1658 n1
.setup(m
, alm
.a1o
.z
, alm
.a1o
.of
, alm
.in_mid
)
1660 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.out_z
,
1661 n1
.in_mid
, self
.out_mid
))
1663 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.out_z
,
1664 sc
.in_mid
, self
.out_mid
))
1667 class FPADDBase(FPState
, FPID
):
1669 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1672 * width: bit-width of IEEE754. supported: 16, 32, 64
1673 * id_wid: an identifier that is sync-connected to the input
1674 * single_cycle: True indicates each stage to complete in 1 clock
1676 FPID
.__init
__(self
, id_wid
)
1677 FPState
.__init
__(self
, "fpadd")
1679 self
.single_cycle
= single_cycle
1680 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1682 self
.in_t
= Trigger()
1683 self
.in_a
= Signal(width
)
1684 self
.in_b
= Signal(width
)
1685 #self.out_z = FPOp(width)
1687 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1688 self
.in_accept
= Signal(reset_less
=True)
1689 self
.add_stb
= Signal(reset_less
=True)
1690 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1692 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1694 self
.out_mid
= out_mid
1695 m
.d
.comb
+= [self
.in_a
.eq(a
),
1697 self
.mod
.in_a
.eq(self
.in_a
),
1698 self
.mod
.in_b
.eq(self
.in_b
),
1699 self
.in_mid
.eq(in_mid
),
1700 self
.mod
.in_mid
.eq(self
.in_mid
),
1701 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1702 #self.add_stb.eq(add_stb),
1703 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1704 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1705 self
.out_mid
.eq(self
.mod
.out_mid
),
1706 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1707 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1708 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1711 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1712 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1713 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1714 #m.d.sync += self.in_t.stb.eq(0)
1716 m
.submodules
.fpadd
= self
.mod
1718 def action(self
, m
):
1720 # in_accept is set on incoming strobe HIGH and ack LOW.
1721 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1723 #with m.If(self.in_t.ack):
1724 # m.d.sync += self.in_t.stb.eq(0)
1725 with m
.If(~self
.z_done
):
1726 # not done: test for accepting an incoming operand pair
1727 with m
.If(self
.in_accept
):
1729 self
.add_ack
.eq(1), # acknowledge receipt...
1730 self
.in_t
.stb
.eq(1), # initiate add
1733 m
.d
.sync
+= [self
.add_ack
.eq(0),
1734 self
.in_t
.stb
.eq(0),
1735 self
.out_z
.ack
.eq(1),
1738 # done: acknowledge, and write out id and value
1739 m
.d
.sync
+= [self
.add_ack
.eq(1),
1746 if self
.in_mid
is not None:
1747 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1750 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1752 # move to output state on detecting z ack
1753 with m
.If(self
.out_z
.trigger
):
1754 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1757 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1760 def __init__(self
, width
, id_wid
):
1762 self
.id_wid
= id_wid
1764 for i
in range(rs_sz
):
1766 out_z
.name
= "out_z_%d" % i
1768 self
.res
= Array(res
)
1769 self
.in_z
= FPOp(width
)
1770 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1772 def setup(self
, m
, in_z
, in_mid
):
1773 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1774 self
.in_mid
.eq(in_mid
)]
1776 def get_fragment(self
, platform
=None):
1777 """ creates the HDL code-fragment for FPAdd
1780 m
.submodules
.res_in_z
= self
.in_z
1781 m
.submodules
+= self
.res
1793 """ FPADD: stages as follows:
1799 FPAddBase---> FPAddBaseMod
1801 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1803 FPAddBase is tricky: it is both a stage and *has* stages.
1804 Connection to FPAddBaseMod therefore requires an in stb/ack
1805 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1806 needs to be the thing that raises the incoming stb.
1809 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1812 * width: bit-width of IEEE754. supported: 16, 32, 64
1813 * id_wid: an identifier that is sync-connected to the input
1814 * single_cycle: True indicates each stage to complete in 1 clock
1817 self
.id_wid
= id_wid
1818 self
.single_cycle
= single_cycle
1820 #self.out_z = FPOp(width)
1821 self
.ids
= FPID(id_wid
)
1824 for i
in range(rs_sz
):
1827 in_a
.name
= "in_a_%d" % i
1828 in_b
.name
= "in_b_%d" % i
1829 rs
.append((in_a
, in_b
))
1833 for i
in range(rs_sz
):
1835 out_z
.name
= "out_z_%d" % i
1837 self
.res
= Array(res
)
1841 def add_state(self
, state
):
1842 self
.states
.append(state
)
1845 def get_fragment(self
, platform
=None):
1846 """ creates the HDL code-fragment for FPAdd
1849 m
.submodules
+= self
.rs
1851 in_a
= self
.rs
[0][0]
1852 in_b
= self
.rs
[0][1]
1854 out_z
= FPOp(self
.width
)
1855 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1856 m
.submodules
.out_z
= out_z
1858 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1863 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1868 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1869 ab
= self
.add_state(ab
)
1870 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1873 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1876 with m
.FSM() as fsm
:
1878 for state
in self
.states
:
1879 with m
.State(state
.state_from
):
1885 if __name__
== "__main__":
1887 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1888 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1889 alu
.rs
[0][1].ports() + \
1890 alu
.res
[0].ports() + \
1891 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1893 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1894 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1895 alu
.in_t
.ports() + \
1896 alu
.out_z
.ports() + \
1897 [alu
.in_mid
, alu
.out_mid
])
1900 # works... but don't use, just do "python fname.py convert -t v"
1901 #print (verilog.convert(alu, ports=[
1902 # ports=alu.in_a.ports() + \
1903 # alu.in_b.ports() + \
1904 # alu.out_z.ports())