1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 #from fpbase import FPNumShiftMultiRight
15 class FPState(FPBase
):
16 def __init__(self
, state_from
):
17 self
.state_from
= state_from
19 def set_inputs(self
, inputs
):
21 for k
,v
in inputs
.items():
24 def set_outputs(self
, outputs
):
25 self
.outputs
= outputs
26 for k
,v
in outputs
.items():
30 class FPGetSyncOpsMod
:
31 def __init__(self
, width
, num_ops
=2):
33 self
.num_ops
= num_ops
36 for i
in range(num_ops
):
37 inops
.append(Signal(width
, reset_less
=True))
38 outops
.append(Signal(width
, reset_less
=True))
41 self
.stb
= Signal(num_ops
)
43 self
.ready
= Signal(reset_less
=True)
44 self
.out_decode
= Signal(reset_less
=True)
46 def elaborate(self
, platform
):
48 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
49 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
50 with m
.If(self
.out_decode
):
51 for i
in range(self
.num_ops
):
53 self
.out_op
[i
].eq(self
.in_op
[i
]),
58 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
62 def __init__(self
, width
, num_ops
):
63 Trigger
.__init
__(self
)
65 self
.num_ops
= num_ops
68 for i
in range(num_ops
):
69 res
.append(Signal(width
))
74 for i
in range(self
.num_ops
):
82 def __init__(self
, width
, num_ops
=2, num_rows
=4):
84 self
.num_ops
= num_ops
85 self
.num_rows
= num_rows
86 self
.mmax
= int(log(self
.num_rows
) / log(2))
88 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
89 for i
in range(num_rows
):
90 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
91 self
.rs
= Array(self
.rs
)
93 self
.out_op
= FPOps(width
, num_ops
)
95 def elaborate(self
, platform
):
98 pe
= PriorityEncoder(self
.num_rows
)
99 m
.submodules
.selector
= pe
100 m
.submodules
.out_op
= self
.out_op
101 m
.submodules
+= self
.rs
103 # connect priority encoder
105 for i
in range(self
.num_rows
):
106 in_ready
.append(self
.rs
[i
].ready
)
107 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
109 active
= Signal(reset_less
=True)
110 out_en
= Signal(reset_less
=True)
111 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
112 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
114 # encoder active: ack relevant input, record MID, pass output
117 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
118 m
.d
.sync
+= rs
.ack
.eq(0)
119 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
120 for j
in range(self
.num_ops
):
121 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
123 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
124 # acks all default to zero
125 for i
in range(self
.num_rows
):
126 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
132 for i
in range(self
.num_rows
):
134 res
+= inop
.in_op
+ [inop
.stb
]
135 return self
.out_op
.ports() + res
+ [self
.mid
]
139 def __init__(self
, width
):
140 self
.in_op
= FPOp(width
)
141 self
.out_op
= Signal(width
)
142 self
.out_decode
= Signal(reset_less
=True)
144 def elaborate(self
, platform
):
146 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
147 m
.submodules
.get_op_in
= self
.in_op
148 #m.submodules.get_op_out = self.out_op
149 with m
.If(self
.out_decode
):
151 self
.out_op
.eq(self
.in_op
.v
),
156 class FPGetOp(FPState
):
160 def __init__(self
, in_state
, out_state
, in_op
, width
):
161 FPState
.__init
__(self
, in_state
)
162 self
.out_state
= out_state
163 self
.mod
= FPGetOpMod(width
)
165 self
.out_op
= Signal(width
)
166 self
.out_decode
= Signal(reset_less
=True)
168 def setup(self
, m
, in_op
):
169 """ links module to inputs and outputs
171 setattr(m
.submodules
, self
.state_from
, self
.mod
)
172 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
173 #m.d.comb += self.out_op.eq(self.mod.out_op)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 #m.submodules.get_op_in = self.in_op
204 m
.submodules
.get_op1_out
= self
.o
.a
205 m
.submodules
.get_op2_out
= self
.o
.b
206 out_op1
= FPNumIn(None, self
.width
)
207 out_op2
= FPNumIn(None, self
.width
)
208 with m
.If(self
.trigger
):
210 out_op1
.decode(self
.i
.a
),
211 out_op2
.decode(self
.i
.b
),
212 self
.o
.a
.eq(out_op1
),
213 self
.o
.b
.eq(out_op2
),
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
263 class FPAddSpecialCasesMod
:
264 """ special cases: NaNs, infs, zeros, denormalised
265 NOTE: some of these are unique to add. see "Special Operations"
266 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
269 def __init__(self
, width
, id_wid
):
272 self
.i
= self
.ispec()
273 self
.o
= self
.ospec()
274 self
.out_do_z
= Signal(reset_less
=True)
277 return FPNumBase2Ops(self
.width
, self
.id_wid
)
280 return FPPackData(self
.width
, self
.id_wid
)
282 def setup(self
, m
, i
, out_do_z
):
283 """ links module to inputs and outputs
285 m
.submodules
.specialcases
= self
286 m
.d
.comb
+= self
.i
.eq(i
)
287 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
289 def elaborate(self
, platform
):
292 m
.submodules
.sc_in_a
= self
.i
.a
293 m
.submodules
.sc_in_b
= self
.i
.b
294 m
.submodules
.sc_out_z
= self
.o
.z
297 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
300 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
302 # if a is NaN or b is NaN return NaN
303 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
304 m
.d
.comb
+= self
.out_do_z
.eq(1)
305 m
.d
.comb
+= self
.o
.z
.nan(0)
307 # XXX WEIRDNESS for FP16 non-canonical NaN handling
310 ## if a is zero and b is NaN return -b
311 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
312 # m.d.comb += self.out_do_z.eq(1)
313 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
315 ## if b is zero and a is NaN return -a
316 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
317 # m.d.comb += self.out_do_z.eq(1)
318 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
320 ## if a is -zero and b is NaN return -b
321 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
322 # m.d.comb += self.out_do_z.eq(1)
323 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
325 ## if b is -zero and a is NaN return -a
326 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
327 # m.d.comb += self.out_do_z.eq(1)
328 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
330 # if a is inf return inf (or NaN)
331 with m
.Elif(self
.i
.a
.is_inf
):
332 m
.d
.comb
+= self
.out_do_z
.eq(1)
333 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
334 # if a is inf and signs don't match return NaN
335 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
336 m
.d
.comb
+= self
.o
.z
.nan(0)
338 # if b is inf return inf
339 with m
.Elif(self
.i
.b
.is_inf
):
340 m
.d
.comb
+= self
.out_do_z
.eq(1)
341 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
343 # if a is zero and b zero return signed-a/b
344 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
345 m
.d
.comb
+= self
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
350 # if a is zero return b
351 with m
.Elif(self
.i
.a
.is_zero
):
352 m
.d
.comb
+= self
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
356 # if b is zero return a
357 with m
.Elif(self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
362 # if a equal to -b return zero (+ve zero)
363 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
364 m
.d
.comb
+= self
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.zero(0)
367 # Denormalised Number checks
369 m
.d
.comb
+= self
.out_do_z
.eq(0)
375 def __init__(self
, id_wid
):
378 self
.in_mid
= Signal(id_wid
, reset_less
=True)
379 self
.out_mid
= Signal(id_wid
, reset_less
=True)
385 if self
.id_wid
is not None:
386 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
389 class FPAddSpecialCases(FPState
, FPID
):
390 """ special cases: NaNs, infs, zeros, denormalised
391 NOTE: some of these are unique to add. see "Special Operations"
392 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
395 def __init__(self
, width
, id_wid
):
396 FPState
.__init
__(self
, "special_cases")
397 FPID
.__init
__(self
, id_wid
)
398 self
.mod
= FPAddSpecialCasesMod(width
)
399 self
.out_z
= self
.mod
.ospec()
400 self
.out_do_z
= Signal(reset_less
=True)
402 def setup(self
, m
, in_a
, in_b
, in_mid
):
403 """ links module to inputs and outputs
405 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
406 if self
.in_mid
is not None:
407 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
411 with m
.If(self
.out_do_z
):
412 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
415 m
.next
= "denormalise"
418 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
419 """ special cases: NaNs, infs, zeros, denormalised
420 NOTE: some of these are unique to add. see "Special Operations"
421 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
424 def __init__(self
, width
, id_wid
):
425 FPState
.__init
__(self
, "special_cases")
426 FPID
.__init
__(self
, id_wid
)
427 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
428 self
.out_z
= self
.smod
.ospec()
429 self
.out_do_z
= Signal(reset_less
=True)
431 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
432 self
.o
= self
.dmod
.ospec()
434 def setup(self
, m
, i
, in_mid
):
435 """ links module to inputs and outputs
437 self
.smod
.setup(m
, i
, self
.out_do_z
)
438 self
.dmod
.setup(m
, i
)
439 if self
.in_mid
is not None:
440 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
444 with m
.If(self
.out_do_z
):
445 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
449 m
.d
.sync
+= self
.o
.a
.eq(self
.dmod
.o
.a
)
450 m
.d
.sync
+= self
.o
.b
.eq(self
.dmod
.o
.b
)
453 class FPAddDeNormMod(FPState
):
455 def __init__(self
, width
, id_wid
):
458 self
.i
= self
.ispec()
459 self
.o
= self
.ospec()
462 return FPNumBase2Ops(self
.width
, self
.id_wid
)
465 return FPNumBase2Ops(self
.width
, self
.id_wid
)
467 def setup(self
, m
, i
):
468 """ links module to inputs and outputs
470 m
.submodules
.denormalise
= self
471 m
.d
.comb
+= self
.i
.eq(i
)
473 def elaborate(self
, platform
):
475 m
.submodules
.denorm_in_a
= self
.i
.a
476 m
.submodules
.denorm_in_b
= self
.i
.b
477 m
.submodules
.denorm_out_a
= self
.o
.a
478 m
.submodules
.denorm_out_b
= self
.o
.b
479 # hmmm, don't like repeating identical code
480 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
481 with m
.If(self
.i
.a
.exp_n127
):
482 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
484 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
486 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
487 with m
.If(self
.i
.b
.exp_n127
):
488 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
490 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
495 class FPAddDeNorm(FPState
, FPID
):
497 def __init__(self
, width
, id_wid
):
498 FPState
.__init
__(self
, "denormalise")
499 FPID
.__init
__(self
, id_wid
)
500 self
.mod
= FPAddDeNormMod(width
)
501 self
.out_a
= FPNumBase(width
)
502 self
.out_b
= FPNumBase(width
)
504 def setup(self
, m
, in_a
, in_b
, in_mid
):
505 """ links module to inputs and outputs
507 self
.mod
.setup(m
, in_a
, in_b
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 # Denormalised Number checks
515 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
516 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
519 class FPAddAlignMultiMod(FPState
):
521 def __init__(self
, width
):
522 self
.in_a
= FPNumBase(width
)
523 self
.in_b
= FPNumBase(width
)
524 self
.out_a
= FPNumIn(None, width
)
525 self
.out_b
= FPNumIn(None, width
)
526 self
.exp_eq
= Signal(reset_less
=True)
528 def elaborate(self
, platform
):
529 # This one however (single-cycle) will do the shift
534 m
.submodules
.align_in_a
= self
.in_a
535 m
.submodules
.align_in_b
= self
.in_b
536 m
.submodules
.align_out_a
= self
.out_a
537 m
.submodules
.align_out_b
= self
.out_b
539 # NOTE: this does *not* do single-cycle multi-shifting,
540 # it *STAYS* in the align state until exponents match
542 # exponent of a greater than b: shift b down
543 m
.d
.comb
+= self
.exp_eq
.eq(0)
544 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
545 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
546 agtb
= Signal(reset_less
=True)
547 altb
= Signal(reset_less
=True)
548 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
549 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
551 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
552 # exponent of b greater than a: shift a down
554 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
555 # exponents equal: move to next stage.
557 m
.d
.comb
+= self
.exp_eq
.eq(1)
561 class FPAddAlignMulti(FPState
, FPID
):
563 def __init__(self
, width
, id_wid
):
564 FPID
.__init
__(self
, id_wid
)
565 FPState
.__init
__(self
, "align")
566 self
.mod
= FPAddAlignMultiMod(width
)
567 self
.out_a
= FPNumIn(None, width
)
568 self
.out_b
= FPNumIn(None, width
)
569 self
.exp_eq
= Signal(reset_less
=True)
571 def setup(self
, m
, in_a
, in_b
, in_mid
):
572 """ links module to inputs and outputs
574 m
.submodules
.align
= self
.mod
575 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
576 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
577 #m.d.comb += self.out_a.eq(self.mod.out_a)
578 #m.d.comb += self.out_b.eq(self.mod.out_b)
579 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
580 if self
.in_mid
is not None:
581 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
585 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
586 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
587 with m
.If(self
.exp_eq
):
593 def __init__(self
, width
, id_wid
):
594 self
.a
= FPNumIn(None, width
)
595 self
.b
= FPNumIn(None, width
)
596 self
.mid
= Signal(id_wid
, reset_less
=True)
599 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
602 class FPAddAlignSingleMod
:
604 def __init__(self
, width
, id_wid
):
607 self
.i
= self
.ispec()
608 self
.o
= self
.ospec()
611 return FPNumBase2Ops(self
.width
, self
.id_wid
)
614 return FPNumIn2Ops(self
.width
, self
.id_wid
)
616 def setup(self
, m
, i
):
617 """ links module to inputs and outputs
619 m
.submodules
.align
= self
620 m
.d
.comb
+= self
.i
.eq(i
)
622 def elaborate(self
, platform
):
623 """ Aligns A against B or B against A, depending on which has the
624 greater exponent. This is done in a *single* cycle using
625 variable-width bit-shift
627 the shifter used here is quite expensive in terms of gates.
628 Mux A or B in (and out) into temporaries, as only one of them
629 needs to be aligned against the other
633 m
.submodules
.align_in_a
= self
.i
.a
634 m
.submodules
.align_in_b
= self
.i
.b
635 m
.submodules
.align_out_a
= self
.o
.a
636 m
.submodules
.align_out_b
= self
.o
.b
638 # temporary (muxed) input and output to be shifted
639 t_inp
= FPNumBase(self
.width
)
640 t_out
= FPNumIn(None, self
.width
)
641 espec
= (len(self
.i
.a
.e
), True)
642 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
643 m
.submodules
.align_t_in
= t_inp
644 m
.submodules
.align_t_out
= t_out
645 m
.submodules
.multishift_r
= msr
647 ediff
= Signal(espec
, reset_less
=True)
648 ediffr
= Signal(espec
, reset_less
=True)
649 tdiff
= Signal(espec
, reset_less
=True)
650 elz
= Signal(reset_less
=True)
651 egz
= Signal(reset_less
=True)
653 # connect multi-shifter to t_inp/out mantissa (and tdiff)
654 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
655 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
656 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
657 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
658 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
660 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
661 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
662 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
663 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
665 # default: A-exp == B-exp, A and B untouched (fall through)
666 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
667 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
668 # only one shifter (muxed)
669 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
670 # exponent of a greater than b: shift b down
672 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
675 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
677 # exponent of b greater than a: shift a down
679 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
682 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
687 class FPAddAlignSingle(FPState
, FPID
):
689 def __init__(self
, width
, id_wid
):
690 FPState
.__init
__(self
, "align")
691 FPID
.__init
__(self
, id_wid
)
692 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
693 self
.out_a
= FPNumIn(None, width
)
694 self
.out_b
= FPNumIn(None, width
)
696 def setup(self
, m
, in_a
, in_b
, in_mid
):
697 """ links module to inputs and outputs
699 self
.mod
.setup(m
, in_a
, in_b
)
700 if self
.in_mid
is not None:
701 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
705 # NOTE: could be done as comb
706 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
707 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
711 class FPAddAlignSingleAdd(FPState
, FPID
):
713 def __init__(self
, width
, id_wid
):
714 FPState
.__init
__(self
, "align")
715 FPID
.__init
__(self
, id_wid
)
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.o
= self
.mod
.ospec()
719 self
.a0mod
= FPAddStage0Mod(width
, id_wid
)
720 self
.a0o
= self
.a0mod
.ospec()
722 self
.a1mod
= FPAddStage1Mod(width
, id_wid
)
723 self
.a1o
= self
.a1mod
.ospec()
725 def setup(self
, m
, i
, in_mid
):
726 """ links module to inputs and outputs
729 m
.d
.comb
+= self
.o
.eq(self
.mod
.o
)
731 self
.a0mod
.setup(m
, self
.o
)
732 m
.d
.comb
+= self
.a0o
.eq(self
.a0mod
.o
)
734 self
.a1mod
.setup(m
, self
.a0o
.tot
, self
.a0o
.z
)
736 if self
.in_mid
is not None:
737 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
741 m
.d
.sync
+= self
.a1o
.eq(self
.a1mod
.o
)
742 m
.next
= "normalise_1"
745 class FPAddStage0Data
:
747 def __init__(self
, width
, id_wid
):
748 self
.z
= FPNumBase(width
, False)
749 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
750 self
.mid
= Signal(id_wid
, reset_less
=True)
753 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
756 class FPAddStage0Mod
:
758 def __init__(self
, width
, id_wid
):
761 self
.i
= self
.ispec()
762 self
.o
= self
.ospec()
765 return FPNumBase2Ops(self
.width
, self
.id_wid
)
768 return FPAddStage0Data(self
.width
, self
.id_wid
)
770 def setup(self
, m
, i
):
771 """ links module to inputs and outputs
773 m
.submodules
.add0
= self
774 m
.d
.comb
+= self
.i
.eq(i
)
776 def elaborate(self
, platform
):
778 m
.submodules
.add0_in_a
= self
.i
.a
779 m
.submodules
.add0_in_b
= self
.i
.b
780 m
.submodules
.add0_out_z
= self
.o
.z
782 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
784 # store intermediate tests (and zero-extended mantissas)
785 seq
= Signal(reset_less
=True)
786 mge
= Signal(reset_less
=True)
787 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
788 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
789 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
790 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
791 am0
.eq(Cat(self
.i
.a
.m
, 0)),
792 bm0
.eq(Cat(self
.i
.b
.m
, 0))
794 # same-sign (both negative or both positive) add mantissas
797 self
.o
.tot
.eq(am0
+ bm0
),
798 self
.o
.z
.s
.eq(self
.i
.a
.s
)
800 # a mantissa greater than b, use a
803 self
.o
.tot
.eq(am0
- bm0
),
804 self
.o
.z
.s
.eq(self
.i
.a
.s
)
806 # b mantissa greater than a, use b
809 self
.o
.tot
.eq(bm0
- am0
),
810 self
.o
.z
.s
.eq(self
.i
.b
.s
)
815 class FPAddStage0(FPState
, FPID
):
816 """ First stage of add. covers same-sign (add) and subtract
817 special-casing when mantissas are greater or equal, to
818 give greatest accuracy.
821 def __init__(self
, width
, id_wid
):
822 FPState
.__init
__(self
, "add_0")
823 FPID
.__init
__(self
, id_wid
)
824 self
.mod
= FPAddStage0Mod(width
)
825 self
.o
= self
.mod
.ospec()
827 def setup(self
, m
, in_a
, in_b
, in_mid
):
828 """ links module to inputs and outputs
830 self
.mod
.setup(m
, in_a
, in_b
)
831 if self
.in_mid
is not None:
832 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
836 # NOTE: these could be done as combinatorial (merge add0+add1)
837 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
841 class FPAddStage1Data
:
843 def __init__(self
, width
, id_wid
):
844 self
.z
= FPNumBase(width
, False)
846 self
.mid
= Signal(id_wid
, reset_less
=True)
849 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
853 class FPAddStage1Mod(FPState
):
854 """ Second stage of add: preparation for normalisation.
855 detects when tot sum is too big (tot[27] is kinda a carry bit)
858 def __init__(self
, width
, id_wid
):
861 self
.i
= self
.ispec()
862 self
.o
= self
.ospec()
865 return FPAddStage0Data(self
.width
, self
.id_wid
)
868 return FPAddStage1Data(self
.width
, self
.id_wid
)
870 def setup(self
, m
, in_tot
, in_z
):
871 """ links module to inputs and outputs
873 m
.submodules
.add1
= self
874 m
.submodules
.add1_out_overflow
= self
.o
.of
876 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
877 m
.d
.comb
+= self
.i
.tot
.eq(in_tot
)
879 def elaborate(self
, platform
):
881 #m.submodules.norm1_in_overflow = self.in_of
882 #m.submodules.norm1_out_overflow = self.out_of
883 #m.submodules.norm1_in_z = self.in_z
884 #m.submodules.norm1_out_z = self.out_z
885 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
886 # tot[-1] (MSB) gets set when the sum overflows. shift result down
887 with m
.If(self
.i
.tot
[-1]):
889 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
890 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
891 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
892 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
893 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
894 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
896 # tot[-1] (MSB) zero case
899 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
900 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
901 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
902 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
903 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
908 class FPAddStage1(FPState
, FPID
):
910 def __init__(self
, width
, id_wid
):
911 FPState
.__init
__(self
, "add_1")
912 FPID
.__init
__(self
, id_wid
)
913 self
.mod
= FPAddStage1Mod(width
)
914 self
.out_z
= FPNumBase(width
, False)
915 self
.out_of
= Overflow()
916 self
.norm_stb
= Signal()
918 def setup(self
, m
, in_tot
, in_z
, in_mid
):
919 """ links module to inputs and outputs
921 self
.mod
.setup(m
, in_tot
, in_z
)
923 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
925 if self
.in_mid
is not None:
926 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
930 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
931 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
932 m
.d
.sync
+= self
.norm_stb
.eq(1)
933 m
.next
= "normalise_1"
936 class FPNormaliseModSingle
:
938 def __init__(self
, width
):
940 self
.in_z
= self
.ispec()
941 self
.out_z
= self
.ospec()
944 return FPNumBase(self
.width
, False)
947 return FPNumBase(self
.width
, False)
949 def setup(self
, m
, in_z
, out_z
):
950 """ links module to inputs and outputs
952 m
.submodules
.normalise
= self
953 m
.d
.comb
+= self
.in_z
.eq(in_z
)
954 m
.d
.comb
+= out_z
.eq(self
.out_z
)
956 def elaborate(self
, platform
):
959 mwid
= self
.out_z
.m_width
+2
960 pe
= PriorityEncoder(mwid
)
961 m
.submodules
.norm_pe
= pe
963 m
.submodules
.norm1_out_z
= self
.out_z
964 m
.submodules
.norm1_in_z
= self
.in_z
966 in_z
= FPNumBase(self
.width
, False)
968 m
.submodules
.norm1_insel_z
= in_z
969 m
.submodules
.norm1_insel_overflow
= in_of
971 espec
= (len(in_z
.e
), True)
972 ediff_n126
= Signal(espec
, reset_less
=True)
973 msr
= MultiShiftRMerge(mwid
, espec
)
974 m
.submodules
.multishift_r
= msr
976 m
.d
.comb
+= in_z
.eq(self
.in_z
)
977 m
.d
.comb
+= in_of
.eq(self
.in_of
)
978 # initialise out from in (overridden below)
979 m
.d
.comb
+= self
.out_z
.eq(in_z
)
980 m
.d
.comb
+= self
.out_of
.eq(in_of
)
981 # normalisation decrease condition
982 decrease
= Signal(reset_less
=True)
983 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
986 # *sigh* not entirely obvious: count leading zeros (clz)
987 # with a PriorityEncoder: to find from the MSB
988 # we reverse the order of the bits.
989 temp_m
= Signal(mwid
, reset_less
=True)
990 temp_s
= Signal(mwid
+1, reset_less
=True)
991 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
993 # cat round and guard bits back into the mantissa
994 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
995 pe
.i
.eq(temp_m
[::-1]), # inverted
996 clz
.eq(pe
.o
), # count zeros from MSB down
997 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
998 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
999 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1006 def __init__(self
, width
, id_wid
):
1007 self
.roundz
= Signal(reset_less
=True)
1008 self
.z
= FPNumBase(width
, False)
1009 self
.mid
= Signal(id_wid
, reset_less
=True)
1012 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1015 class FPNorm1ModSingle
:
1017 def __init__(self
, width
, id_wid
):
1019 self
.id_wid
= id_wid
1020 self
.i
= self
.ispec()
1021 self
.o
= self
.ospec()
1024 return FPAddStage1Data(self
.width
, self
.id_wid
)
1027 return FPNorm1Data(self
.width
, self
.id_wid
)
1029 def setup(self
, m
, i
):
1030 """ links module to inputs and outputs
1032 m
.submodules
.normalise_1
= self
1033 m
.d
.comb
+= self
.i
.eq(i
)
1035 def elaborate(self
, platform
):
1038 mwid
= self
.o
.z
.m_width
+2
1039 pe
= PriorityEncoder(mwid
)
1040 m
.submodules
.norm_pe
= pe
1043 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1045 m
.submodules
.norm1_out_z
= self
.o
.z
1046 m
.submodules
.norm1_out_overflow
= of
1047 m
.submodules
.norm1_in_z
= self
.i
.z
1048 m
.submodules
.norm1_in_overflow
= self
.i
.of
1051 m
.submodules
.norm1_insel_z
= i
.z
1052 m
.submodules
.norm1_insel_overflow
= i
.of
1054 espec
= (len(i
.z
.e
), True)
1055 ediff_n126
= Signal(espec
, reset_less
=True)
1056 msr
= MultiShiftRMerge(mwid
, espec
)
1057 m
.submodules
.multishift_r
= msr
1059 m
.d
.comb
+= i
.eq(self
.i
)
1060 # initialise out from in (overridden below)
1061 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1062 m
.d
.comb
+= of
.eq(i
.of
)
1063 # normalisation increase/decrease conditions
1064 decrease
= Signal(reset_less
=True)
1065 increase
= Signal(reset_less
=True)
1066 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1067 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1069 with m
.If(decrease
):
1070 # *sigh* not entirely obvious: count leading zeros (clz)
1071 # with a PriorityEncoder: to find from the MSB
1072 # we reverse the order of the bits.
1073 temp_m
= Signal(mwid
, reset_less
=True)
1074 temp_s
= Signal(mwid
+1, reset_less
=True)
1075 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1076 # make sure that the amount to decrease by does NOT
1077 # go below the minimum non-INF/NaN exponent
1078 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1081 # cat round and guard bits back into the mantissa
1082 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1083 pe
.i
.eq(temp_m
[::-1]), # inverted
1084 clz
.eq(limclz
), # count zeros from MSB down
1085 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1086 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1087 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1088 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1089 # overflow in bits 0..1: got shifted too (leave sticky)
1090 of
.guard
.eq(temp_s
[1]), # guard
1091 of
.round_bit
.eq(temp_s
[0]), # round
1094 with m
.Elif(increase
):
1095 temp_m
= Signal(mwid
+1, reset_less
=True)
1097 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1099 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1100 # connect multi-shifter to inp/out mantissa (and ediff)
1102 msr
.diff
.eq(ediff_n126
),
1103 self
.o
.z
.m
.eq(msr
.m
[3:]),
1104 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1105 # overflow in bits 0..1: got shifted too (leave sticky)
1106 of
.guard
.eq(temp_s
[2]), # guard
1107 of
.round_bit
.eq(temp_s
[1]), # round
1108 of
.sticky
.eq(temp_s
[0]), # sticky
1109 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1115 class FPNorm1ModMulti
:
1117 def __init__(self
, width
, single_cycle
=True):
1119 self
.in_select
= Signal(reset_less
=True)
1120 self
.in_z
= FPNumBase(width
, False)
1121 self
.in_of
= Overflow()
1122 self
.temp_z
= FPNumBase(width
, False)
1123 self
.temp_of
= Overflow()
1124 self
.out_z
= FPNumBase(width
, False)
1125 self
.out_of
= Overflow()
1127 def elaborate(self
, platform
):
1130 m
.submodules
.norm1_out_z
= self
.out_z
1131 m
.submodules
.norm1_out_overflow
= self
.out_of
1132 m
.submodules
.norm1_temp_z
= self
.temp_z
1133 m
.submodules
.norm1_temp_of
= self
.temp_of
1134 m
.submodules
.norm1_in_z
= self
.in_z
1135 m
.submodules
.norm1_in_overflow
= self
.in_of
1137 in_z
= FPNumBase(self
.width
, False)
1139 m
.submodules
.norm1_insel_z
= in_z
1140 m
.submodules
.norm1_insel_overflow
= in_of
1142 # select which of temp or in z/of to use
1143 with m
.If(self
.in_select
):
1144 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1145 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1147 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1148 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1149 # initialise out from in (overridden below)
1150 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1151 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1152 # normalisation increase/decrease conditions
1153 decrease
= Signal(reset_less
=True)
1154 increase
= Signal(reset_less
=True)
1155 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1156 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1157 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1159 with m
.If(decrease
):
1161 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1162 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1163 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1164 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1165 self
.out_of
.round_bit
.eq(0), # reset round bit
1166 self
.out_of
.m0
.eq(in_of
.guard
),
1169 with m
.Elif(increase
):
1171 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1172 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1173 self
.out_of
.guard
.eq(in_z
.m
[0]),
1174 self
.out_of
.m0
.eq(in_z
.m
[1]),
1175 self
.out_of
.round_bit
.eq(in_of
.guard
),
1176 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1182 class FPNorm1Single(FPState
, FPID
):
1184 def __init__(self
, width
, id_wid
, single_cycle
=True):
1185 FPID
.__init
__(self
, id_wid
)
1186 FPState
.__init
__(self
, "normalise_1")
1187 self
.mod
= FPNorm1ModSingle(width
)
1188 self
.out_z
= FPNumBase(width
, False)
1189 self
.out_roundz
= Signal(reset_less
=True)
1191 def setup(self
, m
, in_z
, in_of
, in_mid
):
1192 """ links module to inputs and outputs
1194 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1196 if self
.in_mid
is not None:
1197 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1199 def action(self
, m
):
1201 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1205 class FPNorm1Multi(FPState
, FPID
):
1207 def __init__(self
, width
, id_wid
):
1208 FPID
.__init
__(self
, id_wid
)
1209 FPState
.__init
__(self
, "normalise_1")
1210 self
.mod
= FPNorm1ModMulti(width
)
1211 self
.stb
= Signal(reset_less
=True)
1212 self
.ack
= Signal(reset
=0, reset_less
=True)
1213 self
.out_norm
= Signal(reset_less
=True)
1214 self
.in_accept
= Signal(reset_less
=True)
1215 self
.temp_z
= FPNumBase(width
)
1216 self
.temp_of
= Overflow()
1217 self
.out_z
= FPNumBase(width
)
1218 self
.out_roundz
= Signal(reset_less
=True)
1220 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1221 """ links module to inputs and outputs
1223 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1224 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1225 self
.out_z
, self
.out_norm
)
1227 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1228 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1230 if self
.in_mid
is not None:
1231 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1233 def action(self
, m
):
1235 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1236 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1237 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1238 with m
.If(self
.out_norm
):
1239 with m
.If(self
.in_accept
):
1244 m
.d
.sync
+= self
.ack
.eq(0)
1246 # normalisation not required (or done).
1248 m
.d
.sync
+= self
.ack
.eq(1)
1249 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1252 class FPNormToPack(FPState
, FPID
):
1254 def __init__(self
, width
, id_wid
):
1255 FPID
.__init
__(self
, id_wid
)
1256 FPState
.__init
__(self
, "normalise_1")
1259 def setup(self
, m
, i
, in_mid
):
1260 """ links module to inputs and outputs
1263 # Normalisation (chained to input in_z+in_of)
1264 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1266 n_out
= nmod
.ospec()
1267 m
.d
.comb
+= n_out
.eq(nmod
.o
)
1269 # Rounding (chained to normalisation)
1270 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1271 r_out_z
= rmod
.ospec()
1272 rmod
.setup(m
, n_out
.z
, n_out
.roundz
)
1273 m
.d
.comb
+= r_out_z
.eq(rmod
.out_z
)
1275 # Corrections (chained to rounding)
1276 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1277 c_out_z
= cmod
.ospec()
1278 cmod
.setup(m
, r_out_z
)
1279 m
.d
.comb
+= c_out_z
.eq(cmod
.out_z
)
1281 # Pack (chained to corrections)
1282 self
.pmod
= FPPackMod(self
.width
, self
.id_wid
)
1283 self
.out_z
= self
.pmod
.ospec()
1284 self
.pmod
.setup(m
, c_out_z
)
1287 if self
.in_mid
is not None:
1288 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1290 def action(self
, m
):
1291 self
.idsync(m
) # copies incoming ID to outgoing
1292 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.pmod
.o
.z
.v
) # outputs packed result
1293 m
.next
= "pack_put_z"
1298 def __init__(self
, width
, id_wid
):
1299 self
.z
= FPNumBase(width
, False)
1300 self
.mid
= Signal(id_wid
, reset_less
=True)
1303 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1308 def __init__(self
, width
, id_wid
):
1310 self
.id_wid
= id_wid
1311 self
.i
= self
.ispec()
1312 self
.out_z
= self
.ospec()
1315 return FPNorm1Data(self
.width
, self
.id_wid
)
1318 return FPRoundData(self
.width
, self
.id_wid
)
1320 def setup(self
, m
, in_z
, roundz
):
1321 m
.submodules
.roundz
= self
1323 m
.d
.comb
+= self
.i
.z
.eq(in_z
)
1324 m
.d
.comb
+= self
.i
.roundz
.eq(roundz
)
1326 def elaborate(self
, platform
):
1328 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1329 with m
.If(self
.i
.roundz
):
1330 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1331 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1332 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1336 class FPRound(FPState
, FPID
):
1338 def __init__(self
, width
, id_wid
):
1339 FPState
.__init
__(self
, "round")
1340 FPID
.__init
__(self
, id_wid
)
1341 self
.mod
= FPRoundMod(width
)
1342 self
.out_z
= self
.mod
.ospec()
1344 def setup(self
, m
, in_z
, roundz
, in_mid
):
1345 """ links module to inputs and outputs
1347 self
.mod
.setup(m
, in_z
, roundz
)
1349 if self
.in_mid
is not None:
1350 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1352 def action(self
, m
):
1354 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1355 m
.next
= "corrections"
1358 class FPCorrectionsMod
:
1360 def __init__(self
, width
, id_wid
):
1362 self
.id_wid
= id_wid
1363 self
.in_z
= self
.ispec()
1364 self
.out_z
= self
.ospec()
1367 return FPRoundData(self
.width
, self
.id_wid
)
1370 return FPRoundData(self
.width
, self
.id_wid
)
1372 def setup(self
, m
, in_z
):
1373 """ links module to inputs and outputs
1375 m
.submodules
.corrections
= self
1376 m
.d
.comb
+= self
.in_z
.eq(in_z
)
1378 def elaborate(self
, platform
):
1380 m
.submodules
.corr_in_z
= self
.in_z
.z
1381 m
.submodules
.corr_out_z
= self
.out_z
.z
1382 m
.d
.comb
+= self
.out_z
.eq(self
.in_z
)
1383 with m
.If(self
.in_z
.z
.is_denormalised
):
1384 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.in_z
.z
.N127
)
1388 class FPCorrections(FPState
, FPID
):
1390 def __init__(self
, width
, id_wid
):
1391 FPState
.__init
__(self
, "corrections")
1392 FPID
.__init
__(self
, id_wid
)
1393 self
.mod
= FPCorrectionsMod(width
)
1394 self
.out_z
= self
.mod
.ospec()
1396 def setup(self
, m
, in_z
, in_mid
):
1397 """ links module to inputs and outputs
1399 self
.mod
.setup(m
, in_z
)
1400 if self
.in_mid
is not None:
1401 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1403 def action(self
, m
):
1405 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1411 def __init__(self
, width
, id_wid
):
1412 self
.z
= FPNumOut(width
, False)
1413 self
.mid
= Signal(id_wid
, reset_less
=True)
1416 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1421 def __init__(self
, width
, id_wid
):
1423 self
.id_wid
= id_wid
1424 self
.i
= self
.ispec()
1425 self
.o
= self
.ospec()
1428 return FPRoundData(self
.width
, self
.id_wid
)
1431 return FPPackData(self
.width
, self
.id_wid
)
1433 def setup(self
, m
, in_z
):
1434 """ links module to inputs and outputs
1436 m
.submodules
.pack
= self
1437 m
.d
.comb
+= self
.i
.eq(in_z
)
1439 def elaborate(self
, platform
):
1441 m
.submodules
.pack_in_z
= self
.i
.z
1442 with m
.If(self
.i
.z
.is_overflowed
):
1443 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1445 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1450 def __init__(self
, width
, id_wid
):
1451 self
.z
= FPNumOut(width
, False)
1452 self
.mid
= Signal(id_wid
, reset_less
=True)
1455 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1458 class FPPack(FPState
, FPID
):
1460 def __init__(self
, width
, id_wid
):
1461 FPState
.__init
__(self
, "pack")
1462 FPID
.__init
__(self
, id_wid
)
1463 self
.mod
= FPPackMod(width
)
1464 self
.out_z
= self
.ospec()
1467 return self
.mod
.ispec()
1470 return self
.mod
.ospec()
1472 def setup(self
, m
, in_z
, in_mid
):
1473 """ links module to inputs and outputs
1475 self
.mod
.setup(m
, in_z
)
1476 if self
.in_mid
is not None:
1477 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1479 def action(self
, m
):
1481 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1482 m
.next
= "pack_put_z"
1485 class FPPutZ(FPState
):
1487 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1488 FPState
.__init
__(self
, state
)
1489 if to_state
is None:
1490 to_state
= "get_ops"
1491 self
.to_state
= to_state
1494 self
.in_mid
= in_mid
1495 self
.out_mid
= out_mid
1497 def action(self
, m
):
1498 if self
.in_mid
is not None:
1499 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1501 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1503 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1504 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1505 m
.next
= self
.to_state
1507 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1510 class FPPutZIdx(FPState
):
1512 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1513 FPState
.__init
__(self
, state
)
1514 if to_state
is None:
1515 to_state
= "get_ops"
1516 self
.to_state
= to_state
1518 self
.out_zs
= out_zs
1519 self
.in_mid
= in_mid
1521 def action(self
, m
):
1522 outz_stb
= Signal(reset_less
=True)
1523 outz_ack
= Signal(reset_less
=True)
1524 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1525 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1528 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1530 with m
.If(outz_stb
& outz_ack
):
1531 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1532 m
.next
= self
.to_state
1534 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1536 class FPADDBaseData
:
1538 def __init__(self
, width
, id_wid
):
1540 self
.id_wid
= id_wid
1541 self
.a
= Signal(width
)
1542 self
.b
= Signal(width
)
1543 self
.mid
= Signal(id_wid
, reset_less
=True)
1546 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1550 def __init__(self
, width
, id_wid
):
1551 self
.z
= FPOp(width
)
1552 self
.mid
= Signal(id_wid
, reset_less
=True)
1555 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1558 class FPADDBaseMod(FPID
):
1560 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1563 * width: bit-width of IEEE754. supported: 16, 32, 64
1564 * id_wid: an identifier that is sync-connected to the input
1565 * single_cycle: True indicates each stage to complete in 1 clock
1566 * compact: True indicates a reduced number of stages
1568 FPID
.__init
__(self
, id_wid
)
1570 self
.id_wid
= id_wid
1571 self
.single_cycle
= single_cycle
1572 self
.compact
= compact
1574 self
.in_t
= Trigger()
1575 self
.i
= self
.ispec()
1576 self
.o
= self
.ospec()
1581 return FPADDBaseData(self
.width
, self
.id_wid
)
1584 return FPOpData(self
.width
, self
.id_wid
)
1586 def add_state(self
, state
):
1587 self
.states
.append(state
)
1590 def get_fragment(self
, platform
=None):
1591 """ creates the HDL code-fragment for FPAdd
1594 m
.submodules
.out_z
= self
.o
.z
1595 m
.submodules
.in_t
= self
.in_t
1597 self
.get_compact_fragment(m
, platform
)
1599 self
.get_longer_fragment(m
, platform
)
1601 with m
.FSM() as fsm
:
1603 for state
in self
.states
:
1604 with m
.State(state
.state_from
):
1609 def get_longer_fragment(self
, m
, platform
=None):
1611 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1613 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1617 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1618 sc
.setup(m
, a
, b
, self
.in_mid
)
1620 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1621 dn
.setup(m
, a
, b
, sc
.in_mid
)
1623 if self
.single_cycle
:
1624 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1625 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1627 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1628 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1630 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1631 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1633 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1634 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1636 if self
.single_cycle
:
1637 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1638 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1640 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1641 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1643 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1644 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1646 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1647 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1649 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1650 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1652 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1653 pa
.in_mid
, self
.out_mid
))
1655 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1656 pa
.in_mid
, self
.out_mid
))
1658 def get_compact_fragment(self
, m
, platform
=None):
1660 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1661 self
.width
, self
.id_wid
))
1662 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1664 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1665 sc
.setup(m
, get
.o
, self
.in_mid
)
1667 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1668 alm
.setup(m
, sc
.o
, sc
.in_mid
)
1670 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1671 n1
.setup(m
, alm
.a1o
, alm
.in_mid
)
1673 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1674 n1
.in_mid
, self
.out_mid
))
1676 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1677 sc
.in_mid
, self
.out_mid
))
1680 class FPADDBase(FPState
, FPID
):
1682 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1685 * width: bit-width of IEEE754. supported: 16, 32, 64
1686 * id_wid: an identifier that is sync-connected to the input
1687 * single_cycle: True indicates each stage to complete in 1 clock
1689 FPID
.__init
__(self
, id_wid
)
1690 FPState
.__init
__(self
, "fpadd")
1692 self
.single_cycle
= single_cycle
1693 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1694 self
.o
= self
.ospec()
1696 self
.in_t
= Trigger()
1697 self
.i
= self
.ispec()
1699 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1700 self
.in_accept
= Signal(reset_less
=True)
1701 self
.add_stb
= Signal(reset_less
=True)
1702 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1705 return self
.mod
.ispec()
1708 return self
.mod
.ospec()
1710 def setup(self
, m
, i
, add_stb
, in_mid
):
1711 m
.d
.comb
+= [self
.i
.eq(i
),
1712 self
.mod
.i
.eq(self
.i
),
1713 self
.in_mid
.eq(in_mid
),
1714 self
.mod
.in_mid
.eq(self
.in_mid
),
1715 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1716 #self.add_stb.eq(add_stb),
1717 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1718 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1719 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1720 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1721 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1722 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1725 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1726 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1727 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1728 #m.d.sync += self.in_t.stb.eq(0)
1730 m
.submodules
.fpadd
= self
.mod
1732 def action(self
, m
):
1734 # in_accept is set on incoming strobe HIGH and ack LOW.
1735 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1737 #with m.If(self.in_t.ack):
1738 # m.d.sync += self.in_t.stb.eq(0)
1739 with m
.If(~self
.z_done
):
1740 # not done: test for accepting an incoming operand pair
1741 with m
.If(self
.in_accept
):
1743 self
.add_ack
.eq(1), # acknowledge receipt...
1744 self
.in_t
.stb
.eq(1), # initiate add
1747 m
.d
.sync
+= [self
.add_ack
.eq(0),
1748 self
.in_t
.stb
.eq(0),
1752 # done: acknowledge, and write out id and value
1753 m
.d
.sync
+= [self
.add_ack
.eq(1),
1760 if self
.in_mid
is not None:
1761 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1764 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1766 # move to output state on detecting z ack
1767 with m
.If(self
.out_z
.trigger
):
1768 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1771 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1774 def __init__(self
, width
, id_wid
):
1776 self
.id_wid
= id_wid
1778 for i
in range(rs_sz
):
1780 out_z
.name
= "out_z_%d" % i
1782 self
.res
= Array(res
)
1783 self
.in_z
= FPOp(width
)
1784 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1786 def setup(self
, m
, in_z
, in_mid
):
1787 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1788 self
.in_mid
.eq(in_mid
)]
1790 def get_fragment(self
, platform
=None):
1791 """ creates the HDL code-fragment for FPAdd
1794 m
.submodules
.res_in_z
= self
.in_z
1795 m
.submodules
+= self
.res
1807 """ FPADD: stages as follows:
1813 FPAddBase---> FPAddBaseMod
1815 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1817 FPAddBase is tricky: it is both a stage and *has* stages.
1818 Connection to FPAddBaseMod therefore requires an in stb/ack
1819 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1820 needs to be the thing that raises the incoming stb.
1823 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1826 * width: bit-width of IEEE754. supported: 16, 32, 64
1827 * id_wid: an identifier that is sync-connected to the input
1828 * single_cycle: True indicates each stage to complete in 1 clock
1831 self
.id_wid
= id_wid
1832 self
.single_cycle
= single_cycle
1834 #self.out_z = FPOp(width)
1835 self
.ids
= FPID(id_wid
)
1838 for i
in range(rs_sz
):
1841 in_a
.name
= "in_a_%d" % i
1842 in_b
.name
= "in_b_%d" % i
1843 rs
.append((in_a
, in_b
))
1847 for i
in range(rs_sz
):
1849 out_z
.name
= "out_z_%d" % i
1851 self
.res
= Array(res
)
1855 def add_state(self
, state
):
1856 self
.states
.append(state
)
1859 def get_fragment(self
, platform
=None):
1860 """ creates the HDL code-fragment for FPAdd
1863 m
.submodules
+= self
.rs
1865 in_a
= self
.rs
[0][0]
1866 in_b
= self
.rs
[0][1]
1868 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1873 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1878 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1879 ab
= self
.add_state(ab
)
1880 abd
= ab
.ispec() # create an input spec object for FPADDBase
1881 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1882 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1885 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1888 with m
.FSM() as fsm
:
1890 for state
in self
.states
:
1891 with m
.State(state
.state_from
):
1897 if __name__
== "__main__":
1899 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1900 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1901 alu
.rs
[0][1].ports() + \
1902 alu
.res
[0].ports() + \
1903 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1905 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1906 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1907 alu
.in_t
.ports() + \
1908 alu
.out_z
.ports() + \
1909 [alu
.in_mid
, alu
.out_mid
])
1912 # works... but don't use, just do "python fname.py convert -t v"
1913 #print (verilog.convert(alu, ports=[
1914 # ports=alu.in_a.ports() + \
1915 # alu.in_b.ports() + \
1916 # alu.out_z.ports())