1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.oz
= Signal(width
, reset_less
=True)
270 self
.out_do_z
= Signal(reset_less
=True)
271 self
.mid
= Signal(id_wid
, reset_less
=True)
274 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
275 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
278 class FPAddSpecialCasesMod
:
279 """ special cases: NaNs, infs, zeros, denormalised
280 NOTE: some of these are unique to add. see "Special Operations"
281 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 def __init__(self
, width
, id_wid
):
287 self
.i
= self
.ispec()
288 self
.o
= self
.ospec()
291 return FPNumBase2Ops(self
.width
, self
.id_wid
)
294 return FPSCData(self
.width
, self
.id_wid
)
296 def setup(self
, m
, i
):
297 """ links module to inputs and outputs
299 m
.submodules
.specialcases
= self
300 m
.d
.comb
+= self
.i
.eq(i
)
302 def elaborate(self
, platform
):
305 m
.submodules
.sc_in_a
= self
.i
.a
306 m
.submodules
.sc_in_b
= self
.i
.b
307 m
.submodules
.sc_out_z
= self
.o
.z
310 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
313 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
315 # if a is NaN or b is NaN return NaN
316 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
317 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
318 m
.d
.comb
+= self
.o
.z
.nan(0)
320 # XXX WEIRDNESS for FP16 non-canonical NaN handling
323 ## if a is zero and b is NaN return -b
324 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
325 # m.d.comb += self.o.out_do_z.eq(1)
326 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
328 ## if b is zero and a is NaN return -a
329 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
330 # m.d.comb += self.o.out_do_z.eq(1)
331 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
333 ## if a is -zero and b is NaN return -b
334 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
335 # m.d.comb += self.o.out_do_z.eq(1)
336 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
338 ## if b is -zero and a is NaN return -a
339 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
340 # m.d.comb += self.o.out_do_z.eq(1)
341 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
343 # if a is inf return inf (or NaN)
344 with m
.Elif(self
.i
.a
.is_inf
):
345 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
347 # if a is inf and signs don't match return NaN
348 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
349 m
.d
.comb
+= self
.o
.z
.nan(0)
351 # if b is inf return inf
352 with m
.Elif(self
.i
.b
.is_inf
):
353 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
354 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
356 # if a is zero and b zero return signed-a/b
357 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
363 # if a is zero return b
364 with m
.Elif(self
.i
.a
.is_zero
):
365 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
366 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
369 # if b is zero return a
370 with m
.Elif(self
.i
.b
.is_zero
):
371 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
372 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
375 # if a equal to -b return zero (+ve zero)
376 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
377 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
378 m
.d
.comb
+= self
.o
.z
.zero(0)
380 # Denormalised Number checks next, so pass a/b data through
382 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
383 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
384 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
386 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
387 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
393 def __init__(self
, id_wid
):
396 self
.in_mid
= Signal(id_wid
, reset_less
=True)
397 self
.out_mid
= Signal(id_wid
, reset_less
=True)
403 if self
.id_wid
is not None:
404 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
407 class FPAddSpecialCases(FPState
):
408 """ special cases: NaNs, infs, zeros, denormalised
409 NOTE: some of these are unique to add. see "Special Operations"
410 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
413 def __init__(self
, width
, id_wid
):
414 FPState
.__init
__(self
, "special_cases")
415 self
.mod
= FPAddSpecialCasesMod(width
)
416 self
.out_z
= self
.mod
.ospec()
417 self
.out_do_z
= Signal(reset_less
=True)
419 def setup(self
, m
, i
):
420 """ links module to inputs and outputs
422 self
.mod
.setup(m
, i
, self
.out_do_z
)
423 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
424 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
428 with m
.If(self
.out_do_z
):
431 m
.next
= "denormalise"
434 class FPAddSpecialCasesDeNorm(FPState
):
435 """ special cases: NaNs, infs, zeros, denormalised
436 NOTE: some of these are unique to add. see "Special Operations"
437 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
440 def __init__(self
, width
, id_wid
):
441 FPState
.__init
__(self
, "special_cases")
442 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
443 self
.out_z
= self
.smod
.ospec()
444 self
.out_do_z
= Signal(reset_less
=True)
446 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
447 self
.o
= self
.dmod
.ospec()
449 def setup(self
, m
, i
):
450 """ links module to inputs and outputs
452 self
.smod
.setup(m
, i
)
453 self
.dmod
.setup(m
, self
.smod
.o
)
454 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
457 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
458 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
460 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
463 with m
.If(self
.out_do_z
):
469 class FPAddDeNormMod(FPState
):
471 def __init__(self
, width
, id_wid
):
474 self
.i
= self
.ispec()
475 self
.o
= self
.ospec()
478 return FPSCData(self
.width
, self
.id_wid
)
481 return FPSCData(self
.width
, self
.id_wid
)
483 def setup(self
, m
, i
):
484 """ links module to inputs and outputs
486 m
.submodules
.denormalise
= self
487 m
.d
.comb
+= self
.i
.eq(i
)
489 def elaborate(self
, platform
):
491 m
.submodules
.denorm_in_a
= self
.i
.a
492 m
.submodules
.denorm_in_b
= self
.i
.b
493 m
.submodules
.denorm_out_a
= self
.o
.a
494 m
.submodules
.denorm_out_b
= self
.o
.b
496 with m
.If(~self
.i
.out_do_z
):
497 # XXX hmmm, don't like repeating identical code
498 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
499 with m
.If(self
.i
.a
.exp_n127
):
500 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
502 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
504 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
505 with m
.If(self
.i
.b
.exp_n127
):
506 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
508 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
510 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
511 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
512 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
513 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
518 class FPAddDeNorm(FPState
):
520 def __init__(self
, width
, id_wid
):
521 FPState
.__init
__(self
, "denormalise")
522 self
.mod
= FPAddDeNormMod(width
)
523 self
.out_a
= FPNumBase(width
)
524 self
.out_b
= FPNumBase(width
)
526 def setup(self
, m
, i
):
527 """ links module to inputs and outputs
531 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
532 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
535 # Denormalised Number checks
539 class FPAddAlignMultiMod(FPState
):
541 def __init__(self
, width
):
542 self
.in_a
= FPNumBase(width
)
543 self
.in_b
= FPNumBase(width
)
544 self
.out_a
= FPNumIn(None, width
)
545 self
.out_b
= FPNumIn(None, width
)
546 self
.exp_eq
= Signal(reset_less
=True)
548 def elaborate(self
, platform
):
549 # This one however (single-cycle) will do the shift
554 m
.submodules
.align_in_a
= self
.in_a
555 m
.submodules
.align_in_b
= self
.in_b
556 m
.submodules
.align_out_a
= self
.out_a
557 m
.submodules
.align_out_b
= self
.out_b
559 # NOTE: this does *not* do single-cycle multi-shifting,
560 # it *STAYS* in the align state until exponents match
562 # exponent of a greater than b: shift b down
563 m
.d
.comb
+= self
.exp_eq
.eq(0)
564 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
565 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
566 agtb
= Signal(reset_less
=True)
567 altb
= Signal(reset_less
=True)
568 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
569 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
571 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
572 # exponent of b greater than a: shift a down
574 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
575 # exponents equal: move to next stage.
577 m
.d
.comb
+= self
.exp_eq
.eq(1)
581 class FPAddAlignMulti(FPState
):
583 def __init__(self
, width
, id_wid
):
584 FPState
.__init
__(self
, "align")
585 self
.mod
= FPAddAlignMultiMod(width
)
586 self
.out_a
= FPNumIn(None, width
)
587 self
.out_b
= FPNumIn(None, width
)
588 self
.exp_eq
= Signal(reset_less
=True)
590 def setup(self
, m
, in_a
, in_b
):
591 """ links module to inputs and outputs
593 m
.submodules
.align
= self
.mod
594 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
595 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
596 #m.d.comb += self.out_a.eq(self.mod.out_a)
597 #m.d.comb += self.out_b.eq(self.mod.out_b)
598 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
599 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
600 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
603 with m
.If(self
.exp_eq
):
609 def __init__(self
, width
, id_wid
):
610 self
.a
= FPNumIn(None, width
)
611 self
.b
= FPNumIn(None, width
)
612 self
.z
= FPNumOut(width
, False)
613 self
.out_do_z
= Signal(reset_less
=True)
614 self
.oz
= Signal(width
, reset_less
=True)
615 self
.mid
= Signal(id_wid
, reset_less
=True)
618 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
619 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
622 class FPAddAlignSingleMod
:
624 def __init__(self
, width
, id_wid
):
627 self
.i
= self
.ispec()
628 self
.o
= self
.ospec()
631 return FPSCData(self
.width
, self
.id_wid
)
634 return FPNumIn2Ops(self
.width
, self
.id_wid
)
636 def process(self
, i
):
639 def setup(self
, m
, i
):
640 """ links module to inputs and outputs
642 m
.submodules
.align
= self
643 m
.d
.comb
+= self
.i
.eq(i
)
645 def elaborate(self
, platform
):
646 """ Aligns A against B or B against A, depending on which has the
647 greater exponent. This is done in a *single* cycle using
648 variable-width bit-shift
650 the shifter used here is quite expensive in terms of gates.
651 Mux A or B in (and out) into temporaries, as only one of them
652 needs to be aligned against the other
656 m
.submodules
.align_in_a
= self
.i
.a
657 m
.submodules
.align_in_b
= self
.i
.b
658 m
.submodules
.align_out_a
= self
.o
.a
659 m
.submodules
.align_out_b
= self
.o
.b
661 # temporary (muxed) input and output to be shifted
662 t_inp
= FPNumBase(self
.width
)
663 t_out
= FPNumIn(None, self
.width
)
664 espec
= (len(self
.i
.a
.e
), True)
665 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
666 m
.submodules
.align_t_in
= t_inp
667 m
.submodules
.align_t_out
= t_out
668 m
.submodules
.multishift_r
= msr
670 ediff
= Signal(espec
, reset_less
=True)
671 ediffr
= Signal(espec
, reset_less
=True)
672 tdiff
= Signal(espec
, reset_less
=True)
673 elz
= Signal(reset_less
=True)
674 egz
= Signal(reset_less
=True)
676 # connect multi-shifter to t_inp/out mantissa (and tdiff)
677 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
678 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
679 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
680 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
681 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
683 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
684 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
685 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
686 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
688 # default: A-exp == B-exp, A and B untouched (fall through)
689 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
690 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
691 # only one shifter (muxed)
692 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
693 # exponent of a greater than b: shift b down
694 with m
.If(~self
.i
.out_do_z
):
696 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
699 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
701 # exponent of b greater than a: shift a down
703 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
706 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
709 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
710 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
711 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
717 class FPAddAlignSingle(FPState
):
719 def __init__(self
, width
, id_wid
):
720 FPState
.__init
__(self
, "align")
721 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
722 self
.out_a
= FPNumIn(None, width
)
723 self
.out_b
= FPNumIn(None, width
)
725 def setup(self
, m
, i
):
726 """ links module to inputs and outputs
730 # NOTE: could be done as comb
731 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
732 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
738 class FPAddAlignSingleAdd(FPState
):
740 def __init__(self
, width
, id_wid
):
741 FPState
.__init
__(self
, "align")
744 self
.a1o
= self
.ospec()
747 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
750 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
752 def setup(self
, m
, i
):
753 """ links module to inputs and outputs
756 # chain AddAlignSingle, AddStage0 and AddStage1
757 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
758 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
759 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
761 chain
= StageChain([mod
, a0mod
, a1mod
])
764 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
767 m
.next
= "normalise_1"
770 class FPAddStage0Data
:
772 def __init__(self
, width
, id_wid
):
773 self
.z
= FPNumBase(width
, False)
774 self
.out_do_z
= Signal(reset_less
=True)
775 self
.oz
= Signal(width
, reset_less
=True)
776 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
777 self
.mid
= Signal(id_wid
, reset_less
=True)
780 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
781 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
784 class FPAddStage0Mod
:
786 def __init__(self
, width
, id_wid
):
789 self
.i
= self
.ispec()
790 self
.o
= self
.ospec()
793 return FPSCData(self
.width
, self
.id_wid
)
796 return FPAddStage0Data(self
.width
, self
.id_wid
)
798 def process(self
, i
):
801 def setup(self
, m
, i
):
802 """ links module to inputs and outputs
804 m
.submodules
.add0
= self
805 m
.d
.comb
+= self
.i
.eq(i
)
807 def elaborate(self
, platform
):
809 m
.submodules
.add0_in_a
= self
.i
.a
810 m
.submodules
.add0_in_b
= self
.i
.b
811 m
.submodules
.add0_out_z
= self
.o
.z
813 # store intermediate tests (and zero-extended mantissas)
814 seq
= Signal(reset_less
=True)
815 mge
= Signal(reset_less
=True)
816 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
817 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
818 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
819 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
820 am0
.eq(Cat(self
.i
.a
.m
, 0)),
821 bm0
.eq(Cat(self
.i
.b
.m
, 0))
823 # same-sign (both negative or both positive) add mantissas
824 with m
.If(~self
.i
.out_do_z
):
825 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
828 self
.o
.tot
.eq(am0
+ bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # a mantissa greater than b, use a
834 self
.o
.tot
.eq(am0
- bm0
),
835 self
.o
.z
.s
.eq(self
.i
.a
.s
)
837 # b mantissa greater than a, use b
840 self
.o
.tot
.eq(bm0
- am0
),
841 self
.o
.z
.s
.eq(self
.i
.b
.s
)
844 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.z
.v
)
846 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
847 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
851 class FPAddStage0(FPState
):
852 """ First stage of add. covers same-sign (add) and subtract
853 special-casing when mantissas are greater or equal, to
854 give greatest accuracy.
857 def __init__(self
, width
, id_wid
):
858 FPState
.__init
__(self
, "add_0")
859 self
.mod
= FPAddStage0Mod(width
)
860 self
.o
= self
.mod
.ospec()
862 def setup(self
, m
, i
):
863 """ links module to inputs and outputs
867 # NOTE: these could be done as combinatorial (merge add0+add1)
868 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
874 class FPAddStage1Data
:
876 def __init__(self
, width
, id_wid
):
877 self
.z
= FPNumBase(width
, False)
878 self
.out_do_z
= Signal(reset_less
=True)
879 self
.oz
= Signal(width
, reset_less
=True)
881 self
.mid
= Signal(id_wid
, reset_less
=True)
884 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
885 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
889 class FPAddStage1Mod(FPState
):
890 """ Second stage of add: preparation for normalisation.
891 detects when tot sum is too big (tot[27] is kinda a carry bit)
894 def __init__(self
, width
, id_wid
):
897 self
.i
= self
.ispec()
898 self
.o
= self
.ospec()
901 return FPAddStage0Data(self
.width
, self
.id_wid
)
904 return FPAddStage1Data(self
.width
, self
.id_wid
)
906 def process(self
, i
):
909 def setup(self
, m
, i
):
910 """ links module to inputs and outputs
912 m
.submodules
.add1
= self
913 m
.submodules
.add1_out_overflow
= self
.o
.of
915 m
.d
.comb
+= self
.i
.eq(i
)
917 def elaborate(self
, platform
):
919 #m.submodules.norm1_in_overflow = self.in_of
920 #m.submodules.norm1_out_overflow = self.out_of
921 #m.submodules.norm1_in_z = self.in_z
922 #m.submodules.norm1_out_z = self.out_z
923 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
924 # tot[-1] (MSB) gets set when the sum overflows. shift result down
925 with m
.If(~self
.i
.out_do_z
):
926 with m
.If(self
.i
.tot
[-1]):
928 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
929 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
930 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
931 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
932 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
933 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
935 # tot[-1] (MSB) zero case
938 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
939 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
940 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
941 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
942 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
945 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
946 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
947 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
952 class FPAddStage1(FPState
):
954 def __init__(self
, width
, id_wid
):
955 FPState
.__init
__(self
, "add_1")
956 self
.mod
= FPAddStage1Mod(width
)
957 self
.out_z
= FPNumBase(width
, False)
958 self
.out_of
= Overflow()
959 self
.norm_stb
= Signal()
961 def setup(self
, m
, i
):
962 """ links module to inputs and outputs
966 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
968 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
969 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
970 m
.d
.sync
+= self
.norm_stb
.eq(1)
973 m
.next
= "normalise_1"
976 class FPNormaliseModSingle
:
978 def __init__(self
, width
):
980 self
.in_z
= self
.ispec()
981 self
.out_z
= self
.ospec()
984 return FPNumBase(self
.width
, False)
987 return FPNumBase(self
.width
, False)
989 def setup(self
, m
, i
):
990 """ links module to inputs and outputs
992 m
.submodules
.normalise
= self
993 m
.d
.comb
+= self
.i
.eq(i
)
995 def elaborate(self
, platform
):
998 mwid
= self
.out_z
.m_width
+2
999 pe
= PriorityEncoder(mwid
)
1000 m
.submodules
.norm_pe
= pe
1002 m
.submodules
.norm1_out_z
= self
.out_z
1003 m
.submodules
.norm1_in_z
= self
.in_z
1005 in_z
= FPNumBase(self
.width
, False)
1007 m
.submodules
.norm1_insel_z
= in_z
1008 m
.submodules
.norm1_insel_overflow
= in_of
1010 espec
= (len(in_z
.e
), True)
1011 ediff_n126
= Signal(espec
, reset_less
=True)
1012 msr
= MultiShiftRMerge(mwid
, espec
)
1013 m
.submodules
.multishift_r
= msr
1015 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1016 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1017 # initialise out from in (overridden below)
1018 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1019 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1020 # normalisation decrease condition
1021 decrease
= Signal(reset_less
=True)
1022 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1024 with m
.If(decrease
):
1025 # *sigh* not entirely obvious: count leading zeros (clz)
1026 # with a PriorityEncoder: to find from the MSB
1027 # we reverse the order of the bits.
1028 temp_m
= Signal(mwid
, reset_less
=True)
1029 temp_s
= Signal(mwid
+1, reset_less
=True)
1030 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1032 # cat round and guard bits back into the mantissa
1033 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1034 pe
.i
.eq(temp_m
[::-1]), # inverted
1035 clz
.eq(pe
.o
), # count zeros from MSB down
1036 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1037 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1038 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1045 def __init__(self
, width
, id_wid
):
1046 self
.roundz
= Signal(reset_less
=True)
1047 self
.z
= FPNumBase(width
, False)
1048 self
.out_do_z
= Signal(reset_less
=True)
1049 self
.oz
= Signal(width
, reset_less
=True)
1050 self
.mid
= Signal(id_wid
, reset_less
=True)
1053 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1054 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1057 class FPNorm1ModSingle
:
1059 def __init__(self
, width
, id_wid
):
1061 self
.id_wid
= id_wid
1062 self
.i
= self
.ispec()
1063 self
.o
= self
.ospec()
1066 return FPAddStage1Data(self
.width
, self
.id_wid
)
1069 return FPNorm1Data(self
.width
, self
.id_wid
)
1071 def setup(self
, m
, i
):
1072 """ links module to inputs and outputs
1074 m
.submodules
.normalise_1
= self
1075 m
.d
.comb
+= self
.i
.eq(i
)
1077 def process(self
, i
):
1080 def elaborate(self
, platform
):
1083 mwid
= self
.o
.z
.m_width
+2
1084 pe
= PriorityEncoder(mwid
)
1085 m
.submodules
.norm_pe
= pe
1088 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1090 m
.submodules
.norm1_out_z
= self
.o
.z
1091 m
.submodules
.norm1_out_overflow
= of
1092 m
.submodules
.norm1_in_z
= self
.i
.z
1093 m
.submodules
.norm1_in_overflow
= self
.i
.of
1096 m
.submodules
.norm1_insel_z
= i
.z
1097 m
.submodules
.norm1_insel_overflow
= i
.of
1099 espec
= (len(i
.z
.e
), True)
1100 ediff_n126
= Signal(espec
, reset_less
=True)
1101 msr
= MultiShiftRMerge(mwid
, espec
)
1102 m
.submodules
.multishift_r
= msr
1104 m
.d
.comb
+= i
.eq(self
.i
)
1105 # initialise out from in (overridden below)
1106 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1107 m
.d
.comb
+= of
.eq(i
.of
)
1108 # normalisation increase/decrease conditions
1109 decrease
= Signal(reset_less
=True)
1110 increase
= Signal(reset_less
=True)
1111 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1112 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1114 with m
.If(~self
.i
.out_do_z
):
1115 with m
.If(decrease
):
1116 # *sigh* not entirely obvious: count leading zeros (clz)
1117 # with a PriorityEncoder: to find from the MSB
1118 # we reverse the order of the bits.
1119 temp_m
= Signal(mwid
, reset_less
=True)
1120 temp_s
= Signal(mwid
+1, reset_less
=True)
1121 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1122 # make sure that the amount to decrease by does NOT
1123 # go below the minimum non-INF/NaN exponent
1124 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1127 # cat round and guard bits back into the mantissa
1128 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1129 pe
.i
.eq(temp_m
[::-1]), # inverted
1130 clz
.eq(limclz
), # count zeros from MSB down
1131 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1132 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1133 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1134 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1135 # overflow in bits 0..1: got shifted too (leave sticky)
1136 of
.guard
.eq(temp_s
[1]), # guard
1137 of
.round_bit
.eq(temp_s
[0]), # round
1140 with m
.Elif(increase
):
1141 temp_m
= Signal(mwid
+1, reset_less
=True)
1143 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1145 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1146 # connect multi-shifter to inp/out mantissa (and ediff)
1148 msr
.diff
.eq(ediff_n126
),
1149 self
.o
.z
.m
.eq(msr
.m
[3:]),
1150 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1151 # overflow in bits 0..1: got shifted too (leave sticky)
1152 of
.guard
.eq(temp_s
[2]), # guard
1153 of
.round_bit
.eq(temp_s
[1]), # round
1154 of
.sticky
.eq(temp_s
[0]), # sticky
1155 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1158 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1159 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1160 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1165 class FPNorm1ModMulti
:
1167 def __init__(self
, width
, single_cycle
=True):
1169 self
.in_select
= Signal(reset_less
=True)
1170 self
.in_z
= FPNumBase(width
, False)
1171 self
.in_of
= Overflow()
1172 self
.temp_z
= FPNumBase(width
, False)
1173 self
.temp_of
= Overflow()
1174 self
.out_z
= FPNumBase(width
, False)
1175 self
.out_of
= Overflow()
1177 def elaborate(self
, platform
):
1180 m
.submodules
.norm1_out_z
= self
.out_z
1181 m
.submodules
.norm1_out_overflow
= self
.out_of
1182 m
.submodules
.norm1_temp_z
= self
.temp_z
1183 m
.submodules
.norm1_temp_of
= self
.temp_of
1184 m
.submodules
.norm1_in_z
= self
.in_z
1185 m
.submodules
.norm1_in_overflow
= self
.in_of
1187 in_z
= FPNumBase(self
.width
, False)
1189 m
.submodules
.norm1_insel_z
= in_z
1190 m
.submodules
.norm1_insel_overflow
= in_of
1192 # select which of temp or in z/of to use
1193 with m
.If(self
.in_select
):
1194 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1195 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1197 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1198 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1199 # initialise out from in (overridden below)
1200 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1201 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1202 # normalisation increase/decrease conditions
1203 decrease
= Signal(reset_less
=True)
1204 increase
= Signal(reset_less
=True)
1205 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1206 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1207 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1209 with m
.If(decrease
):
1211 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1212 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1213 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1214 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1215 self
.out_of
.round_bit
.eq(0), # reset round bit
1216 self
.out_of
.m0
.eq(in_of
.guard
),
1219 with m
.Elif(increase
):
1221 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1222 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1223 self
.out_of
.guard
.eq(in_z
.m
[0]),
1224 self
.out_of
.m0
.eq(in_z
.m
[1]),
1225 self
.out_of
.round_bit
.eq(in_of
.guard
),
1226 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1232 class FPNorm1Single(FPState
):
1234 def __init__(self
, width
, id_wid
, single_cycle
=True):
1235 FPState
.__init
__(self
, "normalise_1")
1236 self
.mod
= FPNorm1ModSingle(width
)
1237 self
.o
= self
.ospec()
1238 self
.out_z
= FPNumBase(width
, False)
1239 self
.out_roundz
= Signal(reset_less
=True)
1242 return self
.mod
.ispec()
1245 return self
.mod
.ospec()
1247 def setup(self
, m
, i
):
1248 """ links module to inputs and outputs
1250 self
.mod
.setup(m
, i
)
1252 def action(self
, m
):
1256 class FPNorm1Multi(FPState
):
1258 def __init__(self
, width
, id_wid
):
1259 FPState
.__init
__(self
, "normalise_1")
1260 self
.mod
= FPNorm1ModMulti(width
)
1261 self
.stb
= Signal(reset_less
=True)
1262 self
.ack
= Signal(reset
=0, reset_less
=True)
1263 self
.out_norm
= Signal(reset_less
=True)
1264 self
.in_accept
= Signal(reset_less
=True)
1265 self
.temp_z
= FPNumBase(width
)
1266 self
.temp_of
= Overflow()
1267 self
.out_z
= FPNumBase(width
)
1268 self
.out_roundz
= Signal(reset_less
=True)
1270 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1271 """ links module to inputs and outputs
1273 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1274 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1275 self
.out_z
, self
.out_norm
)
1277 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1278 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1280 def action(self
, m
):
1281 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1282 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1283 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1284 with m
.If(self
.out_norm
):
1285 with m
.If(self
.in_accept
):
1290 m
.d
.sync
+= self
.ack
.eq(0)
1292 # normalisation not required (or done).
1294 m
.d
.sync
+= self
.ack
.eq(1)
1295 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1298 class FPNormToPack(FPState
):
1300 def __init__(self
, width
, id_wid
):
1301 FPState
.__init
__(self
, "normalise_1")
1302 self
.id_wid
= id_wid
1306 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1309 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1311 def setup(self
, m
, i
):
1312 """ links module to inputs and outputs
1315 # Normalisation, Rounding Corrections, Pack - in a chain
1316 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1317 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1318 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1319 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1320 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1322 self
.out_z
= pmod
.ospec()
1324 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1325 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1327 def action(self
, m
):
1328 m
.next
= "pack_put_z"
1333 def __init__(self
, width
, id_wid
):
1334 self
.z
= FPNumBase(width
, False)
1335 self
.out_do_z
= Signal(reset_less
=True)
1336 self
.oz
= Signal(width
, reset_less
=True)
1337 self
.mid
= Signal(id_wid
, reset_less
=True)
1340 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1346 def __init__(self
, width
, id_wid
):
1348 self
.id_wid
= id_wid
1349 self
.i
= self
.ispec()
1350 self
.out_z
= self
.ospec()
1353 return FPNorm1Data(self
.width
, self
.id_wid
)
1356 return FPRoundData(self
.width
, self
.id_wid
)
1358 def process(self
, i
):
1361 def setup(self
, m
, i
):
1362 m
.submodules
.roundz
= self
1363 m
.d
.comb
+= self
.i
.eq(i
)
1365 def elaborate(self
, platform
):
1367 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1368 with m
.If(~self
.i
.out_do_z
):
1369 with m
.If(self
.i
.roundz
):
1370 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1371 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1372 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1377 class FPRound(FPState
):
1379 def __init__(self
, width
, id_wid
):
1380 FPState
.__init
__(self
, "round")
1381 self
.mod
= FPRoundMod(width
)
1382 self
.out_z
= self
.ospec()
1385 return self
.mod
.ispec()
1388 return self
.mod
.ospec()
1390 def setup(self
, m
, i
):
1391 """ links module to inputs and outputs
1393 self
.mod
.setup(m
, i
)
1396 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1397 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1399 def action(self
, m
):
1400 m
.next
= "corrections"
1403 class FPCorrectionsMod
:
1405 def __init__(self
, width
, id_wid
):
1407 self
.id_wid
= id_wid
1408 self
.i
= self
.ispec()
1409 self
.out_z
= self
.ospec()
1412 return FPRoundData(self
.width
, self
.id_wid
)
1415 return FPRoundData(self
.width
, self
.id_wid
)
1417 def process(self
, i
):
1420 def setup(self
, m
, i
):
1421 """ links module to inputs and outputs
1423 m
.submodules
.corrections
= self
1424 m
.d
.comb
+= self
.i
.eq(i
)
1426 def elaborate(self
, platform
):
1428 m
.submodules
.corr_in_z
= self
.i
.z
1429 m
.submodules
.corr_out_z
= self
.out_z
.z
1430 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1431 with m
.If(~self
.i
.out_do_z
):
1432 with m
.If(self
.i
.z
.is_denormalised
):
1433 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1437 class FPCorrections(FPState
):
1439 def __init__(self
, width
, id_wid
):
1440 FPState
.__init
__(self
, "corrections")
1441 self
.mod
= FPCorrectionsMod(width
)
1442 self
.out_z
= self
.ospec()
1445 return self
.mod
.ispec()
1448 return self
.mod
.ospec()
1450 def setup(self
, m
, in_z
):
1451 """ links module to inputs and outputs
1453 self
.mod
.setup(m
, in_z
)
1455 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1458 def action(self
, m
):
1464 def __init__(self
, width
, id_wid
):
1465 self
.z
= FPNumOut(width
, False)
1466 self
.mid
= Signal(id_wid
, reset_less
=True)
1469 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1474 def __init__(self
, width
, id_wid
):
1476 self
.id_wid
= id_wid
1477 self
.i
= self
.ispec()
1478 self
.o
= self
.ospec()
1481 return FPRoundData(self
.width
, self
.id_wid
)
1484 return FPPackData(self
.width
, self
.id_wid
)
1486 def process(self
, i
):
1489 def setup(self
, m
, in_z
):
1490 """ links module to inputs and outputs
1492 m
.submodules
.pack
= self
1493 m
.d
.comb
+= self
.i
.eq(in_z
)
1495 def elaborate(self
, platform
):
1497 m
.submodules
.pack_in_z
= self
.i
.z
1498 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1499 with m
.If(~self
.i
.out_do_z
):
1500 with m
.If(self
.i
.z
.is_overflowed
):
1501 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1503 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1505 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1509 class FPPack(FPState
):
1511 def __init__(self
, width
, id_wid
):
1512 FPState
.__init
__(self
, "pack")
1513 self
.mod
= FPPackMod(width
)
1514 self
.out_z
= self
.ospec()
1517 return self
.mod
.ispec()
1520 return self
.mod
.ospec()
1522 def setup(self
, m
, in_z
):
1523 """ links module to inputs and outputs
1525 self
.mod
.setup(m
, in_z
)
1527 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1528 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1530 def action(self
, m
):
1531 m
.next
= "pack_put_z"
1534 class FPPutZ(FPState
):
1536 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1537 FPState
.__init
__(self
, state
)
1538 if to_state
is None:
1539 to_state
= "get_ops"
1540 self
.to_state
= to_state
1543 self
.in_mid
= in_mid
1544 self
.out_mid
= out_mid
1546 def action(self
, m
):
1547 if self
.in_mid
is not None:
1548 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1550 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1552 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1553 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1554 m
.next
= self
.to_state
1556 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1559 class FPPutZIdx(FPState
):
1561 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1562 FPState
.__init
__(self
, state
)
1563 if to_state
is None:
1564 to_state
= "get_ops"
1565 self
.to_state
= to_state
1567 self
.out_zs
= out_zs
1568 self
.in_mid
= in_mid
1570 def action(self
, m
):
1571 outz_stb
= Signal(reset_less
=True)
1572 outz_ack
= Signal(reset_less
=True)
1573 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1574 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1577 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1579 with m
.If(outz_stb
& outz_ack
):
1580 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1581 m
.next
= self
.to_state
1583 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1585 class FPADDBaseData
:
1587 def __init__(self
, width
, id_wid
):
1589 self
.id_wid
= id_wid
1590 self
.a
= Signal(width
)
1591 self
.b
= Signal(width
)
1592 self
.mid
= Signal(id_wid
, reset_less
=True)
1595 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1599 def __init__(self
, width
, id_wid
):
1600 self
.z
= FPOp(width
)
1601 self
.mid
= Signal(id_wid
, reset_less
=True)
1604 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1609 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1612 * width: bit-width of IEEE754. supported: 16, 32, 64
1613 * id_wid: an identifier that is sync-connected to the input
1614 * single_cycle: True indicates each stage to complete in 1 clock
1615 * compact: True indicates a reduced number of stages
1618 self
.id_wid
= id_wid
1619 self
.single_cycle
= single_cycle
1620 self
.compact
= compact
1622 self
.in_t
= Trigger()
1623 self
.i
= self
.ispec()
1624 self
.o
= self
.ospec()
1629 return FPADDBaseData(self
.width
, self
.id_wid
)
1632 return FPOpData(self
.width
, self
.id_wid
)
1634 def add_state(self
, state
):
1635 self
.states
.append(state
)
1638 def get_fragment(self
, platform
=None):
1639 """ creates the HDL code-fragment for FPAdd
1642 m
.submodules
.out_z
= self
.o
.z
1643 m
.submodules
.in_t
= self
.in_t
1645 self
.get_compact_fragment(m
, platform
)
1647 self
.get_longer_fragment(m
, platform
)
1649 with m
.FSM() as fsm
:
1651 for state
in self
.states
:
1652 with m
.State(state
.state_from
):
1657 def get_longer_fragment(self
, m
, platform
=None):
1659 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1661 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1665 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1666 sc
.setup(m
, a
, b
, self
.in_mid
)
1668 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1669 dn
.setup(m
, a
, b
, sc
.in_mid
)
1671 if self
.single_cycle
:
1672 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1673 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1675 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1676 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1678 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1679 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1681 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1682 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1684 if self
.single_cycle
:
1685 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1686 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1688 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1689 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1691 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1692 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1694 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1695 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1697 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1698 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1700 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1701 pa
.in_mid
, self
.out_mid
))
1703 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1704 pa
.in_mid
, self
.out_mid
))
1706 def get_compact_fragment(self
, m
, platform
=None):
1708 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1709 self
.width
, self
.id_wid
))
1710 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1712 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1715 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1718 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1719 n1
.setup(m
, alm
.a1o
)
1721 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1722 n1
.out_z
.mid
, self
.o
.mid
))
1724 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1725 sc
.o
.mid
, self
.o
.mid
))
1728 class FPADDBase(FPState
):
1730 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1733 * width: bit-width of IEEE754. supported: 16, 32, 64
1734 * id_wid: an identifier that is sync-connected to the input
1735 * single_cycle: True indicates each stage to complete in 1 clock
1737 FPState
.__init
__(self
, "fpadd")
1739 self
.single_cycle
= single_cycle
1740 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1741 self
.o
= self
.ospec()
1743 self
.in_t
= Trigger()
1744 self
.i
= self
.ispec()
1746 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1747 self
.in_accept
= Signal(reset_less
=True)
1748 self
.add_stb
= Signal(reset_less
=True)
1749 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1752 return self
.mod
.ispec()
1755 return self
.mod
.ospec()
1757 def setup(self
, m
, i
, add_stb
, in_mid
):
1758 m
.d
.comb
+= [self
.i
.eq(i
),
1759 self
.mod
.i
.eq(self
.i
),
1760 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1761 #self.add_stb.eq(add_stb),
1762 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1763 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1764 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1765 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1766 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1767 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1770 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1771 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1772 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1773 #m.d.sync += self.in_t.stb.eq(0)
1775 m
.submodules
.fpadd
= self
.mod
1777 def action(self
, m
):
1779 # in_accept is set on incoming strobe HIGH and ack LOW.
1780 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1782 #with m.If(self.in_t.ack):
1783 # m.d.sync += self.in_t.stb.eq(0)
1784 with m
.If(~self
.z_done
):
1785 # not done: test for accepting an incoming operand pair
1786 with m
.If(self
.in_accept
):
1788 self
.add_ack
.eq(1), # acknowledge receipt...
1789 self
.in_t
.stb
.eq(1), # initiate add
1792 m
.d
.sync
+= [self
.add_ack
.eq(0),
1793 self
.in_t
.stb
.eq(0),
1797 # done: acknowledge, and write out id and value
1798 m
.d
.sync
+= [self
.add_ack
.eq(1),
1805 if self
.in_mid
is not None:
1806 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1809 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1811 # move to output state on detecting z ack
1812 with m
.If(self
.out_z
.trigger
):
1813 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1816 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1820 def __init__(self
, width
, id_wid
):
1822 self
.id_wid
= id_wid
1824 for i
in range(rs_sz
):
1826 out_z
.name
= "out_z_%d" % i
1828 self
.res
= Array(res
)
1829 self
.in_z
= FPOp(width
)
1830 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1832 def setup(self
, m
, in_z
, in_mid
):
1833 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1834 self
.in_mid
.eq(in_mid
)]
1836 def get_fragment(self
, platform
=None):
1837 """ creates the HDL code-fragment for FPAdd
1840 m
.submodules
.res_in_z
= self
.in_z
1841 m
.submodules
+= self
.res
1853 """ FPADD: stages as follows:
1859 FPAddBase---> FPAddBaseMod
1861 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1863 FPAddBase is tricky: it is both a stage and *has* stages.
1864 Connection to FPAddBaseMod therefore requires an in stb/ack
1865 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1866 needs to be the thing that raises the incoming stb.
1869 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1872 * width: bit-width of IEEE754. supported: 16, 32, 64
1873 * id_wid: an identifier that is sync-connected to the input
1874 * single_cycle: True indicates each stage to complete in 1 clock
1877 self
.id_wid
= id_wid
1878 self
.single_cycle
= single_cycle
1880 #self.out_z = FPOp(width)
1881 self
.ids
= FPID(id_wid
)
1884 for i
in range(rs_sz
):
1887 in_a
.name
= "in_a_%d" % i
1888 in_b
.name
= "in_b_%d" % i
1889 rs
.append((in_a
, in_b
))
1893 for i
in range(rs_sz
):
1895 out_z
.name
= "out_z_%d" % i
1897 self
.res
= Array(res
)
1901 def add_state(self
, state
):
1902 self
.states
.append(state
)
1905 def get_fragment(self
, platform
=None):
1906 """ creates the HDL code-fragment for FPAdd
1909 m
.submodules
+= self
.rs
1911 in_a
= self
.rs
[0][0]
1912 in_b
= self
.rs
[0][1]
1914 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1919 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1924 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1925 ab
= self
.add_state(ab
)
1926 abd
= ab
.ispec() # create an input spec object for FPADDBase
1927 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1928 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1931 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1934 with m
.FSM() as fsm
:
1936 for state
in self
.states
:
1937 with m
.State(state
.state_from
):
1943 if __name__
== "__main__":
1945 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1946 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1947 alu
.rs
[0][1].ports() + \
1948 alu
.res
[0].ports() + \
1949 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1951 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1952 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1953 alu
.in_t
.ports() + \
1954 alu
.out_z
.ports() + \
1955 [alu
.in_mid
, alu
.out_mid
])
1958 # works... but don't use, just do "python fname.py convert -t v"
1959 #print (verilog.convert(alu, ports=[
1960 # ports=alu.in_a.ports() + \
1961 # alu.in_b.ports() + \
1962 # alu.out_z.ports())