1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.out_do_z
= Signal(reset_less
=True)
267 self
.a
= FPNumBase(width
, True)
268 self
.b
= FPNumBase(width
, True)
269 self
.z
= FPNumOut(width
, False)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
276 class FPAddSpecialCasesMod
:
277 """ special cases: NaNs, infs, zeros, denormalised
278 NOTE: some of these are unique to add. see "Special Operations"
279 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
282 def __init__(self
, width
, id_wid
):
285 self
.i
= self
.ispec()
286 self
.o
= self
.ospec()
289 return FPNumBase2Ops(self
.width
, self
.id_wid
)
292 return FPSCData(self
.width
, self
.id_wid
)
294 def setup(self
, m
, i
):
295 """ links module to inputs and outputs
297 m
.submodules
.specialcases
= self
298 m
.d
.comb
+= self
.i
.eq(i
)
300 def elaborate(self
, platform
):
303 m
.submodules
.sc_in_a
= self
.i
.a
304 m
.submodules
.sc_in_b
= self
.i
.b
305 m
.submodules
.sc_out_z
= self
.o
.z
308 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
311 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
313 # if a is NaN or b is NaN return NaN
314 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
315 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
316 m
.d
.comb
+= self
.o
.z
.nan(0)
318 # XXX WEIRDNESS for FP16 non-canonical NaN handling
321 ## if a is zero and b is NaN return -b
322 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
323 # m.d.comb += self.o.out_do_z.eq(1)
324 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
326 ## if b is zero and a is NaN return -a
327 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
328 # m.d.comb += self.o.out_do_z.eq(1)
329 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
331 ## if a is -zero and b is NaN return -b
332 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
333 # m.d.comb += self.o.out_do_z.eq(1)
334 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
336 ## if b is -zero and a is NaN return -a
337 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
338 # m.d.comb += self.o.out_do_z.eq(1)
339 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
341 # if a is inf return inf (or NaN)
342 with m
.Elif(self
.i
.a
.is_inf
):
343 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
344 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
345 # if a is inf and signs don't match return NaN
346 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
347 m
.d
.comb
+= self
.o
.z
.nan(0)
349 # if b is inf return inf
350 with m
.Elif(self
.i
.b
.is_inf
):
351 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
352 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
354 # if a is zero and b zero return signed-a/b
355 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
356 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
357 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
361 # if a is zero return b
362 with m
.Elif(self
.i
.a
.is_zero
):
363 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
364 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
367 # if b is zero return a
368 with m
.Elif(self
.i
.b
.is_zero
):
369 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
370 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
373 # if a equal to -b return zero (+ve zero)
374 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
375 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
376 m
.d
.comb
+= self
.o
.z
.zero(0)
378 # Denormalised Number checks next, so pass a/b data through
380 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
381 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
382 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
384 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
390 def __init__(self
, id_wid
):
393 self
.in_mid
= Signal(id_wid
, reset_less
=True)
394 self
.out_mid
= Signal(id_wid
, reset_less
=True)
400 if self
.id_wid
is not None:
401 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
404 class FPAddSpecialCases(FPState
):
405 """ special cases: NaNs, infs, zeros, denormalised
406 NOTE: some of these are unique to add. see "Special Operations"
407 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
410 def __init__(self
, width
, id_wid
):
411 FPState
.__init
__(self
, "special_cases")
412 self
.mod
= FPAddSpecialCasesMod(width
)
413 self
.out_z
= self
.mod
.ospec()
414 self
.out_do_z
= Signal(reset_less
=True)
416 def setup(self
, m
, i
):
417 """ links module to inputs and outputs
419 self
.mod
.setup(m
, i
, self
.out_do_z
)
420 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
421 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
425 with m
.If(self
.out_do_z
):
428 m
.next
= "denormalise"
431 class FPAddSpecialCasesDeNorm(FPState
):
432 """ special cases: NaNs, infs, zeros, denormalised
433 NOTE: some of these are unique to add. see "Special Operations"
434 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
437 def __init__(self
, width
, id_wid
):
438 FPState
.__init
__(self
, "special_cases")
439 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
440 self
.out_z
= self
.smod
.ospec()
441 self
.out_do_z
= Signal(reset_less
=True)
443 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
444 self
.o
= self
.dmod
.ospec()
446 def setup(self
, m
, i
):
447 """ links module to inputs and outputs
449 self
.smod
.setup(m
, i
)
450 self
.dmod
.setup(m
, self
.smod
.o
)
451 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
454 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
455 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
457 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
460 with m
.If(self
.out_do_z
):
466 class FPAddDeNormMod(FPState
):
468 def __init__(self
, width
, id_wid
):
471 self
.i
= self
.ispec()
472 self
.o
= self
.ospec()
475 return FPNumBase2Ops(self
.width
, self
.id_wid
)
478 return FPNumBase2Ops(self
.width
, self
.id_wid
)
480 def setup(self
, m
, i
):
481 """ links module to inputs and outputs
483 m
.submodules
.denormalise
= self
484 m
.d
.comb
+= self
.i
.eq(i
)
486 def elaborate(self
, platform
):
488 m
.submodules
.denorm_in_a
= self
.i
.a
489 m
.submodules
.denorm_in_b
= self
.i
.b
490 m
.submodules
.denorm_out_a
= self
.o
.a
491 m
.submodules
.denorm_out_b
= self
.o
.b
492 # hmmm, don't like repeating identical code
493 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
494 with m
.If(self
.i
.a
.exp_n127
):
495 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
497 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
499 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
500 with m
.If(self
.i
.b
.exp_n127
):
501 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
503 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
505 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
510 class FPAddDeNorm(FPState
):
512 def __init__(self
, width
, id_wid
):
513 FPState
.__init
__(self
, "denormalise")
514 self
.mod
= FPAddDeNormMod(width
)
515 self
.out_a
= FPNumBase(width
)
516 self
.out_b
= FPNumBase(width
)
518 def setup(self
, m
, i
):
519 """ links module to inputs and outputs
523 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
524 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
527 # Denormalised Number checks
531 class FPAddAlignMultiMod(FPState
):
533 def __init__(self
, width
):
534 self
.in_a
= FPNumBase(width
)
535 self
.in_b
= FPNumBase(width
)
536 self
.out_a
= FPNumIn(None, width
)
537 self
.out_b
= FPNumIn(None, width
)
538 self
.exp_eq
= Signal(reset_less
=True)
540 def elaborate(self
, platform
):
541 # This one however (single-cycle) will do the shift
546 m
.submodules
.align_in_a
= self
.in_a
547 m
.submodules
.align_in_b
= self
.in_b
548 m
.submodules
.align_out_a
= self
.out_a
549 m
.submodules
.align_out_b
= self
.out_b
551 # NOTE: this does *not* do single-cycle multi-shifting,
552 # it *STAYS* in the align state until exponents match
554 # exponent of a greater than b: shift b down
555 m
.d
.comb
+= self
.exp_eq
.eq(0)
556 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
557 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
558 agtb
= Signal(reset_less
=True)
559 altb
= Signal(reset_less
=True)
560 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
561 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
563 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
564 # exponent of b greater than a: shift a down
566 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
567 # exponents equal: move to next stage.
569 m
.d
.comb
+= self
.exp_eq
.eq(1)
573 class FPAddAlignMulti(FPState
):
575 def __init__(self
, width
, id_wid
):
576 FPState
.__init
__(self
, "align")
577 self
.mod
= FPAddAlignMultiMod(width
)
578 self
.out_a
= FPNumIn(None, width
)
579 self
.out_b
= FPNumIn(None, width
)
580 self
.exp_eq
= Signal(reset_less
=True)
582 def setup(self
, m
, in_a
, in_b
):
583 """ links module to inputs and outputs
585 m
.submodules
.align
= self
.mod
586 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
587 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
588 #m.d.comb += self.out_a.eq(self.mod.out_a)
589 #m.d.comb += self.out_b.eq(self.mod.out_b)
590 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
591 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
592 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
595 with m
.If(self
.exp_eq
):
601 def __init__(self
, width
, id_wid
):
602 self
.a
= FPNumIn(None, width
)
603 self
.b
= FPNumIn(None, width
)
604 self
.mid
= Signal(id_wid
, reset_less
=True)
607 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
610 class FPAddAlignSingleMod
:
612 def __init__(self
, width
, id_wid
):
615 self
.i
= self
.ispec()
616 self
.o
= self
.ospec()
619 return FPNumBase2Ops(self
.width
, self
.id_wid
)
622 return FPNumIn2Ops(self
.width
, self
.id_wid
)
624 def process(self
, i
):
627 def setup(self
, m
, i
):
628 """ links module to inputs and outputs
630 m
.submodules
.align
= self
631 m
.d
.comb
+= self
.i
.eq(i
)
633 def elaborate(self
, platform
):
634 """ Aligns A against B or B against A, depending on which has the
635 greater exponent. This is done in a *single* cycle using
636 variable-width bit-shift
638 the shifter used here is quite expensive in terms of gates.
639 Mux A or B in (and out) into temporaries, as only one of them
640 needs to be aligned against the other
644 m
.submodules
.align_in_a
= self
.i
.a
645 m
.submodules
.align_in_b
= self
.i
.b
646 m
.submodules
.align_out_a
= self
.o
.a
647 m
.submodules
.align_out_b
= self
.o
.b
649 # temporary (muxed) input and output to be shifted
650 t_inp
= FPNumBase(self
.width
)
651 t_out
= FPNumIn(None, self
.width
)
652 espec
= (len(self
.i
.a
.e
), True)
653 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
654 m
.submodules
.align_t_in
= t_inp
655 m
.submodules
.align_t_out
= t_out
656 m
.submodules
.multishift_r
= msr
658 ediff
= Signal(espec
, reset_less
=True)
659 ediffr
= Signal(espec
, reset_less
=True)
660 tdiff
= Signal(espec
, reset_less
=True)
661 elz
= Signal(reset_less
=True)
662 egz
= Signal(reset_less
=True)
664 # connect multi-shifter to t_inp/out mantissa (and tdiff)
665 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
666 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
667 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
668 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
669 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
671 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
672 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
673 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
674 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
676 # default: A-exp == B-exp, A and B untouched (fall through)
677 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
678 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
679 # only one shifter (muxed)
680 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
681 # exponent of a greater than b: shift b down
683 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
686 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
688 # exponent of b greater than a: shift a down
690 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
693 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
696 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
701 class FPAddAlignSingle(FPState
):
703 def __init__(self
, width
, id_wid
):
704 FPState
.__init
__(self
, "align")
705 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
706 self
.out_a
= FPNumIn(None, width
)
707 self
.out_b
= FPNumIn(None, width
)
709 def setup(self
, m
, i
):
710 """ links module to inputs and outputs
714 # NOTE: could be done as comb
715 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
716 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
722 class FPAddAlignSingleAdd(FPState
):
724 def __init__(self
, width
, id_wid
):
725 FPState
.__init
__(self
, "align")
728 self
.a1o
= self
.ospec()
731 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
734 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
736 def setup(self
, m
, i
):
737 """ links module to inputs and outputs
740 # chain AddAlignSingle, AddStage0 and AddStage1
741 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
742 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
743 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
745 chain
= StageChain([mod
, a0mod
, a1mod
])
748 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
751 m
.next
= "normalise_1"
754 class FPAddStage0Data
:
756 def __init__(self
, width
, id_wid
):
757 self
.z
= FPNumBase(width
, False)
758 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
759 self
.mid
= Signal(id_wid
, reset_less
=True)
762 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
765 class FPAddStage0Mod
:
767 def __init__(self
, width
, id_wid
):
770 self
.i
= self
.ispec()
771 self
.o
= self
.ospec()
774 return FPNumBase2Ops(self
.width
, self
.id_wid
)
777 return FPAddStage0Data(self
.width
, self
.id_wid
)
779 def process(self
, i
):
782 def setup(self
, m
, i
):
783 """ links module to inputs and outputs
785 m
.submodules
.add0
= self
786 m
.d
.comb
+= self
.i
.eq(i
)
788 def elaborate(self
, platform
):
790 m
.submodules
.add0_in_a
= self
.i
.a
791 m
.submodules
.add0_in_b
= self
.i
.b
792 m
.submodules
.add0_out_z
= self
.o
.z
794 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
795 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
797 # store intermediate tests (and zero-extended mantissas)
798 seq
= Signal(reset_less
=True)
799 mge
= Signal(reset_less
=True)
800 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
801 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
802 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
803 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
804 am0
.eq(Cat(self
.i
.a
.m
, 0)),
805 bm0
.eq(Cat(self
.i
.b
.m
, 0))
807 # same-sign (both negative or both positive) add mantissas
810 self
.o
.tot
.eq(am0
+ bm0
),
811 self
.o
.z
.s
.eq(self
.i
.a
.s
)
813 # a mantissa greater than b, use a
816 self
.o
.tot
.eq(am0
- bm0
),
817 self
.o
.z
.s
.eq(self
.i
.a
.s
)
819 # b mantissa greater than a, use b
822 self
.o
.tot
.eq(bm0
- am0
),
823 self
.o
.z
.s
.eq(self
.i
.b
.s
)
828 class FPAddStage0(FPState
):
829 """ First stage of add. covers same-sign (add) and subtract
830 special-casing when mantissas are greater or equal, to
831 give greatest accuracy.
834 def __init__(self
, width
, id_wid
):
835 FPState
.__init
__(self
, "add_0")
836 self
.mod
= FPAddStage0Mod(width
)
837 self
.o
= self
.mod
.ospec()
839 def setup(self
, m
, i
):
840 """ links module to inputs and outputs
844 # NOTE: these could be done as combinatorial (merge add0+add1)
845 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
851 class FPAddStage1Data
:
853 def __init__(self
, width
, id_wid
):
854 self
.z
= FPNumBase(width
, False)
856 self
.mid
= Signal(id_wid
, reset_less
=True)
859 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
863 class FPAddStage1Mod(FPState
):
864 """ Second stage of add: preparation for normalisation.
865 detects when tot sum is too big (tot[27] is kinda a carry bit)
868 def __init__(self
, width
, id_wid
):
871 self
.i
= self
.ispec()
872 self
.o
= self
.ospec()
875 return FPAddStage0Data(self
.width
, self
.id_wid
)
878 return FPAddStage1Data(self
.width
, self
.id_wid
)
880 def process(self
, i
):
883 def setup(self
, m
, i
):
884 """ links module to inputs and outputs
886 m
.submodules
.add1
= self
887 m
.submodules
.add1_out_overflow
= self
.o
.of
889 m
.d
.comb
+= self
.i
.eq(i
)
891 def elaborate(self
, platform
):
893 #m.submodules.norm1_in_overflow = self.in_of
894 #m.submodules.norm1_out_overflow = self.out_of
895 #m.submodules.norm1_in_z = self.in_z
896 #m.submodules.norm1_out_z = self.out_z
897 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
898 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
899 # tot[-1] (MSB) gets set when the sum overflows. shift result down
900 with m
.If(self
.i
.tot
[-1]):
902 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
903 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
904 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
905 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
906 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
907 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
909 # tot[-1] (MSB) zero case
912 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
913 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
914 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
915 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
916 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
921 class FPAddStage1(FPState
):
923 def __init__(self
, width
, id_wid
):
924 FPState
.__init
__(self
, "add_1")
925 self
.mod
= FPAddStage1Mod(width
)
926 self
.out_z
= FPNumBase(width
, False)
927 self
.out_of
= Overflow()
928 self
.norm_stb
= Signal()
930 def setup(self
, m
, i
):
931 """ links module to inputs and outputs
935 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
937 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
938 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
939 m
.d
.sync
+= self
.norm_stb
.eq(1)
942 m
.next
= "normalise_1"
945 class FPNormaliseModSingle
:
947 def __init__(self
, width
):
949 self
.in_z
= self
.ispec()
950 self
.out_z
= self
.ospec()
953 return FPNumBase(self
.width
, False)
956 return FPNumBase(self
.width
, False)
958 def setup(self
, m
, i
):
959 """ links module to inputs and outputs
961 m
.submodules
.normalise
= self
962 m
.d
.comb
+= self
.i
.eq(i
)
964 def elaborate(self
, platform
):
967 mwid
= self
.out_z
.m_width
+2
968 pe
= PriorityEncoder(mwid
)
969 m
.submodules
.norm_pe
= pe
971 m
.submodules
.norm1_out_z
= self
.out_z
972 m
.submodules
.norm1_in_z
= self
.in_z
974 in_z
= FPNumBase(self
.width
, False)
976 m
.submodules
.norm1_insel_z
= in_z
977 m
.submodules
.norm1_insel_overflow
= in_of
979 espec
= (len(in_z
.e
), True)
980 ediff_n126
= Signal(espec
, reset_less
=True)
981 msr
= MultiShiftRMerge(mwid
, espec
)
982 m
.submodules
.multishift_r
= msr
984 m
.d
.comb
+= in_z
.eq(self
.in_z
)
985 m
.d
.comb
+= in_of
.eq(self
.in_of
)
986 # initialise out from in (overridden below)
987 m
.d
.comb
+= self
.out_z
.eq(in_z
)
988 m
.d
.comb
+= self
.out_of
.eq(in_of
)
989 # normalisation decrease condition
990 decrease
= Signal(reset_less
=True)
991 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
994 # *sigh* not entirely obvious: count leading zeros (clz)
995 # with a PriorityEncoder: to find from the MSB
996 # we reverse the order of the bits.
997 temp_m
= Signal(mwid
, reset_less
=True)
998 temp_s
= Signal(mwid
+1, reset_less
=True)
999 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1001 # cat round and guard bits back into the mantissa
1002 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1003 pe
.i
.eq(temp_m
[::-1]), # inverted
1004 clz
.eq(pe
.o
), # count zeros from MSB down
1005 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1006 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1007 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1014 def __init__(self
, width
, id_wid
):
1015 self
.roundz
= Signal(reset_less
=True)
1016 self
.z
= FPNumBase(width
, False)
1017 self
.mid
= Signal(id_wid
, reset_less
=True)
1020 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1023 class FPNorm1ModSingle
:
1025 def __init__(self
, width
, id_wid
):
1027 self
.id_wid
= id_wid
1028 self
.i
= self
.ispec()
1029 self
.o
= self
.ospec()
1032 return FPAddStage1Data(self
.width
, self
.id_wid
)
1035 return FPNorm1Data(self
.width
, self
.id_wid
)
1037 def setup(self
, m
, i
):
1038 """ links module to inputs and outputs
1040 m
.submodules
.normalise_1
= self
1041 m
.d
.comb
+= self
.i
.eq(i
)
1043 def process(self
, i
):
1046 def elaborate(self
, platform
):
1049 mwid
= self
.o
.z
.m_width
+2
1050 pe
= PriorityEncoder(mwid
)
1051 m
.submodules
.norm_pe
= pe
1054 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1056 m
.submodules
.norm1_out_z
= self
.o
.z
1057 m
.submodules
.norm1_out_overflow
= of
1058 m
.submodules
.norm1_in_z
= self
.i
.z
1059 m
.submodules
.norm1_in_overflow
= self
.i
.of
1062 m
.submodules
.norm1_insel_z
= i
.z
1063 m
.submodules
.norm1_insel_overflow
= i
.of
1065 espec
= (len(i
.z
.e
), True)
1066 ediff_n126
= Signal(espec
, reset_less
=True)
1067 msr
= MultiShiftRMerge(mwid
, espec
)
1068 m
.submodules
.multishift_r
= msr
1070 m
.d
.comb
+= i
.eq(self
.i
)
1071 # initialise out from in (overridden below)
1072 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1073 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1074 m
.d
.comb
+= of
.eq(i
.of
)
1075 # normalisation increase/decrease conditions
1076 decrease
= Signal(reset_less
=True)
1077 increase
= Signal(reset_less
=True)
1078 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1079 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1081 with m
.If(decrease
):
1082 # *sigh* not entirely obvious: count leading zeros (clz)
1083 # with a PriorityEncoder: to find from the MSB
1084 # we reverse the order of the bits.
1085 temp_m
= Signal(mwid
, reset_less
=True)
1086 temp_s
= Signal(mwid
+1, reset_less
=True)
1087 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1088 # make sure that the amount to decrease by does NOT
1089 # go below the minimum non-INF/NaN exponent
1090 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1093 # cat round and guard bits back into the mantissa
1094 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1095 pe
.i
.eq(temp_m
[::-1]), # inverted
1096 clz
.eq(limclz
), # count zeros from MSB down
1097 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1098 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1099 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1100 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1101 # overflow in bits 0..1: got shifted too (leave sticky)
1102 of
.guard
.eq(temp_s
[1]), # guard
1103 of
.round_bit
.eq(temp_s
[0]), # round
1106 with m
.Elif(increase
):
1107 temp_m
= Signal(mwid
+1, reset_less
=True)
1109 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1111 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1112 # connect multi-shifter to inp/out mantissa (and ediff)
1114 msr
.diff
.eq(ediff_n126
),
1115 self
.o
.z
.m
.eq(msr
.m
[3:]),
1116 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1117 # overflow in bits 0..1: got shifted too (leave sticky)
1118 of
.guard
.eq(temp_s
[2]), # guard
1119 of
.round_bit
.eq(temp_s
[1]), # round
1120 of
.sticky
.eq(temp_s
[0]), # sticky
1121 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1127 class FPNorm1ModMulti
:
1129 def __init__(self
, width
, single_cycle
=True):
1131 self
.in_select
= Signal(reset_less
=True)
1132 self
.in_z
= FPNumBase(width
, False)
1133 self
.in_of
= Overflow()
1134 self
.temp_z
= FPNumBase(width
, False)
1135 self
.temp_of
= Overflow()
1136 self
.out_z
= FPNumBase(width
, False)
1137 self
.out_of
= Overflow()
1139 def elaborate(self
, platform
):
1142 m
.submodules
.norm1_out_z
= self
.out_z
1143 m
.submodules
.norm1_out_overflow
= self
.out_of
1144 m
.submodules
.norm1_temp_z
= self
.temp_z
1145 m
.submodules
.norm1_temp_of
= self
.temp_of
1146 m
.submodules
.norm1_in_z
= self
.in_z
1147 m
.submodules
.norm1_in_overflow
= self
.in_of
1149 in_z
= FPNumBase(self
.width
, False)
1151 m
.submodules
.norm1_insel_z
= in_z
1152 m
.submodules
.norm1_insel_overflow
= in_of
1154 # select which of temp or in z/of to use
1155 with m
.If(self
.in_select
):
1156 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1157 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1159 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1160 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1161 # initialise out from in (overridden below)
1162 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1163 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1164 # normalisation increase/decrease conditions
1165 decrease
= Signal(reset_less
=True)
1166 increase
= Signal(reset_less
=True)
1167 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1168 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1169 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1171 with m
.If(decrease
):
1173 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1174 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1175 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1176 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1177 self
.out_of
.round_bit
.eq(0), # reset round bit
1178 self
.out_of
.m0
.eq(in_of
.guard
),
1181 with m
.Elif(increase
):
1183 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1184 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1185 self
.out_of
.guard
.eq(in_z
.m
[0]),
1186 self
.out_of
.m0
.eq(in_z
.m
[1]),
1187 self
.out_of
.round_bit
.eq(in_of
.guard
),
1188 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1194 class FPNorm1Single(FPState
):
1196 def __init__(self
, width
, id_wid
, single_cycle
=True):
1197 FPState
.__init
__(self
, "normalise_1")
1198 self
.mod
= FPNorm1ModSingle(width
)
1199 self
.o
= self
.ospec()
1200 self
.out_z
= FPNumBase(width
, False)
1201 self
.out_roundz
= Signal(reset_less
=True)
1204 return self
.mod
.ispec()
1207 return self
.mod
.ospec()
1209 def setup(self
, m
, i
):
1210 """ links module to inputs and outputs
1212 self
.mod
.setup(m
, i
)
1214 def action(self
, m
):
1218 class FPNorm1Multi(FPState
):
1220 def __init__(self
, width
, id_wid
):
1221 FPState
.__init
__(self
, "normalise_1")
1222 self
.mod
= FPNorm1ModMulti(width
)
1223 self
.stb
= Signal(reset_less
=True)
1224 self
.ack
= Signal(reset
=0, reset_less
=True)
1225 self
.out_norm
= Signal(reset_less
=True)
1226 self
.in_accept
= Signal(reset_less
=True)
1227 self
.temp_z
= FPNumBase(width
)
1228 self
.temp_of
= Overflow()
1229 self
.out_z
= FPNumBase(width
)
1230 self
.out_roundz
= Signal(reset_less
=True)
1232 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1233 """ links module to inputs and outputs
1235 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1236 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1237 self
.out_z
, self
.out_norm
)
1239 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1240 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1242 def action(self
, m
):
1243 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1244 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1245 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1246 with m
.If(self
.out_norm
):
1247 with m
.If(self
.in_accept
):
1252 m
.d
.sync
+= self
.ack
.eq(0)
1254 # normalisation not required (or done).
1256 m
.d
.sync
+= self
.ack
.eq(1)
1257 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1260 class FPNormToPack(FPState
):
1262 def __init__(self
, width
, id_wid
):
1263 FPState
.__init
__(self
, "normalise_1")
1264 self
.id_wid
= id_wid
1268 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1271 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1273 def setup(self
, m
, i
):
1274 """ links module to inputs and outputs
1277 # Normalisation, Rounding Corrections, Pack - in a chain
1278 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1279 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1280 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1281 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1282 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1284 self
.out_z
= pmod
.ospec()
1286 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1287 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1289 def action(self
, m
):
1290 m
.next
= "pack_put_z"
1295 def __init__(self
, width
, id_wid
):
1296 self
.z
= FPNumBase(width
, False)
1297 self
.mid
= Signal(id_wid
, reset_less
=True)
1300 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1305 def __init__(self
, width
, id_wid
):
1307 self
.id_wid
= id_wid
1308 self
.i
= self
.ispec()
1309 self
.out_z
= self
.ospec()
1312 return FPNorm1Data(self
.width
, self
.id_wid
)
1315 return FPRoundData(self
.width
, self
.id_wid
)
1317 def process(self
, i
):
1320 def setup(self
, m
, i
):
1321 m
.submodules
.roundz
= self
1322 m
.d
.comb
+= self
.i
.eq(i
)
1324 def elaborate(self
, platform
):
1326 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1327 with m
.If(self
.i
.roundz
):
1328 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1329 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1330 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1334 class FPRound(FPState
):
1336 def __init__(self
, width
, id_wid
):
1337 FPState
.__init
__(self
, "round")
1338 self
.mod
= FPRoundMod(width
)
1339 self
.out_z
= self
.ospec()
1342 return self
.mod
.ispec()
1345 return self
.mod
.ospec()
1347 def setup(self
, m
, i
):
1348 """ links module to inputs and outputs
1350 self
.mod
.setup(m
, i
)
1353 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1354 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1356 def action(self
, m
):
1357 m
.next
= "corrections"
1360 class FPCorrectionsMod
:
1362 def __init__(self
, width
, id_wid
):
1364 self
.id_wid
= id_wid
1365 self
.i
= self
.ispec()
1366 self
.out_z
= self
.ospec()
1369 return FPRoundData(self
.width
, self
.id_wid
)
1372 return FPRoundData(self
.width
, self
.id_wid
)
1374 def process(self
, i
):
1377 def setup(self
, m
, i
):
1378 """ links module to inputs and outputs
1380 m
.submodules
.corrections
= self
1381 m
.d
.comb
+= self
.i
.eq(i
)
1383 def elaborate(self
, platform
):
1385 m
.submodules
.corr_in_z
= self
.i
.z
1386 m
.submodules
.corr_out_z
= self
.out_z
.z
1387 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1388 with m
.If(self
.i
.z
.is_denormalised
):
1389 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1393 class FPCorrections(FPState
):
1395 def __init__(self
, width
, id_wid
):
1396 FPState
.__init
__(self
, "corrections")
1397 self
.mod
= FPCorrectionsMod(width
)
1398 self
.out_z
= self
.ospec()
1401 return self
.mod
.ispec()
1404 return self
.mod
.ospec()
1406 def setup(self
, m
, in_z
):
1407 """ links module to inputs and outputs
1409 self
.mod
.setup(m
, in_z
)
1411 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1412 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1414 def action(self
, m
):
1420 def __init__(self
, width
, id_wid
):
1421 self
.z
= FPNumOut(width
, False)
1422 self
.mid
= Signal(id_wid
, reset_less
=True)
1425 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1430 def __init__(self
, width
, id_wid
):
1432 self
.id_wid
= id_wid
1433 self
.i
= self
.ispec()
1434 self
.o
= self
.ospec()
1437 return FPRoundData(self
.width
, self
.id_wid
)
1440 return FPPackData(self
.width
, self
.id_wid
)
1442 def process(self
, i
):
1445 def setup(self
, m
, in_z
):
1446 """ links module to inputs and outputs
1448 m
.submodules
.pack
= self
1449 m
.d
.comb
+= self
.i
.eq(in_z
)
1451 def elaborate(self
, platform
):
1453 m
.submodules
.pack_in_z
= self
.i
.z
1454 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1455 with m
.If(self
.i
.z
.is_overflowed
):
1456 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1458 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1463 def __init__(self
, width
, id_wid
):
1464 self
.z
= FPNumOut(width
, False)
1465 self
.mid
= Signal(id_wid
, reset_less
=True)
1468 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1471 class FPPack(FPState
):
1473 def __init__(self
, width
, id_wid
):
1474 FPState
.__init
__(self
, "pack")
1475 self
.mod
= FPPackMod(width
)
1476 self
.out_z
= self
.ospec()
1479 return self
.mod
.ispec()
1482 return self
.mod
.ospec()
1484 def setup(self
, m
, in_z
):
1485 """ links module to inputs and outputs
1487 self
.mod
.setup(m
, in_z
)
1489 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1490 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1492 def action(self
, m
):
1493 m
.next
= "pack_put_z"
1496 class FPPutZ(FPState
):
1498 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1499 FPState
.__init
__(self
, state
)
1500 if to_state
is None:
1501 to_state
= "get_ops"
1502 self
.to_state
= to_state
1505 self
.in_mid
= in_mid
1506 self
.out_mid
= out_mid
1508 def action(self
, m
):
1509 if self
.in_mid
is not None:
1510 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1512 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1514 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1515 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1516 m
.next
= self
.to_state
1518 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1521 class FPPutZIdx(FPState
):
1523 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1524 FPState
.__init
__(self
, state
)
1525 if to_state
is None:
1526 to_state
= "get_ops"
1527 self
.to_state
= to_state
1529 self
.out_zs
= out_zs
1530 self
.in_mid
= in_mid
1532 def action(self
, m
):
1533 outz_stb
= Signal(reset_less
=True)
1534 outz_ack
= Signal(reset_less
=True)
1535 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1536 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1539 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1541 with m
.If(outz_stb
& outz_ack
):
1542 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1543 m
.next
= self
.to_state
1545 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1547 class FPADDBaseData
:
1549 def __init__(self
, width
, id_wid
):
1551 self
.id_wid
= id_wid
1552 self
.a
= Signal(width
)
1553 self
.b
= Signal(width
)
1554 self
.mid
= Signal(id_wid
, reset_less
=True)
1557 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1561 def __init__(self
, width
, id_wid
):
1562 self
.z
= FPOp(width
)
1563 self
.mid
= Signal(id_wid
, reset_less
=True)
1566 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1571 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1574 * width: bit-width of IEEE754. supported: 16, 32, 64
1575 * id_wid: an identifier that is sync-connected to the input
1576 * single_cycle: True indicates each stage to complete in 1 clock
1577 * compact: True indicates a reduced number of stages
1580 self
.id_wid
= id_wid
1581 self
.single_cycle
= single_cycle
1582 self
.compact
= compact
1584 self
.in_t
= Trigger()
1585 self
.i
= self
.ispec()
1586 self
.o
= self
.ospec()
1591 return FPADDBaseData(self
.width
, self
.id_wid
)
1594 return FPOpData(self
.width
, self
.id_wid
)
1596 def add_state(self
, state
):
1597 self
.states
.append(state
)
1600 def get_fragment(self
, platform
=None):
1601 """ creates the HDL code-fragment for FPAdd
1604 m
.submodules
.out_z
= self
.o
.z
1605 m
.submodules
.in_t
= self
.in_t
1607 self
.get_compact_fragment(m
, platform
)
1609 self
.get_longer_fragment(m
, platform
)
1611 with m
.FSM() as fsm
:
1613 for state
in self
.states
:
1614 with m
.State(state
.state_from
):
1619 def get_longer_fragment(self
, m
, platform
=None):
1621 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1623 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1627 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1628 sc
.setup(m
, a
, b
, self
.in_mid
)
1630 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1631 dn
.setup(m
, a
, b
, sc
.in_mid
)
1633 if self
.single_cycle
:
1634 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1635 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1637 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1638 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1640 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1641 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1643 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1644 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1646 if self
.single_cycle
:
1647 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1648 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1650 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1651 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1653 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1654 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1656 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1657 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1659 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1660 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1662 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1663 pa
.in_mid
, self
.out_mid
))
1665 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1666 pa
.in_mid
, self
.out_mid
))
1668 def get_compact_fragment(self
, m
, platform
=None):
1670 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1671 self
.width
, self
.id_wid
))
1672 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1674 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1677 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1680 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1681 n1
.setup(m
, alm
.a1o
)
1683 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1684 n1
.out_z
.mid
, self
.o
.mid
))
1686 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1687 sc
.o
.mid
, self
.o
.mid
))
1690 class FPADDBase(FPState
):
1692 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1695 * width: bit-width of IEEE754. supported: 16, 32, 64
1696 * id_wid: an identifier that is sync-connected to the input
1697 * single_cycle: True indicates each stage to complete in 1 clock
1699 FPState
.__init
__(self
, "fpadd")
1701 self
.single_cycle
= single_cycle
1702 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1703 self
.o
= self
.ospec()
1705 self
.in_t
= Trigger()
1706 self
.i
= self
.ispec()
1708 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1709 self
.in_accept
= Signal(reset_less
=True)
1710 self
.add_stb
= Signal(reset_less
=True)
1711 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1714 return self
.mod
.ispec()
1717 return self
.mod
.ospec()
1719 def setup(self
, m
, i
, add_stb
, in_mid
):
1720 m
.d
.comb
+= [self
.i
.eq(i
),
1721 self
.mod
.i
.eq(self
.i
),
1722 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1723 #self.add_stb.eq(add_stb),
1724 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1725 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1726 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1727 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1728 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1729 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1732 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1733 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1734 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1735 #m.d.sync += self.in_t.stb.eq(0)
1737 m
.submodules
.fpadd
= self
.mod
1739 def action(self
, m
):
1741 # in_accept is set on incoming strobe HIGH and ack LOW.
1742 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1744 #with m.If(self.in_t.ack):
1745 # m.d.sync += self.in_t.stb.eq(0)
1746 with m
.If(~self
.z_done
):
1747 # not done: test for accepting an incoming operand pair
1748 with m
.If(self
.in_accept
):
1750 self
.add_ack
.eq(1), # acknowledge receipt...
1751 self
.in_t
.stb
.eq(1), # initiate add
1754 m
.d
.sync
+= [self
.add_ack
.eq(0),
1755 self
.in_t
.stb
.eq(0),
1759 # done: acknowledge, and write out id and value
1760 m
.d
.sync
+= [self
.add_ack
.eq(1),
1767 if self
.in_mid
is not None:
1768 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1771 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1773 # move to output state on detecting z ack
1774 with m
.If(self
.out_z
.trigger
):
1775 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1778 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1782 def __init__(self
, width
, id_wid
):
1784 self
.id_wid
= id_wid
1786 for i
in range(rs_sz
):
1788 out_z
.name
= "out_z_%d" % i
1790 self
.res
= Array(res
)
1791 self
.in_z
= FPOp(width
)
1792 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1794 def setup(self
, m
, in_z
, in_mid
):
1795 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1796 self
.in_mid
.eq(in_mid
)]
1798 def get_fragment(self
, platform
=None):
1799 """ creates the HDL code-fragment for FPAdd
1802 m
.submodules
.res_in_z
= self
.in_z
1803 m
.submodules
+= self
.res
1815 """ FPADD: stages as follows:
1821 FPAddBase---> FPAddBaseMod
1823 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1825 FPAddBase is tricky: it is both a stage and *has* stages.
1826 Connection to FPAddBaseMod therefore requires an in stb/ack
1827 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1828 needs to be the thing that raises the incoming stb.
1831 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1834 * width: bit-width of IEEE754. supported: 16, 32, 64
1835 * id_wid: an identifier that is sync-connected to the input
1836 * single_cycle: True indicates each stage to complete in 1 clock
1839 self
.id_wid
= id_wid
1840 self
.single_cycle
= single_cycle
1842 #self.out_z = FPOp(width)
1843 self
.ids
= FPID(id_wid
)
1846 for i
in range(rs_sz
):
1849 in_a
.name
= "in_a_%d" % i
1850 in_b
.name
= "in_b_%d" % i
1851 rs
.append((in_a
, in_b
))
1855 for i
in range(rs_sz
):
1857 out_z
.name
= "out_z_%d" % i
1859 self
.res
= Array(res
)
1863 def add_state(self
, state
):
1864 self
.states
.append(state
)
1867 def get_fragment(self
, platform
=None):
1868 """ creates the HDL code-fragment for FPAdd
1871 m
.submodules
+= self
.rs
1873 in_a
= self
.rs
[0][0]
1874 in_b
= self
.rs
[0][1]
1876 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1881 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1886 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1887 ab
= self
.add_state(ab
)
1888 abd
= ab
.ispec() # create an input spec object for FPADDBase
1889 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1890 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1893 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1896 with m
.FSM() as fsm
:
1898 for state
in self
.states
:
1899 with m
.State(state
.state_from
):
1905 if __name__
== "__main__":
1907 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1908 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1909 alu
.rs
[0][1].ports() + \
1910 alu
.res
[0].ports() + \
1911 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1913 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1914 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1915 alu
.in_t
.ports() + \
1916 alu
.out_z
.ports() + \
1917 [alu
.in_mid
, alu
.out_mid
])
1920 # works... but don't use, just do "python fname.py convert -t v"
1921 #print (verilog.convert(alu, ports=[
1922 # ports=alu.in_a.ports() + \
1923 # alu.in_b.ports() + \
1924 # alu.out_z.ports())