1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
, UnbufferedPipeline
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.oz
= Signal(width
, reset_less
=True)
270 self
.out_do_z
= Signal(reset_less
=True)
271 self
.mid
= Signal(id_wid
, reset_less
=True)
274 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
275 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
278 class FPAddSpecialCasesMod
:
279 """ special cases: NaNs, infs, zeros, denormalised
280 NOTE: some of these are unique to add. see "Special Operations"
281 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 def __init__(self
, width
, id_wid
):
287 self
.i
= self
.ispec()
288 self
.o
= self
.ospec()
291 return FPNumBase2Ops(self
.width
, self
.id_wid
)
294 return FPSCData(self
.width
, self
.id_wid
)
296 def setup(self
, m
, i
):
297 """ links module to inputs and outputs
299 m
.submodules
.specialcases
= self
300 m
.d
.comb
+= self
.i
.eq(i
)
302 def elaborate(self
, platform
):
305 m
.submodules
.sc_in_a
= self
.i
.a
306 m
.submodules
.sc_in_b
= self
.i
.b
307 m
.submodules
.sc_out_z
= self
.o
.z
310 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
313 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
315 # if a is NaN or b is NaN return NaN
316 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
317 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
318 m
.d
.comb
+= self
.o
.z
.nan(0)
320 # XXX WEIRDNESS for FP16 non-canonical NaN handling
323 ## if a is zero and b is NaN return -b
324 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
325 # m.d.comb += self.o.out_do_z.eq(1)
326 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
328 ## if b is zero and a is NaN return -a
329 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
330 # m.d.comb += self.o.out_do_z.eq(1)
331 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
333 ## if a is -zero and b is NaN return -b
334 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
335 # m.d.comb += self.o.out_do_z.eq(1)
336 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
338 ## if b is -zero and a is NaN return -a
339 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
340 # m.d.comb += self.o.out_do_z.eq(1)
341 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
343 # if a is inf return inf (or NaN)
344 with m
.Elif(self
.i
.a
.is_inf
):
345 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
347 # if a is inf and signs don't match return NaN
348 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
349 m
.d
.comb
+= self
.o
.z
.nan(0)
351 # if b is inf return inf
352 with m
.Elif(self
.i
.b
.is_inf
):
353 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
354 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
356 # if a is zero and b zero return signed-a/b
357 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
363 # if a is zero return b
364 with m
.Elif(self
.i
.a
.is_zero
):
365 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
366 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
369 # if b is zero return a
370 with m
.Elif(self
.i
.b
.is_zero
):
371 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
372 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
375 # if a equal to -b return zero (+ve zero)
376 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
377 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
378 m
.d
.comb
+= self
.o
.z
.zero(0)
380 # Denormalised Number checks next, so pass a/b data through
382 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
383 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
384 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
386 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
387 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
393 def __init__(self
, id_wid
):
396 self
.in_mid
= Signal(id_wid
, reset_less
=True)
397 self
.out_mid
= Signal(id_wid
, reset_less
=True)
403 if self
.id_wid
is not None:
404 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
407 class FPAddSpecialCases(FPState
):
408 """ special cases: NaNs, infs, zeros, denormalised
409 NOTE: some of these are unique to add. see "Special Operations"
410 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
413 def __init__(self
, width
, id_wid
):
414 FPState
.__init
__(self
, "special_cases")
415 self
.mod
= FPAddSpecialCasesMod(width
)
416 self
.out_z
= self
.mod
.ospec()
417 self
.out_do_z
= Signal(reset_less
=True)
419 def setup(self
, m
, i
):
420 """ links module to inputs and outputs
422 self
.mod
.setup(m
, i
, self
.out_do_z
)
423 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
424 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
428 with m
.If(self
.out_do_z
):
431 m
.next
= "denormalise"
434 class FPAddSpecialCasesDeNorm(FPState
):
435 """ special cases: NaNs, infs, zeros, denormalised
436 NOTE: some of these are unique to add. see "Special Operations"
437 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
440 def __init__(self
, width
, id_wid
):
441 FPState
.__init
__(self
, "special_cases")
442 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
443 self
.out_z
= self
.smod
.ospec()
444 self
.out_do_z
= Signal(reset_less
=True)
446 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
447 self
.o
= self
.dmod
.ospec()
449 def setup(self
, m
, i
):
450 """ links module to inputs and outputs
452 self
.smod
.setup(m
, i
)
453 self
.dmod
.setup(m
, self
.smod
.o
)
454 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
457 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
458 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
460 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
463 #with m.If(self.out_do_z):
469 class FPAddDeNormMod(FPState
):
471 def __init__(self
, width
, id_wid
):
474 self
.i
= self
.ispec()
475 self
.o
= self
.ospec()
478 return FPSCData(self
.width
, self
.id_wid
)
481 return FPSCData(self
.width
, self
.id_wid
)
483 def setup(self
, m
, i
):
484 """ links module to inputs and outputs
486 m
.submodules
.denormalise
= self
487 m
.d
.comb
+= self
.i
.eq(i
)
489 def elaborate(self
, platform
):
491 m
.submodules
.denorm_in_a
= self
.i
.a
492 m
.submodules
.denorm_in_b
= self
.i
.b
493 m
.submodules
.denorm_out_a
= self
.o
.a
494 m
.submodules
.denorm_out_b
= self
.o
.b
496 with m
.If(~self
.i
.out_do_z
):
497 # XXX hmmm, don't like repeating identical code
498 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
499 with m
.If(self
.i
.a
.exp_n127
):
500 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
502 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
504 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
505 with m
.If(self
.i
.b
.exp_n127
):
506 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
508 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
510 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
511 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
512 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
513 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
518 class FPAddDeNorm(FPState
):
520 def __init__(self
, width
, id_wid
):
521 FPState
.__init
__(self
, "denormalise")
522 self
.mod
= FPAddDeNormMod(width
)
523 self
.out_a
= FPNumBase(width
)
524 self
.out_b
= FPNumBase(width
)
526 def setup(self
, m
, i
):
527 """ links module to inputs and outputs
531 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
532 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
535 # Denormalised Number checks
539 class FPAddAlignMultiMod(FPState
):
541 def __init__(self
, width
):
542 self
.in_a
= FPNumBase(width
)
543 self
.in_b
= FPNumBase(width
)
544 self
.out_a
= FPNumIn(None, width
)
545 self
.out_b
= FPNumIn(None, width
)
546 self
.exp_eq
= Signal(reset_less
=True)
548 def elaborate(self
, platform
):
549 # This one however (single-cycle) will do the shift
554 m
.submodules
.align_in_a
= self
.in_a
555 m
.submodules
.align_in_b
= self
.in_b
556 m
.submodules
.align_out_a
= self
.out_a
557 m
.submodules
.align_out_b
= self
.out_b
559 # NOTE: this does *not* do single-cycle multi-shifting,
560 # it *STAYS* in the align state until exponents match
562 # exponent of a greater than b: shift b down
563 m
.d
.comb
+= self
.exp_eq
.eq(0)
564 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
565 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
566 agtb
= Signal(reset_less
=True)
567 altb
= Signal(reset_less
=True)
568 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
569 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
571 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
572 # exponent of b greater than a: shift a down
574 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
575 # exponents equal: move to next stage.
577 m
.d
.comb
+= self
.exp_eq
.eq(1)
581 class FPAddAlignMulti(FPState
):
583 def __init__(self
, width
, id_wid
):
584 FPState
.__init
__(self
, "align")
585 self
.mod
= FPAddAlignMultiMod(width
)
586 self
.out_a
= FPNumIn(None, width
)
587 self
.out_b
= FPNumIn(None, width
)
588 self
.exp_eq
= Signal(reset_less
=True)
590 def setup(self
, m
, in_a
, in_b
):
591 """ links module to inputs and outputs
593 m
.submodules
.align
= self
.mod
594 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
595 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
596 #m.d.comb += self.out_a.eq(self.mod.out_a)
597 #m.d.comb += self.out_b.eq(self.mod.out_b)
598 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
599 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
600 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
603 with m
.If(self
.exp_eq
):
609 def __init__(self
, width
, id_wid
):
610 self
.a
= FPNumIn(None, width
)
611 self
.b
= FPNumIn(None, width
)
612 self
.z
= FPNumOut(width
, False)
613 self
.out_do_z
= Signal(reset_less
=True)
614 self
.oz
= Signal(width
, reset_less
=True)
615 self
.mid
= Signal(id_wid
, reset_less
=True)
618 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
619 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
622 class FPAddAlignSingleMod
:
624 def __init__(self
, width
, id_wid
):
627 self
.i
= self
.ispec()
628 self
.o
= self
.ospec()
631 return FPSCData(self
.width
, self
.id_wid
)
634 return FPNumIn2Ops(self
.width
, self
.id_wid
)
636 def process(self
, i
):
639 def setup(self
, m
, i
):
640 """ links module to inputs and outputs
642 m
.submodules
.align
= self
643 m
.d
.comb
+= self
.i
.eq(i
)
645 def elaborate(self
, platform
):
646 """ Aligns A against B or B against A, depending on which has the
647 greater exponent. This is done in a *single* cycle using
648 variable-width bit-shift
650 the shifter used here is quite expensive in terms of gates.
651 Mux A or B in (and out) into temporaries, as only one of them
652 needs to be aligned against the other
656 m
.submodules
.align_in_a
= self
.i
.a
657 m
.submodules
.align_in_b
= self
.i
.b
658 m
.submodules
.align_out_a
= self
.o
.a
659 m
.submodules
.align_out_b
= self
.o
.b
661 # temporary (muxed) input and output to be shifted
662 t_inp
= FPNumBase(self
.width
)
663 t_out
= FPNumIn(None, self
.width
)
664 espec
= (len(self
.i
.a
.e
), True)
665 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
666 m
.submodules
.align_t_in
= t_inp
667 m
.submodules
.align_t_out
= t_out
668 m
.submodules
.multishift_r
= msr
670 ediff
= Signal(espec
, reset_less
=True)
671 ediffr
= Signal(espec
, reset_less
=True)
672 tdiff
= Signal(espec
, reset_less
=True)
673 elz
= Signal(reset_less
=True)
674 egz
= Signal(reset_less
=True)
676 # connect multi-shifter to t_inp/out mantissa (and tdiff)
677 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
678 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
679 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
680 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
681 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
683 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
684 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
685 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
686 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
688 # default: A-exp == B-exp, A and B untouched (fall through)
689 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
690 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
691 # only one shifter (muxed)
692 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
693 # exponent of a greater than b: shift b down
694 with m
.If(~self
.i
.out_do_z
):
696 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
699 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
701 # exponent of b greater than a: shift a down
703 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
706 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
709 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
710 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
711 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
717 class FPAddAlignSingle(FPState
):
719 def __init__(self
, width
, id_wid
):
720 FPState
.__init
__(self
, "align")
721 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
722 self
.out_a
= FPNumIn(None, width
)
723 self
.out_b
= FPNumIn(None, width
)
725 def setup(self
, m
, i
):
726 """ links module to inputs and outputs
730 # NOTE: could be done as comb
731 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
732 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
738 class FPAddAlignSingleAdd(FPState
):
740 def __init__(self
, width
, id_wid
):
741 FPState
.__init
__(self
, "align")
744 self
.a1o
= self
.ospec()
747 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
750 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
752 def setup(self
, m
, i
):
753 """ links module to inputs and outputs
756 # chain AddAlignSingle, AddStage0 and AddStage1
757 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
758 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
759 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
761 chain
= StageChain([mod
, a0mod
, a1mod
])
764 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
767 m
.next
= "normalise_1"
770 class FPAddStage0Data
:
772 def __init__(self
, width
, id_wid
):
773 self
.z
= FPNumBase(width
, False)
774 self
.out_do_z
= Signal(reset_less
=True)
775 self
.oz
= Signal(width
, reset_less
=True)
776 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
777 self
.mid
= Signal(id_wid
, reset_less
=True)
780 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
781 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
784 class FPAddStage0Mod
:
786 def __init__(self
, width
, id_wid
):
789 self
.i
= self
.ispec()
790 self
.o
= self
.ospec()
793 return FPSCData(self
.width
, self
.id_wid
)
796 return FPAddStage0Data(self
.width
, self
.id_wid
)
798 def process(self
, i
):
801 def setup(self
, m
, i
):
802 """ links module to inputs and outputs
804 m
.submodules
.add0
= self
805 m
.d
.comb
+= self
.i
.eq(i
)
807 def elaborate(self
, platform
):
809 m
.submodules
.add0_in_a
= self
.i
.a
810 m
.submodules
.add0_in_b
= self
.i
.b
811 m
.submodules
.add0_out_z
= self
.o
.z
813 # store intermediate tests (and zero-extended mantissas)
814 seq
= Signal(reset_less
=True)
815 mge
= Signal(reset_less
=True)
816 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
817 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
818 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
819 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
820 am0
.eq(Cat(self
.i
.a
.m
, 0)),
821 bm0
.eq(Cat(self
.i
.b
.m
, 0))
823 # same-sign (both negative or both positive) add mantissas
824 with m
.If(~self
.i
.out_do_z
):
825 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
828 self
.o
.tot
.eq(am0
+ bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # a mantissa greater than b, use a
834 self
.o
.tot
.eq(am0
- bm0
),
835 self
.o
.z
.s
.eq(self
.i
.a
.s
)
837 # b mantissa greater than a, use b
840 self
.o
.tot
.eq(bm0
- am0
),
841 self
.o
.z
.s
.eq(self
.i
.b
.s
)
844 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
845 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
846 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
850 class FPAddStage0(FPState
):
851 """ First stage of add. covers same-sign (add) and subtract
852 special-casing when mantissas are greater or equal, to
853 give greatest accuracy.
856 def __init__(self
, width
, id_wid
):
857 FPState
.__init
__(self
, "add_0")
858 self
.mod
= FPAddStage0Mod(width
)
859 self
.o
= self
.mod
.ospec()
861 def setup(self
, m
, i
):
862 """ links module to inputs and outputs
866 # NOTE: these could be done as combinatorial (merge add0+add1)
867 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
873 class FPAddStage1Data
:
875 def __init__(self
, width
, id_wid
):
876 self
.z
= FPNumBase(width
, False)
877 self
.out_do_z
= Signal(reset_less
=True)
878 self
.oz
= Signal(width
, reset_less
=True)
880 self
.mid
= Signal(id_wid
, reset_less
=True)
883 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
884 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
888 class FPAddStage1Mod(FPState
):
889 """ Second stage of add: preparation for normalisation.
890 detects when tot sum is too big (tot[27] is kinda a carry bit)
893 def __init__(self
, width
, id_wid
):
896 self
.i
= self
.ispec()
897 self
.o
= self
.ospec()
900 return FPAddStage0Data(self
.width
, self
.id_wid
)
903 return FPAddStage1Data(self
.width
, self
.id_wid
)
905 def process(self
, i
):
908 def setup(self
, m
, i
):
909 """ links module to inputs and outputs
911 m
.submodules
.add1
= self
912 m
.submodules
.add1_out_overflow
= self
.o
.of
914 m
.d
.comb
+= self
.i
.eq(i
)
916 def elaborate(self
, platform
):
918 #m.submodules.norm1_in_overflow = self.in_of
919 #m.submodules.norm1_out_overflow = self.out_of
920 #m.submodules.norm1_in_z = self.in_z
921 #m.submodules.norm1_out_z = self.out_z
922 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
923 # tot[-1] (MSB) gets set when the sum overflows. shift result down
924 with m
.If(~self
.i
.out_do_z
):
925 with m
.If(self
.i
.tot
[-1]):
927 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
928 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
929 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
930 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
931 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
932 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
934 # tot[-1] (MSB) zero case
937 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
938 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
939 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
940 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
941 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
944 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
945 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
946 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
951 class FPAddStage1(FPState
):
953 def __init__(self
, width
, id_wid
):
954 FPState
.__init
__(self
, "add_1")
955 self
.mod
= FPAddStage1Mod(width
)
956 self
.out_z
= FPNumBase(width
, False)
957 self
.out_of
= Overflow()
958 self
.norm_stb
= Signal()
960 def setup(self
, m
, i
):
961 """ links module to inputs and outputs
965 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
967 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
968 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
969 m
.d
.sync
+= self
.norm_stb
.eq(1)
972 m
.next
= "normalise_1"
975 class FPNormaliseModSingle
:
977 def __init__(self
, width
):
979 self
.in_z
= self
.ispec()
980 self
.out_z
= self
.ospec()
983 return FPNumBase(self
.width
, False)
986 return FPNumBase(self
.width
, False)
988 def setup(self
, m
, i
):
989 """ links module to inputs and outputs
991 m
.submodules
.normalise
= self
992 m
.d
.comb
+= self
.i
.eq(i
)
994 def elaborate(self
, platform
):
997 mwid
= self
.out_z
.m_width
+2
998 pe
= PriorityEncoder(mwid
)
999 m
.submodules
.norm_pe
= pe
1001 m
.submodules
.norm1_out_z
= self
.out_z
1002 m
.submodules
.norm1_in_z
= self
.in_z
1004 in_z
= FPNumBase(self
.width
, False)
1006 m
.submodules
.norm1_insel_z
= in_z
1007 m
.submodules
.norm1_insel_overflow
= in_of
1009 espec
= (len(in_z
.e
), True)
1010 ediff_n126
= Signal(espec
, reset_less
=True)
1011 msr
= MultiShiftRMerge(mwid
, espec
)
1012 m
.submodules
.multishift_r
= msr
1014 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1015 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1016 # initialise out from in (overridden below)
1017 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1018 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1019 # normalisation decrease condition
1020 decrease
= Signal(reset_less
=True)
1021 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1023 with m
.If(decrease
):
1024 # *sigh* not entirely obvious: count leading zeros (clz)
1025 # with a PriorityEncoder: to find from the MSB
1026 # we reverse the order of the bits.
1027 temp_m
= Signal(mwid
, reset_less
=True)
1028 temp_s
= Signal(mwid
+1, reset_less
=True)
1029 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1031 # cat round and guard bits back into the mantissa
1032 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1033 pe
.i
.eq(temp_m
[::-1]), # inverted
1034 clz
.eq(pe
.o
), # count zeros from MSB down
1035 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1036 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1037 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1044 def __init__(self
, width
, id_wid
):
1045 self
.roundz
= Signal(reset_less
=True)
1046 self
.z
= FPNumBase(width
, False)
1047 self
.out_do_z
= Signal(reset_less
=True)
1048 self
.oz
= Signal(width
, reset_less
=True)
1049 self
.mid
= Signal(id_wid
, reset_less
=True)
1052 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1053 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1056 class FPNorm1ModSingle
:
1058 def __init__(self
, width
, id_wid
):
1060 self
.id_wid
= id_wid
1061 self
.i
= self
.ispec()
1062 self
.o
= self
.ospec()
1065 return FPAddStage1Data(self
.width
, self
.id_wid
)
1068 return FPNorm1Data(self
.width
, self
.id_wid
)
1070 def setup(self
, m
, i
):
1071 """ links module to inputs and outputs
1073 m
.submodules
.normalise_1
= self
1074 m
.d
.comb
+= self
.i
.eq(i
)
1076 def process(self
, i
):
1079 def elaborate(self
, platform
):
1082 mwid
= self
.o
.z
.m_width
+2
1083 pe
= PriorityEncoder(mwid
)
1084 m
.submodules
.norm_pe
= pe
1087 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1089 m
.submodules
.norm1_out_z
= self
.o
.z
1090 m
.submodules
.norm1_out_overflow
= of
1091 m
.submodules
.norm1_in_z
= self
.i
.z
1092 m
.submodules
.norm1_in_overflow
= self
.i
.of
1095 m
.submodules
.norm1_insel_z
= i
.z
1096 m
.submodules
.norm1_insel_overflow
= i
.of
1098 espec
= (len(i
.z
.e
), True)
1099 ediff_n126
= Signal(espec
, reset_less
=True)
1100 msr
= MultiShiftRMerge(mwid
, espec
)
1101 m
.submodules
.multishift_r
= msr
1103 m
.d
.comb
+= i
.eq(self
.i
)
1104 # initialise out from in (overridden below)
1105 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1106 m
.d
.comb
+= of
.eq(i
.of
)
1107 # normalisation increase/decrease conditions
1108 decrease
= Signal(reset_less
=True)
1109 increase
= Signal(reset_less
=True)
1110 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1111 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1113 with m
.If(~self
.i
.out_do_z
):
1114 with m
.If(decrease
):
1115 # *sigh* not entirely obvious: count leading zeros (clz)
1116 # with a PriorityEncoder: to find from the MSB
1117 # we reverse the order of the bits.
1118 temp_m
= Signal(mwid
, reset_less
=True)
1119 temp_s
= Signal(mwid
+1, reset_less
=True)
1120 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1121 # make sure that the amount to decrease by does NOT
1122 # go below the minimum non-INF/NaN exponent
1123 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1126 # cat round and guard bits back into the mantissa
1127 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1128 pe
.i
.eq(temp_m
[::-1]), # inverted
1129 clz
.eq(limclz
), # count zeros from MSB down
1130 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1131 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1132 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1133 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1134 # overflow in bits 0..1: got shifted too (leave sticky)
1135 of
.guard
.eq(temp_s
[1]), # guard
1136 of
.round_bit
.eq(temp_s
[0]), # round
1139 with m
.Elif(increase
):
1140 temp_m
= Signal(mwid
+1, reset_less
=True)
1142 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1144 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1145 # connect multi-shifter to inp/out mantissa (and ediff)
1147 msr
.diff
.eq(ediff_n126
),
1148 self
.o
.z
.m
.eq(msr
.m
[3:]),
1149 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1150 # overflow in bits 0..1: got shifted too (leave sticky)
1151 of
.guard
.eq(temp_s
[2]), # guard
1152 of
.round_bit
.eq(temp_s
[1]), # round
1153 of
.sticky
.eq(temp_s
[0]), # sticky
1154 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1157 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1158 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1159 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1164 class FPNorm1ModMulti
:
1166 def __init__(self
, width
, single_cycle
=True):
1168 self
.in_select
= Signal(reset_less
=True)
1169 self
.in_z
= FPNumBase(width
, False)
1170 self
.in_of
= Overflow()
1171 self
.temp_z
= FPNumBase(width
, False)
1172 self
.temp_of
= Overflow()
1173 self
.out_z
= FPNumBase(width
, False)
1174 self
.out_of
= Overflow()
1176 def elaborate(self
, platform
):
1179 m
.submodules
.norm1_out_z
= self
.out_z
1180 m
.submodules
.norm1_out_overflow
= self
.out_of
1181 m
.submodules
.norm1_temp_z
= self
.temp_z
1182 m
.submodules
.norm1_temp_of
= self
.temp_of
1183 m
.submodules
.norm1_in_z
= self
.in_z
1184 m
.submodules
.norm1_in_overflow
= self
.in_of
1186 in_z
= FPNumBase(self
.width
, False)
1188 m
.submodules
.norm1_insel_z
= in_z
1189 m
.submodules
.norm1_insel_overflow
= in_of
1191 # select which of temp or in z/of to use
1192 with m
.If(self
.in_select
):
1193 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1194 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1196 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1197 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1198 # initialise out from in (overridden below)
1199 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1200 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1201 # normalisation increase/decrease conditions
1202 decrease
= Signal(reset_less
=True)
1203 increase
= Signal(reset_less
=True)
1204 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1205 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1206 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1208 with m
.If(decrease
):
1210 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1211 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1212 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1213 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1214 self
.out_of
.round_bit
.eq(0), # reset round bit
1215 self
.out_of
.m0
.eq(in_of
.guard
),
1218 with m
.Elif(increase
):
1220 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1221 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1222 self
.out_of
.guard
.eq(in_z
.m
[0]),
1223 self
.out_of
.m0
.eq(in_z
.m
[1]),
1224 self
.out_of
.round_bit
.eq(in_of
.guard
),
1225 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1231 class FPNorm1Single(FPState
):
1233 def __init__(self
, width
, id_wid
, single_cycle
=True):
1234 FPState
.__init
__(self
, "normalise_1")
1235 self
.mod
= FPNorm1ModSingle(width
)
1236 self
.o
= self
.ospec()
1237 self
.out_z
= FPNumBase(width
, False)
1238 self
.out_roundz
= Signal(reset_less
=True)
1241 return self
.mod
.ispec()
1244 return self
.mod
.ospec()
1246 def setup(self
, m
, i
):
1247 """ links module to inputs and outputs
1249 self
.mod
.setup(m
, i
)
1251 def action(self
, m
):
1255 class FPNorm1Multi(FPState
):
1257 def __init__(self
, width
, id_wid
):
1258 FPState
.__init
__(self
, "normalise_1")
1259 self
.mod
= FPNorm1ModMulti(width
)
1260 self
.stb
= Signal(reset_less
=True)
1261 self
.ack
= Signal(reset
=0, reset_less
=True)
1262 self
.out_norm
= Signal(reset_less
=True)
1263 self
.in_accept
= Signal(reset_less
=True)
1264 self
.temp_z
= FPNumBase(width
)
1265 self
.temp_of
= Overflow()
1266 self
.out_z
= FPNumBase(width
)
1267 self
.out_roundz
= Signal(reset_less
=True)
1269 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1270 """ links module to inputs and outputs
1272 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1273 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1274 self
.out_z
, self
.out_norm
)
1276 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1277 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1279 def action(self
, m
):
1280 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1281 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1282 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1283 with m
.If(self
.out_norm
):
1284 with m
.If(self
.in_accept
):
1289 m
.d
.sync
+= self
.ack
.eq(0)
1291 # normalisation not required (or done).
1293 m
.d
.sync
+= self
.ack
.eq(1)
1294 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1297 class FPNormToPack(FPState
):
1299 def __init__(self
, width
, id_wid
):
1300 FPState
.__init
__(self
, "normalise_1")
1301 self
.id_wid
= id_wid
1305 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1308 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1310 def setup(self
, m
, i
):
1311 """ links module to inputs and outputs
1314 # Normalisation, Rounding Corrections, Pack - in a chain
1315 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1316 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1317 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1318 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1319 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1321 self
.out_z
= pmod
.ospec()
1323 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1324 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1326 def action(self
, m
):
1327 m
.next
= "pack_put_z"
1332 def __init__(self
, width
, id_wid
):
1333 self
.z
= FPNumBase(width
, False)
1334 self
.out_do_z
= Signal(reset_less
=True)
1335 self
.oz
= Signal(width
, reset_less
=True)
1336 self
.mid
= Signal(id_wid
, reset_less
=True)
1339 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1345 def __init__(self
, width
, id_wid
):
1347 self
.id_wid
= id_wid
1348 self
.i
= self
.ispec()
1349 self
.out_z
= self
.ospec()
1352 return FPNorm1Data(self
.width
, self
.id_wid
)
1355 return FPRoundData(self
.width
, self
.id_wid
)
1357 def process(self
, i
):
1360 def setup(self
, m
, i
):
1361 m
.submodules
.roundz
= self
1362 m
.d
.comb
+= self
.i
.eq(i
)
1364 def elaborate(self
, platform
):
1366 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1367 with m
.If(~self
.i
.out_do_z
):
1368 with m
.If(self
.i
.roundz
):
1369 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1370 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1371 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1376 class FPRound(FPState
):
1378 def __init__(self
, width
, id_wid
):
1379 FPState
.__init
__(self
, "round")
1380 self
.mod
= FPRoundMod(width
)
1381 self
.out_z
= self
.ospec()
1384 return self
.mod
.ispec()
1387 return self
.mod
.ospec()
1389 def setup(self
, m
, i
):
1390 """ links module to inputs and outputs
1392 self
.mod
.setup(m
, i
)
1395 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1396 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1398 def action(self
, m
):
1399 m
.next
= "corrections"
1402 class FPCorrectionsMod
:
1404 def __init__(self
, width
, id_wid
):
1406 self
.id_wid
= id_wid
1407 self
.i
= self
.ispec()
1408 self
.out_z
= self
.ospec()
1411 return FPRoundData(self
.width
, self
.id_wid
)
1414 return FPRoundData(self
.width
, self
.id_wid
)
1416 def process(self
, i
):
1419 def setup(self
, m
, i
):
1420 """ links module to inputs and outputs
1422 m
.submodules
.corrections
= self
1423 m
.d
.comb
+= self
.i
.eq(i
)
1425 def elaborate(self
, platform
):
1427 m
.submodules
.corr_in_z
= self
.i
.z
1428 m
.submodules
.corr_out_z
= self
.out_z
.z
1429 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1430 with m
.If(~self
.i
.out_do_z
):
1431 with m
.If(self
.i
.z
.is_denormalised
):
1432 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1436 class FPCorrections(FPState
):
1438 def __init__(self
, width
, id_wid
):
1439 FPState
.__init
__(self
, "corrections")
1440 self
.mod
= FPCorrectionsMod(width
)
1441 self
.out_z
= self
.ospec()
1444 return self
.mod
.ispec()
1447 return self
.mod
.ospec()
1449 def setup(self
, m
, in_z
):
1450 """ links module to inputs and outputs
1452 self
.mod
.setup(m
, in_z
)
1454 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1455 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1457 def action(self
, m
):
1463 def __init__(self
, width
, id_wid
):
1464 self
.z
= FPNumOut(width
, False)
1465 self
.mid
= Signal(id_wid
, reset_less
=True)
1468 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1473 def __init__(self
, width
, id_wid
):
1475 self
.id_wid
= id_wid
1476 self
.i
= self
.ispec()
1477 self
.o
= self
.ospec()
1480 return FPRoundData(self
.width
, self
.id_wid
)
1483 return FPPackData(self
.width
, self
.id_wid
)
1485 def process(self
, i
):
1488 def setup(self
, m
, in_z
):
1489 """ links module to inputs and outputs
1491 m
.submodules
.pack
= self
1492 m
.d
.comb
+= self
.i
.eq(in_z
)
1494 def elaborate(self
, platform
):
1496 m
.submodules
.pack_in_z
= self
.i
.z
1497 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1498 with m
.If(~self
.i
.out_do_z
):
1499 with m
.If(self
.i
.z
.is_overflowed
):
1500 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1502 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1504 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1508 class FPPack(FPState
):
1510 def __init__(self
, width
, id_wid
):
1511 FPState
.__init
__(self
, "pack")
1512 self
.mod
= FPPackMod(width
)
1513 self
.out_z
= self
.ospec()
1516 return self
.mod
.ispec()
1519 return self
.mod
.ospec()
1521 def setup(self
, m
, in_z
):
1522 """ links module to inputs and outputs
1524 self
.mod
.setup(m
, in_z
)
1526 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1527 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1529 def action(self
, m
):
1530 m
.next
= "pack_put_z"
1533 class FPPutZ(FPState
):
1535 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1536 FPState
.__init
__(self
, state
)
1537 if to_state
is None:
1538 to_state
= "get_ops"
1539 self
.to_state
= to_state
1542 self
.in_mid
= in_mid
1543 self
.out_mid
= out_mid
1545 def action(self
, m
):
1546 if self
.in_mid
is not None:
1547 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1549 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1551 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1552 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1553 m
.next
= self
.to_state
1555 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1558 class FPPutZIdx(FPState
):
1560 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1561 FPState
.__init
__(self
, state
)
1562 if to_state
is None:
1563 to_state
= "get_ops"
1564 self
.to_state
= to_state
1566 self
.out_zs
= out_zs
1567 self
.in_mid
= in_mid
1569 def action(self
, m
):
1570 outz_stb
= Signal(reset_less
=True)
1571 outz_ack
= Signal(reset_less
=True)
1572 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1573 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1576 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1578 with m
.If(outz_stb
& outz_ack
):
1579 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1580 m
.next
= self
.to_state
1582 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1584 class FPADDBaseData
:
1586 def __init__(self
, width
, id_wid
):
1588 self
.id_wid
= id_wid
1589 self
.a
= Signal(width
)
1590 self
.b
= Signal(width
)
1591 self
.mid
= Signal(id_wid
, reset_less
=True)
1594 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1598 def __init__(self
, width
, id_wid
):
1599 self
.z
= FPOp(width
)
1600 self
.mid
= Signal(id_wid
, reset_less
=True)
1603 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1608 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1611 * width: bit-width of IEEE754. supported: 16, 32, 64
1612 * id_wid: an identifier that is sync-connected to the input
1613 * single_cycle: True indicates each stage to complete in 1 clock
1614 * compact: True indicates a reduced number of stages
1617 self
.id_wid
= id_wid
1618 self
.single_cycle
= single_cycle
1619 self
.compact
= compact
1621 self
.in_t
= Trigger()
1622 self
.i
= self
.ispec()
1623 self
.o
= self
.ospec()
1628 return FPADDBaseData(self
.width
, self
.id_wid
)
1631 return FPOpData(self
.width
, self
.id_wid
)
1633 def add_state(self
, state
):
1634 self
.states
.append(state
)
1637 def get_fragment(self
, platform
=None):
1638 """ creates the HDL code-fragment for FPAdd
1641 m
.submodules
.out_z
= self
.o
.z
1642 m
.submodules
.in_t
= self
.in_t
1644 self
.get_compact_fragment(m
, platform
)
1646 self
.get_longer_fragment(m
, platform
)
1648 with m
.FSM() as fsm
:
1650 for state
in self
.states
:
1651 with m
.State(state
.state_from
):
1656 def get_longer_fragment(self
, m
, platform
=None):
1658 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1660 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1664 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1665 sc
.setup(m
, a
, b
, self
.in_mid
)
1667 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1668 dn
.setup(m
, a
, b
, sc
.in_mid
)
1670 if self
.single_cycle
:
1671 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1672 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1674 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1675 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1677 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1678 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1680 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1681 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1683 if self
.single_cycle
:
1684 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1685 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1687 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1688 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1690 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1691 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1693 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1694 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1696 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1697 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1699 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1700 pa
.in_mid
, self
.out_mid
))
1702 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1703 pa
.in_mid
, self
.out_mid
))
1705 def get_compact_fragment(self
, m
, platform
=None):
1707 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1708 self
.width
, self
.id_wid
))
1709 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1711 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1714 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1717 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1718 n1
.setup(m
, alm
.a1o
)
1720 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1721 n1
.out_z
.mid
, self
.o
.mid
))
1723 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1724 # sc.o.mid, self.o.mid))
1727 class FPADDBase(FPState
):
1729 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1732 * width: bit-width of IEEE754. supported: 16, 32, 64
1733 * id_wid: an identifier that is sync-connected to the input
1734 * single_cycle: True indicates each stage to complete in 1 clock
1736 FPState
.__init
__(self
, "fpadd")
1738 self
.single_cycle
= single_cycle
1739 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1740 self
.o
= self
.ospec()
1742 self
.in_t
= Trigger()
1743 self
.i
= self
.ispec()
1745 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1746 self
.in_accept
= Signal(reset_less
=True)
1747 self
.add_stb
= Signal(reset_less
=True)
1748 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1751 return self
.mod
.ispec()
1754 return self
.mod
.ospec()
1756 def setup(self
, m
, i
, add_stb
, in_mid
):
1757 m
.d
.comb
+= [self
.i
.eq(i
),
1758 self
.mod
.i
.eq(self
.i
),
1759 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1760 #self.add_stb.eq(add_stb),
1761 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1762 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1763 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1764 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1765 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1766 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1769 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1770 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1771 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1772 #m.d.sync += self.in_t.stb.eq(0)
1774 m
.submodules
.fpadd
= self
.mod
1776 def action(self
, m
):
1778 # in_accept is set on incoming strobe HIGH and ack LOW.
1779 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1781 #with m.If(self.in_t.ack):
1782 # m.d.sync += self.in_t.stb.eq(0)
1783 with m
.If(~self
.z_done
):
1784 # not done: test for accepting an incoming operand pair
1785 with m
.If(self
.in_accept
):
1787 self
.add_ack
.eq(1), # acknowledge receipt...
1788 self
.in_t
.stb
.eq(1), # initiate add
1791 m
.d
.sync
+= [self
.add_ack
.eq(0),
1792 self
.in_t
.stb
.eq(0),
1796 # done: acknowledge, and write out id and value
1797 m
.d
.sync
+= [self
.add_ack
.eq(1),
1804 if self
.in_mid
is not None:
1805 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1808 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1810 # move to output state on detecting z ack
1811 with m
.If(self
.out_z
.trigger
):
1812 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1815 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1818 def __init__(self
, width
, id_wid
):
1819 self
.a
= Signal(width
)
1820 self
.b
= Signal(width
)
1821 self
.mid
= Signal(id_wid
, reset_less
=True)
1824 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1827 class FPADDStageOut
:
1828 def __init__(self
, width
, id_wid
):
1829 self
.z
= Signal(width
)
1830 self
.mid
= Signal(id_wid
, reset_less
=True)
1833 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1836 # matches the format of FPADDStageOut, allows eq function to do assignments
1837 class PlaceHolder
: pass
1840 class FPAddBaseStage
:
1841 def __init__(self
, width
, id_wid
):
1843 self
.id_wid
= id_wid
1846 return FPADDStageIn(self
.width
, self
.id_wid
)
1849 return FPADDStageOut(self
.width
, self
.id_wid
)
1851 def process(self
, i
):
1858 class FPADDBasePipe
:
1859 def __init__(self
, width
, id_wid
):
1860 stage1
= FPAddBaseStage(width
, id_wid
)
1861 self
.pipe
= UnbufferedPipeline(stage1
)
1863 def elaborate(self
, platform
):
1864 return self
.pipe
.elaborate(platform
)
1867 return self
.pipe
.ports()
1870 def __init__(self
, width
, id_wid
):
1872 self
.id_wid
= id_wid
1874 for i
in range(rs_sz
):
1876 out_z
.name
= "out_z_%d" % i
1878 self
.res
= Array(res
)
1879 self
.in_z
= FPOp(width
)
1880 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1882 def setup(self
, m
, in_z
, in_mid
):
1883 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1884 self
.in_mid
.eq(in_mid
)]
1886 def get_fragment(self
, platform
=None):
1887 """ creates the HDL code-fragment for FPAdd
1890 m
.submodules
.res_in_z
= self
.in_z
1891 m
.submodules
+= self
.res
1903 """ FPADD: stages as follows:
1909 FPAddBase---> FPAddBaseMod
1911 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1913 FPAddBase is tricky: it is both a stage and *has* stages.
1914 Connection to FPAddBaseMod therefore requires an in stb/ack
1915 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1916 needs to be the thing that raises the incoming stb.
1919 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1922 * width: bit-width of IEEE754. supported: 16, 32, 64
1923 * id_wid: an identifier that is sync-connected to the input
1924 * single_cycle: True indicates each stage to complete in 1 clock
1927 self
.id_wid
= id_wid
1928 self
.single_cycle
= single_cycle
1930 #self.out_z = FPOp(width)
1931 self
.ids
= FPID(id_wid
)
1934 for i
in range(rs_sz
):
1937 in_a
.name
= "in_a_%d" % i
1938 in_b
.name
= "in_b_%d" % i
1939 rs
.append((in_a
, in_b
))
1943 for i
in range(rs_sz
):
1945 out_z
.name
= "out_z_%d" % i
1947 self
.res
= Array(res
)
1951 def add_state(self
, state
):
1952 self
.states
.append(state
)
1955 def get_fragment(self
, platform
=None):
1956 """ creates the HDL code-fragment for FPAdd
1959 m
.submodules
+= self
.rs
1961 in_a
= self
.rs
[0][0]
1962 in_b
= self
.rs
[0][1]
1964 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1969 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1974 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1975 ab
= self
.add_state(ab
)
1976 abd
= ab
.ispec() # create an input spec object for FPADDBase
1977 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1978 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1981 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1984 with m
.FSM() as fsm
:
1986 for state
in self
.states
:
1987 with m
.State(state
.state_from
):
1993 if __name__
== "__main__":
1995 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1996 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1997 alu
.rs
[0][1].ports() + \
1998 alu
.res
[0].ports() + \
1999 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
2001 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
2002 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
2003 alu
.in_t
.ports() + \
2004 alu
.out_z
.ports() + \
2005 [alu
.in_mid
, alu
.out_mid
])
2008 # works... but don't use, just do "python fname.py convert -t v"
2009 #print (verilog.convert(alu, ports=[
2010 # ports=alu.in_a.ports() + \
2011 # alu.in_b.ports() + \
2012 # alu.out_z.ports())