1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.out_do_z
= Signal(reset_less
=True)
770 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
771 self
.mid
= Signal(id_wid
, reset_less
=True)
774 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
775 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
778 class FPAddStage0Mod
:
780 def __init__(self
, width
, id_wid
):
783 self
.i
= self
.ispec()
784 self
.o
= self
.ospec()
787 return FPSCData(self
.width
, self
.id_wid
)
790 return FPAddStage0Data(self
.width
, self
.id_wid
)
792 def process(self
, i
):
795 def setup(self
, m
, i
):
796 """ links module to inputs and outputs
798 m
.submodules
.add0
= self
799 m
.d
.comb
+= self
.i
.eq(i
)
801 def elaborate(self
, platform
):
803 m
.submodules
.add0_in_a
= self
.i
.a
804 m
.submodules
.add0_in_b
= self
.i
.b
805 m
.submodules
.add0_out_z
= self
.o
.z
807 # store intermediate tests (and zero-extended mantissas)
808 seq
= Signal(reset_less
=True)
809 mge
= Signal(reset_less
=True)
810 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
811 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
812 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
813 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
814 am0
.eq(Cat(self
.i
.a
.m
, 0)),
815 bm0
.eq(Cat(self
.i
.b
.m
, 0))
817 # same-sign (both negative or both positive) add mantissas
818 with m
.If(~self
.i
.out_do_z
):
819 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
822 self
.o
.tot
.eq(am0
+ bm0
),
823 self
.o
.z
.s
.eq(self
.i
.a
.s
)
825 # a mantissa greater than b, use a
828 self
.o
.tot
.eq(am0
- bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # b mantissa greater than a, use b
834 self
.o
.tot
.eq(bm0
- am0
),
835 self
.o
.z
.s
.eq(self
.i
.b
.s
)
838 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
840 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
841 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
845 class FPAddStage0(FPState
):
846 """ First stage of add. covers same-sign (add) and subtract
847 special-casing when mantissas are greater or equal, to
848 give greatest accuracy.
851 def __init__(self
, width
, id_wid
):
852 FPState
.__init
__(self
, "add_0")
853 self
.mod
= FPAddStage0Mod(width
)
854 self
.o
= self
.mod
.ospec()
856 def setup(self
, m
, i
):
857 """ links module to inputs and outputs
861 # NOTE: these could be done as combinatorial (merge add0+add1)
862 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
868 class FPAddStage1Data
:
870 def __init__(self
, width
, id_wid
):
871 self
.z
= FPNumBase(width
, False)
872 self
.out_do_z
= Signal(reset_less
=True)
874 self
.mid
= Signal(id_wid
, reset_less
=True)
877 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
878 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
882 class FPAddStage1Mod(FPState
):
883 """ Second stage of add: preparation for normalisation.
884 detects when tot sum is too big (tot[27] is kinda a carry bit)
887 def __init__(self
, width
, id_wid
):
890 self
.i
= self
.ispec()
891 self
.o
= self
.ospec()
894 return FPAddStage0Data(self
.width
, self
.id_wid
)
897 return FPAddStage1Data(self
.width
, self
.id_wid
)
899 def process(self
, i
):
902 def setup(self
, m
, i
):
903 """ links module to inputs and outputs
905 m
.submodules
.add1
= self
906 m
.submodules
.add1_out_overflow
= self
.o
.of
908 m
.d
.comb
+= self
.i
.eq(i
)
910 def elaborate(self
, platform
):
912 #m.submodules.norm1_in_overflow = self.in_of
913 #m.submodules.norm1_out_overflow = self.out_of
914 #m.submodules.norm1_in_z = self.in_z
915 #m.submodules.norm1_out_z = self.out_z
916 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
917 # tot[-1] (MSB) gets set when the sum overflows. shift result down
918 with m
.If(~self
.i
.out_do_z
):
919 with m
.If(self
.i
.tot
[-1]):
921 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
922 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
923 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
924 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
925 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
926 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
928 # tot[-1] (MSB) zero case
931 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
932 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
933 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
934 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
935 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
938 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
939 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
944 class FPAddStage1(FPState
):
946 def __init__(self
, width
, id_wid
):
947 FPState
.__init
__(self
, "add_1")
948 self
.mod
= FPAddStage1Mod(width
)
949 self
.out_z
= FPNumBase(width
, False)
950 self
.out_of
= Overflow()
951 self
.norm_stb
= Signal()
953 def setup(self
, m
, i
):
954 """ links module to inputs and outputs
958 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
960 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
961 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
962 m
.d
.sync
+= self
.norm_stb
.eq(1)
965 m
.next
= "normalise_1"
968 class FPNormaliseModSingle
:
970 def __init__(self
, width
):
972 self
.in_z
= self
.ispec()
973 self
.out_z
= self
.ospec()
976 return FPNumBase(self
.width
, False)
979 return FPNumBase(self
.width
, False)
981 def setup(self
, m
, i
):
982 """ links module to inputs and outputs
984 m
.submodules
.normalise
= self
985 m
.d
.comb
+= self
.i
.eq(i
)
987 def elaborate(self
, platform
):
990 mwid
= self
.out_z
.m_width
+2
991 pe
= PriorityEncoder(mwid
)
992 m
.submodules
.norm_pe
= pe
994 m
.submodules
.norm1_out_z
= self
.out_z
995 m
.submodules
.norm1_in_z
= self
.in_z
997 in_z
= FPNumBase(self
.width
, False)
999 m
.submodules
.norm1_insel_z
= in_z
1000 m
.submodules
.norm1_insel_overflow
= in_of
1002 espec
= (len(in_z
.e
), True)
1003 ediff_n126
= Signal(espec
, reset_less
=True)
1004 msr
= MultiShiftRMerge(mwid
, espec
)
1005 m
.submodules
.multishift_r
= msr
1007 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1008 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1009 # initialise out from in (overridden below)
1010 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1011 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1012 # normalisation decrease condition
1013 decrease
= Signal(reset_less
=True)
1014 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1016 with m
.If(decrease
):
1017 # *sigh* not entirely obvious: count leading zeros (clz)
1018 # with a PriorityEncoder: to find from the MSB
1019 # we reverse the order of the bits.
1020 temp_m
= Signal(mwid
, reset_less
=True)
1021 temp_s
= Signal(mwid
+1, reset_less
=True)
1022 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1024 # cat round and guard bits back into the mantissa
1025 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1026 pe
.i
.eq(temp_m
[::-1]), # inverted
1027 clz
.eq(pe
.o
), # count zeros from MSB down
1028 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1029 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1030 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1037 def __init__(self
, width
, id_wid
):
1038 self
.roundz
= Signal(reset_less
=True)
1039 self
.z
= FPNumBase(width
, False)
1040 self
.mid
= Signal(id_wid
, reset_less
=True)
1043 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1046 class FPNorm1ModSingle
:
1048 def __init__(self
, width
, id_wid
):
1050 self
.id_wid
= id_wid
1051 self
.i
= self
.ispec()
1052 self
.o
= self
.ospec()
1055 return FPAddStage1Data(self
.width
, self
.id_wid
)
1058 return FPNorm1Data(self
.width
, self
.id_wid
)
1060 def setup(self
, m
, i
):
1061 """ links module to inputs and outputs
1063 m
.submodules
.normalise_1
= self
1064 m
.d
.comb
+= self
.i
.eq(i
)
1066 def process(self
, i
):
1069 def elaborate(self
, platform
):
1072 mwid
= self
.o
.z
.m_width
+2
1073 pe
= PriorityEncoder(mwid
)
1074 m
.submodules
.norm_pe
= pe
1077 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1079 m
.submodules
.norm1_out_z
= self
.o
.z
1080 m
.submodules
.norm1_out_overflow
= of
1081 m
.submodules
.norm1_in_z
= self
.i
.z
1082 m
.submodules
.norm1_in_overflow
= self
.i
.of
1085 m
.submodules
.norm1_insel_z
= i
.z
1086 m
.submodules
.norm1_insel_overflow
= i
.of
1088 espec
= (len(i
.z
.e
), True)
1089 ediff_n126
= Signal(espec
, reset_less
=True)
1090 msr
= MultiShiftRMerge(mwid
, espec
)
1091 m
.submodules
.multishift_r
= msr
1093 m
.d
.comb
+= i
.eq(self
.i
)
1094 # initialise out from in (overridden below)
1095 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1096 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1097 m
.d
.comb
+= of
.eq(i
.of
)
1098 # normalisation increase/decrease conditions
1099 decrease
= Signal(reset_less
=True)
1100 increase
= Signal(reset_less
=True)
1101 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1102 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1104 with m
.If(decrease
):
1105 # *sigh* not entirely obvious: count leading zeros (clz)
1106 # with a PriorityEncoder: to find from the MSB
1107 # we reverse the order of the bits.
1108 temp_m
= Signal(mwid
, reset_less
=True)
1109 temp_s
= Signal(mwid
+1, reset_less
=True)
1110 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1111 # make sure that the amount to decrease by does NOT
1112 # go below the minimum non-INF/NaN exponent
1113 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1116 # cat round and guard bits back into the mantissa
1117 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1118 pe
.i
.eq(temp_m
[::-1]), # inverted
1119 clz
.eq(limclz
), # count zeros from MSB down
1120 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1121 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1122 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1123 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1124 # overflow in bits 0..1: got shifted too (leave sticky)
1125 of
.guard
.eq(temp_s
[1]), # guard
1126 of
.round_bit
.eq(temp_s
[0]), # round
1129 with m
.Elif(increase
):
1130 temp_m
= Signal(mwid
+1, reset_less
=True)
1132 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1134 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1135 # connect multi-shifter to inp/out mantissa (and ediff)
1137 msr
.diff
.eq(ediff_n126
),
1138 self
.o
.z
.m
.eq(msr
.m
[3:]),
1139 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1140 # overflow in bits 0..1: got shifted too (leave sticky)
1141 of
.guard
.eq(temp_s
[2]), # guard
1142 of
.round_bit
.eq(temp_s
[1]), # round
1143 of
.sticky
.eq(temp_s
[0]), # sticky
1144 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1150 class FPNorm1ModMulti
:
1152 def __init__(self
, width
, single_cycle
=True):
1154 self
.in_select
= Signal(reset_less
=True)
1155 self
.in_z
= FPNumBase(width
, False)
1156 self
.in_of
= Overflow()
1157 self
.temp_z
= FPNumBase(width
, False)
1158 self
.temp_of
= Overflow()
1159 self
.out_z
= FPNumBase(width
, False)
1160 self
.out_of
= Overflow()
1162 def elaborate(self
, platform
):
1165 m
.submodules
.norm1_out_z
= self
.out_z
1166 m
.submodules
.norm1_out_overflow
= self
.out_of
1167 m
.submodules
.norm1_temp_z
= self
.temp_z
1168 m
.submodules
.norm1_temp_of
= self
.temp_of
1169 m
.submodules
.norm1_in_z
= self
.in_z
1170 m
.submodules
.norm1_in_overflow
= self
.in_of
1172 in_z
= FPNumBase(self
.width
, False)
1174 m
.submodules
.norm1_insel_z
= in_z
1175 m
.submodules
.norm1_insel_overflow
= in_of
1177 # select which of temp or in z/of to use
1178 with m
.If(self
.in_select
):
1179 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1180 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1182 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1183 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1184 # initialise out from in (overridden below)
1185 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1186 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1187 # normalisation increase/decrease conditions
1188 decrease
= Signal(reset_less
=True)
1189 increase
= Signal(reset_less
=True)
1190 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1191 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1192 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1194 with m
.If(decrease
):
1196 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1197 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1198 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1199 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1200 self
.out_of
.round_bit
.eq(0), # reset round bit
1201 self
.out_of
.m0
.eq(in_of
.guard
),
1204 with m
.Elif(increase
):
1206 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1207 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1208 self
.out_of
.guard
.eq(in_z
.m
[0]),
1209 self
.out_of
.m0
.eq(in_z
.m
[1]),
1210 self
.out_of
.round_bit
.eq(in_of
.guard
),
1211 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1217 class FPNorm1Single(FPState
):
1219 def __init__(self
, width
, id_wid
, single_cycle
=True):
1220 FPState
.__init
__(self
, "normalise_1")
1221 self
.mod
= FPNorm1ModSingle(width
)
1222 self
.o
= self
.ospec()
1223 self
.out_z
= FPNumBase(width
, False)
1224 self
.out_roundz
= Signal(reset_less
=True)
1227 return self
.mod
.ispec()
1230 return self
.mod
.ospec()
1232 def setup(self
, m
, i
):
1233 """ links module to inputs and outputs
1235 self
.mod
.setup(m
, i
)
1237 def action(self
, m
):
1241 class FPNorm1Multi(FPState
):
1243 def __init__(self
, width
, id_wid
):
1244 FPState
.__init
__(self
, "normalise_1")
1245 self
.mod
= FPNorm1ModMulti(width
)
1246 self
.stb
= Signal(reset_less
=True)
1247 self
.ack
= Signal(reset
=0, reset_less
=True)
1248 self
.out_norm
= Signal(reset_less
=True)
1249 self
.in_accept
= Signal(reset_less
=True)
1250 self
.temp_z
= FPNumBase(width
)
1251 self
.temp_of
= Overflow()
1252 self
.out_z
= FPNumBase(width
)
1253 self
.out_roundz
= Signal(reset_less
=True)
1255 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1256 """ links module to inputs and outputs
1258 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1259 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1260 self
.out_z
, self
.out_norm
)
1262 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1263 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1265 def action(self
, m
):
1266 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1267 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1268 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1269 with m
.If(self
.out_norm
):
1270 with m
.If(self
.in_accept
):
1275 m
.d
.sync
+= self
.ack
.eq(0)
1277 # normalisation not required (or done).
1279 m
.d
.sync
+= self
.ack
.eq(1)
1280 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1283 class FPNormToPack(FPState
):
1285 def __init__(self
, width
, id_wid
):
1286 FPState
.__init
__(self
, "normalise_1")
1287 self
.id_wid
= id_wid
1291 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1294 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1296 def setup(self
, m
, i
):
1297 """ links module to inputs and outputs
1300 # Normalisation, Rounding Corrections, Pack - in a chain
1301 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1302 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1303 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1304 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1305 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1307 self
.out_z
= pmod
.ospec()
1309 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1310 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1312 def action(self
, m
):
1313 m
.next
= "pack_put_z"
1318 def __init__(self
, width
, id_wid
):
1319 self
.z
= FPNumBase(width
, False)
1320 self
.mid
= Signal(id_wid
, reset_less
=True)
1323 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1328 def __init__(self
, width
, id_wid
):
1330 self
.id_wid
= id_wid
1331 self
.i
= self
.ispec()
1332 self
.out_z
= self
.ospec()
1335 return FPNorm1Data(self
.width
, self
.id_wid
)
1338 return FPRoundData(self
.width
, self
.id_wid
)
1340 def process(self
, i
):
1343 def setup(self
, m
, i
):
1344 m
.submodules
.roundz
= self
1345 m
.d
.comb
+= self
.i
.eq(i
)
1347 def elaborate(self
, platform
):
1349 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1350 with m
.If(self
.i
.roundz
):
1351 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1352 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1353 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1357 class FPRound(FPState
):
1359 def __init__(self
, width
, id_wid
):
1360 FPState
.__init
__(self
, "round")
1361 self
.mod
= FPRoundMod(width
)
1362 self
.out_z
= self
.ospec()
1365 return self
.mod
.ispec()
1368 return self
.mod
.ospec()
1370 def setup(self
, m
, i
):
1371 """ links module to inputs and outputs
1373 self
.mod
.setup(m
, i
)
1376 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1377 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1379 def action(self
, m
):
1380 m
.next
= "corrections"
1383 class FPCorrectionsMod
:
1385 def __init__(self
, width
, id_wid
):
1387 self
.id_wid
= id_wid
1388 self
.i
= self
.ispec()
1389 self
.out_z
= self
.ospec()
1392 return FPRoundData(self
.width
, self
.id_wid
)
1395 return FPRoundData(self
.width
, self
.id_wid
)
1397 def process(self
, i
):
1400 def setup(self
, m
, i
):
1401 """ links module to inputs and outputs
1403 m
.submodules
.corrections
= self
1404 m
.d
.comb
+= self
.i
.eq(i
)
1406 def elaborate(self
, platform
):
1408 m
.submodules
.corr_in_z
= self
.i
.z
1409 m
.submodules
.corr_out_z
= self
.out_z
.z
1410 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1411 with m
.If(self
.i
.z
.is_denormalised
):
1412 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1416 class FPCorrections(FPState
):
1418 def __init__(self
, width
, id_wid
):
1419 FPState
.__init
__(self
, "corrections")
1420 self
.mod
= FPCorrectionsMod(width
)
1421 self
.out_z
= self
.ospec()
1424 return self
.mod
.ispec()
1427 return self
.mod
.ospec()
1429 def setup(self
, m
, in_z
):
1430 """ links module to inputs and outputs
1432 self
.mod
.setup(m
, in_z
)
1434 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1435 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1437 def action(self
, m
):
1443 def __init__(self
, width
, id_wid
):
1444 self
.z
= FPNumOut(width
, False)
1445 self
.mid
= Signal(id_wid
, reset_less
=True)
1448 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1453 def __init__(self
, width
, id_wid
):
1455 self
.id_wid
= id_wid
1456 self
.i
= self
.ispec()
1457 self
.o
= self
.ospec()
1460 return FPRoundData(self
.width
, self
.id_wid
)
1463 return FPPackData(self
.width
, self
.id_wid
)
1465 def process(self
, i
):
1468 def setup(self
, m
, in_z
):
1469 """ links module to inputs and outputs
1471 m
.submodules
.pack
= self
1472 m
.d
.comb
+= self
.i
.eq(in_z
)
1474 def elaborate(self
, platform
):
1476 m
.submodules
.pack_in_z
= self
.i
.z
1477 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1478 with m
.If(self
.i
.z
.is_overflowed
):
1479 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1481 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1486 def __init__(self
, width
, id_wid
):
1487 self
.z
= FPNumOut(width
, False)
1488 self
.mid
= Signal(id_wid
, reset_less
=True)
1491 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1494 class FPPack(FPState
):
1496 def __init__(self
, width
, id_wid
):
1497 FPState
.__init
__(self
, "pack")
1498 self
.mod
= FPPackMod(width
)
1499 self
.out_z
= self
.ospec()
1502 return self
.mod
.ispec()
1505 return self
.mod
.ospec()
1507 def setup(self
, m
, in_z
):
1508 """ links module to inputs and outputs
1510 self
.mod
.setup(m
, in_z
)
1512 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1513 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1515 def action(self
, m
):
1516 m
.next
= "pack_put_z"
1519 class FPPutZ(FPState
):
1521 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1522 FPState
.__init
__(self
, state
)
1523 if to_state
is None:
1524 to_state
= "get_ops"
1525 self
.to_state
= to_state
1528 self
.in_mid
= in_mid
1529 self
.out_mid
= out_mid
1531 def action(self
, m
):
1532 if self
.in_mid
is not None:
1533 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1535 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1537 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1538 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1539 m
.next
= self
.to_state
1541 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1544 class FPPutZIdx(FPState
):
1546 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1547 FPState
.__init
__(self
, state
)
1548 if to_state
is None:
1549 to_state
= "get_ops"
1550 self
.to_state
= to_state
1552 self
.out_zs
= out_zs
1553 self
.in_mid
= in_mid
1555 def action(self
, m
):
1556 outz_stb
= Signal(reset_less
=True)
1557 outz_ack
= Signal(reset_less
=True)
1558 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1559 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1562 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1564 with m
.If(outz_stb
& outz_ack
):
1565 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1566 m
.next
= self
.to_state
1568 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1570 class FPADDBaseData
:
1572 def __init__(self
, width
, id_wid
):
1574 self
.id_wid
= id_wid
1575 self
.a
= Signal(width
)
1576 self
.b
= Signal(width
)
1577 self
.mid
= Signal(id_wid
, reset_less
=True)
1580 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1584 def __init__(self
, width
, id_wid
):
1585 self
.z
= FPOp(width
)
1586 self
.mid
= Signal(id_wid
, reset_less
=True)
1589 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1594 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1597 * width: bit-width of IEEE754. supported: 16, 32, 64
1598 * id_wid: an identifier that is sync-connected to the input
1599 * single_cycle: True indicates each stage to complete in 1 clock
1600 * compact: True indicates a reduced number of stages
1603 self
.id_wid
= id_wid
1604 self
.single_cycle
= single_cycle
1605 self
.compact
= compact
1607 self
.in_t
= Trigger()
1608 self
.i
= self
.ispec()
1609 self
.o
= self
.ospec()
1614 return FPADDBaseData(self
.width
, self
.id_wid
)
1617 return FPOpData(self
.width
, self
.id_wid
)
1619 def add_state(self
, state
):
1620 self
.states
.append(state
)
1623 def get_fragment(self
, platform
=None):
1624 """ creates the HDL code-fragment for FPAdd
1627 m
.submodules
.out_z
= self
.o
.z
1628 m
.submodules
.in_t
= self
.in_t
1630 self
.get_compact_fragment(m
, platform
)
1632 self
.get_longer_fragment(m
, platform
)
1634 with m
.FSM() as fsm
:
1636 for state
in self
.states
:
1637 with m
.State(state
.state_from
):
1642 def get_longer_fragment(self
, m
, platform
=None):
1644 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1646 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1650 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1651 sc
.setup(m
, a
, b
, self
.in_mid
)
1653 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1654 dn
.setup(m
, a
, b
, sc
.in_mid
)
1656 if self
.single_cycle
:
1657 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1658 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1660 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1661 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1663 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1664 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1666 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1667 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1669 if self
.single_cycle
:
1670 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1671 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1673 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1674 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1676 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1677 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1679 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1680 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1682 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1683 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1685 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1686 pa
.in_mid
, self
.out_mid
))
1688 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1689 pa
.in_mid
, self
.out_mid
))
1691 def get_compact_fragment(self
, m
, platform
=None):
1693 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1694 self
.width
, self
.id_wid
))
1695 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1697 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1700 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1703 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1704 n1
.setup(m
, alm
.a1o
)
1706 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1707 n1
.out_z
.mid
, self
.o
.mid
))
1709 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1710 sc
.o
.mid
, self
.o
.mid
))
1713 class FPADDBase(FPState
):
1715 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1718 * width: bit-width of IEEE754. supported: 16, 32, 64
1719 * id_wid: an identifier that is sync-connected to the input
1720 * single_cycle: True indicates each stage to complete in 1 clock
1722 FPState
.__init
__(self
, "fpadd")
1724 self
.single_cycle
= single_cycle
1725 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1726 self
.o
= self
.ospec()
1728 self
.in_t
= Trigger()
1729 self
.i
= self
.ispec()
1731 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1732 self
.in_accept
= Signal(reset_less
=True)
1733 self
.add_stb
= Signal(reset_less
=True)
1734 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1737 return self
.mod
.ispec()
1740 return self
.mod
.ospec()
1742 def setup(self
, m
, i
, add_stb
, in_mid
):
1743 m
.d
.comb
+= [self
.i
.eq(i
),
1744 self
.mod
.i
.eq(self
.i
),
1745 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1746 #self.add_stb.eq(add_stb),
1747 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1748 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1749 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1750 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1751 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1752 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1755 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1756 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1757 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1758 #m.d.sync += self.in_t.stb.eq(0)
1760 m
.submodules
.fpadd
= self
.mod
1762 def action(self
, m
):
1764 # in_accept is set on incoming strobe HIGH and ack LOW.
1765 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1767 #with m.If(self.in_t.ack):
1768 # m.d.sync += self.in_t.stb.eq(0)
1769 with m
.If(~self
.z_done
):
1770 # not done: test for accepting an incoming operand pair
1771 with m
.If(self
.in_accept
):
1773 self
.add_ack
.eq(1), # acknowledge receipt...
1774 self
.in_t
.stb
.eq(1), # initiate add
1777 m
.d
.sync
+= [self
.add_ack
.eq(0),
1778 self
.in_t
.stb
.eq(0),
1782 # done: acknowledge, and write out id and value
1783 m
.d
.sync
+= [self
.add_ack
.eq(1),
1790 if self
.in_mid
is not None:
1791 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1794 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1796 # move to output state on detecting z ack
1797 with m
.If(self
.out_z
.trigger
):
1798 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1801 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1805 def __init__(self
, width
, id_wid
):
1807 self
.id_wid
= id_wid
1809 for i
in range(rs_sz
):
1811 out_z
.name
= "out_z_%d" % i
1813 self
.res
= Array(res
)
1814 self
.in_z
= FPOp(width
)
1815 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1817 def setup(self
, m
, in_z
, in_mid
):
1818 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1819 self
.in_mid
.eq(in_mid
)]
1821 def get_fragment(self
, platform
=None):
1822 """ creates the HDL code-fragment for FPAdd
1825 m
.submodules
.res_in_z
= self
.in_z
1826 m
.submodules
+= self
.res
1838 """ FPADD: stages as follows:
1844 FPAddBase---> FPAddBaseMod
1846 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1848 FPAddBase is tricky: it is both a stage and *has* stages.
1849 Connection to FPAddBaseMod therefore requires an in stb/ack
1850 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1851 needs to be the thing that raises the incoming stb.
1854 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1857 * width: bit-width of IEEE754. supported: 16, 32, 64
1858 * id_wid: an identifier that is sync-connected to the input
1859 * single_cycle: True indicates each stage to complete in 1 clock
1862 self
.id_wid
= id_wid
1863 self
.single_cycle
= single_cycle
1865 #self.out_z = FPOp(width)
1866 self
.ids
= FPID(id_wid
)
1869 for i
in range(rs_sz
):
1872 in_a
.name
= "in_a_%d" % i
1873 in_b
.name
= "in_b_%d" % i
1874 rs
.append((in_a
, in_b
))
1878 for i
in range(rs_sz
):
1880 out_z
.name
= "out_z_%d" % i
1882 self
.res
= Array(res
)
1886 def add_state(self
, state
):
1887 self
.states
.append(state
)
1890 def get_fragment(self
, platform
=None):
1891 """ creates the HDL code-fragment for FPAdd
1894 m
.submodules
+= self
.rs
1896 in_a
= self
.rs
[0][0]
1897 in_b
= self
.rs
[0][1]
1899 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1904 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1909 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1910 ab
= self
.add_state(ab
)
1911 abd
= ab
.ispec() # create an input spec object for FPADDBase
1912 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1913 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1916 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1919 with m
.FSM() as fsm
:
1921 for state
in self
.states
:
1922 with m
.State(state
.state_from
):
1928 if __name__
== "__main__":
1930 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1931 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1932 alu
.rs
[0][1].ports() + \
1933 alu
.res
[0].ports() + \
1934 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1936 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1937 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1938 alu
.in_t
.ports() + \
1939 alu
.out_z
.ports() + \
1940 [alu
.in_mid
, alu
.out_mid
])
1943 # works... but don't use, just do "python fname.py convert -t v"
1944 #print (verilog.convert(alu, ports=[
1945 # ports=alu.in_a.ports() + \
1946 # alu.in_b.ports() + \
1947 # alu.out_z.ports())