1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
770 self
.mid
= Signal(id_wid
, reset_less
=True)
773 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
776 class FPAddStage0Mod
:
778 def __init__(self
, width
, id_wid
):
781 self
.i
= self
.ispec()
782 self
.o
= self
.ospec()
785 return FPNumBase2Ops(self
.width
, self
.id_wid
)
788 return FPAddStage0Data(self
.width
, self
.id_wid
)
790 def process(self
, i
):
793 def setup(self
, m
, i
):
794 """ links module to inputs and outputs
796 m
.submodules
.add0
= self
797 m
.d
.comb
+= self
.i
.eq(i
)
799 def elaborate(self
, platform
):
801 m
.submodules
.add0_in_a
= self
.i
.a
802 m
.submodules
.add0_in_b
= self
.i
.b
803 m
.submodules
.add0_out_z
= self
.o
.z
805 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
806 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
808 # store intermediate tests (and zero-extended mantissas)
809 seq
= Signal(reset_less
=True)
810 mge
= Signal(reset_less
=True)
811 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
812 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
813 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
814 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
815 am0
.eq(Cat(self
.i
.a
.m
, 0)),
816 bm0
.eq(Cat(self
.i
.b
.m
, 0))
818 # same-sign (both negative or both positive) add mantissas
821 self
.o
.tot
.eq(am0
+ bm0
),
822 self
.o
.z
.s
.eq(self
.i
.a
.s
)
824 # a mantissa greater than b, use a
827 self
.o
.tot
.eq(am0
- bm0
),
828 self
.o
.z
.s
.eq(self
.i
.a
.s
)
830 # b mantissa greater than a, use b
833 self
.o
.tot
.eq(bm0
- am0
),
834 self
.o
.z
.s
.eq(self
.i
.b
.s
)
839 class FPAddStage0(FPState
):
840 """ First stage of add. covers same-sign (add) and subtract
841 special-casing when mantissas are greater or equal, to
842 give greatest accuracy.
845 def __init__(self
, width
, id_wid
):
846 FPState
.__init
__(self
, "add_0")
847 self
.mod
= FPAddStage0Mod(width
)
848 self
.o
= self
.mod
.ospec()
850 def setup(self
, m
, i
):
851 """ links module to inputs and outputs
855 # NOTE: these could be done as combinatorial (merge add0+add1)
856 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
862 class FPAddStage1Data
:
864 def __init__(self
, width
, id_wid
):
865 self
.z
= FPNumBase(width
, False)
867 self
.mid
= Signal(id_wid
, reset_less
=True)
870 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
874 class FPAddStage1Mod(FPState
):
875 """ Second stage of add: preparation for normalisation.
876 detects when tot sum is too big (tot[27] is kinda a carry bit)
879 def __init__(self
, width
, id_wid
):
882 self
.i
= self
.ispec()
883 self
.o
= self
.ospec()
886 return FPAddStage0Data(self
.width
, self
.id_wid
)
889 return FPAddStage1Data(self
.width
, self
.id_wid
)
891 def process(self
, i
):
894 def setup(self
, m
, i
):
895 """ links module to inputs and outputs
897 m
.submodules
.add1
= self
898 m
.submodules
.add1_out_overflow
= self
.o
.of
900 m
.d
.comb
+= self
.i
.eq(i
)
902 def elaborate(self
, platform
):
904 #m.submodules.norm1_in_overflow = self.in_of
905 #m.submodules.norm1_out_overflow = self.out_of
906 #m.submodules.norm1_in_z = self.in_z
907 #m.submodules.norm1_out_z = self.out_z
908 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
909 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
910 # tot[-1] (MSB) gets set when the sum overflows. shift result down
911 with m
.If(self
.i
.tot
[-1]):
913 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
914 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
915 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
916 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
917 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
918 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
920 # tot[-1] (MSB) zero case
923 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
924 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
925 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
926 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
927 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
932 class FPAddStage1(FPState
):
934 def __init__(self
, width
, id_wid
):
935 FPState
.__init
__(self
, "add_1")
936 self
.mod
= FPAddStage1Mod(width
)
937 self
.out_z
= FPNumBase(width
, False)
938 self
.out_of
= Overflow()
939 self
.norm_stb
= Signal()
941 def setup(self
, m
, i
):
942 """ links module to inputs and outputs
946 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
948 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
949 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
950 m
.d
.sync
+= self
.norm_stb
.eq(1)
953 m
.next
= "normalise_1"
956 class FPNormaliseModSingle
:
958 def __init__(self
, width
):
960 self
.in_z
= self
.ispec()
961 self
.out_z
= self
.ospec()
964 return FPNumBase(self
.width
, False)
967 return FPNumBase(self
.width
, False)
969 def setup(self
, m
, i
):
970 """ links module to inputs and outputs
972 m
.submodules
.normalise
= self
973 m
.d
.comb
+= self
.i
.eq(i
)
975 def elaborate(self
, platform
):
978 mwid
= self
.out_z
.m_width
+2
979 pe
= PriorityEncoder(mwid
)
980 m
.submodules
.norm_pe
= pe
982 m
.submodules
.norm1_out_z
= self
.out_z
983 m
.submodules
.norm1_in_z
= self
.in_z
985 in_z
= FPNumBase(self
.width
, False)
987 m
.submodules
.norm1_insel_z
= in_z
988 m
.submodules
.norm1_insel_overflow
= in_of
990 espec
= (len(in_z
.e
), True)
991 ediff_n126
= Signal(espec
, reset_less
=True)
992 msr
= MultiShiftRMerge(mwid
, espec
)
993 m
.submodules
.multishift_r
= msr
995 m
.d
.comb
+= in_z
.eq(self
.in_z
)
996 m
.d
.comb
+= in_of
.eq(self
.in_of
)
997 # initialise out from in (overridden below)
998 m
.d
.comb
+= self
.out_z
.eq(in_z
)
999 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1000 # normalisation decrease condition
1001 decrease
= Signal(reset_less
=True)
1002 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1004 with m
.If(decrease
):
1005 # *sigh* not entirely obvious: count leading zeros (clz)
1006 # with a PriorityEncoder: to find from the MSB
1007 # we reverse the order of the bits.
1008 temp_m
= Signal(mwid
, reset_less
=True)
1009 temp_s
= Signal(mwid
+1, reset_less
=True)
1010 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1012 # cat round and guard bits back into the mantissa
1013 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1014 pe
.i
.eq(temp_m
[::-1]), # inverted
1015 clz
.eq(pe
.o
), # count zeros from MSB down
1016 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1017 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1018 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1025 def __init__(self
, width
, id_wid
):
1026 self
.roundz
= Signal(reset_less
=True)
1027 self
.z
= FPNumBase(width
, False)
1028 self
.mid
= Signal(id_wid
, reset_less
=True)
1031 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1034 class FPNorm1ModSingle
:
1036 def __init__(self
, width
, id_wid
):
1038 self
.id_wid
= id_wid
1039 self
.i
= self
.ispec()
1040 self
.o
= self
.ospec()
1043 return FPAddStage1Data(self
.width
, self
.id_wid
)
1046 return FPNorm1Data(self
.width
, self
.id_wid
)
1048 def setup(self
, m
, i
):
1049 """ links module to inputs and outputs
1051 m
.submodules
.normalise_1
= self
1052 m
.d
.comb
+= self
.i
.eq(i
)
1054 def process(self
, i
):
1057 def elaborate(self
, platform
):
1060 mwid
= self
.o
.z
.m_width
+2
1061 pe
= PriorityEncoder(mwid
)
1062 m
.submodules
.norm_pe
= pe
1065 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1067 m
.submodules
.norm1_out_z
= self
.o
.z
1068 m
.submodules
.norm1_out_overflow
= of
1069 m
.submodules
.norm1_in_z
= self
.i
.z
1070 m
.submodules
.norm1_in_overflow
= self
.i
.of
1073 m
.submodules
.norm1_insel_z
= i
.z
1074 m
.submodules
.norm1_insel_overflow
= i
.of
1076 espec
= (len(i
.z
.e
), True)
1077 ediff_n126
= Signal(espec
, reset_less
=True)
1078 msr
= MultiShiftRMerge(mwid
, espec
)
1079 m
.submodules
.multishift_r
= msr
1081 m
.d
.comb
+= i
.eq(self
.i
)
1082 # initialise out from in (overridden below)
1083 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1084 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1085 m
.d
.comb
+= of
.eq(i
.of
)
1086 # normalisation increase/decrease conditions
1087 decrease
= Signal(reset_less
=True)
1088 increase
= Signal(reset_less
=True)
1089 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1090 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1092 with m
.If(decrease
):
1093 # *sigh* not entirely obvious: count leading zeros (clz)
1094 # with a PriorityEncoder: to find from the MSB
1095 # we reverse the order of the bits.
1096 temp_m
= Signal(mwid
, reset_less
=True)
1097 temp_s
= Signal(mwid
+1, reset_less
=True)
1098 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1099 # make sure that the amount to decrease by does NOT
1100 # go below the minimum non-INF/NaN exponent
1101 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1104 # cat round and guard bits back into the mantissa
1105 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1106 pe
.i
.eq(temp_m
[::-1]), # inverted
1107 clz
.eq(limclz
), # count zeros from MSB down
1108 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1109 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1110 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1111 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1112 # overflow in bits 0..1: got shifted too (leave sticky)
1113 of
.guard
.eq(temp_s
[1]), # guard
1114 of
.round_bit
.eq(temp_s
[0]), # round
1117 with m
.Elif(increase
):
1118 temp_m
= Signal(mwid
+1, reset_less
=True)
1120 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1122 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1123 # connect multi-shifter to inp/out mantissa (and ediff)
1125 msr
.diff
.eq(ediff_n126
),
1126 self
.o
.z
.m
.eq(msr
.m
[3:]),
1127 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1128 # overflow in bits 0..1: got shifted too (leave sticky)
1129 of
.guard
.eq(temp_s
[2]), # guard
1130 of
.round_bit
.eq(temp_s
[1]), # round
1131 of
.sticky
.eq(temp_s
[0]), # sticky
1132 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1138 class FPNorm1ModMulti
:
1140 def __init__(self
, width
, single_cycle
=True):
1142 self
.in_select
= Signal(reset_less
=True)
1143 self
.in_z
= FPNumBase(width
, False)
1144 self
.in_of
= Overflow()
1145 self
.temp_z
= FPNumBase(width
, False)
1146 self
.temp_of
= Overflow()
1147 self
.out_z
= FPNumBase(width
, False)
1148 self
.out_of
= Overflow()
1150 def elaborate(self
, platform
):
1153 m
.submodules
.norm1_out_z
= self
.out_z
1154 m
.submodules
.norm1_out_overflow
= self
.out_of
1155 m
.submodules
.norm1_temp_z
= self
.temp_z
1156 m
.submodules
.norm1_temp_of
= self
.temp_of
1157 m
.submodules
.norm1_in_z
= self
.in_z
1158 m
.submodules
.norm1_in_overflow
= self
.in_of
1160 in_z
= FPNumBase(self
.width
, False)
1162 m
.submodules
.norm1_insel_z
= in_z
1163 m
.submodules
.norm1_insel_overflow
= in_of
1165 # select which of temp or in z/of to use
1166 with m
.If(self
.in_select
):
1167 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1168 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1170 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1171 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1172 # initialise out from in (overridden below)
1173 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1174 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1175 # normalisation increase/decrease conditions
1176 decrease
= Signal(reset_less
=True)
1177 increase
= Signal(reset_less
=True)
1178 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1179 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1180 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1182 with m
.If(decrease
):
1184 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1185 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1186 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1187 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1188 self
.out_of
.round_bit
.eq(0), # reset round bit
1189 self
.out_of
.m0
.eq(in_of
.guard
),
1192 with m
.Elif(increase
):
1194 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1195 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1196 self
.out_of
.guard
.eq(in_z
.m
[0]),
1197 self
.out_of
.m0
.eq(in_z
.m
[1]),
1198 self
.out_of
.round_bit
.eq(in_of
.guard
),
1199 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1205 class FPNorm1Single(FPState
):
1207 def __init__(self
, width
, id_wid
, single_cycle
=True):
1208 FPState
.__init
__(self
, "normalise_1")
1209 self
.mod
= FPNorm1ModSingle(width
)
1210 self
.o
= self
.ospec()
1211 self
.out_z
= FPNumBase(width
, False)
1212 self
.out_roundz
= Signal(reset_less
=True)
1215 return self
.mod
.ispec()
1218 return self
.mod
.ospec()
1220 def setup(self
, m
, i
):
1221 """ links module to inputs and outputs
1223 self
.mod
.setup(m
, i
)
1225 def action(self
, m
):
1229 class FPNorm1Multi(FPState
):
1231 def __init__(self
, width
, id_wid
):
1232 FPState
.__init
__(self
, "normalise_1")
1233 self
.mod
= FPNorm1ModMulti(width
)
1234 self
.stb
= Signal(reset_less
=True)
1235 self
.ack
= Signal(reset
=0, reset_less
=True)
1236 self
.out_norm
= Signal(reset_less
=True)
1237 self
.in_accept
= Signal(reset_less
=True)
1238 self
.temp_z
= FPNumBase(width
)
1239 self
.temp_of
= Overflow()
1240 self
.out_z
= FPNumBase(width
)
1241 self
.out_roundz
= Signal(reset_less
=True)
1243 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1244 """ links module to inputs and outputs
1246 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1247 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1248 self
.out_z
, self
.out_norm
)
1250 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1251 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1253 def action(self
, m
):
1254 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1255 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1256 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1257 with m
.If(self
.out_norm
):
1258 with m
.If(self
.in_accept
):
1263 m
.d
.sync
+= self
.ack
.eq(0)
1265 # normalisation not required (or done).
1267 m
.d
.sync
+= self
.ack
.eq(1)
1268 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1271 class FPNormToPack(FPState
):
1273 def __init__(self
, width
, id_wid
):
1274 FPState
.__init
__(self
, "normalise_1")
1275 self
.id_wid
= id_wid
1279 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1282 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1284 def setup(self
, m
, i
):
1285 """ links module to inputs and outputs
1288 # Normalisation, Rounding Corrections, Pack - in a chain
1289 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1290 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1291 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1292 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1293 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1295 self
.out_z
= pmod
.ospec()
1297 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1298 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1300 def action(self
, m
):
1301 m
.next
= "pack_put_z"
1306 def __init__(self
, width
, id_wid
):
1307 self
.z
= FPNumBase(width
, False)
1308 self
.mid
= Signal(id_wid
, reset_less
=True)
1311 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1316 def __init__(self
, width
, id_wid
):
1318 self
.id_wid
= id_wid
1319 self
.i
= self
.ispec()
1320 self
.out_z
= self
.ospec()
1323 return FPNorm1Data(self
.width
, self
.id_wid
)
1326 return FPRoundData(self
.width
, self
.id_wid
)
1328 def process(self
, i
):
1331 def setup(self
, m
, i
):
1332 m
.submodules
.roundz
= self
1333 m
.d
.comb
+= self
.i
.eq(i
)
1335 def elaborate(self
, platform
):
1337 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1338 with m
.If(self
.i
.roundz
):
1339 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1340 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1341 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1345 class FPRound(FPState
):
1347 def __init__(self
, width
, id_wid
):
1348 FPState
.__init
__(self
, "round")
1349 self
.mod
= FPRoundMod(width
)
1350 self
.out_z
= self
.ospec()
1353 return self
.mod
.ispec()
1356 return self
.mod
.ospec()
1358 def setup(self
, m
, i
):
1359 """ links module to inputs and outputs
1361 self
.mod
.setup(m
, i
)
1364 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1365 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1367 def action(self
, m
):
1368 m
.next
= "corrections"
1371 class FPCorrectionsMod
:
1373 def __init__(self
, width
, id_wid
):
1375 self
.id_wid
= id_wid
1376 self
.i
= self
.ispec()
1377 self
.out_z
= self
.ospec()
1380 return FPRoundData(self
.width
, self
.id_wid
)
1383 return FPRoundData(self
.width
, self
.id_wid
)
1385 def process(self
, i
):
1388 def setup(self
, m
, i
):
1389 """ links module to inputs and outputs
1391 m
.submodules
.corrections
= self
1392 m
.d
.comb
+= self
.i
.eq(i
)
1394 def elaborate(self
, platform
):
1396 m
.submodules
.corr_in_z
= self
.i
.z
1397 m
.submodules
.corr_out_z
= self
.out_z
.z
1398 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1399 with m
.If(self
.i
.z
.is_denormalised
):
1400 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1404 class FPCorrections(FPState
):
1406 def __init__(self
, width
, id_wid
):
1407 FPState
.__init
__(self
, "corrections")
1408 self
.mod
= FPCorrectionsMod(width
)
1409 self
.out_z
= self
.ospec()
1412 return self
.mod
.ispec()
1415 return self
.mod
.ospec()
1417 def setup(self
, m
, in_z
):
1418 """ links module to inputs and outputs
1420 self
.mod
.setup(m
, in_z
)
1422 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1423 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1425 def action(self
, m
):
1431 def __init__(self
, width
, id_wid
):
1432 self
.z
= FPNumOut(width
, False)
1433 self
.mid
= Signal(id_wid
, reset_less
=True)
1436 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1441 def __init__(self
, width
, id_wid
):
1443 self
.id_wid
= id_wid
1444 self
.i
= self
.ispec()
1445 self
.o
= self
.ospec()
1448 return FPRoundData(self
.width
, self
.id_wid
)
1451 return FPPackData(self
.width
, self
.id_wid
)
1453 def process(self
, i
):
1456 def setup(self
, m
, in_z
):
1457 """ links module to inputs and outputs
1459 m
.submodules
.pack
= self
1460 m
.d
.comb
+= self
.i
.eq(in_z
)
1462 def elaborate(self
, platform
):
1464 m
.submodules
.pack_in_z
= self
.i
.z
1465 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1466 with m
.If(self
.i
.z
.is_overflowed
):
1467 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1469 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1474 def __init__(self
, width
, id_wid
):
1475 self
.z
= FPNumOut(width
, False)
1476 self
.mid
= Signal(id_wid
, reset_less
=True)
1479 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1482 class FPPack(FPState
):
1484 def __init__(self
, width
, id_wid
):
1485 FPState
.__init
__(self
, "pack")
1486 self
.mod
= FPPackMod(width
)
1487 self
.out_z
= self
.ospec()
1490 return self
.mod
.ispec()
1493 return self
.mod
.ospec()
1495 def setup(self
, m
, in_z
):
1496 """ links module to inputs and outputs
1498 self
.mod
.setup(m
, in_z
)
1500 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1501 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1503 def action(self
, m
):
1504 m
.next
= "pack_put_z"
1507 class FPPutZ(FPState
):
1509 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1510 FPState
.__init
__(self
, state
)
1511 if to_state
is None:
1512 to_state
= "get_ops"
1513 self
.to_state
= to_state
1516 self
.in_mid
= in_mid
1517 self
.out_mid
= out_mid
1519 def action(self
, m
):
1520 if self
.in_mid
is not None:
1521 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1523 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1525 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1526 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1527 m
.next
= self
.to_state
1529 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1532 class FPPutZIdx(FPState
):
1534 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1535 FPState
.__init
__(self
, state
)
1536 if to_state
is None:
1537 to_state
= "get_ops"
1538 self
.to_state
= to_state
1540 self
.out_zs
= out_zs
1541 self
.in_mid
= in_mid
1543 def action(self
, m
):
1544 outz_stb
= Signal(reset_less
=True)
1545 outz_ack
= Signal(reset_less
=True)
1546 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1547 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1550 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1552 with m
.If(outz_stb
& outz_ack
):
1553 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1554 m
.next
= self
.to_state
1556 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1558 class FPADDBaseData
:
1560 def __init__(self
, width
, id_wid
):
1562 self
.id_wid
= id_wid
1563 self
.a
= Signal(width
)
1564 self
.b
= Signal(width
)
1565 self
.mid
= Signal(id_wid
, reset_less
=True)
1568 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1572 def __init__(self
, width
, id_wid
):
1573 self
.z
= FPOp(width
)
1574 self
.mid
= Signal(id_wid
, reset_less
=True)
1577 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1582 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1585 * width: bit-width of IEEE754. supported: 16, 32, 64
1586 * id_wid: an identifier that is sync-connected to the input
1587 * single_cycle: True indicates each stage to complete in 1 clock
1588 * compact: True indicates a reduced number of stages
1591 self
.id_wid
= id_wid
1592 self
.single_cycle
= single_cycle
1593 self
.compact
= compact
1595 self
.in_t
= Trigger()
1596 self
.i
= self
.ispec()
1597 self
.o
= self
.ospec()
1602 return FPADDBaseData(self
.width
, self
.id_wid
)
1605 return FPOpData(self
.width
, self
.id_wid
)
1607 def add_state(self
, state
):
1608 self
.states
.append(state
)
1611 def get_fragment(self
, platform
=None):
1612 """ creates the HDL code-fragment for FPAdd
1615 m
.submodules
.out_z
= self
.o
.z
1616 m
.submodules
.in_t
= self
.in_t
1618 self
.get_compact_fragment(m
, platform
)
1620 self
.get_longer_fragment(m
, platform
)
1622 with m
.FSM() as fsm
:
1624 for state
in self
.states
:
1625 with m
.State(state
.state_from
):
1630 def get_longer_fragment(self
, m
, platform
=None):
1632 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1634 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1638 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1639 sc
.setup(m
, a
, b
, self
.in_mid
)
1641 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1642 dn
.setup(m
, a
, b
, sc
.in_mid
)
1644 if self
.single_cycle
:
1645 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1646 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1648 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1649 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1651 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1652 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1654 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1655 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1657 if self
.single_cycle
:
1658 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1659 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1661 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1662 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1664 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1665 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1667 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1668 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1670 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1671 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1673 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1674 pa
.in_mid
, self
.out_mid
))
1676 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1677 pa
.in_mid
, self
.out_mid
))
1679 def get_compact_fragment(self
, m
, platform
=None):
1681 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1682 self
.width
, self
.id_wid
))
1683 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1685 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1688 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1691 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1692 n1
.setup(m
, alm
.a1o
)
1694 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1695 n1
.out_z
.mid
, self
.o
.mid
))
1697 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1698 sc
.o
.mid
, self
.o
.mid
))
1701 class FPADDBase(FPState
):
1703 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1706 * width: bit-width of IEEE754. supported: 16, 32, 64
1707 * id_wid: an identifier that is sync-connected to the input
1708 * single_cycle: True indicates each stage to complete in 1 clock
1710 FPState
.__init
__(self
, "fpadd")
1712 self
.single_cycle
= single_cycle
1713 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1714 self
.o
= self
.ospec()
1716 self
.in_t
= Trigger()
1717 self
.i
= self
.ispec()
1719 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1720 self
.in_accept
= Signal(reset_less
=True)
1721 self
.add_stb
= Signal(reset_less
=True)
1722 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1725 return self
.mod
.ispec()
1728 return self
.mod
.ospec()
1730 def setup(self
, m
, i
, add_stb
, in_mid
):
1731 m
.d
.comb
+= [self
.i
.eq(i
),
1732 self
.mod
.i
.eq(self
.i
),
1733 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1734 #self.add_stb.eq(add_stb),
1735 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1736 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1737 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1738 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1739 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1740 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1743 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1744 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1745 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1746 #m.d.sync += self.in_t.stb.eq(0)
1748 m
.submodules
.fpadd
= self
.mod
1750 def action(self
, m
):
1752 # in_accept is set on incoming strobe HIGH and ack LOW.
1753 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1755 #with m.If(self.in_t.ack):
1756 # m.d.sync += self.in_t.stb.eq(0)
1757 with m
.If(~self
.z_done
):
1758 # not done: test for accepting an incoming operand pair
1759 with m
.If(self
.in_accept
):
1761 self
.add_ack
.eq(1), # acknowledge receipt...
1762 self
.in_t
.stb
.eq(1), # initiate add
1765 m
.d
.sync
+= [self
.add_ack
.eq(0),
1766 self
.in_t
.stb
.eq(0),
1770 # done: acknowledge, and write out id and value
1771 m
.d
.sync
+= [self
.add_ack
.eq(1),
1778 if self
.in_mid
is not None:
1779 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1782 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1784 # move to output state on detecting z ack
1785 with m
.If(self
.out_z
.trigger
):
1786 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1789 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1793 def __init__(self
, width
, id_wid
):
1795 self
.id_wid
= id_wid
1797 for i
in range(rs_sz
):
1799 out_z
.name
= "out_z_%d" % i
1801 self
.res
= Array(res
)
1802 self
.in_z
= FPOp(width
)
1803 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1805 def setup(self
, m
, in_z
, in_mid
):
1806 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1807 self
.in_mid
.eq(in_mid
)]
1809 def get_fragment(self
, platform
=None):
1810 """ creates the HDL code-fragment for FPAdd
1813 m
.submodules
.res_in_z
= self
.in_z
1814 m
.submodules
+= self
.res
1826 """ FPADD: stages as follows:
1832 FPAddBase---> FPAddBaseMod
1834 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1836 FPAddBase is tricky: it is both a stage and *has* stages.
1837 Connection to FPAddBaseMod therefore requires an in stb/ack
1838 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1839 needs to be the thing that raises the incoming stb.
1842 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1845 * width: bit-width of IEEE754. supported: 16, 32, 64
1846 * id_wid: an identifier that is sync-connected to the input
1847 * single_cycle: True indicates each stage to complete in 1 clock
1850 self
.id_wid
= id_wid
1851 self
.single_cycle
= single_cycle
1853 #self.out_z = FPOp(width)
1854 self
.ids
= FPID(id_wid
)
1857 for i
in range(rs_sz
):
1860 in_a
.name
= "in_a_%d" % i
1861 in_b
.name
= "in_b_%d" % i
1862 rs
.append((in_a
, in_b
))
1866 for i
in range(rs_sz
):
1868 out_z
.name
= "out_z_%d" % i
1870 self
.res
= Array(res
)
1874 def add_state(self
, state
):
1875 self
.states
.append(state
)
1878 def get_fragment(self
, platform
=None):
1879 """ creates the HDL code-fragment for FPAdd
1882 m
.submodules
+= self
.rs
1884 in_a
= self
.rs
[0][0]
1885 in_b
= self
.rs
[0][1]
1887 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1892 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1897 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1898 ab
= self
.add_state(ab
)
1899 abd
= ab
.ispec() # create an input spec object for FPADDBase
1900 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1901 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1904 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1907 with m
.FSM() as fsm
:
1909 for state
in self
.states
:
1910 with m
.State(state
.state_from
):
1916 if __name__
== "__main__":
1918 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1919 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1920 alu
.rs
[0][1].ports() + \
1921 alu
.res
[0].ports() + \
1922 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1924 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1925 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1926 alu
.in_t
.ports() + \
1927 alu
.out_z
.ports() + \
1928 [alu
.in_mid
, alu
.out_mid
])
1931 # works... but don't use, just do "python fname.py convert -t v"
1932 #print (verilog.convert(alu, ports=[
1933 # ports=alu.in_a.ports() + \
1934 # alu.in_b.ports() + \
1935 # alu.out_z.ports())