1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPNumBase2Ops(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
691 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
694 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
696 # exponent of b greater than a: shift a down
698 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
701 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
704 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
709 class FPAddAlignSingle(FPState
):
711 def __init__(self
, width
, id_wid
):
712 FPState
.__init
__(self
, "align")
713 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
714 self
.out_a
= FPNumIn(None, width
)
715 self
.out_b
= FPNumIn(None, width
)
717 def setup(self
, m
, i
):
718 """ links module to inputs and outputs
722 # NOTE: could be done as comb
723 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
724 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
730 class FPAddAlignSingleAdd(FPState
):
732 def __init__(self
, width
, id_wid
):
733 FPState
.__init
__(self
, "align")
736 self
.a1o
= self
.ospec()
739 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
742 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
744 def setup(self
, m
, i
):
745 """ links module to inputs and outputs
748 # chain AddAlignSingle, AddStage0 and AddStage1
749 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
750 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
751 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
753 chain
= StageChain([mod
, a0mod
, a1mod
])
756 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
759 m
.next
= "normalise_1"
762 class FPAddStage0Data
:
764 def __init__(self
, width
, id_wid
):
765 self
.z
= FPNumBase(width
, False)
766 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
767 self
.mid
= Signal(id_wid
, reset_less
=True)
770 return [self
.z
.eq(i
.z
), self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
773 class FPAddStage0Mod
:
775 def __init__(self
, width
, id_wid
):
778 self
.i
= self
.ispec()
779 self
.o
= self
.ospec()
782 return FPNumBase2Ops(self
.width
, self
.id_wid
)
785 return FPAddStage0Data(self
.width
, self
.id_wid
)
787 def process(self
, i
):
790 def setup(self
, m
, i
):
791 """ links module to inputs and outputs
793 m
.submodules
.add0
= self
794 m
.d
.comb
+= self
.i
.eq(i
)
796 def elaborate(self
, platform
):
798 m
.submodules
.add0_in_a
= self
.i
.a
799 m
.submodules
.add0_in_b
= self
.i
.b
800 m
.submodules
.add0_out_z
= self
.o
.z
802 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
803 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
805 # store intermediate tests (and zero-extended mantissas)
806 seq
= Signal(reset_less
=True)
807 mge
= Signal(reset_less
=True)
808 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
809 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
810 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
811 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
812 am0
.eq(Cat(self
.i
.a
.m
, 0)),
813 bm0
.eq(Cat(self
.i
.b
.m
, 0))
815 # same-sign (both negative or both positive) add mantissas
818 self
.o
.tot
.eq(am0
+ bm0
),
819 self
.o
.z
.s
.eq(self
.i
.a
.s
)
821 # a mantissa greater than b, use a
824 self
.o
.tot
.eq(am0
- bm0
),
825 self
.o
.z
.s
.eq(self
.i
.a
.s
)
827 # b mantissa greater than a, use b
830 self
.o
.tot
.eq(bm0
- am0
),
831 self
.o
.z
.s
.eq(self
.i
.b
.s
)
836 class FPAddStage0(FPState
):
837 """ First stage of add. covers same-sign (add) and subtract
838 special-casing when mantissas are greater or equal, to
839 give greatest accuracy.
842 def __init__(self
, width
, id_wid
):
843 FPState
.__init
__(self
, "add_0")
844 self
.mod
= FPAddStage0Mod(width
)
845 self
.o
= self
.mod
.ospec()
847 def setup(self
, m
, i
):
848 """ links module to inputs and outputs
852 # NOTE: these could be done as combinatorial (merge add0+add1)
853 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
859 class FPAddStage1Data
:
861 def __init__(self
, width
, id_wid
):
862 self
.z
= FPNumBase(width
, False)
864 self
.mid
= Signal(id_wid
, reset_less
=True)
867 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
871 class FPAddStage1Mod(FPState
):
872 """ Second stage of add: preparation for normalisation.
873 detects when tot sum is too big (tot[27] is kinda a carry bit)
876 def __init__(self
, width
, id_wid
):
879 self
.i
= self
.ispec()
880 self
.o
= self
.ospec()
883 return FPAddStage0Data(self
.width
, self
.id_wid
)
886 return FPAddStage1Data(self
.width
, self
.id_wid
)
888 def process(self
, i
):
891 def setup(self
, m
, i
):
892 """ links module to inputs and outputs
894 m
.submodules
.add1
= self
895 m
.submodules
.add1_out_overflow
= self
.o
.of
897 m
.d
.comb
+= self
.i
.eq(i
)
899 def elaborate(self
, platform
):
901 #m.submodules.norm1_in_overflow = self.in_of
902 #m.submodules.norm1_out_overflow = self.out_of
903 #m.submodules.norm1_in_z = self.in_z
904 #m.submodules.norm1_out_z = self.out_z
905 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
906 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
907 # tot[-1] (MSB) gets set when the sum overflows. shift result down
908 with m
.If(self
.i
.tot
[-1]):
910 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
911 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
912 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
913 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
914 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
915 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
917 # tot[-1] (MSB) zero case
920 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
921 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
922 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
923 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
924 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
929 class FPAddStage1(FPState
):
931 def __init__(self
, width
, id_wid
):
932 FPState
.__init
__(self
, "add_1")
933 self
.mod
= FPAddStage1Mod(width
)
934 self
.out_z
= FPNumBase(width
, False)
935 self
.out_of
= Overflow()
936 self
.norm_stb
= Signal()
938 def setup(self
, m
, i
):
939 """ links module to inputs and outputs
943 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
945 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
946 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
947 m
.d
.sync
+= self
.norm_stb
.eq(1)
950 m
.next
= "normalise_1"
953 class FPNormaliseModSingle
:
955 def __init__(self
, width
):
957 self
.in_z
= self
.ispec()
958 self
.out_z
= self
.ospec()
961 return FPNumBase(self
.width
, False)
964 return FPNumBase(self
.width
, False)
966 def setup(self
, m
, i
):
967 """ links module to inputs and outputs
969 m
.submodules
.normalise
= self
970 m
.d
.comb
+= self
.i
.eq(i
)
972 def elaborate(self
, platform
):
975 mwid
= self
.out_z
.m_width
+2
976 pe
= PriorityEncoder(mwid
)
977 m
.submodules
.norm_pe
= pe
979 m
.submodules
.norm1_out_z
= self
.out_z
980 m
.submodules
.norm1_in_z
= self
.in_z
982 in_z
= FPNumBase(self
.width
, False)
984 m
.submodules
.norm1_insel_z
= in_z
985 m
.submodules
.norm1_insel_overflow
= in_of
987 espec
= (len(in_z
.e
), True)
988 ediff_n126
= Signal(espec
, reset_less
=True)
989 msr
= MultiShiftRMerge(mwid
, espec
)
990 m
.submodules
.multishift_r
= msr
992 m
.d
.comb
+= in_z
.eq(self
.in_z
)
993 m
.d
.comb
+= in_of
.eq(self
.in_of
)
994 # initialise out from in (overridden below)
995 m
.d
.comb
+= self
.out_z
.eq(in_z
)
996 m
.d
.comb
+= self
.out_of
.eq(in_of
)
997 # normalisation decrease condition
998 decrease
= Signal(reset_less
=True)
999 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1001 with m
.If(decrease
):
1002 # *sigh* not entirely obvious: count leading zeros (clz)
1003 # with a PriorityEncoder: to find from the MSB
1004 # we reverse the order of the bits.
1005 temp_m
= Signal(mwid
, reset_less
=True)
1006 temp_s
= Signal(mwid
+1, reset_less
=True)
1007 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1009 # cat round and guard bits back into the mantissa
1010 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1011 pe
.i
.eq(temp_m
[::-1]), # inverted
1012 clz
.eq(pe
.o
), # count zeros from MSB down
1013 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1014 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1015 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1022 def __init__(self
, width
, id_wid
):
1023 self
.roundz
= Signal(reset_less
=True)
1024 self
.z
= FPNumBase(width
, False)
1025 self
.mid
= Signal(id_wid
, reset_less
=True)
1028 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1031 class FPNorm1ModSingle
:
1033 def __init__(self
, width
, id_wid
):
1035 self
.id_wid
= id_wid
1036 self
.i
= self
.ispec()
1037 self
.o
= self
.ospec()
1040 return FPAddStage1Data(self
.width
, self
.id_wid
)
1043 return FPNorm1Data(self
.width
, self
.id_wid
)
1045 def setup(self
, m
, i
):
1046 """ links module to inputs and outputs
1048 m
.submodules
.normalise_1
= self
1049 m
.d
.comb
+= self
.i
.eq(i
)
1051 def process(self
, i
):
1054 def elaborate(self
, platform
):
1057 mwid
= self
.o
.z
.m_width
+2
1058 pe
= PriorityEncoder(mwid
)
1059 m
.submodules
.norm_pe
= pe
1062 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1064 m
.submodules
.norm1_out_z
= self
.o
.z
1065 m
.submodules
.norm1_out_overflow
= of
1066 m
.submodules
.norm1_in_z
= self
.i
.z
1067 m
.submodules
.norm1_in_overflow
= self
.i
.of
1070 m
.submodules
.norm1_insel_z
= i
.z
1071 m
.submodules
.norm1_insel_overflow
= i
.of
1073 espec
= (len(i
.z
.e
), True)
1074 ediff_n126
= Signal(espec
, reset_less
=True)
1075 msr
= MultiShiftRMerge(mwid
, espec
)
1076 m
.submodules
.multishift_r
= msr
1078 m
.d
.comb
+= i
.eq(self
.i
)
1079 # initialise out from in (overridden below)
1080 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1081 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1082 m
.d
.comb
+= of
.eq(i
.of
)
1083 # normalisation increase/decrease conditions
1084 decrease
= Signal(reset_less
=True)
1085 increase
= Signal(reset_less
=True)
1086 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1087 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1089 with m
.If(decrease
):
1090 # *sigh* not entirely obvious: count leading zeros (clz)
1091 # with a PriorityEncoder: to find from the MSB
1092 # we reverse the order of the bits.
1093 temp_m
= Signal(mwid
, reset_less
=True)
1094 temp_s
= Signal(mwid
+1, reset_less
=True)
1095 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1096 # make sure that the amount to decrease by does NOT
1097 # go below the minimum non-INF/NaN exponent
1098 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1101 # cat round and guard bits back into the mantissa
1102 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1103 pe
.i
.eq(temp_m
[::-1]), # inverted
1104 clz
.eq(limclz
), # count zeros from MSB down
1105 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1106 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1107 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1108 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1109 # overflow in bits 0..1: got shifted too (leave sticky)
1110 of
.guard
.eq(temp_s
[1]), # guard
1111 of
.round_bit
.eq(temp_s
[0]), # round
1114 with m
.Elif(increase
):
1115 temp_m
= Signal(mwid
+1, reset_less
=True)
1117 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1119 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1120 # connect multi-shifter to inp/out mantissa (and ediff)
1122 msr
.diff
.eq(ediff_n126
),
1123 self
.o
.z
.m
.eq(msr
.m
[3:]),
1124 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1125 # overflow in bits 0..1: got shifted too (leave sticky)
1126 of
.guard
.eq(temp_s
[2]), # guard
1127 of
.round_bit
.eq(temp_s
[1]), # round
1128 of
.sticky
.eq(temp_s
[0]), # sticky
1129 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1135 class FPNorm1ModMulti
:
1137 def __init__(self
, width
, single_cycle
=True):
1139 self
.in_select
= Signal(reset_less
=True)
1140 self
.in_z
= FPNumBase(width
, False)
1141 self
.in_of
= Overflow()
1142 self
.temp_z
= FPNumBase(width
, False)
1143 self
.temp_of
= Overflow()
1144 self
.out_z
= FPNumBase(width
, False)
1145 self
.out_of
= Overflow()
1147 def elaborate(self
, platform
):
1150 m
.submodules
.norm1_out_z
= self
.out_z
1151 m
.submodules
.norm1_out_overflow
= self
.out_of
1152 m
.submodules
.norm1_temp_z
= self
.temp_z
1153 m
.submodules
.norm1_temp_of
= self
.temp_of
1154 m
.submodules
.norm1_in_z
= self
.in_z
1155 m
.submodules
.norm1_in_overflow
= self
.in_of
1157 in_z
= FPNumBase(self
.width
, False)
1159 m
.submodules
.norm1_insel_z
= in_z
1160 m
.submodules
.norm1_insel_overflow
= in_of
1162 # select which of temp or in z/of to use
1163 with m
.If(self
.in_select
):
1164 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1165 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1167 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1168 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1169 # initialise out from in (overridden below)
1170 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1171 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1172 # normalisation increase/decrease conditions
1173 decrease
= Signal(reset_less
=True)
1174 increase
= Signal(reset_less
=True)
1175 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1176 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1177 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1179 with m
.If(decrease
):
1181 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1182 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1183 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1184 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1185 self
.out_of
.round_bit
.eq(0), # reset round bit
1186 self
.out_of
.m0
.eq(in_of
.guard
),
1189 with m
.Elif(increase
):
1191 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1192 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1193 self
.out_of
.guard
.eq(in_z
.m
[0]),
1194 self
.out_of
.m0
.eq(in_z
.m
[1]),
1195 self
.out_of
.round_bit
.eq(in_of
.guard
),
1196 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1202 class FPNorm1Single(FPState
):
1204 def __init__(self
, width
, id_wid
, single_cycle
=True):
1205 FPState
.__init
__(self
, "normalise_1")
1206 self
.mod
= FPNorm1ModSingle(width
)
1207 self
.o
= self
.ospec()
1208 self
.out_z
= FPNumBase(width
, False)
1209 self
.out_roundz
= Signal(reset_less
=True)
1212 return self
.mod
.ispec()
1215 return self
.mod
.ospec()
1217 def setup(self
, m
, i
):
1218 """ links module to inputs and outputs
1220 self
.mod
.setup(m
, i
)
1222 def action(self
, m
):
1226 class FPNorm1Multi(FPState
):
1228 def __init__(self
, width
, id_wid
):
1229 FPState
.__init
__(self
, "normalise_1")
1230 self
.mod
= FPNorm1ModMulti(width
)
1231 self
.stb
= Signal(reset_less
=True)
1232 self
.ack
= Signal(reset
=0, reset_less
=True)
1233 self
.out_norm
= Signal(reset_less
=True)
1234 self
.in_accept
= Signal(reset_less
=True)
1235 self
.temp_z
= FPNumBase(width
)
1236 self
.temp_of
= Overflow()
1237 self
.out_z
= FPNumBase(width
)
1238 self
.out_roundz
= Signal(reset_less
=True)
1240 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1241 """ links module to inputs and outputs
1243 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1244 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1245 self
.out_z
, self
.out_norm
)
1247 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1248 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1250 def action(self
, m
):
1251 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1252 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1253 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1254 with m
.If(self
.out_norm
):
1255 with m
.If(self
.in_accept
):
1260 m
.d
.sync
+= self
.ack
.eq(0)
1262 # normalisation not required (or done).
1264 m
.d
.sync
+= self
.ack
.eq(1)
1265 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1268 class FPNormToPack(FPState
):
1270 def __init__(self
, width
, id_wid
):
1271 FPState
.__init
__(self
, "normalise_1")
1272 self
.id_wid
= id_wid
1276 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1279 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1281 def setup(self
, m
, i
):
1282 """ links module to inputs and outputs
1285 # Normalisation, Rounding Corrections, Pack - in a chain
1286 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1287 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1288 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1289 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1290 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1292 self
.out_z
= pmod
.ospec()
1294 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1295 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1297 def action(self
, m
):
1298 m
.next
= "pack_put_z"
1303 def __init__(self
, width
, id_wid
):
1304 self
.z
= FPNumBase(width
, False)
1305 self
.mid
= Signal(id_wid
, reset_less
=True)
1308 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1313 def __init__(self
, width
, id_wid
):
1315 self
.id_wid
= id_wid
1316 self
.i
= self
.ispec()
1317 self
.out_z
= self
.ospec()
1320 return FPNorm1Data(self
.width
, self
.id_wid
)
1323 return FPRoundData(self
.width
, self
.id_wid
)
1325 def process(self
, i
):
1328 def setup(self
, m
, i
):
1329 m
.submodules
.roundz
= self
1330 m
.d
.comb
+= self
.i
.eq(i
)
1332 def elaborate(self
, platform
):
1334 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1335 with m
.If(self
.i
.roundz
):
1336 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1337 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1338 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1342 class FPRound(FPState
):
1344 def __init__(self
, width
, id_wid
):
1345 FPState
.__init
__(self
, "round")
1346 self
.mod
= FPRoundMod(width
)
1347 self
.out_z
= self
.ospec()
1350 return self
.mod
.ispec()
1353 return self
.mod
.ospec()
1355 def setup(self
, m
, i
):
1356 """ links module to inputs and outputs
1358 self
.mod
.setup(m
, i
)
1361 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1362 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1364 def action(self
, m
):
1365 m
.next
= "corrections"
1368 class FPCorrectionsMod
:
1370 def __init__(self
, width
, id_wid
):
1372 self
.id_wid
= id_wid
1373 self
.i
= self
.ispec()
1374 self
.out_z
= self
.ospec()
1377 return FPRoundData(self
.width
, self
.id_wid
)
1380 return FPRoundData(self
.width
, self
.id_wid
)
1382 def process(self
, i
):
1385 def setup(self
, m
, i
):
1386 """ links module to inputs and outputs
1388 m
.submodules
.corrections
= self
1389 m
.d
.comb
+= self
.i
.eq(i
)
1391 def elaborate(self
, platform
):
1393 m
.submodules
.corr_in_z
= self
.i
.z
1394 m
.submodules
.corr_out_z
= self
.out_z
.z
1395 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1396 with m
.If(self
.i
.z
.is_denormalised
):
1397 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1401 class FPCorrections(FPState
):
1403 def __init__(self
, width
, id_wid
):
1404 FPState
.__init
__(self
, "corrections")
1405 self
.mod
= FPCorrectionsMod(width
)
1406 self
.out_z
= self
.ospec()
1409 return self
.mod
.ispec()
1412 return self
.mod
.ospec()
1414 def setup(self
, m
, in_z
):
1415 """ links module to inputs and outputs
1417 self
.mod
.setup(m
, in_z
)
1419 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1420 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1422 def action(self
, m
):
1428 def __init__(self
, width
, id_wid
):
1429 self
.z
= FPNumOut(width
, False)
1430 self
.mid
= Signal(id_wid
, reset_less
=True)
1433 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1438 def __init__(self
, width
, id_wid
):
1440 self
.id_wid
= id_wid
1441 self
.i
= self
.ispec()
1442 self
.o
= self
.ospec()
1445 return FPRoundData(self
.width
, self
.id_wid
)
1448 return FPPackData(self
.width
, self
.id_wid
)
1450 def process(self
, i
):
1453 def setup(self
, m
, in_z
):
1454 """ links module to inputs and outputs
1456 m
.submodules
.pack
= self
1457 m
.d
.comb
+= self
.i
.eq(in_z
)
1459 def elaborate(self
, platform
):
1461 m
.submodules
.pack_in_z
= self
.i
.z
1462 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1463 with m
.If(self
.i
.z
.is_overflowed
):
1464 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1466 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1471 def __init__(self
, width
, id_wid
):
1472 self
.z
= FPNumOut(width
, False)
1473 self
.mid
= Signal(id_wid
, reset_less
=True)
1476 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1479 class FPPack(FPState
):
1481 def __init__(self
, width
, id_wid
):
1482 FPState
.__init
__(self
, "pack")
1483 self
.mod
= FPPackMod(width
)
1484 self
.out_z
= self
.ospec()
1487 return self
.mod
.ispec()
1490 return self
.mod
.ospec()
1492 def setup(self
, m
, in_z
):
1493 """ links module to inputs and outputs
1495 self
.mod
.setup(m
, in_z
)
1497 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1498 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1500 def action(self
, m
):
1501 m
.next
= "pack_put_z"
1504 class FPPutZ(FPState
):
1506 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1507 FPState
.__init
__(self
, state
)
1508 if to_state
is None:
1509 to_state
= "get_ops"
1510 self
.to_state
= to_state
1513 self
.in_mid
= in_mid
1514 self
.out_mid
= out_mid
1516 def action(self
, m
):
1517 if self
.in_mid
is not None:
1518 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1520 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1522 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1523 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1524 m
.next
= self
.to_state
1526 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1529 class FPPutZIdx(FPState
):
1531 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1532 FPState
.__init
__(self
, state
)
1533 if to_state
is None:
1534 to_state
= "get_ops"
1535 self
.to_state
= to_state
1537 self
.out_zs
= out_zs
1538 self
.in_mid
= in_mid
1540 def action(self
, m
):
1541 outz_stb
= Signal(reset_less
=True)
1542 outz_ack
= Signal(reset_less
=True)
1543 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1544 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1547 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1549 with m
.If(outz_stb
& outz_ack
):
1550 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1551 m
.next
= self
.to_state
1553 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1555 class FPADDBaseData
:
1557 def __init__(self
, width
, id_wid
):
1559 self
.id_wid
= id_wid
1560 self
.a
= Signal(width
)
1561 self
.b
= Signal(width
)
1562 self
.mid
= Signal(id_wid
, reset_less
=True)
1565 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1569 def __init__(self
, width
, id_wid
):
1570 self
.z
= FPOp(width
)
1571 self
.mid
= Signal(id_wid
, reset_less
=True)
1574 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1579 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1582 * width: bit-width of IEEE754. supported: 16, 32, 64
1583 * id_wid: an identifier that is sync-connected to the input
1584 * single_cycle: True indicates each stage to complete in 1 clock
1585 * compact: True indicates a reduced number of stages
1588 self
.id_wid
= id_wid
1589 self
.single_cycle
= single_cycle
1590 self
.compact
= compact
1592 self
.in_t
= Trigger()
1593 self
.i
= self
.ispec()
1594 self
.o
= self
.ospec()
1599 return FPADDBaseData(self
.width
, self
.id_wid
)
1602 return FPOpData(self
.width
, self
.id_wid
)
1604 def add_state(self
, state
):
1605 self
.states
.append(state
)
1608 def get_fragment(self
, platform
=None):
1609 """ creates the HDL code-fragment for FPAdd
1612 m
.submodules
.out_z
= self
.o
.z
1613 m
.submodules
.in_t
= self
.in_t
1615 self
.get_compact_fragment(m
, platform
)
1617 self
.get_longer_fragment(m
, platform
)
1619 with m
.FSM() as fsm
:
1621 for state
in self
.states
:
1622 with m
.State(state
.state_from
):
1627 def get_longer_fragment(self
, m
, platform
=None):
1629 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1631 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1635 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1636 sc
.setup(m
, a
, b
, self
.in_mid
)
1638 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1639 dn
.setup(m
, a
, b
, sc
.in_mid
)
1641 if self
.single_cycle
:
1642 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1643 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1645 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1646 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1648 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1649 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1651 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1652 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1654 if self
.single_cycle
:
1655 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1656 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1658 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1659 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1661 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1662 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1664 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1665 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1667 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1668 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1670 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1671 pa
.in_mid
, self
.out_mid
))
1673 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1674 pa
.in_mid
, self
.out_mid
))
1676 def get_compact_fragment(self
, m
, platform
=None):
1678 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1679 self
.width
, self
.id_wid
))
1680 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1682 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1685 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1688 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1689 n1
.setup(m
, alm
.a1o
)
1691 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1692 n1
.out_z
.mid
, self
.o
.mid
))
1694 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1695 sc
.o
.mid
, self
.o
.mid
))
1698 class FPADDBase(FPState
):
1700 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1703 * width: bit-width of IEEE754. supported: 16, 32, 64
1704 * id_wid: an identifier that is sync-connected to the input
1705 * single_cycle: True indicates each stage to complete in 1 clock
1707 FPState
.__init
__(self
, "fpadd")
1709 self
.single_cycle
= single_cycle
1710 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1711 self
.o
= self
.ospec()
1713 self
.in_t
= Trigger()
1714 self
.i
= self
.ispec()
1716 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1717 self
.in_accept
= Signal(reset_less
=True)
1718 self
.add_stb
= Signal(reset_less
=True)
1719 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1722 return self
.mod
.ispec()
1725 return self
.mod
.ospec()
1727 def setup(self
, m
, i
, add_stb
, in_mid
):
1728 m
.d
.comb
+= [self
.i
.eq(i
),
1729 self
.mod
.i
.eq(self
.i
),
1730 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1731 #self.add_stb.eq(add_stb),
1732 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1733 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1734 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1735 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1736 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1737 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1740 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1741 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1742 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1743 #m.d.sync += self.in_t.stb.eq(0)
1745 m
.submodules
.fpadd
= self
.mod
1747 def action(self
, m
):
1749 # in_accept is set on incoming strobe HIGH and ack LOW.
1750 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1752 #with m.If(self.in_t.ack):
1753 # m.d.sync += self.in_t.stb.eq(0)
1754 with m
.If(~self
.z_done
):
1755 # not done: test for accepting an incoming operand pair
1756 with m
.If(self
.in_accept
):
1758 self
.add_ack
.eq(1), # acknowledge receipt...
1759 self
.in_t
.stb
.eq(1), # initiate add
1762 m
.d
.sync
+= [self
.add_ack
.eq(0),
1763 self
.in_t
.stb
.eq(0),
1767 # done: acknowledge, and write out id and value
1768 m
.d
.sync
+= [self
.add_ack
.eq(1),
1775 if self
.in_mid
is not None:
1776 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1779 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1781 # move to output state on detecting z ack
1782 with m
.If(self
.out_z
.trigger
):
1783 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1786 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1790 def __init__(self
, width
, id_wid
):
1792 self
.id_wid
= id_wid
1794 for i
in range(rs_sz
):
1796 out_z
.name
= "out_z_%d" % i
1798 self
.res
= Array(res
)
1799 self
.in_z
= FPOp(width
)
1800 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1802 def setup(self
, m
, in_z
, in_mid
):
1803 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1804 self
.in_mid
.eq(in_mid
)]
1806 def get_fragment(self
, platform
=None):
1807 """ creates the HDL code-fragment for FPAdd
1810 m
.submodules
.res_in_z
= self
.in_z
1811 m
.submodules
+= self
.res
1823 """ FPADD: stages as follows:
1829 FPAddBase---> FPAddBaseMod
1831 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1833 FPAddBase is tricky: it is both a stage and *has* stages.
1834 Connection to FPAddBaseMod therefore requires an in stb/ack
1835 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1836 needs to be the thing that raises the incoming stb.
1839 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1842 * width: bit-width of IEEE754. supported: 16, 32, 64
1843 * id_wid: an identifier that is sync-connected to the input
1844 * single_cycle: True indicates each stage to complete in 1 clock
1847 self
.id_wid
= id_wid
1848 self
.single_cycle
= single_cycle
1850 #self.out_z = FPOp(width)
1851 self
.ids
= FPID(id_wid
)
1854 for i
in range(rs_sz
):
1857 in_a
.name
= "in_a_%d" % i
1858 in_b
.name
= "in_b_%d" % i
1859 rs
.append((in_a
, in_b
))
1863 for i
in range(rs_sz
):
1865 out_z
.name
= "out_z_%d" % i
1867 self
.res
= Array(res
)
1871 def add_state(self
, state
):
1872 self
.states
.append(state
)
1875 def get_fragment(self
, platform
=None):
1876 """ creates the HDL code-fragment for FPAdd
1879 m
.submodules
+= self
.rs
1881 in_a
= self
.rs
[0][0]
1882 in_b
= self
.rs
[0][1]
1884 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1889 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1894 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1895 ab
= self
.add_state(ab
)
1896 abd
= ab
.ispec() # create an input spec object for FPADDBase
1897 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1898 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1901 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1904 with m
.FSM() as fsm
:
1906 for state
in self
.states
:
1907 with m
.State(state
.state_from
):
1913 if __name__
== "__main__":
1915 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1916 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1917 alu
.rs
[0][1].ports() + \
1918 alu
.res
[0].ports() + \
1919 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1921 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1922 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1923 alu
.in_t
.ports() + \
1924 alu
.out_z
.ports() + \
1925 [alu
.in_mid
, alu
.out_mid
])
1928 # works... but don't use, just do "python fname.py convert -t v"
1929 #print (verilog.convert(alu, ports=[
1930 # ports=alu.in_a.ports() + \
1931 # alu.in_b.ports() + \
1932 # alu.out_z.ports())