1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.out_do_z
= Signal(reset_less
=True)
770 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
771 self
.mid
= Signal(id_wid
, reset_less
=True)
774 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
775 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
778 class FPAddStage0Mod
:
780 def __init__(self
, width
, id_wid
):
783 self
.i
= self
.ispec()
784 self
.o
= self
.ospec()
787 return FPSCData(self
.width
, self
.id_wid
)
790 return FPAddStage0Data(self
.width
, self
.id_wid
)
792 def process(self
, i
):
795 def setup(self
, m
, i
):
796 """ links module to inputs and outputs
798 m
.submodules
.add0
= self
799 m
.d
.comb
+= self
.i
.eq(i
)
801 def elaborate(self
, platform
):
803 m
.submodules
.add0_in_a
= self
.i
.a
804 m
.submodules
.add0_in_b
= self
.i
.b
805 m
.submodules
.add0_out_z
= self
.o
.z
807 # store intermediate tests (and zero-extended mantissas)
808 seq
= Signal(reset_less
=True)
809 mge
= Signal(reset_less
=True)
810 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
811 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
812 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
813 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
814 am0
.eq(Cat(self
.i
.a
.m
, 0)),
815 bm0
.eq(Cat(self
.i
.b
.m
, 0))
817 # same-sign (both negative or both positive) add mantissas
818 with m
.If(~self
.i
.out_do_z
):
819 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
822 self
.o
.tot
.eq(am0
+ bm0
),
823 self
.o
.z
.s
.eq(self
.i
.a
.s
)
825 # a mantissa greater than b, use a
828 self
.o
.tot
.eq(am0
- bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # b mantissa greater than a, use b
834 self
.o
.tot
.eq(bm0
- am0
),
835 self
.o
.z
.s
.eq(self
.i
.b
.s
)
838 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
840 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
841 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
845 class FPAddStage0(FPState
):
846 """ First stage of add. covers same-sign (add) and subtract
847 special-casing when mantissas are greater or equal, to
848 give greatest accuracy.
851 def __init__(self
, width
, id_wid
):
852 FPState
.__init
__(self
, "add_0")
853 self
.mod
= FPAddStage0Mod(width
)
854 self
.o
= self
.mod
.ospec()
856 def setup(self
, m
, i
):
857 """ links module to inputs and outputs
861 # NOTE: these could be done as combinatorial (merge add0+add1)
862 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
868 class FPAddStage1Data
:
870 def __init__(self
, width
, id_wid
):
871 self
.z
= FPNumBase(width
, False)
873 self
.mid
= Signal(id_wid
, reset_less
=True)
876 return [self
.z
.eq(i
.z
), self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
880 class FPAddStage1Mod(FPState
):
881 """ Second stage of add: preparation for normalisation.
882 detects when tot sum is too big (tot[27] is kinda a carry bit)
885 def __init__(self
, width
, id_wid
):
888 self
.i
= self
.ispec()
889 self
.o
= self
.ospec()
892 return FPAddStage0Data(self
.width
, self
.id_wid
)
895 return FPAddStage1Data(self
.width
, self
.id_wid
)
897 def process(self
, i
):
900 def setup(self
, m
, i
):
901 """ links module to inputs and outputs
903 m
.submodules
.add1
= self
904 m
.submodules
.add1_out_overflow
= self
.o
.of
906 m
.d
.comb
+= self
.i
.eq(i
)
908 def elaborate(self
, platform
):
910 #m.submodules.norm1_in_overflow = self.in_of
911 #m.submodules.norm1_out_overflow = self.out_of
912 #m.submodules.norm1_in_z = self.in_z
913 #m.submodules.norm1_out_z = self.out_z
914 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
915 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
916 # tot[-1] (MSB) gets set when the sum overflows. shift result down
917 with m
.If(self
.i
.tot
[-1]):
919 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
920 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
921 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
922 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
923 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
924 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
926 # tot[-1] (MSB) zero case
929 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
930 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
931 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
932 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
933 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
938 class FPAddStage1(FPState
):
940 def __init__(self
, width
, id_wid
):
941 FPState
.__init
__(self
, "add_1")
942 self
.mod
= FPAddStage1Mod(width
)
943 self
.out_z
= FPNumBase(width
, False)
944 self
.out_of
= Overflow()
945 self
.norm_stb
= Signal()
947 def setup(self
, m
, i
):
948 """ links module to inputs and outputs
952 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
954 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
955 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
956 m
.d
.sync
+= self
.norm_stb
.eq(1)
959 m
.next
= "normalise_1"
962 class FPNormaliseModSingle
:
964 def __init__(self
, width
):
966 self
.in_z
= self
.ispec()
967 self
.out_z
= self
.ospec()
970 return FPNumBase(self
.width
, False)
973 return FPNumBase(self
.width
, False)
975 def setup(self
, m
, i
):
976 """ links module to inputs and outputs
978 m
.submodules
.normalise
= self
979 m
.d
.comb
+= self
.i
.eq(i
)
981 def elaborate(self
, platform
):
984 mwid
= self
.out_z
.m_width
+2
985 pe
= PriorityEncoder(mwid
)
986 m
.submodules
.norm_pe
= pe
988 m
.submodules
.norm1_out_z
= self
.out_z
989 m
.submodules
.norm1_in_z
= self
.in_z
991 in_z
= FPNumBase(self
.width
, False)
993 m
.submodules
.norm1_insel_z
= in_z
994 m
.submodules
.norm1_insel_overflow
= in_of
996 espec
= (len(in_z
.e
), True)
997 ediff_n126
= Signal(espec
, reset_less
=True)
998 msr
= MultiShiftRMerge(mwid
, espec
)
999 m
.submodules
.multishift_r
= msr
1001 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1002 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1003 # initialise out from in (overridden below)
1004 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1005 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1006 # normalisation decrease condition
1007 decrease
= Signal(reset_less
=True)
1008 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1010 with m
.If(decrease
):
1011 # *sigh* not entirely obvious: count leading zeros (clz)
1012 # with a PriorityEncoder: to find from the MSB
1013 # we reverse the order of the bits.
1014 temp_m
= Signal(mwid
, reset_less
=True)
1015 temp_s
= Signal(mwid
+1, reset_less
=True)
1016 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1018 # cat round and guard bits back into the mantissa
1019 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1020 pe
.i
.eq(temp_m
[::-1]), # inverted
1021 clz
.eq(pe
.o
), # count zeros from MSB down
1022 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1023 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1024 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1031 def __init__(self
, width
, id_wid
):
1032 self
.roundz
= Signal(reset_less
=True)
1033 self
.z
= FPNumBase(width
, False)
1034 self
.mid
= Signal(id_wid
, reset_less
=True)
1037 return [self
.z
.eq(i
.z
), self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1040 class FPNorm1ModSingle
:
1042 def __init__(self
, width
, id_wid
):
1044 self
.id_wid
= id_wid
1045 self
.i
= self
.ispec()
1046 self
.o
= self
.ospec()
1049 return FPAddStage1Data(self
.width
, self
.id_wid
)
1052 return FPNorm1Data(self
.width
, self
.id_wid
)
1054 def setup(self
, m
, i
):
1055 """ links module to inputs and outputs
1057 m
.submodules
.normalise_1
= self
1058 m
.d
.comb
+= self
.i
.eq(i
)
1060 def process(self
, i
):
1063 def elaborate(self
, platform
):
1066 mwid
= self
.o
.z
.m_width
+2
1067 pe
= PriorityEncoder(mwid
)
1068 m
.submodules
.norm_pe
= pe
1071 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1073 m
.submodules
.norm1_out_z
= self
.o
.z
1074 m
.submodules
.norm1_out_overflow
= of
1075 m
.submodules
.norm1_in_z
= self
.i
.z
1076 m
.submodules
.norm1_in_overflow
= self
.i
.of
1079 m
.submodules
.norm1_insel_z
= i
.z
1080 m
.submodules
.norm1_insel_overflow
= i
.of
1082 espec
= (len(i
.z
.e
), True)
1083 ediff_n126
= Signal(espec
, reset_less
=True)
1084 msr
= MultiShiftRMerge(mwid
, espec
)
1085 m
.submodules
.multishift_r
= msr
1087 m
.d
.comb
+= i
.eq(self
.i
)
1088 # initialise out from in (overridden below)
1089 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1090 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1091 m
.d
.comb
+= of
.eq(i
.of
)
1092 # normalisation increase/decrease conditions
1093 decrease
= Signal(reset_less
=True)
1094 increase
= Signal(reset_less
=True)
1095 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1096 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1098 with m
.If(decrease
):
1099 # *sigh* not entirely obvious: count leading zeros (clz)
1100 # with a PriorityEncoder: to find from the MSB
1101 # we reverse the order of the bits.
1102 temp_m
= Signal(mwid
, reset_less
=True)
1103 temp_s
= Signal(mwid
+1, reset_less
=True)
1104 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1105 # make sure that the amount to decrease by does NOT
1106 # go below the minimum non-INF/NaN exponent
1107 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1110 # cat round and guard bits back into the mantissa
1111 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1112 pe
.i
.eq(temp_m
[::-1]), # inverted
1113 clz
.eq(limclz
), # count zeros from MSB down
1114 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1115 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1116 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1117 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1118 # overflow in bits 0..1: got shifted too (leave sticky)
1119 of
.guard
.eq(temp_s
[1]), # guard
1120 of
.round_bit
.eq(temp_s
[0]), # round
1123 with m
.Elif(increase
):
1124 temp_m
= Signal(mwid
+1, reset_less
=True)
1126 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1128 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1129 # connect multi-shifter to inp/out mantissa (and ediff)
1131 msr
.diff
.eq(ediff_n126
),
1132 self
.o
.z
.m
.eq(msr
.m
[3:]),
1133 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1134 # overflow in bits 0..1: got shifted too (leave sticky)
1135 of
.guard
.eq(temp_s
[2]), # guard
1136 of
.round_bit
.eq(temp_s
[1]), # round
1137 of
.sticky
.eq(temp_s
[0]), # sticky
1138 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1144 class FPNorm1ModMulti
:
1146 def __init__(self
, width
, single_cycle
=True):
1148 self
.in_select
= Signal(reset_less
=True)
1149 self
.in_z
= FPNumBase(width
, False)
1150 self
.in_of
= Overflow()
1151 self
.temp_z
= FPNumBase(width
, False)
1152 self
.temp_of
= Overflow()
1153 self
.out_z
= FPNumBase(width
, False)
1154 self
.out_of
= Overflow()
1156 def elaborate(self
, platform
):
1159 m
.submodules
.norm1_out_z
= self
.out_z
1160 m
.submodules
.norm1_out_overflow
= self
.out_of
1161 m
.submodules
.norm1_temp_z
= self
.temp_z
1162 m
.submodules
.norm1_temp_of
= self
.temp_of
1163 m
.submodules
.norm1_in_z
= self
.in_z
1164 m
.submodules
.norm1_in_overflow
= self
.in_of
1166 in_z
= FPNumBase(self
.width
, False)
1168 m
.submodules
.norm1_insel_z
= in_z
1169 m
.submodules
.norm1_insel_overflow
= in_of
1171 # select which of temp or in z/of to use
1172 with m
.If(self
.in_select
):
1173 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1174 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1176 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1177 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1178 # initialise out from in (overridden below)
1179 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1180 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1181 # normalisation increase/decrease conditions
1182 decrease
= Signal(reset_less
=True)
1183 increase
= Signal(reset_less
=True)
1184 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1185 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1186 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1188 with m
.If(decrease
):
1190 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1191 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1192 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1193 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1194 self
.out_of
.round_bit
.eq(0), # reset round bit
1195 self
.out_of
.m0
.eq(in_of
.guard
),
1198 with m
.Elif(increase
):
1200 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1201 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1202 self
.out_of
.guard
.eq(in_z
.m
[0]),
1203 self
.out_of
.m0
.eq(in_z
.m
[1]),
1204 self
.out_of
.round_bit
.eq(in_of
.guard
),
1205 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1211 class FPNorm1Single(FPState
):
1213 def __init__(self
, width
, id_wid
, single_cycle
=True):
1214 FPState
.__init
__(self
, "normalise_1")
1215 self
.mod
= FPNorm1ModSingle(width
)
1216 self
.o
= self
.ospec()
1217 self
.out_z
= FPNumBase(width
, False)
1218 self
.out_roundz
= Signal(reset_less
=True)
1221 return self
.mod
.ispec()
1224 return self
.mod
.ospec()
1226 def setup(self
, m
, i
):
1227 """ links module to inputs and outputs
1229 self
.mod
.setup(m
, i
)
1231 def action(self
, m
):
1235 class FPNorm1Multi(FPState
):
1237 def __init__(self
, width
, id_wid
):
1238 FPState
.__init
__(self
, "normalise_1")
1239 self
.mod
= FPNorm1ModMulti(width
)
1240 self
.stb
= Signal(reset_less
=True)
1241 self
.ack
= Signal(reset
=0, reset_less
=True)
1242 self
.out_norm
= Signal(reset_less
=True)
1243 self
.in_accept
= Signal(reset_less
=True)
1244 self
.temp_z
= FPNumBase(width
)
1245 self
.temp_of
= Overflow()
1246 self
.out_z
= FPNumBase(width
)
1247 self
.out_roundz
= Signal(reset_less
=True)
1249 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1250 """ links module to inputs and outputs
1252 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1253 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1254 self
.out_z
, self
.out_norm
)
1256 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1257 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1259 def action(self
, m
):
1260 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1261 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1262 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1263 with m
.If(self
.out_norm
):
1264 with m
.If(self
.in_accept
):
1269 m
.d
.sync
+= self
.ack
.eq(0)
1271 # normalisation not required (or done).
1273 m
.d
.sync
+= self
.ack
.eq(1)
1274 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1277 class FPNormToPack(FPState
):
1279 def __init__(self
, width
, id_wid
):
1280 FPState
.__init
__(self
, "normalise_1")
1281 self
.id_wid
= id_wid
1285 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1288 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1290 def setup(self
, m
, i
):
1291 """ links module to inputs and outputs
1294 # Normalisation, Rounding Corrections, Pack - in a chain
1295 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1296 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1297 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1298 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1299 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1301 self
.out_z
= pmod
.ospec()
1303 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1304 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1306 def action(self
, m
):
1307 m
.next
= "pack_put_z"
1312 def __init__(self
, width
, id_wid
):
1313 self
.z
= FPNumBase(width
, False)
1314 self
.mid
= Signal(id_wid
, reset_less
=True)
1317 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1322 def __init__(self
, width
, id_wid
):
1324 self
.id_wid
= id_wid
1325 self
.i
= self
.ispec()
1326 self
.out_z
= self
.ospec()
1329 return FPNorm1Data(self
.width
, self
.id_wid
)
1332 return FPRoundData(self
.width
, self
.id_wid
)
1334 def process(self
, i
):
1337 def setup(self
, m
, i
):
1338 m
.submodules
.roundz
= self
1339 m
.d
.comb
+= self
.i
.eq(i
)
1341 def elaborate(self
, platform
):
1343 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1344 with m
.If(self
.i
.roundz
):
1345 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1346 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1347 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1351 class FPRound(FPState
):
1353 def __init__(self
, width
, id_wid
):
1354 FPState
.__init
__(self
, "round")
1355 self
.mod
= FPRoundMod(width
)
1356 self
.out_z
= self
.ospec()
1359 return self
.mod
.ispec()
1362 return self
.mod
.ospec()
1364 def setup(self
, m
, i
):
1365 """ links module to inputs and outputs
1367 self
.mod
.setup(m
, i
)
1370 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1371 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1373 def action(self
, m
):
1374 m
.next
= "corrections"
1377 class FPCorrectionsMod
:
1379 def __init__(self
, width
, id_wid
):
1381 self
.id_wid
= id_wid
1382 self
.i
= self
.ispec()
1383 self
.out_z
= self
.ospec()
1386 return FPRoundData(self
.width
, self
.id_wid
)
1389 return FPRoundData(self
.width
, self
.id_wid
)
1391 def process(self
, i
):
1394 def setup(self
, m
, i
):
1395 """ links module to inputs and outputs
1397 m
.submodules
.corrections
= self
1398 m
.d
.comb
+= self
.i
.eq(i
)
1400 def elaborate(self
, platform
):
1402 m
.submodules
.corr_in_z
= self
.i
.z
1403 m
.submodules
.corr_out_z
= self
.out_z
.z
1404 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1405 with m
.If(self
.i
.z
.is_denormalised
):
1406 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1410 class FPCorrections(FPState
):
1412 def __init__(self
, width
, id_wid
):
1413 FPState
.__init
__(self
, "corrections")
1414 self
.mod
= FPCorrectionsMod(width
)
1415 self
.out_z
= self
.ospec()
1418 return self
.mod
.ispec()
1421 return self
.mod
.ospec()
1423 def setup(self
, m
, in_z
):
1424 """ links module to inputs and outputs
1426 self
.mod
.setup(m
, in_z
)
1428 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1429 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1431 def action(self
, m
):
1437 def __init__(self
, width
, id_wid
):
1438 self
.z
= FPNumOut(width
, False)
1439 self
.mid
= Signal(id_wid
, reset_less
=True)
1442 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1447 def __init__(self
, width
, id_wid
):
1449 self
.id_wid
= id_wid
1450 self
.i
= self
.ispec()
1451 self
.o
= self
.ospec()
1454 return FPRoundData(self
.width
, self
.id_wid
)
1457 return FPPackData(self
.width
, self
.id_wid
)
1459 def process(self
, i
):
1462 def setup(self
, m
, in_z
):
1463 """ links module to inputs and outputs
1465 m
.submodules
.pack
= self
1466 m
.d
.comb
+= self
.i
.eq(in_z
)
1468 def elaborate(self
, platform
):
1470 m
.submodules
.pack_in_z
= self
.i
.z
1471 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1472 with m
.If(self
.i
.z
.is_overflowed
):
1473 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1475 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1480 def __init__(self
, width
, id_wid
):
1481 self
.z
= FPNumOut(width
, False)
1482 self
.mid
= Signal(id_wid
, reset_less
=True)
1485 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1488 class FPPack(FPState
):
1490 def __init__(self
, width
, id_wid
):
1491 FPState
.__init
__(self
, "pack")
1492 self
.mod
= FPPackMod(width
)
1493 self
.out_z
= self
.ospec()
1496 return self
.mod
.ispec()
1499 return self
.mod
.ospec()
1501 def setup(self
, m
, in_z
):
1502 """ links module to inputs and outputs
1504 self
.mod
.setup(m
, in_z
)
1506 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1507 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1509 def action(self
, m
):
1510 m
.next
= "pack_put_z"
1513 class FPPutZ(FPState
):
1515 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1516 FPState
.__init
__(self
, state
)
1517 if to_state
is None:
1518 to_state
= "get_ops"
1519 self
.to_state
= to_state
1522 self
.in_mid
= in_mid
1523 self
.out_mid
= out_mid
1525 def action(self
, m
):
1526 if self
.in_mid
is not None:
1527 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1529 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1531 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1532 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1533 m
.next
= self
.to_state
1535 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1538 class FPPutZIdx(FPState
):
1540 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1541 FPState
.__init
__(self
, state
)
1542 if to_state
is None:
1543 to_state
= "get_ops"
1544 self
.to_state
= to_state
1546 self
.out_zs
= out_zs
1547 self
.in_mid
= in_mid
1549 def action(self
, m
):
1550 outz_stb
= Signal(reset_less
=True)
1551 outz_ack
= Signal(reset_less
=True)
1552 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1553 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1556 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1558 with m
.If(outz_stb
& outz_ack
):
1559 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1560 m
.next
= self
.to_state
1562 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1564 class FPADDBaseData
:
1566 def __init__(self
, width
, id_wid
):
1568 self
.id_wid
= id_wid
1569 self
.a
= Signal(width
)
1570 self
.b
= Signal(width
)
1571 self
.mid
= Signal(id_wid
, reset_less
=True)
1574 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1578 def __init__(self
, width
, id_wid
):
1579 self
.z
= FPOp(width
)
1580 self
.mid
= Signal(id_wid
, reset_less
=True)
1583 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1588 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1591 * width: bit-width of IEEE754. supported: 16, 32, 64
1592 * id_wid: an identifier that is sync-connected to the input
1593 * single_cycle: True indicates each stage to complete in 1 clock
1594 * compact: True indicates a reduced number of stages
1597 self
.id_wid
= id_wid
1598 self
.single_cycle
= single_cycle
1599 self
.compact
= compact
1601 self
.in_t
= Trigger()
1602 self
.i
= self
.ispec()
1603 self
.o
= self
.ospec()
1608 return FPADDBaseData(self
.width
, self
.id_wid
)
1611 return FPOpData(self
.width
, self
.id_wid
)
1613 def add_state(self
, state
):
1614 self
.states
.append(state
)
1617 def get_fragment(self
, platform
=None):
1618 """ creates the HDL code-fragment for FPAdd
1621 m
.submodules
.out_z
= self
.o
.z
1622 m
.submodules
.in_t
= self
.in_t
1624 self
.get_compact_fragment(m
, platform
)
1626 self
.get_longer_fragment(m
, platform
)
1628 with m
.FSM() as fsm
:
1630 for state
in self
.states
:
1631 with m
.State(state
.state_from
):
1636 def get_longer_fragment(self
, m
, platform
=None):
1638 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1640 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1644 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1645 sc
.setup(m
, a
, b
, self
.in_mid
)
1647 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1648 dn
.setup(m
, a
, b
, sc
.in_mid
)
1650 if self
.single_cycle
:
1651 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1652 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1654 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1655 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1657 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1658 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1660 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1661 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1663 if self
.single_cycle
:
1664 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1665 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1667 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1668 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1670 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1671 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1673 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1674 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1676 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1677 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1679 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1680 pa
.in_mid
, self
.out_mid
))
1682 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1683 pa
.in_mid
, self
.out_mid
))
1685 def get_compact_fragment(self
, m
, platform
=None):
1687 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1688 self
.width
, self
.id_wid
))
1689 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1691 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1694 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1697 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1698 n1
.setup(m
, alm
.a1o
)
1700 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1701 n1
.out_z
.mid
, self
.o
.mid
))
1703 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1704 sc
.o
.mid
, self
.o
.mid
))
1707 class FPADDBase(FPState
):
1709 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1712 * width: bit-width of IEEE754. supported: 16, 32, 64
1713 * id_wid: an identifier that is sync-connected to the input
1714 * single_cycle: True indicates each stage to complete in 1 clock
1716 FPState
.__init
__(self
, "fpadd")
1718 self
.single_cycle
= single_cycle
1719 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1720 self
.o
= self
.ospec()
1722 self
.in_t
= Trigger()
1723 self
.i
= self
.ispec()
1725 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1726 self
.in_accept
= Signal(reset_less
=True)
1727 self
.add_stb
= Signal(reset_less
=True)
1728 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1731 return self
.mod
.ispec()
1734 return self
.mod
.ospec()
1736 def setup(self
, m
, i
, add_stb
, in_mid
):
1737 m
.d
.comb
+= [self
.i
.eq(i
),
1738 self
.mod
.i
.eq(self
.i
),
1739 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1740 #self.add_stb.eq(add_stb),
1741 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1742 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1743 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1744 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1745 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1746 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1749 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1750 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1751 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1752 #m.d.sync += self.in_t.stb.eq(0)
1754 m
.submodules
.fpadd
= self
.mod
1756 def action(self
, m
):
1758 # in_accept is set on incoming strobe HIGH and ack LOW.
1759 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1761 #with m.If(self.in_t.ack):
1762 # m.d.sync += self.in_t.stb.eq(0)
1763 with m
.If(~self
.z_done
):
1764 # not done: test for accepting an incoming operand pair
1765 with m
.If(self
.in_accept
):
1767 self
.add_ack
.eq(1), # acknowledge receipt...
1768 self
.in_t
.stb
.eq(1), # initiate add
1771 m
.d
.sync
+= [self
.add_ack
.eq(0),
1772 self
.in_t
.stb
.eq(0),
1776 # done: acknowledge, and write out id and value
1777 m
.d
.sync
+= [self
.add_ack
.eq(1),
1784 if self
.in_mid
is not None:
1785 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1788 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1790 # move to output state on detecting z ack
1791 with m
.If(self
.out_z
.trigger
):
1792 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1795 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1799 def __init__(self
, width
, id_wid
):
1801 self
.id_wid
= id_wid
1803 for i
in range(rs_sz
):
1805 out_z
.name
= "out_z_%d" % i
1807 self
.res
= Array(res
)
1808 self
.in_z
= FPOp(width
)
1809 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1811 def setup(self
, m
, in_z
, in_mid
):
1812 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1813 self
.in_mid
.eq(in_mid
)]
1815 def get_fragment(self
, platform
=None):
1816 """ creates the HDL code-fragment for FPAdd
1819 m
.submodules
.res_in_z
= self
.in_z
1820 m
.submodules
+= self
.res
1832 """ FPADD: stages as follows:
1838 FPAddBase---> FPAddBaseMod
1840 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1842 FPAddBase is tricky: it is both a stage and *has* stages.
1843 Connection to FPAddBaseMod therefore requires an in stb/ack
1844 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1845 needs to be the thing that raises the incoming stb.
1848 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1851 * width: bit-width of IEEE754. supported: 16, 32, 64
1852 * id_wid: an identifier that is sync-connected to the input
1853 * single_cycle: True indicates each stage to complete in 1 clock
1856 self
.id_wid
= id_wid
1857 self
.single_cycle
= single_cycle
1859 #self.out_z = FPOp(width)
1860 self
.ids
= FPID(id_wid
)
1863 for i
in range(rs_sz
):
1866 in_a
.name
= "in_a_%d" % i
1867 in_b
.name
= "in_b_%d" % i
1868 rs
.append((in_a
, in_b
))
1872 for i
in range(rs_sz
):
1874 out_z
.name
= "out_z_%d" % i
1876 self
.res
= Array(res
)
1880 def add_state(self
, state
):
1881 self
.states
.append(state
)
1884 def get_fragment(self
, platform
=None):
1885 """ creates the HDL code-fragment for FPAdd
1888 m
.submodules
+= self
.rs
1890 in_a
= self
.rs
[0][0]
1891 in_b
= self
.rs
[0][1]
1893 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1898 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1903 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1904 ab
= self
.add_state(ab
)
1905 abd
= ab
.ispec() # create an input spec object for FPADDBase
1906 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1907 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1910 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1913 with m
.FSM() as fsm
:
1915 for state
in self
.states
:
1916 with m
.State(state
.state_from
):
1922 if __name__
== "__main__":
1924 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1925 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1926 alu
.rs
[0][1].ports() + \
1927 alu
.res
[0].ports() + \
1928 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1930 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1931 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1932 alu
.in_t
.ports() + \
1933 alu
.out_z
.ports() + \
1934 [alu
.in_mid
, alu
.out_mid
])
1937 # works... but don't use, just do "python fname.py convert -t v"
1938 #print (verilog.convert(alu, ports=[
1939 # ports=alu.in_a.ports() + \
1940 # alu.in_b.ports() + \
1941 # alu.out_z.ports())