1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.out_do_z
= Signal(reset_less
=True)
770 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
771 self
.mid
= Signal(id_wid
, reset_less
=True)
774 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
775 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
778 class FPAddStage0Mod
:
780 def __init__(self
, width
, id_wid
):
783 self
.i
= self
.ispec()
784 self
.o
= self
.ospec()
787 return FPSCData(self
.width
, self
.id_wid
)
790 return FPAddStage0Data(self
.width
, self
.id_wid
)
792 def process(self
, i
):
795 def setup(self
, m
, i
):
796 """ links module to inputs and outputs
798 m
.submodules
.add0
= self
799 m
.d
.comb
+= self
.i
.eq(i
)
801 def elaborate(self
, platform
):
803 m
.submodules
.add0_in_a
= self
.i
.a
804 m
.submodules
.add0_in_b
= self
.i
.b
805 m
.submodules
.add0_out_z
= self
.o
.z
807 # store intermediate tests (and zero-extended mantissas)
808 seq
= Signal(reset_less
=True)
809 mge
= Signal(reset_less
=True)
810 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
811 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
812 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
813 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
814 am0
.eq(Cat(self
.i
.a
.m
, 0)),
815 bm0
.eq(Cat(self
.i
.b
.m
, 0))
817 # same-sign (both negative or both positive) add mantissas
818 with m
.If(~self
.i
.out_do_z
):
819 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
822 self
.o
.tot
.eq(am0
+ bm0
),
823 self
.o
.z
.s
.eq(self
.i
.a
.s
)
825 # a mantissa greater than b, use a
828 self
.o
.tot
.eq(am0
- bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # b mantissa greater than a, use b
834 self
.o
.tot
.eq(bm0
- am0
),
835 self
.o
.z
.s
.eq(self
.i
.b
.s
)
838 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
840 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
841 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
845 class FPAddStage0(FPState
):
846 """ First stage of add. covers same-sign (add) and subtract
847 special-casing when mantissas are greater or equal, to
848 give greatest accuracy.
851 def __init__(self
, width
, id_wid
):
852 FPState
.__init
__(self
, "add_0")
853 self
.mod
= FPAddStage0Mod(width
)
854 self
.o
= self
.mod
.ospec()
856 def setup(self
, m
, i
):
857 """ links module to inputs and outputs
861 # NOTE: these could be done as combinatorial (merge add0+add1)
862 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
868 class FPAddStage1Data
:
870 def __init__(self
, width
, id_wid
):
871 self
.z
= FPNumBase(width
, False)
872 self
.out_do_z
= Signal(reset_less
=True)
874 self
.mid
= Signal(id_wid
, reset_less
=True)
877 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
878 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
882 class FPAddStage1Mod(FPState
):
883 """ Second stage of add: preparation for normalisation.
884 detects when tot sum is too big (tot[27] is kinda a carry bit)
887 def __init__(self
, width
, id_wid
):
890 self
.i
= self
.ispec()
891 self
.o
= self
.ospec()
894 return FPAddStage0Data(self
.width
, self
.id_wid
)
897 return FPAddStage1Data(self
.width
, self
.id_wid
)
899 def process(self
, i
):
902 def setup(self
, m
, i
):
903 """ links module to inputs and outputs
905 m
.submodules
.add1
= self
906 m
.submodules
.add1_out_overflow
= self
.o
.of
908 m
.d
.comb
+= self
.i
.eq(i
)
910 def elaborate(self
, platform
):
912 #m.submodules.norm1_in_overflow = self.in_of
913 #m.submodules.norm1_out_overflow = self.out_of
914 #m.submodules.norm1_in_z = self.in_z
915 #m.submodules.norm1_out_z = self.out_z
916 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
917 # tot[-1] (MSB) gets set when the sum overflows. shift result down
918 with m
.If(~self
.i
.out_do_z
):
919 with m
.If(self
.i
.tot
[-1]):
921 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
922 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
923 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
924 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
925 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
926 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
928 # tot[-1] (MSB) zero case
931 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
932 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
933 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
934 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
935 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
938 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
939 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
944 class FPAddStage1(FPState
):
946 def __init__(self
, width
, id_wid
):
947 FPState
.__init
__(self
, "add_1")
948 self
.mod
= FPAddStage1Mod(width
)
949 self
.out_z
= FPNumBase(width
, False)
950 self
.out_of
= Overflow()
951 self
.norm_stb
= Signal()
953 def setup(self
, m
, i
):
954 """ links module to inputs and outputs
958 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
960 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
961 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
962 m
.d
.sync
+= self
.norm_stb
.eq(1)
965 m
.next
= "normalise_1"
968 class FPNormaliseModSingle
:
970 def __init__(self
, width
):
972 self
.in_z
= self
.ispec()
973 self
.out_z
= self
.ospec()
976 return FPNumBase(self
.width
, False)
979 return FPNumBase(self
.width
, False)
981 def setup(self
, m
, i
):
982 """ links module to inputs and outputs
984 m
.submodules
.normalise
= self
985 m
.d
.comb
+= self
.i
.eq(i
)
987 def elaborate(self
, platform
):
990 mwid
= self
.out_z
.m_width
+2
991 pe
= PriorityEncoder(mwid
)
992 m
.submodules
.norm_pe
= pe
994 m
.submodules
.norm1_out_z
= self
.out_z
995 m
.submodules
.norm1_in_z
= self
.in_z
997 in_z
= FPNumBase(self
.width
, False)
999 m
.submodules
.norm1_insel_z
= in_z
1000 m
.submodules
.norm1_insel_overflow
= in_of
1002 espec
= (len(in_z
.e
), True)
1003 ediff_n126
= Signal(espec
, reset_less
=True)
1004 msr
= MultiShiftRMerge(mwid
, espec
)
1005 m
.submodules
.multishift_r
= msr
1007 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1008 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1009 # initialise out from in (overridden below)
1010 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1011 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1012 # normalisation decrease condition
1013 decrease
= Signal(reset_less
=True)
1014 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1016 with m
.If(decrease
):
1017 # *sigh* not entirely obvious: count leading zeros (clz)
1018 # with a PriorityEncoder: to find from the MSB
1019 # we reverse the order of the bits.
1020 temp_m
= Signal(mwid
, reset_less
=True)
1021 temp_s
= Signal(mwid
+1, reset_less
=True)
1022 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1024 # cat round and guard bits back into the mantissa
1025 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1026 pe
.i
.eq(temp_m
[::-1]), # inverted
1027 clz
.eq(pe
.o
), # count zeros from MSB down
1028 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1029 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1030 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1037 def __init__(self
, width
, id_wid
):
1038 self
.roundz
= Signal(reset_less
=True)
1039 self
.z
= FPNumBase(width
, False)
1040 self
.out_do_z
= Signal(reset_less
=True)
1041 self
.mid
= Signal(id_wid
, reset_less
=True)
1044 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1045 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1048 class FPNorm1ModSingle
:
1050 def __init__(self
, width
, id_wid
):
1052 self
.id_wid
= id_wid
1053 self
.i
= self
.ispec()
1054 self
.o
= self
.ospec()
1057 return FPAddStage1Data(self
.width
, self
.id_wid
)
1060 return FPNorm1Data(self
.width
, self
.id_wid
)
1062 def setup(self
, m
, i
):
1063 """ links module to inputs and outputs
1065 m
.submodules
.normalise_1
= self
1066 m
.d
.comb
+= self
.i
.eq(i
)
1068 def process(self
, i
):
1071 def elaborate(self
, platform
):
1074 mwid
= self
.o
.z
.m_width
+2
1075 pe
= PriorityEncoder(mwid
)
1076 m
.submodules
.norm_pe
= pe
1079 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1081 m
.submodules
.norm1_out_z
= self
.o
.z
1082 m
.submodules
.norm1_out_overflow
= of
1083 m
.submodules
.norm1_in_z
= self
.i
.z
1084 m
.submodules
.norm1_in_overflow
= self
.i
.of
1087 m
.submodules
.norm1_insel_z
= i
.z
1088 m
.submodules
.norm1_insel_overflow
= i
.of
1090 espec
= (len(i
.z
.e
), True)
1091 ediff_n126
= Signal(espec
, reset_less
=True)
1092 msr
= MultiShiftRMerge(mwid
, espec
)
1093 m
.submodules
.multishift_r
= msr
1095 m
.d
.comb
+= i
.eq(self
.i
)
1096 # initialise out from in (overridden below)
1097 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1098 m
.d
.comb
+= of
.eq(i
.of
)
1099 # normalisation increase/decrease conditions
1100 decrease
= Signal(reset_less
=True)
1101 increase
= Signal(reset_less
=True)
1102 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1103 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1105 with m
.If(~self
.i
.out_do_z
):
1106 with m
.If(decrease
):
1107 # *sigh* not entirely obvious: count leading zeros (clz)
1108 # with a PriorityEncoder: to find from the MSB
1109 # we reverse the order of the bits.
1110 temp_m
= Signal(mwid
, reset_less
=True)
1111 temp_s
= Signal(mwid
+1, reset_less
=True)
1112 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1113 # make sure that the amount to decrease by does NOT
1114 # go below the minimum non-INF/NaN exponent
1115 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1118 # cat round and guard bits back into the mantissa
1119 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1120 pe
.i
.eq(temp_m
[::-1]), # inverted
1121 clz
.eq(limclz
), # count zeros from MSB down
1122 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1123 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1124 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1125 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1126 # overflow in bits 0..1: got shifted too (leave sticky)
1127 of
.guard
.eq(temp_s
[1]), # guard
1128 of
.round_bit
.eq(temp_s
[0]), # round
1131 with m
.Elif(increase
):
1132 temp_m
= Signal(mwid
+1, reset_less
=True)
1134 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1136 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1137 # connect multi-shifter to inp/out mantissa (and ediff)
1139 msr
.diff
.eq(ediff_n126
),
1140 self
.o
.z
.m
.eq(msr
.m
[3:]),
1141 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1142 # overflow in bits 0..1: got shifted too (leave sticky)
1143 of
.guard
.eq(temp_s
[2]), # guard
1144 of
.round_bit
.eq(temp_s
[1]), # round
1145 of
.sticky
.eq(temp_s
[0]), # sticky
1146 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1149 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1150 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1155 class FPNorm1ModMulti
:
1157 def __init__(self
, width
, single_cycle
=True):
1159 self
.in_select
= Signal(reset_less
=True)
1160 self
.in_z
= FPNumBase(width
, False)
1161 self
.in_of
= Overflow()
1162 self
.temp_z
= FPNumBase(width
, False)
1163 self
.temp_of
= Overflow()
1164 self
.out_z
= FPNumBase(width
, False)
1165 self
.out_of
= Overflow()
1167 def elaborate(self
, platform
):
1170 m
.submodules
.norm1_out_z
= self
.out_z
1171 m
.submodules
.norm1_out_overflow
= self
.out_of
1172 m
.submodules
.norm1_temp_z
= self
.temp_z
1173 m
.submodules
.norm1_temp_of
= self
.temp_of
1174 m
.submodules
.norm1_in_z
= self
.in_z
1175 m
.submodules
.norm1_in_overflow
= self
.in_of
1177 in_z
= FPNumBase(self
.width
, False)
1179 m
.submodules
.norm1_insel_z
= in_z
1180 m
.submodules
.norm1_insel_overflow
= in_of
1182 # select which of temp or in z/of to use
1183 with m
.If(self
.in_select
):
1184 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1185 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1187 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1188 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1189 # initialise out from in (overridden below)
1190 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1191 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1192 # normalisation increase/decrease conditions
1193 decrease
= Signal(reset_less
=True)
1194 increase
= Signal(reset_less
=True)
1195 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1196 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1197 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1199 with m
.If(decrease
):
1201 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1202 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1203 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1204 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1205 self
.out_of
.round_bit
.eq(0), # reset round bit
1206 self
.out_of
.m0
.eq(in_of
.guard
),
1209 with m
.Elif(increase
):
1211 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1212 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1213 self
.out_of
.guard
.eq(in_z
.m
[0]),
1214 self
.out_of
.m0
.eq(in_z
.m
[1]),
1215 self
.out_of
.round_bit
.eq(in_of
.guard
),
1216 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1222 class FPNorm1Single(FPState
):
1224 def __init__(self
, width
, id_wid
, single_cycle
=True):
1225 FPState
.__init
__(self
, "normalise_1")
1226 self
.mod
= FPNorm1ModSingle(width
)
1227 self
.o
= self
.ospec()
1228 self
.out_z
= FPNumBase(width
, False)
1229 self
.out_roundz
= Signal(reset_less
=True)
1232 return self
.mod
.ispec()
1235 return self
.mod
.ospec()
1237 def setup(self
, m
, i
):
1238 """ links module to inputs and outputs
1240 self
.mod
.setup(m
, i
)
1242 def action(self
, m
):
1246 class FPNorm1Multi(FPState
):
1248 def __init__(self
, width
, id_wid
):
1249 FPState
.__init
__(self
, "normalise_1")
1250 self
.mod
= FPNorm1ModMulti(width
)
1251 self
.stb
= Signal(reset_less
=True)
1252 self
.ack
= Signal(reset
=0, reset_less
=True)
1253 self
.out_norm
= Signal(reset_less
=True)
1254 self
.in_accept
= Signal(reset_less
=True)
1255 self
.temp_z
= FPNumBase(width
)
1256 self
.temp_of
= Overflow()
1257 self
.out_z
= FPNumBase(width
)
1258 self
.out_roundz
= Signal(reset_less
=True)
1260 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1261 """ links module to inputs and outputs
1263 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1264 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1265 self
.out_z
, self
.out_norm
)
1267 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1268 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1270 def action(self
, m
):
1271 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1272 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1273 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1274 with m
.If(self
.out_norm
):
1275 with m
.If(self
.in_accept
):
1280 m
.d
.sync
+= self
.ack
.eq(0)
1282 # normalisation not required (or done).
1284 m
.d
.sync
+= self
.ack
.eq(1)
1285 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1288 class FPNormToPack(FPState
):
1290 def __init__(self
, width
, id_wid
):
1291 FPState
.__init
__(self
, "normalise_1")
1292 self
.id_wid
= id_wid
1296 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1299 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1301 def setup(self
, m
, i
):
1302 """ links module to inputs and outputs
1305 # Normalisation, Rounding Corrections, Pack - in a chain
1306 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1307 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1308 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1309 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1310 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1312 self
.out_z
= pmod
.ospec()
1314 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1315 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1317 def action(self
, m
):
1318 m
.next
= "pack_put_z"
1323 def __init__(self
, width
, id_wid
):
1324 self
.z
= FPNumBase(width
, False)
1325 self
.mid
= Signal(id_wid
, reset_less
=True)
1328 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1333 def __init__(self
, width
, id_wid
):
1335 self
.id_wid
= id_wid
1336 self
.i
= self
.ispec()
1337 self
.out_z
= self
.ospec()
1340 return FPNorm1Data(self
.width
, self
.id_wid
)
1343 return FPRoundData(self
.width
, self
.id_wid
)
1345 def process(self
, i
):
1348 def setup(self
, m
, i
):
1349 m
.submodules
.roundz
= self
1350 m
.d
.comb
+= self
.i
.eq(i
)
1352 def elaborate(self
, platform
):
1354 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1355 with m
.If(self
.i
.roundz
):
1356 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa rounds up
1357 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1358 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1362 class FPRound(FPState
):
1364 def __init__(self
, width
, id_wid
):
1365 FPState
.__init
__(self
, "round")
1366 self
.mod
= FPRoundMod(width
)
1367 self
.out_z
= self
.ospec()
1370 return self
.mod
.ispec()
1373 return self
.mod
.ospec()
1375 def setup(self
, m
, i
):
1376 """ links module to inputs and outputs
1378 self
.mod
.setup(m
, i
)
1381 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1382 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1384 def action(self
, m
):
1385 m
.next
= "corrections"
1388 class FPCorrectionsMod
:
1390 def __init__(self
, width
, id_wid
):
1392 self
.id_wid
= id_wid
1393 self
.i
= self
.ispec()
1394 self
.out_z
= self
.ospec()
1397 return FPRoundData(self
.width
, self
.id_wid
)
1400 return FPRoundData(self
.width
, self
.id_wid
)
1402 def process(self
, i
):
1405 def setup(self
, m
, i
):
1406 """ links module to inputs and outputs
1408 m
.submodules
.corrections
= self
1409 m
.d
.comb
+= self
.i
.eq(i
)
1411 def elaborate(self
, platform
):
1413 m
.submodules
.corr_in_z
= self
.i
.z
1414 m
.submodules
.corr_out_z
= self
.out_z
.z
1415 m
.d
.comb
+= self
.out_z
.eq(self
.i
)
1416 with m
.If(self
.i
.z
.is_denormalised
):
1417 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1421 class FPCorrections(FPState
):
1423 def __init__(self
, width
, id_wid
):
1424 FPState
.__init
__(self
, "corrections")
1425 self
.mod
= FPCorrectionsMod(width
)
1426 self
.out_z
= self
.ospec()
1429 return self
.mod
.ispec()
1432 return self
.mod
.ospec()
1434 def setup(self
, m
, in_z
):
1435 """ links module to inputs and outputs
1437 self
.mod
.setup(m
, in_z
)
1439 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1440 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1442 def action(self
, m
):
1448 def __init__(self
, width
, id_wid
):
1449 self
.z
= FPNumOut(width
, False)
1450 self
.mid
= Signal(id_wid
, reset_less
=True)
1453 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1458 def __init__(self
, width
, id_wid
):
1460 self
.id_wid
= id_wid
1461 self
.i
= self
.ispec()
1462 self
.o
= self
.ospec()
1465 return FPRoundData(self
.width
, self
.id_wid
)
1468 return FPPackData(self
.width
, self
.id_wid
)
1470 def process(self
, i
):
1473 def setup(self
, m
, in_z
):
1474 """ links module to inputs and outputs
1476 m
.submodules
.pack
= self
1477 m
.d
.comb
+= self
.i
.eq(in_z
)
1479 def elaborate(self
, platform
):
1481 m
.submodules
.pack_in_z
= self
.i
.z
1482 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1483 with m
.If(self
.i
.z
.is_overflowed
):
1484 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1486 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1491 def __init__(self
, width
, id_wid
):
1492 self
.z
= FPNumOut(width
, False)
1493 self
.mid
= Signal(id_wid
, reset_less
=True)
1496 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1499 class FPPack(FPState
):
1501 def __init__(self
, width
, id_wid
):
1502 FPState
.__init
__(self
, "pack")
1503 self
.mod
= FPPackMod(width
)
1504 self
.out_z
= self
.ospec()
1507 return self
.mod
.ispec()
1510 return self
.mod
.ospec()
1512 def setup(self
, m
, in_z
):
1513 """ links module to inputs and outputs
1515 self
.mod
.setup(m
, in_z
)
1517 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1518 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1520 def action(self
, m
):
1521 m
.next
= "pack_put_z"
1524 class FPPutZ(FPState
):
1526 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1527 FPState
.__init
__(self
, state
)
1528 if to_state
is None:
1529 to_state
= "get_ops"
1530 self
.to_state
= to_state
1533 self
.in_mid
= in_mid
1534 self
.out_mid
= out_mid
1536 def action(self
, m
):
1537 if self
.in_mid
is not None:
1538 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1540 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1542 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1543 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1544 m
.next
= self
.to_state
1546 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1549 class FPPutZIdx(FPState
):
1551 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1552 FPState
.__init
__(self
, state
)
1553 if to_state
is None:
1554 to_state
= "get_ops"
1555 self
.to_state
= to_state
1557 self
.out_zs
= out_zs
1558 self
.in_mid
= in_mid
1560 def action(self
, m
):
1561 outz_stb
= Signal(reset_less
=True)
1562 outz_ack
= Signal(reset_less
=True)
1563 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1564 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1567 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1569 with m
.If(outz_stb
& outz_ack
):
1570 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1571 m
.next
= self
.to_state
1573 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1575 class FPADDBaseData
:
1577 def __init__(self
, width
, id_wid
):
1579 self
.id_wid
= id_wid
1580 self
.a
= Signal(width
)
1581 self
.b
= Signal(width
)
1582 self
.mid
= Signal(id_wid
, reset_less
=True)
1585 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1589 def __init__(self
, width
, id_wid
):
1590 self
.z
= FPOp(width
)
1591 self
.mid
= Signal(id_wid
, reset_less
=True)
1594 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1599 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1602 * width: bit-width of IEEE754. supported: 16, 32, 64
1603 * id_wid: an identifier that is sync-connected to the input
1604 * single_cycle: True indicates each stage to complete in 1 clock
1605 * compact: True indicates a reduced number of stages
1608 self
.id_wid
= id_wid
1609 self
.single_cycle
= single_cycle
1610 self
.compact
= compact
1612 self
.in_t
= Trigger()
1613 self
.i
= self
.ispec()
1614 self
.o
= self
.ospec()
1619 return FPADDBaseData(self
.width
, self
.id_wid
)
1622 return FPOpData(self
.width
, self
.id_wid
)
1624 def add_state(self
, state
):
1625 self
.states
.append(state
)
1628 def get_fragment(self
, platform
=None):
1629 """ creates the HDL code-fragment for FPAdd
1632 m
.submodules
.out_z
= self
.o
.z
1633 m
.submodules
.in_t
= self
.in_t
1635 self
.get_compact_fragment(m
, platform
)
1637 self
.get_longer_fragment(m
, platform
)
1639 with m
.FSM() as fsm
:
1641 for state
in self
.states
:
1642 with m
.State(state
.state_from
):
1647 def get_longer_fragment(self
, m
, platform
=None):
1649 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1651 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1655 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1656 sc
.setup(m
, a
, b
, self
.in_mid
)
1658 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1659 dn
.setup(m
, a
, b
, sc
.in_mid
)
1661 if self
.single_cycle
:
1662 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1663 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1665 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1666 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1668 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1669 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1671 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1672 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1674 if self
.single_cycle
:
1675 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1676 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1678 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1679 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1681 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1682 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1684 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1685 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1687 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1688 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1690 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1691 pa
.in_mid
, self
.out_mid
))
1693 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1694 pa
.in_mid
, self
.out_mid
))
1696 def get_compact_fragment(self
, m
, platform
=None):
1698 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1699 self
.width
, self
.id_wid
))
1700 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1702 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1705 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1708 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1709 n1
.setup(m
, alm
.a1o
)
1711 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1712 n1
.out_z
.mid
, self
.o
.mid
))
1714 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1715 sc
.o
.mid
, self
.o
.mid
))
1718 class FPADDBase(FPState
):
1720 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1723 * width: bit-width of IEEE754. supported: 16, 32, 64
1724 * id_wid: an identifier that is sync-connected to the input
1725 * single_cycle: True indicates each stage to complete in 1 clock
1727 FPState
.__init
__(self
, "fpadd")
1729 self
.single_cycle
= single_cycle
1730 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1731 self
.o
= self
.ospec()
1733 self
.in_t
= Trigger()
1734 self
.i
= self
.ispec()
1736 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1737 self
.in_accept
= Signal(reset_less
=True)
1738 self
.add_stb
= Signal(reset_less
=True)
1739 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1742 return self
.mod
.ispec()
1745 return self
.mod
.ospec()
1747 def setup(self
, m
, i
, add_stb
, in_mid
):
1748 m
.d
.comb
+= [self
.i
.eq(i
),
1749 self
.mod
.i
.eq(self
.i
),
1750 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1751 #self.add_stb.eq(add_stb),
1752 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1753 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1754 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1755 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1756 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1757 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1760 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1761 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1762 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1763 #m.d.sync += self.in_t.stb.eq(0)
1765 m
.submodules
.fpadd
= self
.mod
1767 def action(self
, m
):
1769 # in_accept is set on incoming strobe HIGH and ack LOW.
1770 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1772 #with m.If(self.in_t.ack):
1773 # m.d.sync += self.in_t.stb.eq(0)
1774 with m
.If(~self
.z_done
):
1775 # not done: test for accepting an incoming operand pair
1776 with m
.If(self
.in_accept
):
1778 self
.add_ack
.eq(1), # acknowledge receipt...
1779 self
.in_t
.stb
.eq(1), # initiate add
1782 m
.d
.sync
+= [self
.add_ack
.eq(0),
1783 self
.in_t
.stb
.eq(0),
1787 # done: acknowledge, and write out id and value
1788 m
.d
.sync
+= [self
.add_ack
.eq(1),
1795 if self
.in_mid
is not None:
1796 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1799 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1801 # move to output state on detecting z ack
1802 with m
.If(self
.out_z
.trigger
):
1803 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1806 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1810 def __init__(self
, width
, id_wid
):
1812 self
.id_wid
= id_wid
1814 for i
in range(rs_sz
):
1816 out_z
.name
= "out_z_%d" % i
1818 self
.res
= Array(res
)
1819 self
.in_z
= FPOp(width
)
1820 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1822 def setup(self
, m
, in_z
, in_mid
):
1823 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1824 self
.in_mid
.eq(in_mid
)]
1826 def get_fragment(self
, platform
=None):
1827 """ creates the HDL code-fragment for FPAdd
1830 m
.submodules
.res_in_z
= self
.in_z
1831 m
.submodules
+= self
.res
1843 """ FPADD: stages as follows:
1849 FPAddBase---> FPAddBaseMod
1851 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1853 FPAddBase is tricky: it is both a stage and *has* stages.
1854 Connection to FPAddBaseMod therefore requires an in stb/ack
1855 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1856 needs to be the thing that raises the incoming stb.
1859 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1862 * width: bit-width of IEEE754. supported: 16, 32, 64
1863 * id_wid: an identifier that is sync-connected to the input
1864 * single_cycle: True indicates each stage to complete in 1 clock
1867 self
.id_wid
= id_wid
1868 self
.single_cycle
= single_cycle
1870 #self.out_z = FPOp(width)
1871 self
.ids
= FPID(id_wid
)
1874 for i
in range(rs_sz
):
1877 in_a
.name
= "in_a_%d" % i
1878 in_b
.name
= "in_b_%d" % i
1879 rs
.append((in_a
, in_b
))
1883 for i
in range(rs_sz
):
1885 out_z
.name
= "out_z_%d" % i
1887 self
.res
= Array(res
)
1891 def add_state(self
, state
):
1892 self
.states
.append(state
)
1895 def get_fragment(self
, platform
=None):
1896 """ creates the HDL code-fragment for FPAdd
1899 m
.submodules
+= self
.rs
1901 in_a
= self
.rs
[0][0]
1902 in_b
= self
.rs
[0][1]
1904 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1909 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1914 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1915 ab
= self
.add_state(ab
)
1916 abd
= ab
.ispec() # create an input spec object for FPADDBase
1917 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1918 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1921 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1924 with m
.FSM() as fsm
:
1926 for state
in self
.states
:
1927 with m
.State(state
.state_from
):
1933 if __name__
== "__main__":
1935 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1936 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1937 alu
.rs
[0][1].ports() + \
1938 alu
.res
[0].ports() + \
1939 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1941 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1942 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1943 alu
.in_t
.ports() + \
1944 alu
.out_z
.ports() + \
1945 [alu
.in_mid
, alu
.out_mid
])
1948 # works... but don't use, just do "python fname.py convert -t v"
1949 #print (verilog.convert(alu, ports=[
1950 # ports=alu.in_a.ports() + \
1951 # alu.in_b.ports() + \
1952 # alu.out_z.ports())