1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.out_do_z
= Signal(reset_less
=True)
770 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
771 self
.mid
= Signal(id_wid
, reset_less
=True)
774 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
775 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
778 class FPAddStage0Mod
:
780 def __init__(self
, width
, id_wid
):
783 self
.i
= self
.ispec()
784 self
.o
= self
.ospec()
787 return FPSCData(self
.width
, self
.id_wid
)
790 return FPAddStage0Data(self
.width
, self
.id_wid
)
792 def process(self
, i
):
795 def setup(self
, m
, i
):
796 """ links module to inputs and outputs
798 m
.submodules
.add0
= self
799 m
.d
.comb
+= self
.i
.eq(i
)
801 def elaborate(self
, platform
):
803 m
.submodules
.add0_in_a
= self
.i
.a
804 m
.submodules
.add0_in_b
= self
.i
.b
805 m
.submodules
.add0_out_z
= self
.o
.z
807 # store intermediate tests (and zero-extended mantissas)
808 seq
= Signal(reset_less
=True)
809 mge
= Signal(reset_less
=True)
810 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
811 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
812 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
813 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
814 am0
.eq(Cat(self
.i
.a
.m
, 0)),
815 bm0
.eq(Cat(self
.i
.b
.m
, 0))
817 # same-sign (both negative or both positive) add mantissas
818 with m
.If(~self
.i
.out_do_z
):
819 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
822 self
.o
.tot
.eq(am0
+ bm0
),
823 self
.o
.z
.s
.eq(self
.i
.a
.s
)
825 # a mantissa greater than b, use a
828 self
.o
.tot
.eq(am0
- bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # b mantissa greater than a, use b
834 self
.o
.tot
.eq(bm0
- am0
),
835 self
.o
.z
.s
.eq(self
.i
.b
.s
)
838 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
840 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
841 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
845 class FPAddStage0(FPState
):
846 """ First stage of add. covers same-sign (add) and subtract
847 special-casing when mantissas are greater or equal, to
848 give greatest accuracy.
851 def __init__(self
, width
, id_wid
):
852 FPState
.__init
__(self
, "add_0")
853 self
.mod
= FPAddStage0Mod(width
)
854 self
.o
= self
.mod
.ospec()
856 def setup(self
, m
, i
):
857 """ links module to inputs and outputs
861 # NOTE: these could be done as combinatorial (merge add0+add1)
862 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
868 class FPAddStage1Data
:
870 def __init__(self
, width
, id_wid
):
871 self
.z
= FPNumBase(width
, False)
872 self
.out_do_z
= Signal(reset_less
=True)
874 self
.mid
= Signal(id_wid
, reset_less
=True)
877 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
878 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
882 class FPAddStage1Mod(FPState
):
883 """ Second stage of add: preparation for normalisation.
884 detects when tot sum is too big (tot[27] is kinda a carry bit)
887 def __init__(self
, width
, id_wid
):
890 self
.i
= self
.ispec()
891 self
.o
= self
.ospec()
894 return FPAddStage0Data(self
.width
, self
.id_wid
)
897 return FPAddStage1Data(self
.width
, self
.id_wid
)
899 def process(self
, i
):
902 def setup(self
, m
, i
):
903 """ links module to inputs and outputs
905 m
.submodules
.add1
= self
906 m
.submodules
.add1_out_overflow
= self
.o
.of
908 m
.d
.comb
+= self
.i
.eq(i
)
910 def elaborate(self
, platform
):
912 #m.submodules.norm1_in_overflow = self.in_of
913 #m.submodules.norm1_out_overflow = self.out_of
914 #m.submodules.norm1_in_z = self.in_z
915 #m.submodules.norm1_out_z = self.out_z
916 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
917 # tot[-1] (MSB) gets set when the sum overflows. shift result down
918 with m
.If(~self
.i
.out_do_z
):
919 with m
.If(self
.i
.tot
[-1]):
921 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
922 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
923 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
924 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
925 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
926 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
928 # tot[-1] (MSB) zero case
931 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
932 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
933 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
934 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
935 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
938 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
939 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
944 class FPAddStage1(FPState
):
946 def __init__(self
, width
, id_wid
):
947 FPState
.__init
__(self
, "add_1")
948 self
.mod
= FPAddStage1Mod(width
)
949 self
.out_z
= FPNumBase(width
, False)
950 self
.out_of
= Overflow()
951 self
.norm_stb
= Signal()
953 def setup(self
, m
, i
):
954 """ links module to inputs and outputs
958 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
960 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
961 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
962 m
.d
.sync
+= self
.norm_stb
.eq(1)
965 m
.next
= "normalise_1"
968 class FPNormaliseModSingle
:
970 def __init__(self
, width
):
972 self
.in_z
= self
.ispec()
973 self
.out_z
= self
.ospec()
976 return FPNumBase(self
.width
, False)
979 return FPNumBase(self
.width
, False)
981 def setup(self
, m
, i
):
982 """ links module to inputs and outputs
984 m
.submodules
.normalise
= self
985 m
.d
.comb
+= self
.i
.eq(i
)
987 def elaborate(self
, platform
):
990 mwid
= self
.out_z
.m_width
+2
991 pe
= PriorityEncoder(mwid
)
992 m
.submodules
.norm_pe
= pe
994 m
.submodules
.norm1_out_z
= self
.out_z
995 m
.submodules
.norm1_in_z
= self
.in_z
997 in_z
= FPNumBase(self
.width
, False)
999 m
.submodules
.norm1_insel_z
= in_z
1000 m
.submodules
.norm1_insel_overflow
= in_of
1002 espec
= (len(in_z
.e
), True)
1003 ediff_n126
= Signal(espec
, reset_less
=True)
1004 msr
= MultiShiftRMerge(mwid
, espec
)
1005 m
.submodules
.multishift_r
= msr
1007 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1008 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1009 # initialise out from in (overridden below)
1010 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1011 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1012 # normalisation decrease condition
1013 decrease
= Signal(reset_less
=True)
1014 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1016 with m
.If(decrease
):
1017 # *sigh* not entirely obvious: count leading zeros (clz)
1018 # with a PriorityEncoder: to find from the MSB
1019 # we reverse the order of the bits.
1020 temp_m
= Signal(mwid
, reset_less
=True)
1021 temp_s
= Signal(mwid
+1, reset_less
=True)
1022 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1024 # cat round and guard bits back into the mantissa
1025 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1026 pe
.i
.eq(temp_m
[::-1]), # inverted
1027 clz
.eq(pe
.o
), # count zeros from MSB down
1028 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1029 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1030 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1037 def __init__(self
, width
, id_wid
):
1038 self
.roundz
= Signal(reset_less
=True)
1039 self
.z
= FPNumBase(width
, False)
1040 self
.out_do_z
= Signal(reset_less
=True)
1041 self
.mid
= Signal(id_wid
, reset_less
=True)
1044 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1045 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1048 class FPNorm1ModSingle
:
1050 def __init__(self
, width
, id_wid
):
1052 self
.id_wid
= id_wid
1053 self
.i
= self
.ispec()
1054 self
.o
= self
.ospec()
1057 return FPAddStage1Data(self
.width
, self
.id_wid
)
1060 return FPNorm1Data(self
.width
, self
.id_wid
)
1062 def setup(self
, m
, i
):
1063 """ links module to inputs and outputs
1065 m
.submodules
.normalise_1
= self
1066 m
.d
.comb
+= self
.i
.eq(i
)
1068 def process(self
, i
):
1071 def elaborate(self
, platform
):
1074 mwid
= self
.o
.z
.m_width
+2
1075 pe
= PriorityEncoder(mwid
)
1076 m
.submodules
.norm_pe
= pe
1079 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1081 m
.submodules
.norm1_out_z
= self
.o
.z
1082 m
.submodules
.norm1_out_overflow
= of
1083 m
.submodules
.norm1_in_z
= self
.i
.z
1084 m
.submodules
.norm1_in_overflow
= self
.i
.of
1087 m
.submodules
.norm1_insel_z
= i
.z
1088 m
.submodules
.norm1_insel_overflow
= i
.of
1090 espec
= (len(i
.z
.e
), True)
1091 ediff_n126
= Signal(espec
, reset_less
=True)
1092 msr
= MultiShiftRMerge(mwid
, espec
)
1093 m
.submodules
.multishift_r
= msr
1095 m
.d
.comb
+= i
.eq(self
.i
)
1096 # initialise out from in (overridden below)
1097 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1098 m
.d
.comb
+= of
.eq(i
.of
)
1099 # normalisation increase/decrease conditions
1100 decrease
= Signal(reset_less
=True)
1101 increase
= Signal(reset_less
=True)
1102 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1103 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1105 with m
.If(~self
.i
.out_do_z
):
1106 with m
.If(decrease
):
1107 # *sigh* not entirely obvious: count leading zeros (clz)
1108 # with a PriorityEncoder: to find from the MSB
1109 # we reverse the order of the bits.
1110 temp_m
= Signal(mwid
, reset_less
=True)
1111 temp_s
= Signal(mwid
+1, reset_less
=True)
1112 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1113 # make sure that the amount to decrease by does NOT
1114 # go below the minimum non-INF/NaN exponent
1115 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1118 # cat round and guard bits back into the mantissa
1119 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1120 pe
.i
.eq(temp_m
[::-1]), # inverted
1121 clz
.eq(limclz
), # count zeros from MSB down
1122 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1123 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1124 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1125 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1126 # overflow in bits 0..1: got shifted too (leave sticky)
1127 of
.guard
.eq(temp_s
[1]), # guard
1128 of
.round_bit
.eq(temp_s
[0]), # round
1131 with m
.Elif(increase
):
1132 temp_m
= Signal(mwid
+1, reset_less
=True)
1134 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1136 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1137 # connect multi-shifter to inp/out mantissa (and ediff)
1139 msr
.diff
.eq(ediff_n126
),
1140 self
.o
.z
.m
.eq(msr
.m
[3:]),
1141 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1142 # overflow in bits 0..1: got shifted too (leave sticky)
1143 of
.guard
.eq(temp_s
[2]), # guard
1144 of
.round_bit
.eq(temp_s
[1]), # round
1145 of
.sticky
.eq(temp_s
[0]), # sticky
1146 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1149 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1150 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1155 class FPNorm1ModMulti
:
1157 def __init__(self
, width
, single_cycle
=True):
1159 self
.in_select
= Signal(reset_less
=True)
1160 self
.in_z
= FPNumBase(width
, False)
1161 self
.in_of
= Overflow()
1162 self
.temp_z
= FPNumBase(width
, False)
1163 self
.temp_of
= Overflow()
1164 self
.out_z
= FPNumBase(width
, False)
1165 self
.out_of
= Overflow()
1167 def elaborate(self
, platform
):
1170 m
.submodules
.norm1_out_z
= self
.out_z
1171 m
.submodules
.norm1_out_overflow
= self
.out_of
1172 m
.submodules
.norm1_temp_z
= self
.temp_z
1173 m
.submodules
.norm1_temp_of
= self
.temp_of
1174 m
.submodules
.norm1_in_z
= self
.in_z
1175 m
.submodules
.norm1_in_overflow
= self
.in_of
1177 in_z
= FPNumBase(self
.width
, False)
1179 m
.submodules
.norm1_insel_z
= in_z
1180 m
.submodules
.norm1_insel_overflow
= in_of
1182 # select which of temp or in z/of to use
1183 with m
.If(self
.in_select
):
1184 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1185 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1187 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1188 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1189 # initialise out from in (overridden below)
1190 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1191 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1192 # normalisation increase/decrease conditions
1193 decrease
= Signal(reset_less
=True)
1194 increase
= Signal(reset_less
=True)
1195 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1196 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1197 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1199 with m
.If(decrease
):
1201 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1202 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1203 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1204 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1205 self
.out_of
.round_bit
.eq(0), # reset round bit
1206 self
.out_of
.m0
.eq(in_of
.guard
),
1209 with m
.Elif(increase
):
1211 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1212 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1213 self
.out_of
.guard
.eq(in_z
.m
[0]),
1214 self
.out_of
.m0
.eq(in_z
.m
[1]),
1215 self
.out_of
.round_bit
.eq(in_of
.guard
),
1216 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1222 class FPNorm1Single(FPState
):
1224 def __init__(self
, width
, id_wid
, single_cycle
=True):
1225 FPState
.__init
__(self
, "normalise_1")
1226 self
.mod
= FPNorm1ModSingle(width
)
1227 self
.o
= self
.ospec()
1228 self
.out_z
= FPNumBase(width
, False)
1229 self
.out_roundz
= Signal(reset_less
=True)
1232 return self
.mod
.ispec()
1235 return self
.mod
.ospec()
1237 def setup(self
, m
, i
):
1238 """ links module to inputs and outputs
1240 self
.mod
.setup(m
, i
)
1242 def action(self
, m
):
1246 class FPNorm1Multi(FPState
):
1248 def __init__(self
, width
, id_wid
):
1249 FPState
.__init
__(self
, "normalise_1")
1250 self
.mod
= FPNorm1ModMulti(width
)
1251 self
.stb
= Signal(reset_less
=True)
1252 self
.ack
= Signal(reset
=0, reset_less
=True)
1253 self
.out_norm
= Signal(reset_less
=True)
1254 self
.in_accept
= Signal(reset_less
=True)
1255 self
.temp_z
= FPNumBase(width
)
1256 self
.temp_of
= Overflow()
1257 self
.out_z
= FPNumBase(width
)
1258 self
.out_roundz
= Signal(reset_less
=True)
1260 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1261 """ links module to inputs and outputs
1263 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1264 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1265 self
.out_z
, self
.out_norm
)
1267 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1268 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1270 def action(self
, m
):
1271 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1272 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1273 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1274 with m
.If(self
.out_norm
):
1275 with m
.If(self
.in_accept
):
1280 m
.d
.sync
+= self
.ack
.eq(0)
1282 # normalisation not required (or done).
1284 m
.d
.sync
+= self
.ack
.eq(1)
1285 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1288 class FPNormToPack(FPState
):
1290 def __init__(self
, width
, id_wid
):
1291 FPState
.__init
__(self
, "normalise_1")
1292 self
.id_wid
= id_wid
1296 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1299 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1301 def setup(self
, m
, i
):
1302 """ links module to inputs and outputs
1305 # Normalisation, Rounding Corrections, Pack - in a chain
1306 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1307 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1308 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1309 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1310 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1312 self
.out_z
= pmod
.ospec()
1314 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1315 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1317 def action(self
, m
):
1318 m
.next
= "pack_put_z"
1323 def __init__(self
, width
, id_wid
):
1324 self
.z
= FPNumBase(width
, False)
1325 self
.out_do_z
= Signal(reset_less
=True)
1326 self
.mid
= Signal(id_wid
, reset_less
=True)
1329 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1335 def __init__(self
, width
, id_wid
):
1337 self
.id_wid
= id_wid
1338 self
.i
= self
.ispec()
1339 self
.out_z
= self
.ospec()
1342 return FPNorm1Data(self
.width
, self
.id_wid
)
1345 return FPRoundData(self
.width
, self
.id_wid
)
1347 def process(self
, i
):
1350 def setup(self
, m
, i
):
1351 m
.submodules
.roundz
= self
1352 m
.d
.comb
+= self
.i
.eq(i
)
1354 def elaborate(self
, platform
):
1356 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1357 with m
.If(~self
.i
.out_do_z
):
1358 with m
.If(self
.i
.roundz
):
1359 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1360 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1361 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1366 class FPRound(FPState
):
1368 def __init__(self
, width
, id_wid
):
1369 FPState
.__init
__(self
, "round")
1370 self
.mod
= FPRoundMod(width
)
1371 self
.out_z
= self
.ospec()
1374 return self
.mod
.ispec()
1377 return self
.mod
.ospec()
1379 def setup(self
, m
, i
):
1380 """ links module to inputs and outputs
1382 self
.mod
.setup(m
, i
)
1385 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1386 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1388 def action(self
, m
):
1389 m
.next
= "corrections"
1392 class FPCorrectionsMod
:
1394 def __init__(self
, width
, id_wid
):
1396 self
.id_wid
= id_wid
1397 self
.i
= self
.ispec()
1398 self
.out_z
= self
.ospec()
1401 return FPRoundData(self
.width
, self
.id_wid
)
1404 return FPRoundData(self
.width
, self
.id_wid
)
1406 def process(self
, i
):
1409 def setup(self
, m
, i
):
1410 """ links module to inputs and outputs
1412 m
.submodules
.corrections
= self
1413 m
.d
.comb
+= self
.i
.eq(i
)
1415 def elaborate(self
, platform
):
1417 m
.submodules
.corr_in_z
= self
.i
.z
1418 m
.submodules
.corr_out_z
= self
.out_z
.z
1419 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1420 with m
.If(~self
.i
.out_do_z
):
1421 with m
.If(self
.i
.z
.is_denormalised
):
1422 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1426 class FPCorrections(FPState
):
1428 def __init__(self
, width
, id_wid
):
1429 FPState
.__init
__(self
, "corrections")
1430 self
.mod
= FPCorrectionsMod(width
)
1431 self
.out_z
= self
.ospec()
1434 return self
.mod
.ispec()
1437 return self
.mod
.ospec()
1439 def setup(self
, m
, in_z
):
1440 """ links module to inputs and outputs
1442 self
.mod
.setup(m
, in_z
)
1444 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1445 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1447 def action(self
, m
):
1453 def __init__(self
, width
, id_wid
):
1454 self
.z
= FPNumOut(width
, False)
1455 self
.mid
= Signal(id_wid
, reset_less
=True)
1458 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1463 def __init__(self
, width
, id_wid
):
1465 self
.id_wid
= id_wid
1466 self
.i
= self
.ispec()
1467 self
.o
= self
.ospec()
1470 return FPRoundData(self
.width
, self
.id_wid
)
1473 return FPPackData(self
.width
, self
.id_wid
)
1475 def process(self
, i
):
1478 def setup(self
, m
, in_z
):
1479 """ links module to inputs and outputs
1481 m
.submodules
.pack
= self
1482 m
.d
.comb
+= self
.i
.eq(in_z
)
1484 def elaborate(self
, platform
):
1486 m
.submodules
.pack_in_z
= self
.i
.z
1487 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1488 with m
.If(self
.i
.z
.is_overflowed
):
1489 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1491 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1496 def __init__(self
, width
, id_wid
):
1497 self
.z
= FPNumOut(width
, False)
1498 self
.mid
= Signal(id_wid
, reset_less
=True)
1501 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1504 class FPPack(FPState
):
1506 def __init__(self
, width
, id_wid
):
1507 FPState
.__init
__(self
, "pack")
1508 self
.mod
= FPPackMod(width
)
1509 self
.out_z
= self
.ospec()
1512 return self
.mod
.ispec()
1515 return self
.mod
.ospec()
1517 def setup(self
, m
, in_z
):
1518 """ links module to inputs and outputs
1520 self
.mod
.setup(m
, in_z
)
1522 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1523 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1525 def action(self
, m
):
1526 m
.next
= "pack_put_z"
1529 class FPPutZ(FPState
):
1531 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1532 FPState
.__init
__(self
, state
)
1533 if to_state
is None:
1534 to_state
= "get_ops"
1535 self
.to_state
= to_state
1538 self
.in_mid
= in_mid
1539 self
.out_mid
= out_mid
1541 def action(self
, m
):
1542 if self
.in_mid
is not None:
1543 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1545 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1547 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1548 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1549 m
.next
= self
.to_state
1551 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1554 class FPPutZIdx(FPState
):
1556 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1557 FPState
.__init
__(self
, state
)
1558 if to_state
is None:
1559 to_state
= "get_ops"
1560 self
.to_state
= to_state
1562 self
.out_zs
= out_zs
1563 self
.in_mid
= in_mid
1565 def action(self
, m
):
1566 outz_stb
= Signal(reset_less
=True)
1567 outz_ack
= Signal(reset_less
=True)
1568 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1569 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1572 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1574 with m
.If(outz_stb
& outz_ack
):
1575 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1576 m
.next
= self
.to_state
1578 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1580 class FPADDBaseData
:
1582 def __init__(self
, width
, id_wid
):
1584 self
.id_wid
= id_wid
1585 self
.a
= Signal(width
)
1586 self
.b
= Signal(width
)
1587 self
.mid
= Signal(id_wid
, reset_less
=True)
1590 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1594 def __init__(self
, width
, id_wid
):
1595 self
.z
= FPOp(width
)
1596 self
.mid
= Signal(id_wid
, reset_less
=True)
1599 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1604 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1607 * width: bit-width of IEEE754. supported: 16, 32, 64
1608 * id_wid: an identifier that is sync-connected to the input
1609 * single_cycle: True indicates each stage to complete in 1 clock
1610 * compact: True indicates a reduced number of stages
1613 self
.id_wid
= id_wid
1614 self
.single_cycle
= single_cycle
1615 self
.compact
= compact
1617 self
.in_t
= Trigger()
1618 self
.i
= self
.ispec()
1619 self
.o
= self
.ospec()
1624 return FPADDBaseData(self
.width
, self
.id_wid
)
1627 return FPOpData(self
.width
, self
.id_wid
)
1629 def add_state(self
, state
):
1630 self
.states
.append(state
)
1633 def get_fragment(self
, platform
=None):
1634 """ creates the HDL code-fragment for FPAdd
1637 m
.submodules
.out_z
= self
.o
.z
1638 m
.submodules
.in_t
= self
.in_t
1640 self
.get_compact_fragment(m
, platform
)
1642 self
.get_longer_fragment(m
, platform
)
1644 with m
.FSM() as fsm
:
1646 for state
in self
.states
:
1647 with m
.State(state
.state_from
):
1652 def get_longer_fragment(self
, m
, platform
=None):
1654 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1656 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1660 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1661 sc
.setup(m
, a
, b
, self
.in_mid
)
1663 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1664 dn
.setup(m
, a
, b
, sc
.in_mid
)
1666 if self
.single_cycle
:
1667 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1668 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1670 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1671 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1673 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1674 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1676 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1677 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1679 if self
.single_cycle
:
1680 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1681 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1683 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1684 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1686 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1687 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1689 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1690 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1692 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1693 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1695 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1696 pa
.in_mid
, self
.out_mid
))
1698 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1699 pa
.in_mid
, self
.out_mid
))
1701 def get_compact_fragment(self
, m
, platform
=None):
1703 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1704 self
.width
, self
.id_wid
))
1705 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1707 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1710 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1713 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1714 n1
.setup(m
, alm
.a1o
)
1716 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1717 n1
.out_z
.mid
, self
.o
.mid
))
1719 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1720 sc
.o
.mid
, self
.o
.mid
))
1723 class FPADDBase(FPState
):
1725 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1728 * width: bit-width of IEEE754. supported: 16, 32, 64
1729 * id_wid: an identifier that is sync-connected to the input
1730 * single_cycle: True indicates each stage to complete in 1 clock
1732 FPState
.__init
__(self
, "fpadd")
1734 self
.single_cycle
= single_cycle
1735 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1736 self
.o
= self
.ospec()
1738 self
.in_t
= Trigger()
1739 self
.i
= self
.ispec()
1741 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1742 self
.in_accept
= Signal(reset_less
=True)
1743 self
.add_stb
= Signal(reset_less
=True)
1744 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1747 return self
.mod
.ispec()
1750 return self
.mod
.ospec()
1752 def setup(self
, m
, i
, add_stb
, in_mid
):
1753 m
.d
.comb
+= [self
.i
.eq(i
),
1754 self
.mod
.i
.eq(self
.i
),
1755 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1756 #self.add_stb.eq(add_stb),
1757 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1758 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1759 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1760 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1761 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1762 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1765 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1766 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1767 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1768 #m.d.sync += self.in_t.stb.eq(0)
1770 m
.submodules
.fpadd
= self
.mod
1772 def action(self
, m
):
1774 # in_accept is set on incoming strobe HIGH and ack LOW.
1775 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1777 #with m.If(self.in_t.ack):
1778 # m.d.sync += self.in_t.stb.eq(0)
1779 with m
.If(~self
.z_done
):
1780 # not done: test for accepting an incoming operand pair
1781 with m
.If(self
.in_accept
):
1783 self
.add_ack
.eq(1), # acknowledge receipt...
1784 self
.in_t
.stb
.eq(1), # initiate add
1787 m
.d
.sync
+= [self
.add_ack
.eq(0),
1788 self
.in_t
.stb
.eq(0),
1792 # done: acknowledge, and write out id and value
1793 m
.d
.sync
+= [self
.add_ack
.eq(1),
1800 if self
.in_mid
is not None:
1801 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1804 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1806 # move to output state on detecting z ack
1807 with m
.If(self
.out_z
.trigger
):
1808 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1811 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1815 def __init__(self
, width
, id_wid
):
1817 self
.id_wid
= id_wid
1819 for i
in range(rs_sz
):
1821 out_z
.name
= "out_z_%d" % i
1823 self
.res
= Array(res
)
1824 self
.in_z
= FPOp(width
)
1825 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1827 def setup(self
, m
, in_z
, in_mid
):
1828 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1829 self
.in_mid
.eq(in_mid
)]
1831 def get_fragment(self
, platform
=None):
1832 """ creates the HDL code-fragment for FPAdd
1835 m
.submodules
.res_in_z
= self
.in_z
1836 m
.submodules
+= self
.res
1848 """ FPADD: stages as follows:
1854 FPAddBase---> FPAddBaseMod
1856 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1858 FPAddBase is tricky: it is both a stage and *has* stages.
1859 Connection to FPAddBaseMod therefore requires an in stb/ack
1860 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1861 needs to be the thing that raises the incoming stb.
1864 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1867 * width: bit-width of IEEE754. supported: 16, 32, 64
1868 * id_wid: an identifier that is sync-connected to the input
1869 * single_cycle: True indicates each stage to complete in 1 clock
1872 self
.id_wid
= id_wid
1873 self
.single_cycle
= single_cycle
1875 #self.out_z = FPOp(width)
1876 self
.ids
= FPID(id_wid
)
1879 for i
in range(rs_sz
):
1882 in_a
.name
= "in_a_%d" % i
1883 in_b
.name
= "in_b_%d" % i
1884 rs
.append((in_a
, in_b
))
1888 for i
in range(rs_sz
):
1890 out_z
.name
= "out_z_%d" % i
1892 self
.res
= Array(res
)
1896 def add_state(self
, state
):
1897 self
.states
.append(state
)
1900 def get_fragment(self
, platform
=None):
1901 """ creates the HDL code-fragment for FPAdd
1904 m
.submodules
+= self
.rs
1906 in_a
= self
.rs
[0][0]
1907 in_b
= self
.rs
[0][1]
1909 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1914 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1919 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1920 ab
= self
.add_state(ab
)
1921 abd
= ab
.ispec() # create an input spec object for FPADDBase
1922 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1923 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1926 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1929 with m
.FSM() as fsm
:
1931 for state
in self
.states
:
1932 with m
.State(state
.state_from
):
1938 if __name__
== "__main__":
1940 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1941 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1942 alu
.rs
[0][1].ports() + \
1943 alu
.res
[0].ports() + \
1944 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1946 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1947 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1948 alu
.in_t
.ports() + \
1949 alu
.out_z
.ports() + \
1950 [alu
.in_mid
, alu
.out_mid
])
1953 # works... but don't use, just do "python fname.py convert -t v"
1954 #print (verilog.convert(alu, ports=[
1955 # ports=alu.in_a.ports() + \
1956 # alu.in_b.ports() + \
1957 # alu.out_z.ports())