1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.mid
= Signal(id_wid
, reset_less
=True)
614 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
615 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
618 class FPAddAlignSingleMod
:
620 def __init__(self
, width
, id_wid
):
623 self
.i
= self
.ispec()
624 self
.o
= self
.ospec()
627 return FPSCData(self
.width
, self
.id_wid
)
630 return FPNumIn2Ops(self
.width
, self
.id_wid
)
632 def process(self
, i
):
635 def setup(self
, m
, i
):
636 """ links module to inputs and outputs
638 m
.submodules
.align
= self
639 m
.d
.comb
+= self
.i
.eq(i
)
641 def elaborate(self
, platform
):
642 """ Aligns A against B or B against A, depending on which has the
643 greater exponent. This is done in a *single* cycle using
644 variable-width bit-shift
646 the shifter used here is quite expensive in terms of gates.
647 Mux A or B in (and out) into temporaries, as only one of them
648 needs to be aligned against the other
652 m
.submodules
.align_in_a
= self
.i
.a
653 m
.submodules
.align_in_b
= self
.i
.b
654 m
.submodules
.align_out_a
= self
.o
.a
655 m
.submodules
.align_out_b
= self
.o
.b
657 # temporary (muxed) input and output to be shifted
658 t_inp
= FPNumBase(self
.width
)
659 t_out
= FPNumIn(None, self
.width
)
660 espec
= (len(self
.i
.a
.e
), True)
661 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
662 m
.submodules
.align_t_in
= t_inp
663 m
.submodules
.align_t_out
= t_out
664 m
.submodules
.multishift_r
= msr
666 ediff
= Signal(espec
, reset_less
=True)
667 ediffr
= Signal(espec
, reset_less
=True)
668 tdiff
= Signal(espec
, reset_less
=True)
669 elz
= Signal(reset_less
=True)
670 egz
= Signal(reset_less
=True)
672 # connect multi-shifter to t_inp/out mantissa (and tdiff)
673 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
674 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
675 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
676 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
677 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
679 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
680 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
681 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
682 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
684 # default: A-exp == B-exp, A and B untouched (fall through)
685 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
686 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
687 # only one shifter (muxed)
688 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
689 # exponent of a greater than b: shift b down
690 with m
.If(~self
.i
.out_do_z
):
692 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
695 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
697 # exponent of b greater than a: shift a down
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
702 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
705 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
706 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
707 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
712 class FPAddAlignSingle(FPState
):
714 def __init__(self
, width
, id_wid
):
715 FPState
.__init
__(self
, "align")
716 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
717 self
.out_a
= FPNumIn(None, width
)
718 self
.out_b
= FPNumIn(None, width
)
720 def setup(self
, m
, i
):
721 """ links module to inputs and outputs
725 # NOTE: could be done as comb
726 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
727 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
733 class FPAddAlignSingleAdd(FPState
):
735 def __init__(self
, width
, id_wid
):
736 FPState
.__init
__(self
, "align")
739 self
.a1o
= self
.ospec()
742 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
745 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
747 def setup(self
, m
, i
):
748 """ links module to inputs and outputs
751 # chain AddAlignSingle, AddStage0 and AddStage1
752 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
753 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
754 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
756 chain
= StageChain([mod
, a0mod
, a1mod
])
759 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
762 m
.next
= "normalise_1"
765 class FPAddStage0Data
:
767 def __init__(self
, width
, id_wid
):
768 self
.z
= FPNumBase(width
, False)
769 self
.out_do_z
= Signal(reset_less
=True)
770 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
771 self
.mid
= Signal(id_wid
, reset_less
=True)
774 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
775 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
778 class FPAddStage0Mod
:
780 def __init__(self
, width
, id_wid
):
783 self
.i
= self
.ispec()
784 self
.o
= self
.ospec()
787 return FPSCData(self
.width
, self
.id_wid
)
790 return FPAddStage0Data(self
.width
, self
.id_wid
)
792 def process(self
, i
):
795 def setup(self
, m
, i
):
796 """ links module to inputs and outputs
798 m
.submodules
.add0
= self
799 m
.d
.comb
+= self
.i
.eq(i
)
801 def elaborate(self
, platform
):
803 m
.submodules
.add0_in_a
= self
.i
.a
804 m
.submodules
.add0_in_b
= self
.i
.b
805 m
.submodules
.add0_out_z
= self
.o
.z
807 # store intermediate tests (and zero-extended mantissas)
808 seq
= Signal(reset_less
=True)
809 mge
= Signal(reset_less
=True)
810 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
811 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
812 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
813 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
814 am0
.eq(Cat(self
.i
.a
.m
, 0)),
815 bm0
.eq(Cat(self
.i
.b
.m
, 0))
817 # same-sign (both negative or both positive) add mantissas
818 with m
.If(~self
.i
.out_do_z
):
819 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
822 self
.o
.tot
.eq(am0
+ bm0
),
823 self
.o
.z
.s
.eq(self
.i
.a
.s
)
825 # a mantissa greater than b, use a
828 self
.o
.tot
.eq(am0
- bm0
),
829 self
.o
.z
.s
.eq(self
.i
.a
.s
)
831 # b mantissa greater than a, use b
834 self
.o
.tot
.eq(bm0
- am0
),
835 self
.o
.z
.s
.eq(self
.i
.b
.s
)
838 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
840 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
841 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
845 class FPAddStage0(FPState
):
846 """ First stage of add. covers same-sign (add) and subtract
847 special-casing when mantissas are greater or equal, to
848 give greatest accuracy.
851 def __init__(self
, width
, id_wid
):
852 FPState
.__init
__(self
, "add_0")
853 self
.mod
= FPAddStage0Mod(width
)
854 self
.o
= self
.mod
.ospec()
856 def setup(self
, m
, i
):
857 """ links module to inputs and outputs
861 # NOTE: these could be done as combinatorial (merge add0+add1)
862 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
868 class FPAddStage1Data
:
870 def __init__(self
, width
, id_wid
):
871 self
.z
= FPNumBase(width
, False)
872 self
.out_do_z
= Signal(reset_less
=True)
874 self
.mid
= Signal(id_wid
, reset_less
=True)
877 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
878 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
882 class FPAddStage1Mod(FPState
):
883 """ Second stage of add: preparation for normalisation.
884 detects when tot sum is too big (tot[27] is kinda a carry bit)
887 def __init__(self
, width
, id_wid
):
890 self
.i
= self
.ispec()
891 self
.o
= self
.ospec()
894 return FPAddStage0Data(self
.width
, self
.id_wid
)
897 return FPAddStage1Data(self
.width
, self
.id_wid
)
899 def process(self
, i
):
902 def setup(self
, m
, i
):
903 """ links module to inputs and outputs
905 m
.submodules
.add1
= self
906 m
.submodules
.add1_out_overflow
= self
.o
.of
908 m
.d
.comb
+= self
.i
.eq(i
)
910 def elaborate(self
, platform
):
912 #m.submodules.norm1_in_overflow = self.in_of
913 #m.submodules.norm1_out_overflow = self.out_of
914 #m.submodules.norm1_in_z = self.in_z
915 #m.submodules.norm1_out_z = self.out_z
916 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
917 # tot[-1] (MSB) gets set when the sum overflows. shift result down
918 with m
.If(~self
.i
.out_do_z
):
919 with m
.If(self
.i
.tot
[-1]):
921 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
922 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
923 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
924 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
925 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
926 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
928 # tot[-1] (MSB) zero case
931 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
932 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
933 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
934 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
935 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
938 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
939 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
944 class FPAddStage1(FPState
):
946 def __init__(self
, width
, id_wid
):
947 FPState
.__init
__(self
, "add_1")
948 self
.mod
= FPAddStage1Mod(width
)
949 self
.out_z
= FPNumBase(width
, False)
950 self
.out_of
= Overflow()
951 self
.norm_stb
= Signal()
953 def setup(self
, m
, i
):
954 """ links module to inputs and outputs
958 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
960 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
961 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
962 m
.d
.sync
+= self
.norm_stb
.eq(1)
965 m
.next
= "normalise_1"
968 class FPNormaliseModSingle
:
970 def __init__(self
, width
):
972 self
.in_z
= self
.ispec()
973 self
.out_z
= self
.ospec()
976 return FPNumBase(self
.width
, False)
979 return FPNumBase(self
.width
, False)
981 def setup(self
, m
, i
):
982 """ links module to inputs and outputs
984 m
.submodules
.normalise
= self
985 m
.d
.comb
+= self
.i
.eq(i
)
987 def elaborate(self
, platform
):
990 mwid
= self
.out_z
.m_width
+2
991 pe
= PriorityEncoder(mwid
)
992 m
.submodules
.norm_pe
= pe
994 m
.submodules
.norm1_out_z
= self
.out_z
995 m
.submodules
.norm1_in_z
= self
.in_z
997 in_z
= FPNumBase(self
.width
, False)
999 m
.submodules
.norm1_insel_z
= in_z
1000 m
.submodules
.norm1_insel_overflow
= in_of
1002 espec
= (len(in_z
.e
), True)
1003 ediff_n126
= Signal(espec
, reset_less
=True)
1004 msr
= MultiShiftRMerge(mwid
, espec
)
1005 m
.submodules
.multishift_r
= msr
1007 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1008 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1009 # initialise out from in (overridden below)
1010 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1011 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1012 # normalisation decrease condition
1013 decrease
= Signal(reset_less
=True)
1014 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1016 with m
.If(decrease
):
1017 # *sigh* not entirely obvious: count leading zeros (clz)
1018 # with a PriorityEncoder: to find from the MSB
1019 # we reverse the order of the bits.
1020 temp_m
= Signal(mwid
, reset_less
=True)
1021 temp_s
= Signal(mwid
+1, reset_less
=True)
1022 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1024 # cat round and guard bits back into the mantissa
1025 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1026 pe
.i
.eq(temp_m
[::-1]), # inverted
1027 clz
.eq(pe
.o
), # count zeros from MSB down
1028 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1029 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1030 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1037 def __init__(self
, width
, id_wid
):
1038 self
.roundz
= Signal(reset_less
=True)
1039 self
.z
= FPNumBase(width
, False)
1040 self
.out_do_z
= Signal(reset_less
=True)
1041 self
.mid
= Signal(id_wid
, reset_less
=True)
1044 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1045 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1048 class FPNorm1ModSingle
:
1050 def __init__(self
, width
, id_wid
):
1052 self
.id_wid
= id_wid
1053 self
.i
= self
.ispec()
1054 self
.o
= self
.ospec()
1057 return FPAddStage1Data(self
.width
, self
.id_wid
)
1060 return FPNorm1Data(self
.width
, self
.id_wid
)
1062 def setup(self
, m
, i
):
1063 """ links module to inputs and outputs
1065 m
.submodules
.normalise_1
= self
1066 m
.d
.comb
+= self
.i
.eq(i
)
1068 def process(self
, i
):
1071 def elaborate(self
, platform
):
1074 mwid
= self
.o
.z
.m_width
+2
1075 pe
= PriorityEncoder(mwid
)
1076 m
.submodules
.norm_pe
= pe
1079 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1081 m
.submodules
.norm1_out_z
= self
.o
.z
1082 m
.submodules
.norm1_out_overflow
= of
1083 m
.submodules
.norm1_in_z
= self
.i
.z
1084 m
.submodules
.norm1_in_overflow
= self
.i
.of
1087 m
.submodules
.norm1_insel_z
= i
.z
1088 m
.submodules
.norm1_insel_overflow
= i
.of
1090 espec
= (len(i
.z
.e
), True)
1091 ediff_n126
= Signal(espec
, reset_less
=True)
1092 msr
= MultiShiftRMerge(mwid
, espec
)
1093 m
.submodules
.multishift_r
= msr
1095 m
.d
.comb
+= i
.eq(self
.i
)
1096 # initialise out from in (overridden below)
1097 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1098 m
.d
.comb
+= of
.eq(i
.of
)
1099 # normalisation increase/decrease conditions
1100 decrease
= Signal(reset_less
=True)
1101 increase
= Signal(reset_less
=True)
1102 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1103 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1105 with m
.If(~self
.i
.out_do_z
):
1106 with m
.If(decrease
):
1107 # *sigh* not entirely obvious: count leading zeros (clz)
1108 # with a PriorityEncoder: to find from the MSB
1109 # we reverse the order of the bits.
1110 temp_m
= Signal(mwid
, reset_less
=True)
1111 temp_s
= Signal(mwid
+1, reset_less
=True)
1112 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1113 # make sure that the amount to decrease by does NOT
1114 # go below the minimum non-INF/NaN exponent
1115 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1118 # cat round and guard bits back into the mantissa
1119 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1120 pe
.i
.eq(temp_m
[::-1]), # inverted
1121 clz
.eq(limclz
), # count zeros from MSB down
1122 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1123 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1124 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1125 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1126 # overflow in bits 0..1: got shifted too (leave sticky)
1127 of
.guard
.eq(temp_s
[1]), # guard
1128 of
.round_bit
.eq(temp_s
[0]), # round
1131 with m
.Elif(increase
):
1132 temp_m
= Signal(mwid
+1, reset_less
=True)
1134 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1136 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1137 # connect multi-shifter to inp/out mantissa (and ediff)
1139 msr
.diff
.eq(ediff_n126
),
1140 self
.o
.z
.m
.eq(msr
.m
[3:]),
1141 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1142 # overflow in bits 0..1: got shifted too (leave sticky)
1143 of
.guard
.eq(temp_s
[2]), # guard
1144 of
.round_bit
.eq(temp_s
[1]), # round
1145 of
.sticky
.eq(temp_s
[0]), # sticky
1146 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1149 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1150 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1155 class FPNorm1ModMulti
:
1157 def __init__(self
, width
, single_cycle
=True):
1159 self
.in_select
= Signal(reset_less
=True)
1160 self
.in_z
= FPNumBase(width
, False)
1161 self
.in_of
= Overflow()
1162 self
.temp_z
= FPNumBase(width
, False)
1163 self
.temp_of
= Overflow()
1164 self
.out_z
= FPNumBase(width
, False)
1165 self
.out_of
= Overflow()
1167 def elaborate(self
, platform
):
1170 m
.submodules
.norm1_out_z
= self
.out_z
1171 m
.submodules
.norm1_out_overflow
= self
.out_of
1172 m
.submodules
.norm1_temp_z
= self
.temp_z
1173 m
.submodules
.norm1_temp_of
= self
.temp_of
1174 m
.submodules
.norm1_in_z
= self
.in_z
1175 m
.submodules
.norm1_in_overflow
= self
.in_of
1177 in_z
= FPNumBase(self
.width
, False)
1179 m
.submodules
.norm1_insel_z
= in_z
1180 m
.submodules
.norm1_insel_overflow
= in_of
1182 # select which of temp or in z/of to use
1183 with m
.If(self
.in_select
):
1184 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1185 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1187 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1188 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1189 # initialise out from in (overridden below)
1190 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1191 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1192 # normalisation increase/decrease conditions
1193 decrease
= Signal(reset_less
=True)
1194 increase
= Signal(reset_less
=True)
1195 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1196 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1197 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1199 with m
.If(decrease
):
1201 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1202 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1203 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1204 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1205 self
.out_of
.round_bit
.eq(0), # reset round bit
1206 self
.out_of
.m0
.eq(in_of
.guard
),
1209 with m
.Elif(increase
):
1211 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1212 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1213 self
.out_of
.guard
.eq(in_z
.m
[0]),
1214 self
.out_of
.m0
.eq(in_z
.m
[1]),
1215 self
.out_of
.round_bit
.eq(in_of
.guard
),
1216 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1222 class FPNorm1Single(FPState
):
1224 def __init__(self
, width
, id_wid
, single_cycle
=True):
1225 FPState
.__init
__(self
, "normalise_1")
1226 self
.mod
= FPNorm1ModSingle(width
)
1227 self
.o
= self
.ospec()
1228 self
.out_z
= FPNumBase(width
, False)
1229 self
.out_roundz
= Signal(reset_less
=True)
1232 return self
.mod
.ispec()
1235 return self
.mod
.ospec()
1237 def setup(self
, m
, i
):
1238 """ links module to inputs and outputs
1240 self
.mod
.setup(m
, i
)
1242 def action(self
, m
):
1246 class FPNorm1Multi(FPState
):
1248 def __init__(self
, width
, id_wid
):
1249 FPState
.__init
__(self
, "normalise_1")
1250 self
.mod
= FPNorm1ModMulti(width
)
1251 self
.stb
= Signal(reset_less
=True)
1252 self
.ack
= Signal(reset
=0, reset_less
=True)
1253 self
.out_norm
= Signal(reset_less
=True)
1254 self
.in_accept
= Signal(reset_less
=True)
1255 self
.temp_z
= FPNumBase(width
)
1256 self
.temp_of
= Overflow()
1257 self
.out_z
= FPNumBase(width
)
1258 self
.out_roundz
= Signal(reset_less
=True)
1260 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1261 """ links module to inputs and outputs
1263 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1264 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1265 self
.out_z
, self
.out_norm
)
1267 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1268 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1270 def action(self
, m
):
1271 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1272 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1273 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1274 with m
.If(self
.out_norm
):
1275 with m
.If(self
.in_accept
):
1280 m
.d
.sync
+= self
.ack
.eq(0)
1282 # normalisation not required (or done).
1284 m
.d
.sync
+= self
.ack
.eq(1)
1285 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1288 class FPNormToPack(FPState
):
1290 def __init__(self
, width
, id_wid
):
1291 FPState
.__init
__(self
, "normalise_1")
1292 self
.id_wid
= id_wid
1296 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1299 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1301 def setup(self
, m
, i
):
1302 """ links module to inputs and outputs
1305 # Normalisation, Rounding Corrections, Pack - in a chain
1306 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1307 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1308 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1309 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1310 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1312 self
.out_z
= pmod
.ospec()
1314 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1315 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1317 def action(self
, m
):
1318 m
.next
= "pack_put_z"
1323 def __init__(self
, width
, id_wid
):
1324 self
.z
= FPNumBase(width
, False)
1325 self
.out_do_z
= Signal(reset_less
=True)
1326 self
.mid
= Signal(id_wid
, reset_less
=True)
1329 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1335 def __init__(self
, width
, id_wid
):
1337 self
.id_wid
= id_wid
1338 self
.i
= self
.ispec()
1339 self
.out_z
= self
.ospec()
1342 return FPNorm1Data(self
.width
, self
.id_wid
)
1345 return FPRoundData(self
.width
, self
.id_wid
)
1347 def process(self
, i
):
1350 def setup(self
, m
, i
):
1351 m
.submodules
.roundz
= self
1352 m
.d
.comb
+= self
.i
.eq(i
)
1354 def elaborate(self
, platform
):
1356 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1357 with m
.If(~self
.i
.out_do_z
):
1358 with m
.If(self
.i
.roundz
):
1359 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1360 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1361 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1366 class FPRound(FPState
):
1368 def __init__(self
, width
, id_wid
):
1369 FPState
.__init
__(self
, "round")
1370 self
.mod
= FPRoundMod(width
)
1371 self
.out_z
= self
.ospec()
1374 return self
.mod
.ispec()
1377 return self
.mod
.ospec()
1379 def setup(self
, m
, i
):
1380 """ links module to inputs and outputs
1382 self
.mod
.setup(m
, i
)
1385 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1386 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1388 def action(self
, m
):
1389 m
.next
= "corrections"
1392 class FPCorrectionsMod
:
1394 def __init__(self
, width
, id_wid
):
1396 self
.id_wid
= id_wid
1397 self
.i
= self
.ispec()
1398 self
.out_z
= self
.ospec()
1401 return FPRoundData(self
.width
, self
.id_wid
)
1404 return FPRoundData(self
.width
, self
.id_wid
)
1406 def process(self
, i
):
1409 def setup(self
, m
, i
):
1410 """ links module to inputs and outputs
1412 m
.submodules
.corrections
= self
1413 m
.d
.comb
+= self
.i
.eq(i
)
1415 def elaborate(self
, platform
):
1417 m
.submodules
.corr_in_z
= self
.i
.z
1418 m
.submodules
.corr_out_z
= self
.out_z
.z
1419 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1420 with m
.If(~self
.i
.out_do_z
):
1421 with m
.If(self
.i
.z
.is_denormalised
):
1422 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1426 class FPCorrections(FPState
):
1428 def __init__(self
, width
, id_wid
):
1429 FPState
.__init
__(self
, "corrections")
1430 self
.mod
= FPCorrectionsMod(width
)
1431 self
.out_z
= self
.ospec()
1434 return self
.mod
.ispec()
1437 return self
.mod
.ospec()
1439 def setup(self
, m
, in_z
):
1440 """ links module to inputs and outputs
1442 self
.mod
.setup(m
, in_z
)
1444 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1445 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1447 def action(self
, m
):
1453 def __init__(self
, width
, id_wid
):
1454 self
.z
= FPNumOut(width
, False)
1455 self
.mid
= Signal(id_wid
, reset_less
=True)
1458 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1463 def __init__(self
, width
, id_wid
):
1465 self
.id_wid
= id_wid
1466 self
.i
= self
.ispec()
1467 self
.o
= self
.ospec()
1470 return FPRoundData(self
.width
, self
.id_wid
)
1473 return FPPackData(self
.width
, self
.id_wid
)
1475 def process(self
, i
):
1478 def setup(self
, m
, in_z
):
1479 """ links module to inputs and outputs
1481 m
.submodules
.pack
= self
1482 m
.d
.comb
+= self
.i
.eq(in_z
)
1484 def elaborate(self
, platform
):
1486 m
.submodules
.pack_in_z
= self
.i
.z
1487 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1488 with m
.If(~self
.i
.out_do_z
):
1489 with m
.If(self
.i
.z
.is_overflowed
):
1490 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1492 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1494 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
1498 class FPPack(FPState
):
1500 def __init__(self
, width
, id_wid
):
1501 FPState
.__init
__(self
, "pack")
1502 self
.mod
= FPPackMod(width
)
1503 self
.out_z
= self
.ospec()
1506 return self
.mod
.ispec()
1509 return self
.mod
.ospec()
1511 def setup(self
, m
, in_z
):
1512 """ links module to inputs and outputs
1514 self
.mod
.setup(m
, in_z
)
1516 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1517 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1519 def action(self
, m
):
1520 m
.next
= "pack_put_z"
1523 class FPPutZ(FPState
):
1525 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1526 FPState
.__init
__(self
, state
)
1527 if to_state
is None:
1528 to_state
= "get_ops"
1529 self
.to_state
= to_state
1532 self
.in_mid
= in_mid
1533 self
.out_mid
= out_mid
1535 def action(self
, m
):
1536 if self
.in_mid
is not None:
1537 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1539 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1541 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1542 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1543 m
.next
= self
.to_state
1545 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1548 class FPPutZIdx(FPState
):
1550 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1551 FPState
.__init
__(self
, state
)
1552 if to_state
is None:
1553 to_state
= "get_ops"
1554 self
.to_state
= to_state
1556 self
.out_zs
= out_zs
1557 self
.in_mid
= in_mid
1559 def action(self
, m
):
1560 outz_stb
= Signal(reset_less
=True)
1561 outz_ack
= Signal(reset_less
=True)
1562 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1563 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1566 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1568 with m
.If(outz_stb
& outz_ack
):
1569 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1570 m
.next
= self
.to_state
1572 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1574 class FPADDBaseData
:
1576 def __init__(self
, width
, id_wid
):
1578 self
.id_wid
= id_wid
1579 self
.a
= Signal(width
)
1580 self
.b
= Signal(width
)
1581 self
.mid
= Signal(id_wid
, reset_less
=True)
1584 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1588 def __init__(self
, width
, id_wid
):
1589 self
.z
= FPOp(width
)
1590 self
.mid
= Signal(id_wid
, reset_less
=True)
1593 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1598 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1601 * width: bit-width of IEEE754. supported: 16, 32, 64
1602 * id_wid: an identifier that is sync-connected to the input
1603 * single_cycle: True indicates each stage to complete in 1 clock
1604 * compact: True indicates a reduced number of stages
1607 self
.id_wid
= id_wid
1608 self
.single_cycle
= single_cycle
1609 self
.compact
= compact
1611 self
.in_t
= Trigger()
1612 self
.i
= self
.ispec()
1613 self
.o
= self
.ospec()
1618 return FPADDBaseData(self
.width
, self
.id_wid
)
1621 return FPOpData(self
.width
, self
.id_wid
)
1623 def add_state(self
, state
):
1624 self
.states
.append(state
)
1627 def get_fragment(self
, platform
=None):
1628 """ creates the HDL code-fragment for FPAdd
1631 m
.submodules
.out_z
= self
.o
.z
1632 m
.submodules
.in_t
= self
.in_t
1634 self
.get_compact_fragment(m
, platform
)
1636 self
.get_longer_fragment(m
, platform
)
1638 with m
.FSM() as fsm
:
1640 for state
in self
.states
:
1641 with m
.State(state
.state_from
):
1646 def get_longer_fragment(self
, m
, platform
=None):
1648 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1650 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1654 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1655 sc
.setup(m
, a
, b
, self
.in_mid
)
1657 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1658 dn
.setup(m
, a
, b
, sc
.in_mid
)
1660 if self
.single_cycle
:
1661 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1662 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1664 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1665 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1667 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1668 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1670 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1671 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1673 if self
.single_cycle
:
1674 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1675 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1677 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1678 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1680 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1681 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1683 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1684 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1686 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1687 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1689 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1690 pa
.in_mid
, self
.out_mid
))
1692 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1693 pa
.in_mid
, self
.out_mid
))
1695 def get_compact_fragment(self
, m
, platform
=None):
1697 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1698 self
.width
, self
.id_wid
))
1699 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1701 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1704 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1707 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1708 n1
.setup(m
, alm
.a1o
)
1710 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1711 n1
.out_z
.mid
, self
.o
.mid
))
1713 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1714 sc
.o
.mid
, self
.o
.mid
))
1717 class FPADDBase(FPState
):
1719 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1722 * width: bit-width of IEEE754. supported: 16, 32, 64
1723 * id_wid: an identifier that is sync-connected to the input
1724 * single_cycle: True indicates each stage to complete in 1 clock
1726 FPState
.__init
__(self
, "fpadd")
1728 self
.single_cycle
= single_cycle
1729 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1730 self
.o
= self
.ospec()
1732 self
.in_t
= Trigger()
1733 self
.i
= self
.ispec()
1735 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1736 self
.in_accept
= Signal(reset_less
=True)
1737 self
.add_stb
= Signal(reset_less
=True)
1738 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1741 return self
.mod
.ispec()
1744 return self
.mod
.ospec()
1746 def setup(self
, m
, i
, add_stb
, in_mid
):
1747 m
.d
.comb
+= [self
.i
.eq(i
),
1748 self
.mod
.i
.eq(self
.i
),
1749 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1750 #self.add_stb.eq(add_stb),
1751 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1752 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1753 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1754 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1755 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1756 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1759 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1760 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1761 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1762 #m.d.sync += self.in_t.stb.eq(0)
1764 m
.submodules
.fpadd
= self
.mod
1766 def action(self
, m
):
1768 # in_accept is set on incoming strobe HIGH and ack LOW.
1769 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1771 #with m.If(self.in_t.ack):
1772 # m.d.sync += self.in_t.stb.eq(0)
1773 with m
.If(~self
.z_done
):
1774 # not done: test for accepting an incoming operand pair
1775 with m
.If(self
.in_accept
):
1777 self
.add_ack
.eq(1), # acknowledge receipt...
1778 self
.in_t
.stb
.eq(1), # initiate add
1781 m
.d
.sync
+= [self
.add_ack
.eq(0),
1782 self
.in_t
.stb
.eq(0),
1786 # done: acknowledge, and write out id and value
1787 m
.d
.sync
+= [self
.add_ack
.eq(1),
1794 if self
.in_mid
is not None:
1795 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1798 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1800 # move to output state on detecting z ack
1801 with m
.If(self
.out_z
.trigger
):
1802 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1805 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1809 def __init__(self
, width
, id_wid
):
1811 self
.id_wid
= id_wid
1813 for i
in range(rs_sz
):
1815 out_z
.name
= "out_z_%d" % i
1817 self
.res
= Array(res
)
1818 self
.in_z
= FPOp(width
)
1819 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1821 def setup(self
, m
, in_z
, in_mid
):
1822 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1823 self
.in_mid
.eq(in_mid
)]
1825 def get_fragment(self
, platform
=None):
1826 """ creates the HDL code-fragment for FPAdd
1829 m
.submodules
.res_in_z
= self
.in_z
1830 m
.submodules
+= self
.res
1842 """ FPADD: stages as follows:
1848 FPAddBase---> FPAddBaseMod
1850 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1852 FPAddBase is tricky: it is both a stage and *has* stages.
1853 Connection to FPAddBaseMod therefore requires an in stb/ack
1854 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1855 needs to be the thing that raises the incoming stb.
1858 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1861 * width: bit-width of IEEE754. supported: 16, 32, 64
1862 * id_wid: an identifier that is sync-connected to the input
1863 * single_cycle: True indicates each stage to complete in 1 clock
1866 self
.id_wid
= id_wid
1867 self
.single_cycle
= single_cycle
1869 #self.out_z = FPOp(width)
1870 self
.ids
= FPID(id_wid
)
1873 for i
in range(rs_sz
):
1876 in_a
.name
= "in_a_%d" % i
1877 in_b
.name
= "in_b_%d" % i
1878 rs
.append((in_a
, in_b
))
1882 for i
in range(rs_sz
):
1884 out_z
.name
= "out_z_%d" % i
1886 self
.res
= Array(res
)
1890 def add_state(self
, state
):
1891 self
.states
.append(state
)
1894 def get_fragment(self
, platform
=None):
1895 """ creates the HDL code-fragment for FPAdd
1898 m
.submodules
+= self
.rs
1900 in_a
= self
.rs
[0][0]
1901 in_b
= self
.rs
[0][1]
1903 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1908 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1913 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1914 ab
= self
.add_state(ab
)
1915 abd
= ab
.ispec() # create an input spec object for FPADDBase
1916 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1917 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1920 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1923 with m
.FSM() as fsm
:
1925 for state
in self
.states
:
1926 with m
.State(state
.state_from
):
1932 if __name__
== "__main__":
1934 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1935 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1936 alu
.rs
[0][1].ports() + \
1937 alu
.res
[0].ports() + \
1938 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1940 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1941 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1942 alu
.in_t
.ports() + \
1943 alu
.out_z
.ports() + \
1944 [alu
.in_mid
, alu
.out_mid
])
1947 # works... but don't use, just do "python fname.py convert -t v"
1948 #print (verilog.convert(alu, ports=[
1949 # ports=alu.in_a.ports() + \
1950 # alu.in_b.ports() + \
1951 # alu.out_z.ports())