1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.oz
= Signal(width
, reset_less
=True)
612 self
.mid
= Signal(id_wid
, reset_less
=True)
615 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
616 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
619 class FPAddAlignSingleMod
:
621 def __init__(self
, width
, id_wid
):
624 self
.i
= self
.ispec()
625 self
.o
= self
.ospec()
628 return FPSCData(self
.width
, self
.id_wid
)
631 return FPNumIn2Ops(self
.width
, self
.id_wid
)
633 def process(self
, i
):
636 def setup(self
, m
, i
):
637 """ links module to inputs and outputs
639 m
.submodules
.align
= self
640 m
.d
.comb
+= self
.i
.eq(i
)
642 def elaborate(self
, platform
):
643 """ Aligns A against B or B against A, depending on which has the
644 greater exponent. This is done in a *single* cycle using
645 variable-width bit-shift
647 the shifter used here is quite expensive in terms of gates.
648 Mux A or B in (and out) into temporaries, as only one of them
649 needs to be aligned against the other
653 m
.submodules
.align_in_a
= self
.i
.a
654 m
.submodules
.align_in_b
= self
.i
.b
655 m
.submodules
.align_out_a
= self
.o
.a
656 m
.submodules
.align_out_b
= self
.o
.b
658 # temporary (muxed) input and output to be shifted
659 t_inp
= FPNumBase(self
.width
)
660 t_out
= FPNumIn(None, self
.width
)
661 espec
= (len(self
.i
.a
.e
), True)
662 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
663 m
.submodules
.align_t_in
= t_inp
664 m
.submodules
.align_t_out
= t_out
665 m
.submodules
.multishift_r
= msr
667 ediff
= Signal(espec
, reset_less
=True)
668 ediffr
= Signal(espec
, reset_less
=True)
669 tdiff
= Signal(espec
, reset_less
=True)
670 elz
= Signal(reset_less
=True)
671 egz
= Signal(reset_less
=True)
673 # connect multi-shifter to t_inp/out mantissa (and tdiff)
674 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
675 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
676 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
677 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
678 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
680 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
681 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
682 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
683 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
685 # default: A-exp == B-exp, A and B untouched (fall through)
686 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
687 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
688 # only one shifter (muxed)
689 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
690 # exponent of a greater than b: shift b down
691 with m
.If(~self
.i
.out_do_z
):
693 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
696 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
698 # exponent of b greater than a: shift a down
700 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
703 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
706 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
707 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
708 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
709 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.z
.v
)
714 class FPAddAlignSingle(FPState
):
716 def __init__(self
, width
, id_wid
):
717 FPState
.__init
__(self
, "align")
718 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
719 self
.out_a
= FPNumIn(None, width
)
720 self
.out_b
= FPNumIn(None, width
)
722 def setup(self
, m
, i
):
723 """ links module to inputs and outputs
727 # NOTE: could be done as comb
728 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
729 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
735 class FPAddAlignSingleAdd(FPState
):
737 def __init__(self
, width
, id_wid
):
738 FPState
.__init
__(self
, "align")
741 self
.a1o
= self
.ospec()
744 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
747 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
749 def setup(self
, m
, i
):
750 """ links module to inputs and outputs
753 # chain AddAlignSingle, AddStage0 and AddStage1
754 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
755 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
756 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
758 chain
= StageChain([mod
, a0mod
, a1mod
])
761 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
764 m
.next
= "normalise_1"
767 class FPAddStage0Data
:
769 def __init__(self
, width
, id_wid
):
770 self
.z
= FPNumBase(width
, False)
771 self
.out_do_z
= Signal(reset_less
=True)
772 self
.oz
= Signal(width
, reset_less
=True)
773 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
774 self
.mid
= Signal(id_wid
, reset_less
=True)
777 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
778 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
781 class FPAddStage0Mod
:
783 def __init__(self
, width
, id_wid
):
786 self
.i
= self
.ispec()
787 self
.o
= self
.ospec()
790 return FPSCData(self
.width
, self
.id_wid
)
793 return FPAddStage0Data(self
.width
, self
.id_wid
)
795 def process(self
, i
):
798 def setup(self
, m
, i
):
799 """ links module to inputs and outputs
801 m
.submodules
.add0
= self
802 m
.d
.comb
+= self
.i
.eq(i
)
804 def elaborate(self
, platform
):
806 m
.submodules
.add0_in_a
= self
.i
.a
807 m
.submodules
.add0_in_b
= self
.i
.b
808 m
.submodules
.add0_out_z
= self
.o
.z
810 # store intermediate tests (and zero-extended mantissas)
811 seq
= Signal(reset_less
=True)
812 mge
= Signal(reset_less
=True)
813 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
814 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
815 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
816 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
817 am0
.eq(Cat(self
.i
.a
.m
, 0)),
818 bm0
.eq(Cat(self
.i
.b
.m
, 0))
820 # same-sign (both negative or both positive) add mantissas
821 with m
.If(~self
.i
.out_do_z
):
822 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
825 self
.o
.tot
.eq(am0
+ bm0
),
826 self
.o
.z
.s
.eq(self
.i
.a
.s
)
828 # a mantissa greater than b, use a
831 self
.o
.tot
.eq(am0
- bm0
),
832 self
.o
.z
.s
.eq(self
.i
.a
.s
)
834 # b mantissa greater than a, use b
837 self
.o
.tot
.eq(bm0
- am0
),
838 self
.o
.z
.s
.eq(self
.i
.b
.s
)
841 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.z
.v
)
843 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
844 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
848 class FPAddStage0(FPState
):
849 """ First stage of add. covers same-sign (add) and subtract
850 special-casing when mantissas are greater or equal, to
851 give greatest accuracy.
854 def __init__(self
, width
, id_wid
):
855 FPState
.__init
__(self
, "add_0")
856 self
.mod
= FPAddStage0Mod(width
)
857 self
.o
= self
.mod
.ospec()
859 def setup(self
, m
, i
):
860 """ links module to inputs and outputs
864 # NOTE: these could be done as combinatorial (merge add0+add1)
865 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
871 class FPAddStage1Data
:
873 def __init__(self
, width
, id_wid
):
874 self
.z
= FPNumBase(width
, False)
875 self
.out_do_z
= Signal(reset_less
=True)
876 self
.oz
= Signal(width
, reset_less
=True)
878 self
.mid
= Signal(id_wid
, reset_less
=True)
881 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
882 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
886 class FPAddStage1Mod(FPState
):
887 """ Second stage of add: preparation for normalisation.
888 detects when tot sum is too big (tot[27] is kinda a carry bit)
891 def __init__(self
, width
, id_wid
):
894 self
.i
= self
.ispec()
895 self
.o
= self
.ospec()
898 return FPAddStage0Data(self
.width
, self
.id_wid
)
901 return FPAddStage1Data(self
.width
, self
.id_wid
)
903 def process(self
, i
):
906 def setup(self
, m
, i
):
907 """ links module to inputs and outputs
909 m
.submodules
.add1
= self
910 m
.submodules
.add1_out_overflow
= self
.o
.of
912 m
.d
.comb
+= self
.i
.eq(i
)
914 def elaborate(self
, platform
):
916 #m.submodules.norm1_in_overflow = self.in_of
917 #m.submodules.norm1_out_overflow = self.out_of
918 #m.submodules.norm1_in_z = self.in_z
919 #m.submodules.norm1_out_z = self.out_z
920 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
921 # tot[-1] (MSB) gets set when the sum overflows. shift result down
922 with m
.If(~self
.i
.out_do_z
):
923 with m
.If(self
.i
.tot
[-1]):
925 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
926 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
927 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
928 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
929 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
930 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
932 # tot[-1] (MSB) zero case
935 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
936 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
937 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
938 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
939 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
942 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
943 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
944 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
949 class FPAddStage1(FPState
):
951 def __init__(self
, width
, id_wid
):
952 FPState
.__init
__(self
, "add_1")
953 self
.mod
= FPAddStage1Mod(width
)
954 self
.out_z
= FPNumBase(width
, False)
955 self
.out_of
= Overflow()
956 self
.norm_stb
= Signal()
958 def setup(self
, m
, i
):
959 """ links module to inputs and outputs
963 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
965 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
966 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
967 m
.d
.sync
+= self
.norm_stb
.eq(1)
970 m
.next
= "normalise_1"
973 class FPNormaliseModSingle
:
975 def __init__(self
, width
):
977 self
.in_z
= self
.ispec()
978 self
.out_z
= self
.ospec()
981 return FPNumBase(self
.width
, False)
984 return FPNumBase(self
.width
, False)
986 def setup(self
, m
, i
):
987 """ links module to inputs and outputs
989 m
.submodules
.normalise
= self
990 m
.d
.comb
+= self
.i
.eq(i
)
992 def elaborate(self
, platform
):
995 mwid
= self
.out_z
.m_width
+2
996 pe
= PriorityEncoder(mwid
)
997 m
.submodules
.norm_pe
= pe
999 m
.submodules
.norm1_out_z
= self
.out_z
1000 m
.submodules
.norm1_in_z
= self
.in_z
1002 in_z
= FPNumBase(self
.width
, False)
1004 m
.submodules
.norm1_insel_z
= in_z
1005 m
.submodules
.norm1_insel_overflow
= in_of
1007 espec
= (len(in_z
.e
), True)
1008 ediff_n126
= Signal(espec
, reset_less
=True)
1009 msr
= MultiShiftRMerge(mwid
, espec
)
1010 m
.submodules
.multishift_r
= msr
1012 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1013 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1014 # initialise out from in (overridden below)
1015 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1016 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1017 # normalisation decrease condition
1018 decrease
= Signal(reset_less
=True)
1019 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1021 with m
.If(decrease
):
1022 # *sigh* not entirely obvious: count leading zeros (clz)
1023 # with a PriorityEncoder: to find from the MSB
1024 # we reverse the order of the bits.
1025 temp_m
= Signal(mwid
, reset_less
=True)
1026 temp_s
= Signal(mwid
+1, reset_less
=True)
1027 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1029 # cat round and guard bits back into the mantissa
1030 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1031 pe
.i
.eq(temp_m
[::-1]), # inverted
1032 clz
.eq(pe
.o
), # count zeros from MSB down
1033 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1034 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1035 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1042 def __init__(self
, width
, id_wid
):
1043 self
.roundz
= Signal(reset_less
=True)
1044 self
.z
= FPNumBase(width
, False)
1045 self
.out_do_z
= Signal(reset_less
=True)
1046 self
.oz
= Signal(width
, reset_less
=True)
1047 self
.mid
= Signal(id_wid
, reset_less
=True)
1050 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1051 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1054 class FPNorm1ModSingle
:
1056 def __init__(self
, width
, id_wid
):
1058 self
.id_wid
= id_wid
1059 self
.i
= self
.ispec()
1060 self
.o
= self
.ospec()
1063 return FPAddStage1Data(self
.width
, self
.id_wid
)
1066 return FPNorm1Data(self
.width
, self
.id_wid
)
1068 def setup(self
, m
, i
):
1069 """ links module to inputs and outputs
1071 m
.submodules
.normalise_1
= self
1072 m
.d
.comb
+= self
.i
.eq(i
)
1074 def process(self
, i
):
1077 def elaborate(self
, platform
):
1080 mwid
= self
.o
.z
.m_width
+2
1081 pe
= PriorityEncoder(mwid
)
1082 m
.submodules
.norm_pe
= pe
1085 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1087 m
.submodules
.norm1_out_z
= self
.o
.z
1088 m
.submodules
.norm1_out_overflow
= of
1089 m
.submodules
.norm1_in_z
= self
.i
.z
1090 m
.submodules
.norm1_in_overflow
= self
.i
.of
1093 m
.submodules
.norm1_insel_z
= i
.z
1094 m
.submodules
.norm1_insel_overflow
= i
.of
1096 espec
= (len(i
.z
.e
), True)
1097 ediff_n126
= Signal(espec
, reset_less
=True)
1098 msr
= MultiShiftRMerge(mwid
, espec
)
1099 m
.submodules
.multishift_r
= msr
1101 m
.d
.comb
+= i
.eq(self
.i
)
1102 # initialise out from in (overridden below)
1103 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1104 m
.d
.comb
+= of
.eq(i
.of
)
1105 # normalisation increase/decrease conditions
1106 decrease
= Signal(reset_less
=True)
1107 increase
= Signal(reset_less
=True)
1108 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1109 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1111 with m
.If(~self
.i
.out_do_z
):
1112 with m
.If(decrease
):
1113 # *sigh* not entirely obvious: count leading zeros (clz)
1114 # with a PriorityEncoder: to find from the MSB
1115 # we reverse the order of the bits.
1116 temp_m
= Signal(mwid
, reset_less
=True)
1117 temp_s
= Signal(mwid
+1, reset_less
=True)
1118 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1119 # make sure that the amount to decrease by does NOT
1120 # go below the minimum non-INF/NaN exponent
1121 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1124 # cat round and guard bits back into the mantissa
1125 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1126 pe
.i
.eq(temp_m
[::-1]), # inverted
1127 clz
.eq(limclz
), # count zeros from MSB down
1128 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1129 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1130 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1131 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1132 # overflow in bits 0..1: got shifted too (leave sticky)
1133 of
.guard
.eq(temp_s
[1]), # guard
1134 of
.round_bit
.eq(temp_s
[0]), # round
1137 with m
.Elif(increase
):
1138 temp_m
= Signal(mwid
+1, reset_less
=True)
1140 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1142 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1143 # connect multi-shifter to inp/out mantissa (and ediff)
1145 msr
.diff
.eq(ediff_n126
),
1146 self
.o
.z
.m
.eq(msr
.m
[3:]),
1147 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1148 # overflow in bits 0..1: got shifted too (leave sticky)
1149 of
.guard
.eq(temp_s
[2]), # guard
1150 of
.round_bit
.eq(temp_s
[1]), # round
1151 of
.sticky
.eq(temp_s
[0]), # sticky
1152 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1155 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1156 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1157 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1162 class FPNorm1ModMulti
:
1164 def __init__(self
, width
, single_cycle
=True):
1166 self
.in_select
= Signal(reset_less
=True)
1167 self
.in_z
= FPNumBase(width
, False)
1168 self
.in_of
= Overflow()
1169 self
.temp_z
= FPNumBase(width
, False)
1170 self
.temp_of
= Overflow()
1171 self
.out_z
= FPNumBase(width
, False)
1172 self
.out_of
= Overflow()
1174 def elaborate(self
, platform
):
1177 m
.submodules
.norm1_out_z
= self
.out_z
1178 m
.submodules
.norm1_out_overflow
= self
.out_of
1179 m
.submodules
.norm1_temp_z
= self
.temp_z
1180 m
.submodules
.norm1_temp_of
= self
.temp_of
1181 m
.submodules
.norm1_in_z
= self
.in_z
1182 m
.submodules
.norm1_in_overflow
= self
.in_of
1184 in_z
= FPNumBase(self
.width
, False)
1186 m
.submodules
.norm1_insel_z
= in_z
1187 m
.submodules
.norm1_insel_overflow
= in_of
1189 # select which of temp or in z/of to use
1190 with m
.If(self
.in_select
):
1191 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1192 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1194 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1195 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1196 # initialise out from in (overridden below)
1197 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1198 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1199 # normalisation increase/decrease conditions
1200 decrease
= Signal(reset_less
=True)
1201 increase
= Signal(reset_less
=True)
1202 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1203 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1204 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1206 with m
.If(decrease
):
1208 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1209 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1210 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1211 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1212 self
.out_of
.round_bit
.eq(0), # reset round bit
1213 self
.out_of
.m0
.eq(in_of
.guard
),
1216 with m
.Elif(increase
):
1218 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1219 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1220 self
.out_of
.guard
.eq(in_z
.m
[0]),
1221 self
.out_of
.m0
.eq(in_z
.m
[1]),
1222 self
.out_of
.round_bit
.eq(in_of
.guard
),
1223 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1229 class FPNorm1Single(FPState
):
1231 def __init__(self
, width
, id_wid
, single_cycle
=True):
1232 FPState
.__init
__(self
, "normalise_1")
1233 self
.mod
= FPNorm1ModSingle(width
)
1234 self
.o
= self
.ospec()
1235 self
.out_z
= FPNumBase(width
, False)
1236 self
.out_roundz
= Signal(reset_less
=True)
1239 return self
.mod
.ispec()
1242 return self
.mod
.ospec()
1244 def setup(self
, m
, i
):
1245 """ links module to inputs and outputs
1247 self
.mod
.setup(m
, i
)
1249 def action(self
, m
):
1253 class FPNorm1Multi(FPState
):
1255 def __init__(self
, width
, id_wid
):
1256 FPState
.__init
__(self
, "normalise_1")
1257 self
.mod
= FPNorm1ModMulti(width
)
1258 self
.stb
= Signal(reset_less
=True)
1259 self
.ack
= Signal(reset
=0, reset_less
=True)
1260 self
.out_norm
= Signal(reset_less
=True)
1261 self
.in_accept
= Signal(reset_less
=True)
1262 self
.temp_z
= FPNumBase(width
)
1263 self
.temp_of
= Overflow()
1264 self
.out_z
= FPNumBase(width
)
1265 self
.out_roundz
= Signal(reset_less
=True)
1267 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1268 """ links module to inputs and outputs
1270 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1271 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1272 self
.out_z
, self
.out_norm
)
1274 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1275 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1277 def action(self
, m
):
1278 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1279 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1280 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1281 with m
.If(self
.out_norm
):
1282 with m
.If(self
.in_accept
):
1287 m
.d
.sync
+= self
.ack
.eq(0)
1289 # normalisation not required (or done).
1291 m
.d
.sync
+= self
.ack
.eq(1)
1292 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1295 class FPNormToPack(FPState
):
1297 def __init__(self
, width
, id_wid
):
1298 FPState
.__init
__(self
, "normalise_1")
1299 self
.id_wid
= id_wid
1303 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1306 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1308 def setup(self
, m
, i
):
1309 """ links module to inputs and outputs
1312 # Normalisation, Rounding Corrections, Pack - in a chain
1313 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1314 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1315 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1316 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1317 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1319 self
.out_z
= pmod
.ospec()
1321 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1322 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1324 def action(self
, m
):
1325 m
.next
= "pack_put_z"
1330 def __init__(self
, width
, id_wid
):
1331 self
.z
= FPNumBase(width
, False)
1332 self
.out_do_z
= Signal(reset_less
=True)
1333 self
.oz
= Signal(width
, reset_less
=True)
1334 self
.mid
= Signal(id_wid
, reset_less
=True)
1337 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1343 def __init__(self
, width
, id_wid
):
1345 self
.id_wid
= id_wid
1346 self
.i
= self
.ispec()
1347 self
.out_z
= self
.ospec()
1350 return FPNorm1Data(self
.width
, self
.id_wid
)
1353 return FPRoundData(self
.width
, self
.id_wid
)
1355 def process(self
, i
):
1358 def setup(self
, m
, i
):
1359 m
.submodules
.roundz
= self
1360 m
.d
.comb
+= self
.i
.eq(i
)
1362 def elaborate(self
, platform
):
1364 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1365 with m
.If(~self
.i
.out_do_z
):
1366 with m
.If(self
.i
.roundz
):
1367 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1368 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1369 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1374 class FPRound(FPState
):
1376 def __init__(self
, width
, id_wid
):
1377 FPState
.__init
__(self
, "round")
1378 self
.mod
= FPRoundMod(width
)
1379 self
.out_z
= self
.ospec()
1382 return self
.mod
.ispec()
1385 return self
.mod
.ospec()
1387 def setup(self
, m
, i
):
1388 """ links module to inputs and outputs
1390 self
.mod
.setup(m
, i
)
1393 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1394 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1396 def action(self
, m
):
1397 m
.next
= "corrections"
1400 class FPCorrectionsMod
:
1402 def __init__(self
, width
, id_wid
):
1404 self
.id_wid
= id_wid
1405 self
.i
= self
.ispec()
1406 self
.out_z
= self
.ospec()
1409 return FPRoundData(self
.width
, self
.id_wid
)
1412 return FPRoundData(self
.width
, self
.id_wid
)
1414 def process(self
, i
):
1417 def setup(self
, m
, i
):
1418 """ links module to inputs and outputs
1420 m
.submodules
.corrections
= self
1421 m
.d
.comb
+= self
.i
.eq(i
)
1423 def elaborate(self
, platform
):
1425 m
.submodules
.corr_in_z
= self
.i
.z
1426 m
.submodules
.corr_out_z
= self
.out_z
.z
1427 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1428 with m
.If(~self
.i
.out_do_z
):
1429 with m
.If(self
.i
.z
.is_denormalised
):
1430 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1434 class FPCorrections(FPState
):
1436 def __init__(self
, width
, id_wid
):
1437 FPState
.__init
__(self
, "corrections")
1438 self
.mod
= FPCorrectionsMod(width
)
1439 self
.out_z
= self
.ospec()
1442 return self
.mod
.ispec()
1445 return self
.mod
.ospec()
1447 def setup(self
, m
, in_z
):
1448 """ links module to inputs and outputs
1450 self
.mod
.setup(m
, in_z
)
1452 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1453 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1455 def action(self
, m
):
1461 def __init__(self
, width
, id_wid
):
1462 self
.z
= FPNumOut(width
, False)
1463 self
.mid
= Signal(id_wid
, reset_less
=True)
1466 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1471 def __init__(self
, width
, id_wid
):
1473 self
.id_wid
= id_wid
1474 self
.i
= self
.ispec()
1475 self
.o
= self
.ospec()
1478 return FPRoundData(self
.width
, self
.id_wid
)
1481 return FPPackData(self
.width
, self
.id_wid
)
1483 def process(self
, i
):
1486 def setup(self
, m
, in_z
):
1487 """ links module to inputs and outputs
1489 m
.submodules
.pack
= self
1490 m
.d
.comb
+= self
.i
.eq(in_z
)
1492 def elaborate(self
, platform
):
1494 m
.submodules
.pack_in_z
= self
.i
.z
1495 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1496 with m
.If(~self
.i
.out_do_z
):
1497 with m
.If(self
.i
.z
.is_overflowed
):
1498 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1500 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1502 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1506 class FPPack(FPState
):
1508 def __init__(self
, width
, id_wid
):
1509 FPState
.__init
__(self
, "pack")
1510 self
.mod
= FPPackMod(width
)
1511 self
.out_z
= self
.ospec()
1514 return self
.mod
.ispec()
1517 return self
.mod
.ospec()
1519 def setup(self
, m
, in_z
):
1520 """ links module to inputs and outputs
1522 self
.mod
.setup(m
, in_z
)
1524 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1525 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1527 def action(self
, m
):
1528 m
.next
= "pack_put_z"
1531 class FPPutZ(FPState
):
1533 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1534 FPState
.__init
__(self
, state
)
1535 if to_state
is None:
1536 to_state
= "get_ops"
1537 self
.to_state
= to_state
1540 self
.in_mid
= in_mid
1541 self
.out_mid
= out_mid
1543 def action(self
, m
):
1544 if self
.in_mid
is not None:
1545 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1547 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1549 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1550 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1551 m
.next
= self
.to_state
1553 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1556 class FPPutZIdx(FPState
):
1558 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1559 FPState
.__init
__(self
, state
)
1560 if to_state
is None:
1561 to_state
= "get_ops"
1562 self
.to_state
= to_state
1564 self
.out_zs
= out_zs
1565 self
.in_mid
= in_mid
1567 def action(self
, m
):
1568 outz_stb
= Signal(reset_less
=True)
1569 outz_ack
= Signal(reset_less
=True)
1570 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1571 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1574 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1576 with m
.If(outz_stb
& outz_ack
):
1577 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1578 m
.next
= self
.to_state
1580 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1582 class FPADDBaseData
:
1584 def __init__(self
, width
, id_wid
):
1586 self
.id_wid
= id_wid
1587 self
.a
= Signal(width
)
1588 self
.b
= Signal(width
)
1589 self
.mid
= Signal(id_wid
, reset_less
=True)
1592 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1596 def __init__(self
, width
, id_wid
):
1597 self
.z
= FPOp(width
)
1598 self
.mid
= Signal(id_wid
, reset_less
=True)
1601 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1606 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1609 * width: bit-width of IEEE754. supported: 16, 32, 64
1610 * id_wid: an identifier that is sync-connected to the input
1611 * single_cycle: True indicates each stage to complete in 1 clock
1612 * compact: True indicates a reduced number of stages
1615 self
.id_wid
= id_wid
1616 self
.single_cycle
= single_cycle
1617 self
.compact
= compact
1619 self
.in_t
= Trigger()
1620 self
.i
= self
.ispec()
1621 self
.o
= self
.ospec()
1626 return FPADDBaseData(self
.width
, self
.id_wid
)
1629 return FPOpData(self
.width
, self
.id_wid
)
1631 def add_state(self
, state
):
1632 self
.states
.append(state
)
1635 def get_fragment(self
, platform
=None):
1636 """ creates the HDL code-fragment for FPAdd
1639 m
.submodules
.out_z
= self
.o
.z
1640 m
.submodules
.in_t
= self
.in_t
1642 self
.get_compact_fragment(m
, platform
)
1644 self
.get_longer_fragment(m
, platform
)
1646 with m
.FSM() as fsm
:
1648 for state
in self
.states
:
1649 with m
.State(state
.state_from
):
1654 def get_longer_fragment(self
, m
, platform
=None):
1656 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1658 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1662 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1663 sc
.setup(m
, a
, b
, self
.in_mid
)
1665 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1666 dn
.setup(m
, a
, b
, sc
.in_mid
)
1668 if self
.single_cycle
:
1669 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1670 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1672 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1673 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1675 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1676 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1678 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1679 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1681 if self
.single_cycle
:
1682 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1683 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1685 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1686 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1688 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1689 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1691 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1692 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1694 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1695 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1697 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1698 pa
.in_mid
, self
.out_mid
))
1700 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1701 pa
.in_mid
, self
.out_mid
))
1703 def get_compact_fragment(self
, m
, platform
=None):
1705 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1706 self
.width
, self
.id_wid
))
1707 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1709 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1712 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1715 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1716 n1
.setup(m
, alm
.a1o
)
1718 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1719 n1
.out_z
.mid
, self
.o
.mid
))
1721 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1722 sc
.o
.mid
, self
.o
.mid
))
1725 class FPADDBase(FPState
):
1727 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1730 * width: bit-width of IEEE754. supported: 16, 32, 64
1731 * id_wid: an identifier that is sync-connected to the input
1732 * single_cycle: True indicates each stage to complete in 1 clock
1734 FPState
.__init
__(self
, "fpadd")
1736 self
.single_cycle
= single_cycle
1737 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1738 self
.o
= self
.ospec()
1740 self
.in_t
= Trigger()
1741 self
.i
= self
.ispec()
1743 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1744 self
.in_accept
= Signal(reset_less
=True)
1745 self
.add_stb
= Signal(reset_less
=True)
1746 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1749 return self
.mod
.ispec()
1752 return self
.mod
.ospec()
1754 def setup(self
, m
, i
, add_stb
, in_mid
):
1755 m
.d
.comb
+= [self
.i
.eq(i
),
1756 self
.mod
.i
.eq(self
.i
),
1757 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1758 #self.add_stb.eq(add_stb),
1759 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1760 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1761 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1762 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1763 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1764 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1767 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1768 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1769 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1770 #m.d.sync += self.in_t.stb.eq(0)
1772 m
.submodules
.fpadd
= self
.mod
1774 def action(self
, m
):
1776 # in_accept is set on incoming strobe HIGH and ack LOW.
1777 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1779 #with m.If(self.in_t.ack):
1780 # m.d.sync += self.in_t.stb.eq(0)
1781 with m
.If(~self
.z_done
):
1782 # not done: test for accepting an incoming operand pair
1783 with m
.If(self
.in_accept
):
1785 self
.add_ack
.eq(1), # acknowledge receipt...
1786 self
.in_t
.stb
.eq(1), # initiate add
1789 m
.d
.sync
+= [self
.add_ack
.eq(0),
1790 self
.in_t
.stb
.eq(0),
1794 # done: acknowledge, and write out id and value
1795 m
.d
.sync
+= [self
.add_ack
.eq(1),
1802 if self
.in_mid
is not None:
1803 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1806 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1808 # move to output state on detecting z ack
1809 with m
.If(self
.out_z
.trigger
):
1810 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1813 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1817 def __init__(self
, width
, id_wid
):
1819 self
.id_wid
= id_wid
1821 for i
in range(rs_sz
):
1823 out_z
.name
= "out_z_%d" % i
1825 self
.res
= Array(res
)
1826 self
.in_z
= FPOp(width
)
1827 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1829 def setup(self
, m
, in_z
, in_mid
):
1830 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1831 self
.in_mid
.eq(in_mid
)]
1833 def get_fragment(self
, platform
=None):
1834 """ creates the HDL code-fragment for FPAdd
1837 m
.submodules
.res_in_z
= self
.in_z
1838 m
.submodules
+= self
.res
1850 """ FPADD: stages as follows:
1856 FPAddBase---> FPAddBaseMod
1858 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1860 FPAddBase is tricky: it is both a stage and *has* stages.
1861 Connection to FPAddBaseMod therefore requires an in stb/ack
1862 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1863 needs to be the thing that raises the incoming stb.
1866 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1869 * width: bit-width of IEEE754. supported: 16, 32, 64
1870 * id_wid: an identifier that is sync-connected to the input
1871 * single_cycle: True indicates each stage to complete in 1 clock
1874 self
.id_wid
= id_wid
1875 self
.single_cycle
= single_cycle
1877 #self.out_z = FPOp(width)
1878 self
.ids
= FPID(id_wid
)
1881 for i
in range(rs_sz
):
1884 in_a
.name
= "in_a_%d" % i
1885 in_b
.name
= "in_b_%d" % i
1886 rs
.append((in_a
, in_b
))
1890 for i
in range(rs_sz
):
1892 out_z
.name
= "out_z_%d" % i
1894 self
.res
= Array(res
)
1898 def add_state(self
, state
):
1899 self
.states
.append(state
)
1902 def get_fragment(self
, platform
=None):
1903 """ creates the HDL code-fragment for FPAdd
1906 m
.submodules
+= self
.rs
1908 in_a
= self
.rs
[0][0]
1909 in_b
= self
.rs
[0][1]
1911 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1916 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1921 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1922 ab
= self
.add_state(ab
)
1923 abd
= ab
.ispec() # create an input spec object for FPADDBase
1924 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1925 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1928 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1931 with m
.FSM() as fsm
:
1933 for state
in self
.states
:
1934 with m
.State(state
.state_from
):
1940 if __name__
== "__main__":
1942 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1943 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1944 alu
.rs
[0][1].ports() + \
1945 alu
.res
[0].ports() + \
1946 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1948 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1949 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1950 alu
.in_t
.ports() + \
1951 alu
.out_z
.ports() + \
1952 [alu
.in_mid
, alu
.out_mid
])
1955 # works... but don't use, just do "python fname.py convert -t v"
1956 #print (verilog.convert(alu, ports=[
1957 # ports=alu.in_a.ports() + \
1958 # alu.in_b.ports() + \
1959 # alu.out_z.ports())