1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.out_do_z
= Signal(reset_less
=True)
270 self
.mid
= Signal(id_wid
, reset_less
=True)
273 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
274 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
277 class FPAddSpecialCasesMod
:
278 """ special cases: NaNs, infs, zeros, denormalised
279 NOTE: some of these are unique to add. see "Special Operations"
280 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
283 def __init__(self
, width
, id_wid
):
286 self
.i
= self
.ispec()
287 self
.o
= self
.ospec()
290 return FPNumBase2Ops(self
.width
, self
.id_wid
)
293 return FPSCData(self
.width
, self
.id_wid
)
295 def setup(self
, m
, i
):
296 """ links module to inputs and outputs
298 m
.submodules
.specialcases
= self
299 m
.d
.comb
+= self
.i
.eq(i
)
301 def elaborate(self
, platform
):
304 m
.submodules
.sc_in_a
= self
.i
.a
305 m
.submodules
.sc_in_b
= self
.i
.b
306 m
.submodules
.sc_out_z
= self
.o
.z
309 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
312 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
314 # if a is NaN or b is NaN return NaN
315 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
316 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
317 m
.d
.comb
+= self
.o
.z
.nan(0)
319 # XXX WEIRDNESS for FP16 non-canonical NaN handling
322 ## if a is zero and b is NaN return -b
323 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
324 # m.d.comb += self.o.out_do_z.eq(1)
325 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
327 ## if b is zero and a is NaN return -a
328 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
329 # m.d.comb += self.o.out_do_z.eq(1)
330 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
332 ## if a is -zero and b is NaN return -b
333 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
337 ## if b is -zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
342 # if a is inf return inf (or NaN)
343 with m
.Elif(self
.i
.a
.is_inf
):
344 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
345 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
346 # if a is inf and signs don't match return NaN
347 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
348 m
.d
.comb
+= self
.o
.z
.nan(0)
350 # if b is inf return inf
351 with m
.Elif(self
.i
.b
.is_inf
):
352 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
353 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
355 # if a is zero and b zero return signed-a/b
356 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
357 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
358 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
362 # if a is zero return b
363 with m
.Elif(self
.i
.a
.is_zero
):
364 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
365 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
368 # if b is zero return a
369 with m
.Elif(self
.i
.b
.is_zero
):
370 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
371 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
374 # if a equal to -b return zero (+ve zero)
375 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
376 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
377 m
.d
.comb
+= self
.o
.z
.zero(0)
379 # Denormalised Number checks next, so pass a/b data through
381 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
382 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
383 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
385 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
391 def __init__(self
, id_wid
):
394 self
.in_mid
= Signal(id_wid
, reset_less
=True)
395 self
.out_mid
= Signal(id_wid
, reset_less
=True)
401 if self
.id_wid
is not None:
402 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
405 class FPAddSpecialCases(FPState
):
406 """ special cases: NaNs, infs, zeros, denormalised
407 NOTE: some of these are unique to add. see "Special Operations"
408 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
411 def __init__(self
, width
, id_wid
):
412 FPState
.__init
__(self
, "special_cases")
413 self
.mod
= FPAddSpecialCasesMod(width
)
414 self
.out_z
= self
.mod
.ospec()
415 self
.out_do_z
= Signal(reset_less
=True)
417 def setup(self
, m
, i
):
418 """ links module to inputs and outputs
420 self
.mod
.setup(m
, i
, self
.out_do_z
)
421 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
422 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
426 with m
.If(self
.out_do_z
):
429 m
.next
= "denormalise"
432 class FPAddSpecialCasesDeNorm(FPState
):
433 """ special cases: NaNs, infs, zeros, denormalised
434 NOTE: some of these are unique to add. see "Special Operations"
435 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
438 def __init__(self
, width
, id_wid
):
439 FPState
.__init
__(self
, "special_cases")
440 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
441 self
.out_z
= self
.smod
.ospec()
442 self
.out_do_z
= Signal(reset_less
=True)
444 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
445 self
.o
= self
.dmod
.ospec()
447 def setup(self
, m
, i
):
448 """ links module to inputs and outputs
450 self
.smod
.setup(m
, i
)
451 self
.dmod
.setup(m
, self
.smod
.o
)
452 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
455 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
456 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
458 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
461 with m
.If(self
.out_do_z
):
467 class FPAddDeNormMod(FPState
):
469 def __init__(self
, width
, id_wid
):
472 self
.i
= self
.ispec()
473 self
.o
= self
.ospec()
476 return FPSCData(self
.width
, self
.id_wid
)
479 return FPSCData(self
.width
, self
.id_wid
)
481 def setup(self
, m
, i
):
482 """ links module to inputs and outputs
484 m
.submodules
.denormalise
= self
485 m
.d
.comb
+= self
.i
.eq(i
)
487 def elaborate(self
, platform
):
489 m
.submodules
.denorm_in_a
= self
.i
.a
490 m
.submodules
.denorm_in_b
= self
.i
.b
491 m
.submodules
.denorm_out_a
= self
.o
.a
492 m
.submodules
.denorm_out_b
= self
.o
.b
494 with m
.If(~self
.i
.out_do_z
):
495 # XXX hmmm, don't like repeating identical code
496 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
497 with m
.If(self
.i
.a
.exp_n127
):
498 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
500 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
502 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
503 with m
.If(self
.i
.b
.exp_n127
):
504 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
506 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
508 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
509 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
510 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
515 class FPAddDeNorm(FPState
):
517 def __init__(self
, width
, id_wid
):
518 FPState
.__init
__(self
, "denormalise")
519 self
.mod
= FPAddDeNormMod(width
)
520 self
.out_a
= FPNumBase(width
)
521 self
.out_b
= FPNumBase(width
)
523 def setup(self
, m
, i
):
524 """ links module to inputs and outputs
528 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
529 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
532 # Denormalised Number checks
536 class FPAddAlignMultiMod(FPState
):
538 def __init__(self
, width
):
539 self
.in_a
= FPNumBase(width
)
540 self
.in_b
= FPNumBase(width
)
541 self
.out_a
= FPNumIn(None, width
)
542 self
.out_b
= FPNumIn(None, width
)
543 self
.exp_eq
= Signal(reset_less
=True)
545 def elaborate(self
, platform
):
546 # This one however (single-cycle) will do the shift
551 m
.submodules
.align_in_a
= self
.in_a
552 m
.submodules
.align_in_b
= self
.in_b
553 m
.submodules
.align_out_a
= self
.out_a
554 m
.submodules
.align_out_b
= self
.out_b
556 # NOTE: this does *not* do single-cycle multi-shifting,
557 # it *STAYS* in the align state until exponents match
559 # exponent of a greater than b: shift b down
560 m
.d
.comb
+= self
.exp_eq
.eq(0)
561 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
562 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
563 agtb
= Signal(reset_less
=True)
564 altb
= Signal(reset_less
=True)
565 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
566 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
568 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
569 # exponent of b greater than a: shift a down
571 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
572 # exponents equal: move to next stage.
574 m
.d
.comb
+= self
.exp_eq
.eq(1)
578 class FPAddAlignMulti(FPState
):
580 def __init__(self
, width
, id_wid
):
581 FPState
.__init
__(self
, "align")
582 self
.mod
= FPAddAlignMultiMod(width
)
583 self
.out_a
= FPNumIn(None, width
)
584 self
.out_b
= FPNumIn(None, width
)
585 self
.exp_eq
= Signal(reset_less
=True)
587 def setup(self
, m
, in_a
, in_b
):
588 """ links module to inputs and outputs
590 m
.submodules
.align
= self
.mod
591 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
592 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
593 #m.d.comb += self.out_a.eq(self.mod.out_a)
594 #m.d.comb += self.out_b.eq(self.mod.out_b)
595 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
596 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
597 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
600 with m
.If(self
.exp_eq
):
606 def __init__(self
, width
, id_wid
):
607 self
.a
= FPNumIn(None, width
)
608 self
.b
= FPNumIn(None, width
)
609 self
.z
= FPNumOut(width
, False)
610 self
.out_do_z
= Signal(reset_less
=True)
611 self
.oz
= Signal(width
, reset_less
=True)
612 self
.mid
= Signal(id_wid
, reset_less
=True)
615 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
616 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
619 class FPAddAlignSingleMod
:
621 def __init__(self
, width
, id_wid
):
624 self
.i
= self
.ispec()
625 self
.o
= self
.ospec()
628 return FPSCData(self
.width
, self
.id_wid
)
631 return FPNumIn2Ops(self
.width
, self
.id_wid
)
633 def process(self
, i
):
636 def setup(self
, m
, i
):
637 """ links module to inputs and outputs
639 m
.submodules
.align
= self
640 m
.d
.comb
+= self
.i
.eq(i
)
642 def elaborate(self
, platform
):
643 """ Aligns A against B or B against A, depending on which has the
644 greater exponent. This is done in a *single* cycle using
645 variable-width bit-shift
647 the shifter used here is quite expensive in terms of gates.
648 Mux A or B in (and out) into temporaries, as only one of them
649 needs to be aligned against the other
653 m
.submodules
.align_in_a
= self
.i
.a
654 m
.submodules
.align_in_b
= self
.i
.b
655 m
.submodules
.align_out_a
= self
.o
.a
656 m
.submodules
.align_out_b
= self
.o
.b
658 # temporary (muxed) input and output to be shifted
659 t_inp
= FPNumBase(self
.width
)
660 t_out
= FPNumIn(None, self
.width
)
661 espec
= (len(self
.i
.a
.e
), True)
662 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
663 m
.submodules
.align_t_in
= t_inp
664 m
.submodules
.align_t_out
= t_out
665 m
.submodules
.multishift_r
= msr
667 ediff
= Signal(espec
, reset_less
=True)
668 ediffr
= Signal(espec
, reset_less
=True)
669 tdiff
= Signal(espec
, reset_less
=True)
670 elz
= Signal(reset_less
=True)
671 egz
= Signal(reset_less
=True)
673 # connect multi-shifter to t_inp/out mantissa (and tdiff)
674 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
675 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
676 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
677 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
678 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
680 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
681 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
682 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
683 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
685 # default: A-exp == B-exp, A and B untouched (fall through)
686 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
687 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
688 # only one shifter (muxed)
689 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
690 # exponent of a greater than b: shift b down
691 with m
.If(~self
.i
.out_do_z
):
693 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
696 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
698 # exponent of b greater than a: shift a down
700 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
703 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
706 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
707 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
708 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
709 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.z
.v
)
714 class FPAddAlignSingle(FPState
):
716 def __init__(self
, width
, id_wid
):
717 FPState
.__init
__(self
, "align")
718 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
719 self
.out_a
= FPNumIn(None, width
)
720 self
.out_b
= FPNumIn(None, width
)
722 def setup(self
, m
, i
):
723 """ links module to inputs and outputs
727 # NOTE: could be done as comb
728 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
729 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
735 class FPAddAlignSingleAdd(FPState
):
737 def __init__(self
, width
, id_wid
):
738 FPState
.__init
__(self
, "align")
741 self
.a1o
= self
.ospec()
744 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
747 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
749 def setup(self
, m
, i
):
750 """ links module to inputs and outputs
753 # chain AddAlignSingle, AddStage0 and AddStage1
754 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
755 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
756 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
758 chain
= StageChain([mod
, a0mod
, a1mod
])
761 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
764 m
.next
= "normalise_1"
767 class FPAddStage0Data
:
769 def __init__(self
, width
, id_wid
):
770 self
.z
= FPNumBase(width
, False)
771 self
.out_do_z
= Signal(reset_less
=True)
772 self
.oz
= Signal(width
, reset_less
=True)
773 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
774 self
.mid
= Signal(id_wid
, reset_less
=True)
777 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
778 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
781 class FPAddStage0Mod
:
783 def __init__(self
, width
, id_wid
):
786 self
.i
= self
.ispec()
787 self
.o
= self
.ospec()
790 return FPSCData(self
.width
, self
.id_wid
)
793 return FPAddStage0Data(self
.width
, self
.id_wid
)
795 def process(self
, i
):
798 def setup(self
, m
, i
):
799 """ links module to inputs and outputs
801 m
.submodules
.add0
= self
802 m
.d
.comb
+= self
.i
.eq(i
)
804 def elaborate(self
, platform
):
806 m
.submodules
.add0_in_a
= self
.i
.a
807 m
.submodules
.add0_in_b
= self
.i
.b
808 m
.submodules
.add0_out_z
= self
.o
.z
810 # store intermediate tests (and zero-extended mantissas)
811 seq
= Signal(reset_less
=True)
812 mge
= Signal(reset_less
=True)
813 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
814 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
815 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
816 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
817 am0
.eq(Cat(self
.i
.a
.m
, 0)),
818 bm0
.eq(Cat(self
.i
.b
.m
, 0))
820 # same-sign (both negative or both positive) add mantissas
821 with m
.If(~self
.i
.out_do_z
):
822 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
825 self
.o
.tot
.eq(am0
+ bm0
),
826 self
.o
.z
.s
.eq(self
.i
.a
.s
)
828 # a mantissa greater than b, use a
831 self
.o
.tot
.eq(am0
- bm0
),
832 self
.o
.z
.s
.eq(self
.i
.a
.s
)
834 # b mantissa greater than a, use b
837 self
.o
.tot
.eq(bm0
- am0
),
838 self
.o
.z
.s
.eq(self
.i
.b
.s
)
841 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.z
.v
)
843 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
844 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
848 class FPAddStage0(FPState
):
849 """ First stage of add. covers same-sign (add) and subtract
850 special-casing when mantissas are greater or equal, to
851 give greatest accuracy.
854 def __init__(self
, width
, id_wid
):
855 FPState
.__init
__(self
, "add_0")
856 self
.mod
= FPAddStage0Mod(width
)
857 self
.o
= self
.mod
.ospec()
859 def setup(self
, m
, i
):
860 """ links module to inputs and outputs
864 # NOTE: these could be done as combinatorial (merge add0+add1)
865 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
871 class FPAddStage1Data
:
873 def __init__(self
, width
, id_wid
):
874 self
.z
= FPNumBase(width
, False)
875 self
.out_do_z
= Signal(reset_less
=True)
876 self
.oz
= Signal(width
, reset_less
=True)
878 self
.mid
= Signal(id_wid
, reset_less
=True)
881 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
882 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
886 class FPAddStage1Mod(FPState
):
887 """ Second stage of add: preparation for normalisation.
888 detects when tot sum is too big (tot[27] is kinda a carry bit)
891 def __init__(self
, width
, id_wid
):
894 self
.i
= self
.ispec()
895 self
.o
= self
.ospec()
898 return FPAddStage0Data(self
.width
, self
.id_wid
)
901 return FPAddStage1Data(self
.width
, self
.id_wid
)
903 def process(self
, i
):
906 def setup(self
, m
, i
):
907 """ links module to inputs and outputs
909 m
.submodules
.add1
= self
910 m
.submodules
.add1_out_overflow
= self
.o
.of
912 m
.d
.comb
+= self
.i
.eq(i
)
914 def elaborate(self
, platform
):
916 #m.submodules.norm1_in_overflow = self.in_of
917 #m.submodules.norm1_out_overflow = self.out_of
918 #m.submodules.norm1_in_z = self.in_z
919 #m.submodules.norm1_out_z = self.out_z
920 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
921 # tot[-1] (MSB) gets set when the sum overflows. shift result down
922 with m
.If(~self
.i
.out_do_z
):
923 with m
.If(self
.i
.tot
[-1]):
925 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
926 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
927 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
928 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
929 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
930 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
932 # tot[-1] (MSB) zero case
935 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
936 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
937 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
938 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
939 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
942 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
943 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
944 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
949 class FPAddStage1(FPState
):
951 def __init__(self
, width
, id_wid
):
952 FPState
.__init
__(self
, "add_1")
953 self
.mod
= FPAddStage1Mod(width
)
954 self
.out_z
= FPNumBase(width
, False)
955 self
.out_of
= Overflow()
956 self
.norm_stb
= Signal()
958 def setup(self
, m
, i
):
959 """ links module to inputs and outputs
963 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
965 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
966 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
967 m
.d
.sync
+= self
.norm_stb
.eq(1)
970 m
.next
= "normalise_1"
973 class FPNormaliseModSingle
:
975 def __init__(self
, width
):
977 self
.in_z
= self
.ispec()
978 self
.out_z
= self
.ospec()
981 return FPNumBase(self
.width
, False)
984 return FPNumBase(self
.width
, False)
986 def setup(self
, m
, i
):
987 """ links module to inputs and outputs
989 m
.submodules
.normalise
= self
990 m
.d
.comb
+= self
.i
.eq(i
)
992 def elaborate(self
, platform
):
995 mwid
= self
.out_z
.m_width
+2
996 pe
= PriorityEncoder(mwid
)
997 m
.submodules
.norm_pe
= pe
999 m
.submodules
.norm1_out_z
= self
.out_z
1000 m
.submodules
.norm1_in_z
= self
.in_z
1002 in_z
= FPNumBase(self
.width
, False)
1004 m
.submodules
.norm1_insel_z
= in_z
1005 m
.submodules
.norm1_insel_overflow
= in_of
1007 espec
= (len(in_z
.e
), True)
1008 ediff_n126
= Signal(espec
, reset_less
=True)
1009 msr
= MultiShiftRMerge(mwid
, espec
)
1010 m
.submodules
.multishift_r
= msr
1012 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1013 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1014 # initialise out from in (overridden below)
1015 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1016 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1017 # normalisation decrease condition
1018 decrease
= Signal(reset_less
=True)
1019 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1021 with m
.If(decrease
):
1022 # *sigh* not entirely obvious: count leading zeros (clz)
1023 # with a PriorityEncoder: to find from the MSB
1024 # we reverse the order of the bits.
1025 temp_m
= Signal(mwid
, reset_less
=True)
1026 temp_s
= Signal(mwid
+1, reset_less
=True)
1027 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1029 # cat round and guard bits back into the mantissa
1030 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1031 pe
.i
.eq(temp_m
[::-1]), # inverted
1032 clz
.eq(pe
.o
), # count zeros from MSB down
1033 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1034 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1035 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1042 def __init__(self
, width
, id_wid
):
1043 self
.roundz
= Signal(reset_less
=True)
1044 self
.z
= FPNumBase(width
, False)
1045 self
.out_do_z
= Signal(reset_less
=True)
1046 self
.oz
= Signal(width
, reset_less
=True)
1047 self
.mid
= Signal(id_wid
, reset_less
=True)
1050 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1051 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1054 class FPNorm1ModSingle
:
1056 def __init__(self
, width
, id_wid
):
1058 self
.id_wid
= id_wid
1059 self
.i
= self
.ispec()
1060 self
.o
= self
.ospec()
1063 return FPAddStage1Data(self
.width
, self
.id_wid
)
1066 return FPNorm1Data(self
.width
, self
.id_wid
)
1068 def setup(self
, m
, i
):
1069 """ links module to inputs and outputs
1071 m
.submodules
.normalise_1
= self
1072 m
.d
.comb
+= self
.i
.eq(i
)
1074 def process(self
, i
):
1077 def elaborate(self
, platform
):
1080 mwid
= self
.o
.z
.m_width
+2
1081 pe
= PriorityEncoder(mwid
)
1082 m
.submodules
.norm_pe
= pe
1085 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1087 m
.submodules
.norm1_out_z
= self
.o
.z
1088 m
.submodules
.norm1_out_overflow
= of
1089 m
.submodules
.norm1_in_z
= self
.i
.z
1090 m
.submodules
.norm1_in_overflow
= self
.i
.of
1093 m
.submodules
.norm1_insel_z
= i
.z
1094 m
.submodules
.norm1_insel_overflow
= i
.of
1096 espec
= (len(i
.z
.e
), True)
1097 ediff_n126
= Signal(espec
, reset_less
=True)
1098 msr
= MultiShiftRMerge(mwid
, espec
)
1099 m
.submodules
.multishift_r
= msr
1101 m
.d
.comb
+= i
.eq(self
.i
)
1102 # initialise out from in (overridden below)
1103 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1104 m
.d
.comb
+= of
.eq(i
.of
)
1105 # normalisation increase/decrease conditions
1106 decrease
= Signal(reset_less
=True)
1107 increase
= Signal(reset_less
=True)
1108 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1109 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1111 with m
.If(~self
.i
.out_do_z
):
1112 with m
.If(decrease
):
1113 # *sigh* not entirely obvious: count leading zeros (clz)
1114 # with a PriorityEncoder: to find from the MSB
1115 # we reverse the order of the bits.
1116 temp_m
= Signal(mwid
, reset_less
=True)
1117 temp_s
= Signal(mwid
+1, reset_less
=True)
1118 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1119 # make sure that the amount to decrease by does NOT
1120 # go below the minimum non-INF/NaN exponent
1121 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1124 # cat round and guard bits back into the mantissa
1125 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1126 pe
.i
.eq(temp_m
[::-1]), # inverted
1127 clz
.eq(limclz
), # count zeros from MSB down
1128 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1129 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1130 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1131 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1132 # overflow in bits 0..1: got shifted too (leave sticky)
1133 of
.guard
.eq(temp_s
[1]), # guard
1134 of
.round_bit
.eq(temp_s
[0]), # round
1137 with m
.Elif(increase
):
1138 temp_m
= Signal(mwid
+1, reset_less
=True)
1140 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1142 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1143 # connect multi-shifter to inp/out mantissa (and ediff)
1145 msr
.diff
.eq(ediff_n126
),
1146 self
.o
.z
.m
.eq(msr
.m
[3:]),
1147 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1148 # overflow in bits 0..1: got shifted too (leave sticky)
1149 of
.guard
.eq(temp_s
[2]), # guard
1150 of
.round_bit
.eq(temp_s
[1]), # round
1151 of
.sticky
.eq(temp_s
[0]), # sticky
1152 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1155 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1156 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1157 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1162 class FPNorm1ModMulti
:
1164 def __init__(self
, width
, single_cycle
=True):
1166 self
.in_select
= Signal(reset_less
=True)
1167 self
.in_z
= FPNumBase(width
, False)
1168 self
.in_of
= Overflow()
1169 self
.temp_z
= FPNumBase(width
, False)
1170 self
.temp_of
= Overflow()
1171 self
.out_z
= FPNumBase(width
, False)
1172 self
.out_of
= Overflow()
1174 def elaborate(self
, platform
):
1177 m
.submodules
.norm1_out_z
= self
.out_z
1178 m
.submodules
.norm1_out_overflow
= self
.out_of
1179 m
.submodules
.norm1_temp_z
= self
.temp_z
1180 m
.submodules
.norm1_temp_of
= self
.temp_of
1181 m
.submodules
.norm1_in_z
= self
.in_z
1182 m
.submodules
.norm1_in_overflow
= self
.in_of
1184 in_z
= FPNumBase(self
.width
, False)
1186 m
.submodules
.norm1_insel_z
= in_z
1187 m
.submodules
.norm1_insel_overflow
= in_of
1189 # select which of temp or in z/of to use
1190 with m
.If(self
.in_select
):
1191 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1192 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1194 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1195 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1196 # initialise out from in (overridden below)
1197 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1198 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1199 # normalisation increase/decrease conditions
1200 decrease
= Signal(reset_less
=True)
1201 increase
= Signal(reset_less
=True)
1202 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1203 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1204 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1206 with m
.If(decrease
):
1208 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1209 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1210 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1211 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1212 self
.out_of
.round_bit
.eq(0), # reset round bit
1213 self
.out_of
.m0
.eq(in_of
.guard
),
1216 with m
.Elif(increase
):
1218 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1219 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1220 self
.out_of
.guard
.eq(in_z
.m
[0]),
1221 self
.out_of
.m0
.eq(in_z
.m
[1]),
1222 self
.out_of
.round_bit
.eq(in_of
.guard
),
1223 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1229 class FPNorm1Single(FPState
):
1231 def __init__(self
, width
, id_wid
, single_cycle
=True):
1232 FPState
.__init
__(self
, "normalise_1")
1233 self
.mod
= FPNorm1ModSingle(width
)
1234 self
.o
= self
.ospec()
1235 self
.out_z
= FPNumBase(width
, False)
1236 self
.out_roundz
= Signal(reset_less
=True)
1239 return self
.mod
.ispec()
1242 return self
.mod
.ospec()
1244 def setup(self
, m
, i
):
1245 """ links module to inputs and outputs
1247 self
.mod
.setup(m
, i
)
1249 def action(self
, m
):
1253 class FPNorm1Multi(FPState
):
1255 def __init__(self
, width
, id_wid
):
1256 FPState
.__init
__(self
, "normalise_1")
1257 self
.mod
= FPNorm1ModMulti(width
)
1258 self
.stb
= Signal(reset_less
=True)
1259 self
.ack
= Signal(reset
=0, reset_less
=True)
1260 self
.out_norm
= Signal(reset_less
=True)
1261 self
.in_accept
= Signal(reset_less
=True)
1262 self
.temp_z
= FPNumBase(width
)
1263 self
.temp_of
= Overflow()
1264 self
.out_z
= FPNumBase(width
)
1265 self
.out_roundz
= Signal(reset_less
=True)
1267 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1268 """ links module to inputs and outputs
1270 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1271 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1272 self
.out_z
, self
.out_norm
)
1274 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1275 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1277 def action(self
, m
):
1278 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1279 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1280 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1281 with m
.If(self
.out_norm
):
1282 with m
.If(self
.in_accept
):
1287 m
.d
.sync
+= self
.ack
.eq(0)
1289 # normalisation not required (or done).
1291 m
.d
.sync
+= self
.ack
.eq(1)
1292 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1295 class FPNormToPack(FPState
):
1297 def __init__(self
, width
, id_wid
):
1298 FPState
.__init
__(self
, "normalise_1")
1299 self
.id_wid
= id_wid
1303 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1306 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1308 def setup(self
, m
, i
):
1309 """ links module to inputs and outputs
1312 # Normalisation, Rounding Corrections, Pack - in a chain
1313 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1314 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1315 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1316 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1317 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1319 self
.out_z
= pmod
.ospec()
1321 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1322 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1324 def action(self
, m
):
1325 m
.next
= "pack_put_z"
1330 def __init__(self
, width
, id_wid
):
1331 self
.z
= FPNumBase(width
, False)
1332 self
.out_do_z
= Signal(reset_less
=True)
1333 self
.mid
= Signal(id_wid
, reset_less
=True)
1336 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
),
1342 def __init__(self
, width
, id_wid
):
1344 self
.id_wid
= id_wid
1345 self
.i
= self
.ispec()
1346 self
.out_z
= self
.ospec()
1349 return FPNorm1Data(self
.width
, self
.id_wid
)
1352 return FPRoundData(self
.width
, self
.id_wid
)
1354 def process(self
, i
):
1357 def setup(self
, m
, i
):
1358 m
.submodules
.roundz
= self
1359 m
.d
.comb
+= self
.i
.eq(i
)
1361 def elaborate(self
, platform
):
1363 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1364 with m
.If(~self
.i
.out_do_z
):
1365 with m
.If(self
.i
.roundz
):
1366 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1367 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1368 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1373 class FPRound(FPState
):
1375 def __init__(self
, width
, id_wid
):
1376 FPState
.__init
__(self
, "round")
1377 self
.mod
= FPRoundMod(width
)
1378 self
.out_z
= self
.ospec()
1381 return self
.mod
.ispec()
1384 return self
.mod
.ospec()
1386 def setup(self
, m
, i
):
1387 """ links module to inputs and outputs
1389 self
.mod
.setup(m
, i
)
1392 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1393 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1395 def action(self
, m
):
1396 m
.next
= "corrections"
1399 class FPCorrectionsMod
:
1401 def __init__(self
, width
, id_wid
):
1403 self
.id_wid
= id_wid
1404 self
.i
= self
.ispec()
1405 self
.out_z
= self
.ospec()
1408 return FPRoundData(self
.width
, self
.id_wid
)
1411 return FPRoundData(self
.width
, self
.id_wid
)
1413 def process(self
, i
):
1416 def setup(self
, m
, i
):
1417 """ links module to inputs and outputs
1419 m
.submodules
.corrections
= self
1420 m
.d
.comb
+= self
.i
.eq(i
)
1422 def elaborate(self
, platform
):
1424 m
.submodules
.corr_in_z
= self
.i
.z
1425 m
.submodules
.corr_out_z
= self
.out_z
.z
1426 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1427 with m
.If(~self
.i
.out_do_z
):
1428 with m
.If(self
.i
.z
.is_denormalised
):
1429 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1433 class FPCorrections(FPState
):
1435 def __init__(self
, width
, id_wid
):
1436 FPState
.__init
__(self
, "corrections")
1437 self
.mod
= FPCorrectionsMod(width
)
1438 self
.out_z
= self
.ospec()
1441 return self
.mod
.ispec()
1444 return self
.mod
.ospec()
1446 def setup(self
, m
, in_z
):
1447 """ links module to inputs and outputs
1449 self
.mod
.setup(m
, in_z
)
1451 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1452 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1454 def action(self
, m
):
1460 def __init__(self
, width
, id_wid
):
1461 self
.z
= FPNumOut(width
, False)
1462 self
.mid
= Signal(id_wid
, reset_less
=True)
1465 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1470 def __init__(self
, width
, id_wid
):
1472 self
.id_wid
= id_wid
1473 self
.i
= self
.ispec()
1474 self
.o
= self
.ospec()
1477 return FPRoundData(self
.width
, self
.id_wid
)
1480 return FPPackData(self
.width
, self
.id_wid
)
1482 def process(self
, i
):
1485 def setup(self
, m
, in_z
):
1486 """ links module to inputs and outputs
1488 m
.submodules
.pack
= self
1489 m
.d
.comb
+= self
.i
.eq(in_z
)
1491 def elaborate(self
, platform
):
1493 m
.submodules
.pack_in_z
= self
.i
.z
1494 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1495 with m
.If(~self
.i
.out_do_z
):
1496 with m
.If(self
.i
.z
.is_overflowed
):
1497 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1499 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1501 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
1505 class FPPack(FPState
):
1507 def __init__(self
, width
, id_wid
):
1508 FPState
.__init
__(self
, "pack")
1509 self
.mod
= FPPackMod(width
)
1510 self
.out_z
= self
.ospec()
1513 return self
.mod
.ispec()
1516 return self
.mod
.ospec()
1518 def setup(self
, m
, in_z
):
1519 """ links module to inputs and outputs
1521 self
.mod
.setup(m
, in_z
)
1523 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1524 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1526 def action(self
, m
):
1527 m
.next
= "pack_put_z"
1530 class FPPutZ(FPState
):
1532 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1533 FPState
.__init
__(self
, state
)
1534 if to_state
is None:
1535 to_state
= "get_ops"
1536 self
.to_state
= to_state
1539 self
.in_mid
= in_mid
1540 self
.out_mid
= out_mid
1542 def action(self
, m
):
1543 if self
.in_mid
is not None:
1544 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1546 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1548 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1549 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1550 m
.next
= self
.to_state
1552 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1555 class FPPutZIdx(FPState
):
1557 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1558 FPState
.__init
__(self
, state
)
1559 if to_state
is None:
1560 to_state
= "get_ops"
1561 self
.to_state
= to_state
1563 self
.out_zs
= out_zs
1564 self
.in_mid
= in_mid
1566 def action(self
, m
):
1567 outz_stb
= Signal(reset_less
=True)
1568 outz_ack
= Signal(reset_less
=True)
1569 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1570 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1573 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1575 with m
.If(outz_stb
& outz_ack
):
1576 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1577 m
.next
= self
.to_state
1579 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1581 class FPADDBaseData
:
1583 def __init__(self
, width
, id_wid
):
1585 self
.id_wid
= id_wid
1586 self
.a
= Signal(width
)
1587 self
.b
= Signal(width
)
1588 self
.mid
= Signal(id_wid
, reset_less
=True)
1591 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1595 def __init__(self
, width
, id_wid
):
1596 self
.z
= FPOp(width
)
1597 self
.mid
= Signal(id_wid
, reset_less
=True)
1600 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1605 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1608 * width: bit-width of IEEE754. supported: 16, 32, 64
1609 * id_wid: an identifier that is sync-connected to the input
1610 * single_cycle: True indicates each stage to complete in 1 clock
1611 * compact: True indicates a reduced number of stages
1614 self
.id_wid
= id_wid
1615 self
.single_cycle
= single_cycle
1616 self
.compact
= compact
1618 self
.in_t
= Trigger()
1619 self
.i
= self
.ispec()
1620 self
.o
= self
.ospec()
1625 return FPADDBaseData(self
.width
, self
.id_wid
)
1628 return FPOpData(self
.width
, self
.id_wid
)
1630 def add_state(self
, state
):
1631 self
.states
.append(state
)
1634 def get_fragment(self
, platform
=None):
1635 """ creates the HDL code-fragment for FPAdd
1638 m
.submodules
.out_z
= self
.o
.z
1639 m
.submodules
.in_t
= self
.in_t
1641 self
.get_compact_fragment(m
, platform
)
1643 self
.get_longer_fragment(m
, platform
)
1645 with m
.FSM() as fsm
:
1647 for state
in self
.states
:
1648 with m
.State(state
.state_from
):
1653 def get_longer_fragment(self
, m
, platform
=None):
1655 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1657 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1661 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1662 sc
.setup(m
, a
, b
, self
.in_mid
)
1664 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1665 dn
.setup(m
, a
, b
, sc
.in_mid
)
1667 if self
.single_cycle
:
1668 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1669 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1671 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1672 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1674 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1675 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1677 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1678 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1680 if self
.single_cycle
:
1681 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1682 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1684 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1685 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1687 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1688 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1690 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1691 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1693 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1694 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1696 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1697 pa
.in_mid
, self
.out_mid
))
1699 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1700 pa
.in_mid
, self
.out_mid
))
1702 def get_compact_fragment(self
, m
, platform
=None):
1704 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1705 self
.width
, self
.id_wid
))
1706 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1708 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1711 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1714 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1715 n1
.setup(m
, alm
.a1o
)
1717 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1718 n1
.out_z
.mid
, self
.o
.mid
))
1720 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
.z
, self
.o
,
1721 sc
.o
.mid
, self
.o
.mid
))
1724 class FPADDBase(FPState
):
1726 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1729 * width: bit-width of IEEE754. supported: 16, 32, 64
1730 * id_wid: an identifier that is sync-connected to the input
1731 * single_cycle: True indicates each stage to complete in 1 clock
1733 FPState
.__init
__(self
, "fpadd")
1735 self
.single_cycle
= single_cycle
1736 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1737 self
.o
= self
.ospec()
1739 self
.in_t
= Trigger()
1740 self
.i
= self
.ispec()
1742 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1743 self
.in_accept
= Signal(reset_less
=True)
1744 self
.add_stb
= Signal(reset_less
=True)
1745 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1748 return self
.mod
.ispec()
1751 return self
.mod
.ospec()
1753 def setup(self
, m
, i
, add_stb
, in_mid
):
1754 m
.d
.comb
+= [self
.i
.eq(i
),
1755 self
.mod
.i
.eq(self
.i
),
1756 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1757 #self.add_stb.eq(add_stb),
1758 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1759 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1760 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1761 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1762 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1763 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1766 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1767 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1768 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1769 #m.d.sync += self.in_t.stb.eq(0)
1771 m
.submodules
.fpadd
= self
.mod
1773 def action(self
, m
):
1775 # in_accept is set on incoming strobe HIGH and ack LOW.
1776 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1778 #with m.If(self.in_t.ack):
1779 # m.d.sync += self.in_t.stb.eq(0)
1780 with m
.If(~self
.z_done
):
1781 # not done: test for accepting an incoming operand pair
1782 with m
.If(self
.in_accept
):
1784 self
.add_ack
.eq(1), # acknowledge receipt...
1785 self
.in_t
.stb
.eq(1), # initiate add
1788 m
.d
.sync
+= [self
.add_ack
.eq(0),
1789 self
.in_t
.stb
.eq(0),
1793 # done: acknowledge, and write out id and value
1794 m
.d
.sync
+= [self
.add_ack
.eq(1),
1801 if self
.in_mid
is not None:
1802 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1805 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1807 # move to output state on detecting z ack
1808 with m
.If(self
.out_z
.trigger
):
1809 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1812 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1816 def __init__(self
, width
, id_wid
):
1818 self
.id_wid
= id_wid
1820 for i
in range(rs_sz
):
1822 out_z
.name
= "out_z_%d" % i
1824 self
.res
= Array(res
)
1825 self
.in_z
= FPOp(width
)
1826 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1828 def setup(self
, m
, in_z
, in_mid
):
1829 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1830 self
.in_mid
.eq(in_mid
)]
1832 def get_fragment(self
, platform
=None):
1833 """ creates the HDL code-fragment for FPAdd
1836 m
.submodules
.res_in_z
= self
.in_z
1837 m
.submodules
+= self
.res
1849 """ FPADD: stages as follows:
1855 FPAddBase---> FPAddBaseMod
1857 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1859 FPAddBase is tricky: it is both a stage and *has* stages.
1860 Connection to FPAddBaseMod therefore requires an in stb/ack
1861 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1862 needs to be the thing that raises the incoming stb.
1865 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1868 * width: bit-width of IEEE754. supported: 16, 32, 64
1869 * id_wid: an identifier that is sync-connected to the input
1870 * single_cycle: True indicates each stage to complete in 1 clock
1873 self
.id_wid
= id_wid
1874 self
.single_cycle
= single_cycle
1876 #self.out_z = FPOp(width)
1877 self
.ids
= FPID(id_wid
)
1880 for i
in range(rs_sz
):
1883 in_a
.name
= "in_a_%d" % i
1884 in_b
.name
= "in_b_%d" % i
1885 rs
.append((in_a
, in_b
))
1889 for i
in range(rs_sz
):
1891 out_z
.name
= "out_z_%d" % i
1893 self
.res
= Array(res
)
1897 def add_state(self
, state
):
1898 self
.states
.append(state
)
1901 def get_fragment(self
, platform
=None):
1902 """ creates the HDL code-fragment for FPAdd
1905 m
.submodules
+= self
.rs
1907 in_a
= self
.rs
[0][0]
1908 in_b
= self
.rs
[0][1]
1910 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1915 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1920 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1921 ab
= self
.add_state(ab
)
1922 abd
= ab
.ispec() # create an input spec object for FPADDBase
1923 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1924 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1927 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1930 with m
.FSM() as fsm
:
1932 for state
in self
.states
:
1933 with m
.State(state
.state_from
):
1939 if __name__
== "__main__":
1941 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1942 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1943 alu
.rs
[0][1].ports() + \
1944 alu
.res
[0].ports() + \
1945 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1947 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1948 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1949 alu
.in_t
.ports() + \
1950 alu
.out_z
.ports() + \
1951 [alu
.in_mid
, alu
.out_mid
])
1954 # works... but don't use, just do "python fname.py convert -t v"
1955 #print (verilog.convert(alu, ports=[
1956 # ports=alu.in_a.ports() + \
1957 # alu.in_b.ports() + \
1958 # alu.out_z.ports())