8bcccf4cbad108b9cd4a74a604b88af1e87c4948
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from example_buf_pipe
import StageChain
, UnbufferedPipeline
13 #from fpbase import FPNumShiftMultiRight
16 class FPState(FPBase
):
17 def __init__(self
, state_from
):
18 self
.state_from
= state_from
20 def set_inputs(self
, inputs
):
22 for k
,v
in inputs
.items():
25 def set_outputs(self
, outputs
):
26 self
.outputs
= outputs
27 for k
,v
in outputs
.items():
31 class FPGetSyncOpsMod
:
32 def __init__(self
, width
, num_ops
=2):
34 self
.num_ops
= num_ops
37 for i
in range(num_ops
):
38 inops
.append(Signal(width
, reset_less
=True))
39 outops
.append(Signal(width
, reset_less
=True))
42 self
.stb
= Signal(num_ops
)
44 self
.ready
= Signal(reset_less
=True)
45 self
.out_decode
= Signal(reset_less
=True)
47 def elaborate(self
, platform
):
49 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
50 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
51 with m
.If(self
.out_decode
):
52 for i
in range(self
.num_ops
):
54 self
.out_op
[i
].eq(self
.in_op
[i
]),
59 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
63 def __init__(self
, width
, num_ops
):
64 Trigger
.__init
__(self
)
66 self
.num_ops
= num_ops
69 for i
in range(num_ops
):
70 res
.append(Signal(width
))
75 for i
in range(self
.num_ops
):
83 def __init__(self
, width
, num_ops
=2, num_rows
=4):
85 self
.num_ops
= num_ops
86 self
.num_rows
= num_rows
87 self
.mmax
= int(log(self
.num_rows
) / log(2))
89 self
.mid
= Signal(self
.mmax
, reset_less
=True) # multiplex id
90 for i
in range(num_rows
):
91 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
92 self
.rs
= Array(self
.rs
)
94 self
.out_op
= FPOps(width
, num_ops
)
96 def elaborate(self
, platform
):
99 pe
= PriorityEncoder(self
.num_rows
)
100 m
.submodules
.selector
= pe
101 m
.submodules
.out_op
= self
.out_op
102 m
.submodules
+= self
.rs
104 # connect priority encoder
106 for i
in range(self
.num_rows
):
107 in_ready
.append(self
.rs
[i
].ready
)
108 m
.d
.comb
+= pe
.i
.eq(Cat(*in_ready
))
110 active
= Signal(reset_less
=True)
111 out_en
= Signal(reset_less
=True)
112 m
.d
.comb
+= active
.eq(~pe
.n
) # encoder active
113 m
.d
.comb
+= out_en
.eq(active
& self
.out_op
.trigger
)
115 # encoder active: ack relevant input, record MID, pass output
118 m
.d
.sync
+= self
.mid
.eq(pe
.o
)
119 m
.d
.sync
+= rs
.ack
.eq(0)
120 m
.d
.sync
+= self
.out_op
.stb
.eq(0)
121 for j
in range(self
.num_ops
):
122 m
.d
.sync
+= self
.out_op
.v
[j
].eq(rs
.out_op
[j
])
124 m
.d
.sync
+= self
.out_op
.stb
.eq(1)
125 # acks all default to zero
126 for i
in range(self
.num_rows
):
127 m
.d
.sync
+= self
.rs
[i
].ack
.eq(1)
133 for i
in range(self
.num_rows
):
135 res
+= inop
.in_op
+ [inop
.stb
]
136 return self
.out_op
.ports() + res
+ [self
.mid
]
140 def __init__(self
, width
):
141 self
.in_op
= FPOp(width
)
142 self
.out_op
= Signal(width
)
143 self
.out_decode
= Signal(reset_less
=True)
145 def elaborate(self
, platform
):
147 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
148 m
.submodules
.get_op_in
= self
.in_op
149 #m.submodules.get_op_out = self.out_op
150 with m
.If(self
.out_decode
):
152 self
.out_op
.eq(self
.in_op
.v
),
157 class FPGetOp(FPState
):
161 def __init__(self
, in_state
, out_state
, in_op
, width
):
162 FPState
.__init
__(self
, in_state
)
163 self
.out_state
= out_state
164 self
.mod
= FPGetOpMod(width
)
166 self
.out_op
= Signal(width
)
167 self
.out_decode
= Signal(reset_less
=True)
169 def setup(self
, m
, in_op
):
170 """ links module to inputs and outputs
172 setattr(m
.submodules
, self
.state_from
, self
.mod
)
173 m
.d
.comb
+= self
.mod
.in_op
.eq(in_op
)
174 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
177 with m
.If(self
.out_decode
):
178 m
.next
= self
.out_state
180 self
.in_op
.ack
.eq(0),
181 self
.out_op
.eq(self
.mod
.out_op
)
184 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
187 class FPGet2OpMod(Trigger
):
188 def __init__(self
, width
, id_wid
):
189 Trigger
.__init
__(self
)
192 self
.i
= self
.ispec()
193 self
.o
= self
.ospec()
196 return FPADDBaseData(self
.width
, self
.id_wid
)
199 return FPNumBase2Ops(self
.width
, self
.id_wid
)
201 def elaborate(self
, platform
):
202 m
= Trigger
.elaborate(self
, platform
)
203 m
.submodules
.get_op1_out
= self
.o
.a
204 m
.submodules
.get_op2_out
= self
.o
.b
205 out_op1
= FPNumIn(None, self
.width
)
206 out_op2
= FPNumIn(None, self
.width
)
207 with m
.If(self
.trigger
):
209 out_op1
.decode(self
.i
.a
),
210 out_op2
.decode(self
.i
.b
),
211 self
.o
.a
.eq(out_op1
),
212 self
.o
.b
.eq(out_op2
),
213 self
.o
.mid
.eq(self
.i
.mid
)
218 class FPGet2Op(FPState
):
222 def __init__(self
, in_state
, out_state
, width
, id_wid
):
223 FPState
.__init
__(self
, in_state
)
224 self
.out_state
= out_state
225 self
.mod
= FPGet2OpMod(width
, id_wid
)
226 self
.o
= self
.mod
.ospec()
227 self
.in_stb
= Signal(reset_less
=True)
228 self
.out_ack
= Signal(reset_less
=True)
229 self
.out_decode
= Signal(reset_less
=True)
231 def setup(self
, m
, i
, in_stb
, in_ack
):
232 """ links module to inputs and outputs
234 m
.submodules
.get_ops
= self
.mod
235 m
.d
.comb
+= self
.mod
.i
.eq(i
)
236 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
237 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
238 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
239 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
242 with m
.If(self
.out_decode
):
243 m
.next
= self
.out_state
246 self
.o
.eq(self
.mod
.o
),
249 m
.d
.sync
+= self
.mod
.ack
.eq(1)
254 def __init__(self
, width
, id_wid
, m_extra
=True):
255 self
.a
= FPNumBase(width
, m_extra
)
256 self
.b
= FPNumBase(width
, m_extra
)
257 self
.mid
= Signal(id_wid
, reset_less
=True)
260 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
265 def __init__(self
, width
, id_wid
):
266 self
.a
= FPNumBase(width
, True)
267 self
.b
= FPNumBase(width
, True)
268 self
.z
= FPNumOut(width
, False)
269 self
.oz
= Signal(width
, reset_less
=True)
270 self
.out_do_z
= Signal(reset_less
=True)
271 self
.mid
= Signal(id_wid
, reset_less
=True)
274 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
275 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
278 class FPAddSpecialCasesMod
:
279 """ special cases: NaNs, infs, zeros, denormalised
280 NOTE: some of these are unique to add. see "Special Operations"
281 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
284 def __init__(self
, width
, id_wid
):
287 self
.i
= self
.ispec()
288 self
.o
= self
.ospec()
291 return FPNumBase2Ops(self
.width
, self
.id_wid
)
294 return FPSCData(self
.width
, self
.id_wid
)
296 def setup(self
, m
, i
):
297 """ links module to inputs and outputs
299 m
.submodules
.specialcases
= self
300 m
.d
.comb
+= self
.i
.eq(i
)
302 def elaborate(self
, platform
):
305 m
.submodules
.sc_in_a
= self
.i
.a
306 m
.submodules
.sc_in_b
= self
.i
.b
307 m
.submodules
.sc_out_z
= self
.o
.z
310 m
.d
.comb
+= s_nomatch
.eq(self
.i
.a
.s
!= self
.i
.b
.s
)
313 m
.d
.comb
+= m_match
.eq(self
.i
.a
.m
== self
.i
.b
.m
)
315 # if a is NaN or b is NaN return NaN
316 with m
.If(self
.i
.a
.is_nan | self
.i
.b
.is_nan
):
317 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
318 m
.d
.comb
+= self
.o
.z
.nan(0)
320 # XXX WEIRDNESS for FP16 non-canonical NaN handling
323 ## if a is zero and b is NaN return -b
324 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
325 # m.d.comb += self.o.out_do_z.eq(1)
326 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
328 ## if b is zero and a is NaN return -a
329 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
330 # m.d.comb += self.o.out_do_z.eq(1)
331 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
333 ## if a is -zero and b is NaN return -b
334 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
335 # m.d.comb += self.o.out_do_z.eq(1)
336 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
338 ## if b is -zero and a is NaN return -a
339 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
340 # m.d.comb += self.o.out_do_z.eq(1)
341 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
343 # if a is inf return inf (or NaN)
344 with m
.Elif(self
.i
.a
.is_inf
):
345 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
346 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.a
.s
)
347 # if a is inf and signs don't match return NaN
348 with m
.If(self
.i
.b
.exp_128
& s_nomatch
):
349 m
.d
.comb
+= self
.o
.z
.nan(0)
351 # if b is inf return inf
352 with m
.Elif(self
.i
.b
.is_inf
):
353 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
354 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.b
.s
)
356 # if a is zero and b zero return signed-a/b
357 with m
.Elif(self
.i
.a
.is_zero
& self
.i
.b
.is_zero
):
358 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
359 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
& self
.i
.b
.s
,
363 # if a is zero return b
364 with m
.Elif(self
.i
.a
.is_zero
):
365 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
366 m
.d
.comb
+= self
.o
.z
.create(self
.i
.b
.s
, self
.i
.b
.e
,
369 # if b is zero return a
370 with m
.Elif(self
.i
.b
.is_zero
):
371 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
372 m
.d
.comb
+= self
.o
.z
.create(self
.i
.a
.s
, self
.i
.a
.e
,
375 # if a equal to -b return zero (+ve zero)
376 with m
.Elif(s_nomatch
& m_match
& (self
.i
.a
.e
== self
.i
.b
.e
)):
377 m
.d
.comb
+= self
.o
.out_do_z
.eq(1)
378 m
.d
.comb
+= self
.o
.z
.zero(0)
380 # Denormalised Number checks next, so pass a/b data through
382 m
.d
.comb
+= self
.o
.out_do_z
.eq(0)
383 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
384 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
386 m
.d
.comb
+= self
.o
.oz
.eq(self
.o
.z
.v
)
387 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
393 def __init__(self
, id_wid
):
396 self
.in_mid
= Signal(id_wid
, reset_less
=True)
397 self
.out_mid
= Signal(id_wid
, reset_less
=True)
403 if self
.id_wid
is not None:
404 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
407 class FPAddSpecialCases(FPState
):
408 """ special cases: NaNs, infs, zeros, denormalised
409 NOTE: some of these are unique to add. see "Special Operations"
410 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
413 def __init__(self
, width
, id_wid
):
414 FPState
.__init
__(self
, "special_cases")
415 self
.mod
= FPAddSpecialCasesMod(width
)
416 self
.out_z
= self
.mod
.ospec()
417 self
.out_do_z
= Signal(reset_less
=True)
419 def setup(self
, m
, i
):
420 """ links module to inputs and outputs
422 self
.mod
.setup(m
, i
, self
.out_do_z
)
423 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
424 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
) # (and mid)
428 with m
.If(self
.out_do_z
):
431 m
.next
= "denormalise"
434 class FPAddSpecialCasesDeNorm(FPState
):
435 """ special cases: NaNs, infs, zeros, denormalised
436 NOTE: some of these are unique to add. see "Special Operations"
437 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
440 def __init__(self
, width
, id_wid
):
441 FPState
.__init
__(self
, "special_cases")
442 self
.smod
= FPAddSpecialCasesMod(width
, id_wid
)
443 self
.out_z
= self
.smod
.ospec()
444 self
.out_do_z
= Signal(reset_less
=True)
446 self
.dmod
= FPAddDeNormMod(width
, id_wid
)
447 self
.o
= self
.dmod
.ospec()
449 def setup(self
, m
, i
):
450 """ links module to inputs and outputs
452 self
.smod
.setup(m
, i
)
453 self
.dmod
.setup(m
, self
.smod
.o
)
454 m
.d
.comb
+= self
.out_do_z
.eq(self
.smod
.o
.out_do_z
)
457 m
.d
.sync
+= self
.out_z
.z
.v
.eq(self
.smod
.o
.z
.v
) # only take output
458 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.smod
.o
.mid
) # (and mid)
460 m
.d
.sync
+= self
.o
.eq(self
.dmod
.o
)
462 def process(self
, i
):
466 #with m.If(self.out_do_z):
472 class FPAddDeNormMod(FPState
):
474 def __init__(self
, width
, id_wid
):
477 self
.i
= self
.ispec()
478 self
.o
= self
.ospec()
481 return FPSCData(self
.width
, self
.id_wid
)
484 return FPSCData(self
.width
, self
.id_wid
)
486 def setup(self
, m
, i
):
487 """ links module to inputs and outputs
489 m
.submodules
.denormalise
= self
490 m
.d
.comb
+= self
.i
.eq(i
)
492 def elaborate(self
, platform
):
494 m
.submodules
.denorm_in_a
= self
.i
.a
495 m
.submodules
.denorm_in_b
= self
.i
.b
496 m
.submodules
.denorm_out_a
= self
.o
.a
497 m
.submodules
.denorm_out_b
= self
.o
.b
499 with m
.If(~self
.i
.out_do_z
):
500 # XXX hmmm, don't like repeating identical code
501 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
502 with m
.If(self
.i
.a
.exp_n127
):
503 m
.d
.comb
+= self
.o
.a
.e
.eq(self
.i
.a
.N126
) # limit a exponent
505 m
.d
.comb
+= self
.o
.a
.m
[-1].eq(1) # set top mantissa bit
507 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
508 with m
.If(self
.i
.b
.exp_n127
):
509 m
.d
.comb
+= self
.o
.b
.e
.eq(self
.i
.b
.N126
) # limit a exponent
511 m
.d
.comb
+= self
.o
.b
.m
[-1].eq(1) # set top mantissa bit
513 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
514 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
515 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
516 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
521 class FPAddDeNorm(FPState
):
523 def __init__(self
, width
, id_wid
):
524 FPState
.__init
__(self
, "denormalise")
525 self
.mod
= FPAddDeNormMod(width
)
526 self
.out_a
= FPNumBase(width
)
527 self
.out_b
= FPNumBase(width
)
529 def setup(self
, m
, i
):
530 """ links module to inputs and outputs
534 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
535 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
538 # Denormalised Number checks
542 class FPAddAlignMultiMod(FPState
):
544 def __init__(self
, width
):
545 self
.in_a
= FPNumBase(width
)
546 self
.in_b
= FPNumBase(width
)
547 self
.out_a
= FPNumIn(None, width
)
548 self
.out_b
= FPNumIn(None, width
)
549 self
.exp_eq
= Signal(reset_less
=True)
551 def elaborate(self
, platform
):
552 # This one however (single-cycle) will do the shift
557 m
.submodules
.align_in_a
= self
.in_a
558 m
.submodules
.align_in_b
= self
.in_b
559 m
.submodules
.align_out_a
= self
.out_a
560 m
.submodules
.align_out_b
= self
.out_b
562 # NOTE: this does *not* do single-cycle multi-shifting,
563 # it *STAYS* in the align state until exponents match
565 # exponent of a greater than b: shift b down
566 m
.d
.comb
+= self
.exp_eq
.eq(0)
567 m
.d
.comb
+= self
.out_a
.eq(self
.in_a
)
568 m
.d
.comb
+= self
.out_b
.eq(self
.in_b
)
569 agtb
= Signal(reset_less
=True)
570 altb
= Signal(reset_less
=True)
571 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
572 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
574 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
575 # exponent of b greater than a: shift a down
577 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
578 # exponents equal: move to next stage.
580 m
.d
.comb
+= self
.exp_eq
.eq(1)
584 class FPAddAlignMulti(FPState
):
586 def __init__(self
, width
, id_wid
):
587 FPState
.__init
__(self
, "align")
588 self
.mod
= FPAddAlignMultiMod(width
)
589 self
.out_a
= FPNumIn(None, width
)
590 self
.out_b
= FPNumIn(None, width
)
591 self
.exp_eq
= Signal(reset_less
=True)
593 def setup(self
, m
, in_a
, in_b
):
594 """ links module to inputs and outputs
596 m
.submodules
.align
= self
.mod
597 m
.d
.comb
+= self
.mod
.in_a
.eq(in_a
)
598 m
.d
.comb
+= self
.mod
.in_b
.eq(in_b
)
599 #m.d.comb += self.out_a.eq(self.mod.out_a)
600 #m.d.comb += self.out_b.eq(self.mod.out_b)
601 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
602 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
603 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
606 with m
.If(self
.exp_eq
):
612 def __init__(self
, width
, id_wid
):
613 self
.a
= FPNumIn(None, width
)
614 self
.b
= FPNumIn(None, width
)
615 self
.z
= FPNumOut(width
, False)
616 self
.out_do_z
= Signal(reset_less
=True)
617 self
.oz
= Signal(width
, reset_less
=True)
618 self
.mid
= Signal(id_wid
, reset_less
=True)
621 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
622 self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
625 class FPAddAlignSingleMod
:
627 def __init__(self
, width
, id_wid
):
630 self
.i
= self
.ispec()
631 self
.o
= self
.ospec()
634 return FPSCData(self
.width
, self
.id_wid
)
637 return FPNumIn2Ops(self
.width
, self
.id_wid
)
639 def process(self
, i
):
642 def setup(self
, m
, i
):
643 """ links module to inputs and outputs
645 m
.submodules
.align
= self
646 m
.d
.comb
+= self
.i
.eq(i
)
648 def elaborate(self
, platform
):
649 """ Aligns A against B or B against A, depending on which has the
650 greater exponent. This is done in a *single* cycle using
651 variable-width bit-shift
653 the shifter used here is quite expensive in terms of gates.
654 Mux A or B in (and out) into temporaries, as only one of them
655 needs to be aligned against the other
659 m
.submodules
.align_in_a
= self
.i
.a
660 m
.submodules
.align_in_b
= self
.i
.b
661 m
.submodules
.align_out_a
= self
.o
.a
662 m
.submodules
.align_out_b
= self
.o
.b
664 # temporary (muxed) input and output to be shifted
665 t_inp
= FPNumBase(self
.width
)
666 t_out
= FPNumIn(None, self
.width
)
667 espec
= (len(self
.i
.a
.e
), True)
668 msr
= MultiShiftRMerge(self
.i
.a
.m_width
, espec
)
669 m
.submodules
.align_t_in
= t_inp
670 m
.submodules
.align_t_out
= t_out
671 m
.submodules
.multishift_r
= msr
673 ediff
= Signal(espec
, reset_less
=True)
674 ediffr
= Signal(espec
, reset_less
=True)
675 tdiff
= Signal(espec
, reset_less
=True)
676 elz
= Signal(reset_less
=True)
677 egz
= Signal(reset_less
=True)
679 # connect multi-shifter to t_inp/out mantissa (and tdiff)
680 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
681 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
682 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
683 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
684 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
686 m
.d
.comb
+= ediff
.eq(self
.i
.a
.e
- self
.i
.b
.e
)
687 m
.d
.comb
+= ediffr
.eq(self
.i
.b
.e
- self
.i
.a
.e
)
688 m
.d
.comb
+= elz
.eq(self
.i
.a
.e
< self
.i
.b
.e
)
689 m
.d
.comb
+= egz
.eq(self
.i
.a
.e
> self
.i
.b
.e
)
691 # default: A-exp == B-exp, A and B untouched (fall through)
692 m
.d
.comb
+= self
.o
.a
.eq(self
.i
.a
)
693 m
.d
.comb
+= self
.o
.b
.eq(self
.i
.b
)
694 # only one shifter (muxed)
695 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
696 # exponent of a greater than b: shift b down
697 with m
.If(~self
.i
.out_do_z
):
699 m
.d
.comb
+= [t_inp
.eq(self
.i
.b
),
702 self
.o
.b
.s
.eq(self
.i
.b
.s
), # whoops forgot sign
704 # exponent of b greater than a: shift a down
706 m
.d
.comb
+= [t_inp
.eq(self
.i
.a
),
709 self
.o
.a
.s
.eq(self
.i
.a
.s
), # whoops forgot sign
712 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
713 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
714 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
715 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
720 class FPAddAlignSingle(FPState
):
722 def __init__(self
, width
, id_wid
):
723 FPState
.__init
__(self
, "align")
724 self
.mod
= FPAddAlignSingleMod(width
, id_wid
)
725 self
.out_a
= FPNumIn(None, width
)
726 self
.out_b
= FPNumIn(None, width
)
728 def setup(self
, m
, i
):
729 """ links module to inputs and outputs
733 # NOTE: could be done as comb
734 m
.d
.sync
+= self
.out_a
.eq(self
.mod
.out_a
)
735 m
.d
.sync
+= self
.out_b
.eq(self
.mod
.out_b
)
741 class FPAddAlignSingleAdd(FPState
):
743 def __init__(self
, width
, id_wid
):
744 FPState
.__init
__(self
, "align")
747 self
.a1o
= self
.ospec()
750 return FPNumBase2Ops(self
.width
, self
.id_wid
) # AlignSingle ispec
753 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
755 def setup(self
, m
, i
):
756 """ links module to inputs and outputs
759 # chain AddAlignSingle, AddStage0 and AddStage1
760 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
761 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
762 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
764 chain
= StageChain([mod
, a0mod
, a1mod
])
767 m
.d
.sync
+= self
.a1o
.eq(a1mod
.o
)
769 def process(self
, i
):
773 m
.next
= "normalise_1"
776 class FPAddStage0Data
:
778 def __init__(self
, width
, id_wid
):
779 self
.z
= FPNumBase(width
, False)
780 self
.out_do_z
= Signal(reset_less
=True)
781 self
.oz
= Signal(width
, reset_less
=True)
782 self
.tot
= Signal(self
.z
.m_width
+ 4, reset_less
=True)
783 self
.mid
= Signal(id_wid
, reset_less
=True)
786 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
787 self
.tot
.eq(i
.tot
), self
.mid
.eq(i
.mid
)]
790 class FPAddStage0Mod
:
792 def __init__(self
, width
, id_wid
):
795 self
.i
= self
.ispec()
796 self
.o
= self
.ospec()
799 return FPSCData(self
.width
, self
.id_wid
)
802 return FPAddStage0Data(self
.width
, self
.id_wid
)
804 def process(self
, i
):
807 def setup(self
, m
, i
):
808 """ links module to inputs and outputs
810 m
.submodules
.add0
= self
811 m
.d
.comb
+= self
.i
.eq(i
)
813 def elaborate(self
, platform
):
815 m
.submodules
.add0_in_a
= self
.i
.a
816 m
.submodules
.add0_in_b
= self
.i
.b
817 m
.submodules
.add0_out_z
= self
.o
.z
819 # store intermediate tests (and zero-extended mantissas)
820 seq
= Signal(reset_less
=True)
821 mge
= Signal(reset_less
=True)
822 am0
= Signal(len(self
.i
.a
.m
)+1, reset_less
=True)
823 bm0
= Signal(len(self
.i
.b
.m
)+1, reset_less
=True)
824 m
.d
.comb
+= [seq
.eq(self
.i
.a
.s
== self
.i
.b
.s
),
825 mge
.eq(self
.i
.a
.m
>= self
.i
.b
.m
),
826 am0
.eq(Cat(self
.i
.a
.m
, 0)),
827 bm0
.eq(Cat(self
.i
.b
.m
, 0))
829 # same-sign (both negative or both positive) add mantissas
830 with m
.If(~self
.i
.out_do_z
):
831 m
.d
.comb
+= self
.o
.z
.e
.eq(self
.i
.a
.e
)
834 self
.o
.tot
.eq(am0
+ bm0
),
835 self
.o
.z
.s
.eq(self
.i
.a
.s
)
837 # a mantissa greater than b, use a
840 self
.o
.tot
.eq(am0
- bm0
),
841 self
.o
.z
.s
.eq(self
.i
.a
.s
)
843 # b mantissa greater than a, use b
846 self
.o
.tot
.eq(bm0
- am0
),
847 self
.o
.z
.s
.eq(self
.i
.b
.s
)
850 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
851 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
852 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
856 class FPAddStage0(FPState
):
857 """ First stage of add. covers same-sign (add) and subtract
858 special-casing when mantissas are greater or equal, to
859 give greatest accuracy.
862 def __init__(self
, width
, id_wid
):
863 FPState
.__init
__(self
, "add_0")
864 self
.mod
= FPAddStage0Mod(width
)
865 self
.o
= self
.mod
.ospec()
867 def setup(self
, m
, i
):
868 """ links module to inputs and outputs
872 # NOTE: these could be done as combinatorial (merge add0+add1)
873 m
.d
.sync
+= self
.o
.eq(self
.mod
.o
)
879 class FPAddStage1Data
:
881 def __init__(self
, width
, id_wid
):
882 self
.z
= FPNumBase(width
, False)
883 self
.out_do_z
= Signal(reset_less
=True)
884 self
.oz
= Signal(width
, reset_less
=True)
886 self
.mid
= Signal(id_wid
, reset_less
=True)
889 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
890 self
.of
.eq(i
.of
), self
.mid
.eq(i
.mid
)]
894 class FPAddStage1Mod(FPState
):
895 """ Second stage of add: preparation for normalisation.
896 detects when tot sum is too big (tot[27] is kinda a carry bit)
899 def __init__(self
, width
, id_wid
):
902 self
.i
= self
.ispec()
903 self
.o
= self
.ospec()
906 return FPAddStage0Data(self
.width
, self
.id_wid
)
909 return FPAddStage1Data(self
.width
, self
.id_wid
)
911 def process(self
, i
):
914 def setup(self
, m
, i
):
915 """ links module to inputs and outputs
917 m
.submodules
.add1
= self
918 m
.submodules
.add1_out_overflow
= self
.o
.of
920 m
.d
.comb
+= self
.i
.eq(i
)
922 def elaborate(self
, platform
):
924 #m.submodules.norm1_in_overflow = self.in_of
925 #m.submodules.norm1_out_overflow = self.out_of
926 #m.submodules.norm1_in_z = self.in_z
927 #m.submodules.norm1_out_z = self.out_z
928 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
929 # tot[-1] (MSB) gets set when the sum overflows. shift result down
930 with m
.If(~self
.i
.out_do_z
):
931 with m
.If(self
.i
.tot
[-1]):
933 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
934 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
935 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
936 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
937 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
938 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
940 # tot[-1] (MSB) zero case
943 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
944 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
945 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
946 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
947 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
950 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
951 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
952 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
957 class FPAddStage1(FPState
):
959 def __init__(self
, width
, id_wid
):
960 FPState
.__init
__(self
, "add_1")
961 self
.mod
= FPAddStage1Mod(width
)
962 self
.out_z
= FPNumBase(width
, False)
963 self
.out_of
= Overflow()
964 self
.norm_stb
= Signal()
966 def setup(self
, m
, i
):
967 """ links module to inputs and outputs
971 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
973 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
974 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
975 m
.d
.sync
+= self
.norm_stb
.eq(1)
978 m
.next
= "normalise_1"
981 class FPNormaliseModSingle
:
983 def __init__(self
, width
):
985 self
.in_z
= self
.ispec()
986 self
.out_z
= self
.ospec()
989 return FPNumBase(self
.width
, False)
992 return FPNumBase(self
.width
, False)
994 def setup(self
, m
, i
):
995 """ links module to inputs and outputs
997 m
.submodules
.normalise
= self
998 m
.d
.comb
+= self
.i
.eq(i
)
1000 def elaborate(self
, platform
):
1003 mwid
= self
.out_z
.m_width
+2
1004 pe
= PriorityEncoder(mwid
)
1005 m
.submodules
.norm_pe
= pe
1007 m
.submodules
.norm1_out_z
= self
.out_z
1008 m
.submodules
.norm1_in_z
= self
.in_z
1010 in_z
= FPNumBase(self
.width
, False)
1012 m
.submodules
.norm1_insel_z
= in_z
1013 m
.submodules
.norm1_insel_overflow
= in_of
1015 espec
= (len(in_z
.e
), True)
1016 ediff_n126
= Signal(espec
, reset_less
=True)
1017 msr
= MultiShiftRMerge(mwid
, espec
)
1018 m
.submodules
.multishift_r
= msr
1020 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1021 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1022 # initialise out from in (overridden below)
1023 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1024 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1025 # normalisation decrease condition
1026 decrease
= Signal(reset_less
=True)
1027 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
)
1029 with m
.If(decrease
):
1030 # *sigh* not entirely obvious: count leading zeros (clz)
1031 # with a PriorityEncoder: to find from the MSB
1032 # we reverse the order of the bits.
1033 temp_m
= Signal(mwid
, reset_less
=True)
1034 temp_s
= Signal(mwid
+1, reset_less
=True)
1035 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
1037 # cat round and guard bits back into the mantissa
1038 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
1039 pe
.i
.eq(temp_m
[::-1]), # inverted
1040 clz
.eq(pe
.o
), # count zeros from MSB down
1041 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1042 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
1043 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1050 def __init__(self
, width
, id_wid
):
1051 self
.roundz
= Signal(reset_less
=True)
1052 self
.z
= FPNumBase(width
, False)
1053 self
.out_do_z
= Signal(reset_less
=True)
1054 self
.oz
= Signal(width
, reset_less
=True)
1055 self
.mid
= Signal(id_wid
, reset_less
=True)
1058 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1059 self
.roundz
.eq(i
.roundz
), self
.mid
.eq(i
.mid
)]
1062 class FPNorm1ModSingle
:
1064 def __init__(self
, width
, id_wid
):
1066 self
.id_wid
= id_wid
1067 self
.i
= self
.ispec()
1068 self
.o
= self
.ospec()
1071 return FPAddStage1Data(self
.width
, self
.id_wid
)
1074 return FPNorm1Data(self
.width
, self
.id_wid
)
1076 def setup(self
, m
, i
):
1077 """ links module to inputs and outputs
1079 m
.submodules
.normalise_1
= self
1080 m
.d
.comb
+= self
.i
.eq(i
)
1082 def process(self
, i
):
1085 def elaborate(self
, platform
):
1088 mwid
= self
.o
.z
.m_width
+2
1089 pe
= PriorityEncoder(mwid
)
1090 m
.submodules
.norm_pe
= pe
1093 m
.d
.comb
+= self
.o
.roundz
.eq(of
.roundz
)
1095 m
.submodules
.norm1_out_z
= self
.o
.z
1096 m
.submodules
.norm1_out_overflow
= of
1097 m
.submodules
.norm1_in_z
= self
.i
.z
1098 m
.submodules
.norm1_in_overflow
= self
.i
.of
1101 m
.submodules
.norm1_insel_z
= i
.z
1102 m
.submodules
.norm1_insel_overflow
= i
.of
1104 espec
= (len(i
.z
.e
), True)
1105 ediff_n126
= Signal(espec
, reset_less
=True)
1106 msr
= MultiShiftRMerge(mwid
, espec
)
1107 m
.submodules
.multishift_r
= msr
1109 m
.d
.comb
+= i
.eq(self
.i
)
1110 # initialise out from in (overridden below)
1111 m
.d
.comb
+= self
.o
.z
.eq(i
.z
)
1112 m
.d
.comb
+= of
.eq(i
.of
)
1113 # normalisation increase/decrease conditions
1114 decrease
= Signal(reset_less
=True)
1115 increase
= Signal(reset_less
=True)
1116 m
.d
.comb
+= decrease
.eq(i
.z
.m_msbzero
& i
.z
.exp_gt_n126
)
1117 m
.d
.comb
+= increase
.eq(i
.z
.exp_lt_n126
)
1119 with m
.If(~self
.i
.out_do_z
):
1120 with m
.If(decrease
):
1121 # *sigh* not entirely obvious: count leading zeros (clz)
1122 # with a PriorityEncoder: to find from the MSB
1123 # we reverse the order of the bits.
1124 temp_m
= Signal(mwid
, reset_less
=True)
1125 temp_s
= Signal(mwid
+1, reset_less
=True)
1126 clz
= Signal((len(i
.z
.e
), True), reset_less
=True)
1127 # make sure that the amount to decrease by does NOT
1128 # go below the minimum non-INF/NaN exponent
1129 limclz
= Mux(i
.z
.exp_sub_n126
> pe
.o
, pe
.o
,
1132 # cat round and guard bits back into the mantissa
1133 temp_m
.eq(Cat(i
.of
.round_bit
, i
.of
.guard
, i
.z
.m
)),
1134 pe
.i
.eq(temp_m
[::-1]), # inverted
1135 clz
.eq(limclz
), # count zeros from MSB down
1136 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
1137 self
.o
.z
.e
.eq(i
.z
.e
- clz
), # DECREASE exponent
1138 self
.o
.z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
1139 of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
1140 # overflow in bits 0..1: got shifted too (leave sticky)
1141 of
.guard
.eq(temp_s
[1]), # guard
1142 of
.round_bit
.eq(temp_s
[0]), # round
1145 with m
.Elif(increase
):
1146 temp_m
= Signal(mwid
+1, reset_less
=True)
1148 temp_m
.eq(Cat(i
.of
.sticky
, i
.of
.round_bit
, i
.of
.guard
,
1150 ediff_n126
.eq(i
.z
.N126
- i
.z
.e
),
1151 # connect multi-shifter to inp/out mantissa (and ediff)
1153 msr
.diff
.eq(ediff_n126
),
1154 self
.o
.z
.m
.eq(msr
.m
[3:]),
1155 of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
1156 # overflow in bits 0..1: got shifted too (leave sticky)
1157 of
.guard
.eq(temp_s
[2]), # guard
1158 of
.round_bit
.eq(temp_s
[1]), # round
1159 of
.sticky
.eq(temp_s
[0]), # sticky
1160 self
.o
.z
.e
.eq(i
.z
.e
+ ediff_n126
),
1163 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1164 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
1165 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
1170 class FPNorm1ModMulti
:
1172 def __init__(self
, width
, single_cycle
=True):
1174 self
.in_select
= Signal(reset_less
=True)
1175 self
.in_z
= FPNumBase(width
, False)
1176 self
.in_of
= Overflow()
1177 self
.temp_z
= FPNumBase(width
, False)
1178 self
.temp_of
= Overflow()
1179 self
.out_z
= FPNumBase(width
, False)
1180 self
.out_of
= Overflow()
1182 def elaborate(self
, platform
):
1185 m
.submodules
.norm1_out_z
= self
.out_z
1186 m
.submodules
.norm1_out_overflow
= self
.out_of
1187 m
.submodules
.norm1_temp_z
= self
.temp_z
1188 m
.submodules
.norm1_temp_of
= self
.temp_of
1189 m
.submodules
.norm1_in_z
= self
.in_z
1190 m
.submodules
.norm1_in_overflow
= self
.in_of
1192 in_z
= FPNumBase(self
.width
, False)
1194 m
.submodules
.norm1_insel_z
= in_z
1195 m
.submodules
.norm1_insel_overflow
= in_of
1197 # select which of temp or in z/of to use
1198 with m
.If(self
.in_select
):
1199 m
.d
.comb
+= in_z
.eq(self
.in_z
)
1200 m
.d
.comb
+= in_of
.eq(self
.in_of
)
1202 m
.d
.comb
+= in_z
.eq(self
.temp_z
)
1203 m
.d
.comb
+= in_of
.eq(self
.temp_of
)
1204 # initialise out from in (overridden below)
1205 m
.d
.comb
+= self
.out_z
.eq(in_z
)
1206 m
.d
.comb
+= self
.out_of
.eq(in_of
)
1207 # normalisation increase/decrease conditions
1208 decrease
= Signal(reset_less
=True)
1209 increase
= Signal(reset_less
=True)
1210 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
1211 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
1212 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
1214 with m
.If(decrease
):
1216 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
1217 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
1218 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
1219 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
1220 self
.out_of
.round_bit
.eq(0), # reset round bit
1221 self
.out_of
.m0
.eq(in_of
.guard
),
1224 with m
.Elif(increase
):
1226 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
1227 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
1228 self
.out_of
.guard
.eq(in_z
.m
[0]),
1229 self
.out_of
.m0
.eq(in_z
.m
[1]),
1230 self
.out_of
.round_bit
.eq(in_of
.guard
),
1231 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
1237 class FPNorm1Single(FPState
):
1239 def __init__(self
, width
, id_wid
, single_cycle
=True):
1240 FPState
.__init
__(self
, "normalise_1")
1241 self
.mod
= FPNorm1ModSingle(width
)
1242 self
.o
= self
.ospec()
1243 self
.out_z
= FPNumBase(width
, False)
1244 self
.out_roundz
= Signal(reset_less
=True)
1247 return self
.mod
.ispec()
1250 return self
.mod
.ospec()
1252 def setup(self
, m
, i
):
1253 """ links module to inputs and outputs
1255 self
.mod
.setup(m
, i
)
1257 def action(self
, m
):
1261 class FPNorm1Multi(FPState
):
1263 def __init__(self
, width
, id_wid
):
1264 FPState
.__init
__(self
, "normalise_1")
1265 self
.mod
= FPNorm1ModMulti(width
)
1266 self
.stb
= Signal(reset_less
=True)
1267 self
.ack
= Signal(reset
=0, reset_less
=True)
1268 self
.out_norm
= Signal(reset_less
=True)
1269 self
.in_accept
= Signal(reset_less
=True)
1270 self
.temp_z
= FPNumBase(width
)
1271 self
.temp_of
= Overflow()
1272 self
.out_z
= FPNumBase(width
)
1273 self
.out_roundz
= Signal(reset_less
=True)
1275 def setup(self
, m
, in_z
, in_of
, norm_stb
):
1276 """ links module to inputs and outputs
1278 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1279 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1280 self
.out_z
, self
.out_norm
)
1282 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1283 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1285 def action(self
, m
):
1286 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1287 m
.d
.sync
+= self
.temp_of
.eq(self
.mod
.out_of
)
1288 m
.d
.sync
+= self
.temp_z
.eq(self
.out_z
)
1289 with m
.If(self
.out_norm
):
1290 with m
.If(self
.in_accept
):
1295 m
.d
.sync
+= self
.ack
.eq(0)
1297 # normalisation not required (or done).
1299 m
.d
.sync
+= self
.ack
.eq(1)
1300 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1303 class FPNormToPack(FPState
):
1305 def __init__(self
, width
, id_wid
):
1306 FPState
.__init
__(self
, "normalise_1")
1307 self
.id_wid
= id_wid
1311 return FPAddStage1Data(self
.width
, self
.id_wid
) # Norm1ModSingle ispec
1314 return FPPackData(self
.width
, self
.id_wid
) # FPPackMod ospec
1316 def setup(self
, m
, i
):
1317 """ links module to inputs and outputs
1320 # Normalisation, Rounding Corrections, Pack - in a chain
1321 nmod
= FPNorm1ModSingle(self
.width
, self
.id_wid
)
1322 rmod
= FPRoundMod(self
.width
, self
.id_wid
)
1323 cmod
= FPCorrectionsMod(self
.width
, self
.id_wid
)
1324 pmod
= FPPackMod(self
.width
, self
.id_wid
)
1325 chain
= StageChain([nmod
, rmod
, cmod
, pmod
])
1327 self
.out_z
= pmod
.ospec()
1329 m
.d
.sync
+= self
.out_z
.mid
.eq(pmod
.o
.mid
)
1330 m
.d
.sync
+= self
.out_z
.z
.v
.eq(pmod
.o
.z
.v
) # outputs packed result
1332 def process(self
, i
):
1335 def action(self
, m
):
1336 m
.next
= "pack_put_z"
1341 def __init__(self
, width
, id_wid
):
1342 self
.z
= FPNumBase(width
, False)
1343 self
.out_do_z
= Signal(reset_less
=True)
1344 self
.oz
= Signal(width
, reset_less
=True)
1345 self
.mid
= Signal(id_wid
, reset_less
=True)
1348 return [self
.z
.eq(i
.z
), self
.out_do_z
.eq(i
.out_do_z
), self
.oz
.eq(i
.oz
),
1354 def __init__(self
, width
, id_wid
):
1356 self
.id_wid
= id_wid
1357 self
.i
= self
.ispec()
1358 self
.out_z
= self
.ospec()
1361 return FPNorm1Data(self
.width
, self
.id_wid
)
1364 return FPRoundData(self
.width
, self
.id_wid
)
1366 def process(self
, i
):
1369 def setup(self
, m
, i
):
1370 m
.submodules
.roundz
= self
1371 m
.d
.comb
+= self
.i
.eq(i
)
1373 def elaborate(self
, platform
):
1375 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1376 with m
.If(~self
.i
.out_do_z
):
1377 with m
.If(self
.i
.roundz
):
1378 m
.d
.comb
+= self
.out_z
.z
.m
.eq(self
.i
.z
.m
+ 1) # mantissa up
1379 with m
.If(self
.i
.z
.m
== self
.i
.z
.m1s
): # all 1s
1380 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.e
+ 1) # exponent up
1385 class FPRound(FPState
):
1387 def __init__(self
, width
, id_wid
):
1388 FPState
.__init
__(self
, "round")
1389 self
.mod
= FPRoundMod(width
)
1390 self
.out_z
= self
.ospec()
1393 return self
.mod
.ispec()
1396 return self
.mod
.ospec()
1398 def setup(self
, m
, i
):
1399 """ links module to inputs and outputs
1401 self
.mod
.setup(m
, i
)
1404 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1405 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1407 def action(self
, m
):
1408 m
.next
= "corrections"
1411 class FPCorrectionsMod
:
1413 def __init__(self
, width
, id_wid
):
1415 self
.id_wid
= id_wid
1416 self
.i
= self
.ispec()
1417 self
.out_z
= self
.ospec()
1420 return FPRoundData(self
.width
, self
.id_wid
)
1423 return FPRoundData(self
.width
, self
.id_wid
)
1425 def process(self
, i
):
1428 def setup(self
, m
, i
):
1429 """ links module to inputs and outputs
1431 m
.submodules
.corrections
= self
1432 m
.d
.comb
+= self
.i
.eq(i
)
1434 def elaborate(self
, platform
):
1436 m
.submodules
.corr_in_z
= self
.i
.z
1437 m
.submodules
.corr_out_z
= self
.out_z
.z
1438 m
.d
.comb
+= self
.out_z
.eq(self
.i
) # copies mid, z, out_do_z
1439 with m
.If(~self
.i
.out_do_z
):
1440 with m
.If(self
.i
.z
.is_denormalised
):
1441 m
.d
.comb
+= self
.out_z
.z
.e
.eq(self
.i
.z
.N127
)
1445 class FPCorrections(FPState
):
1447 def __init__(self
, width
, id_wid
):
1448 FPState
.__init
__(self
, "corrections")
1449 self
.mod
= FPCorrectionsMod(width
)
1450 self
.out_z
= self
.ospec()
1453 return self
.mod
.ispec()
1456 return self
.mod
.ospec()
1458 def setup(self
, m
, in_z
):
1459 """ links module to inputs and outputs
1461 self
.mod
.setup(m
, in_z
)
1463 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
1464 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1466 def action(self
, m
):
1472 def __init__(self
, width
, id_wid
):
1473 self
.z
= FPNumOut(width
, False)
1474 self
.mid
= Signal(id_wid
, reset_less
=True)
1477 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1482 def __init__(self
, width
, id_wid
):
1484 self
.id_wid
= id_wid
1485 self
.i
= self
.ispec()
1486 self
.o
= self
.ospec()
1489 return FPRoundData(self
.width
, self
.id_wid
)
1492 return FPPackData(self
.width
, self
.id_wid
)
1494 def process(self
, i
):
1497 def setup(self
, m
, in_z
):
1498 """ links module to inputs and outputs
1500 m
.submodules
.pack
= self
1501 m
.d
.comb
+= self
.i
.eq(in_z
)
1503 def elaborate(self
, platform
):
1505 m
.submodules
.pack_in_z
= self
.i
.z
1506 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
1507 with m
.If(~self
.i
.out_do_z
):
1508 with m
.If(self
.i
.z
.is_overflowed
):
1509 m
.d
.comb
+= self
.o
.z
.inf(self
.i
.z
.s
)
1511 m
.d
.comb
+= self
.o
.z
.create(self
.i
.z
.s
, self
.i
.z
.e
, self
.i
.z
.m
)
1513 m
.d
.comb
+= self
.o
.z
.v
.eq(self
.i
.oz
)
1517 class FPPack(FPState
):
1519 def __init__(self
, width
, id_wid
):
1520 FPState
.__init
__(self
, "pack")
1521 self
.mod
= FPPackMod(width
)
1522 self
.out_z
= self
.ospec()
1525 return self
.mod
.ispec()
1528 return self
.mod
.ospec()
1530 def setup(self
, m
, in_z
):
1531 """ links module to inputs and outputs
1533 self
.mod
.setup(m
, in_z
)
1535 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1536 m
.d
.sync
+= self
.out_z
.mid
.eq(self
.mod
.o
.mid
)
1538 def action(self
, m
):
1539 m
.next
= "pack_put_z"
1542 class FPPutZ(FPState
):
1544 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1545 FPState
.__init
__(self
, state
)
1546 if to_state
is None:
1547 to_state
= "get_ops"
1548 self
.to_state
= to_state
1551 self
.in_mid
= in_mid
1552 self
.out_mid
= out_mid
1554 def action(self
, m
):
1555 if self
.in_mid
is not None:
1556 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1558 self
.out_z
.z
.v
.eq(self
.in_z
.v
)
1560 with m
.If(self
.out_z
.z
.stb
& self
.out_z
.z
.ack
):
1561 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(0)
1562 m
.next
= self
.to_state
1564 m
.d
.sync
+= self
.out_z
.z
.stb
.eq(1)
1567 class FPPutZIdx(FPState
):
1569 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1570 FPState
.__init
__(self
, state
)
1571 if to_state
is None:
1572 to_state
= "get_ops"
1573 self
.to_state
= to_state
1575 self
.out_zs
= out_zs
1576 self
.in_mid
= in_mid
1578 def action(self
, m
):
1579 outz_stb
= Signal(reset_less
=True)
1580 outz_ack
= Signal(reset_less
=True)
1581 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1582 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1585 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1587 with m
.If(outz_stb
& outz_ack
):
1588 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1589 m
.next
= self
.to_state
1591 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1593 class FPADDBaseData
:
1595 def __init__(self
, width
, id_wid
):
1597 self
.id_wid
= id_wid
1598 self
.a
= Signal(width
)
1599 self
.b
= Signal(width
)
1600 self
.mid
= Signal(id_wid
, reset_less
=True)
1603 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1607 def __init__(self
, width
, id_wid
):
1608 self
.z
= FPOp(width
)
1609 self
.mid
= Signal(id_wid
, reset_less
=True)
1612 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1617 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1620 * width: bit-width of IEEE754. supported: 16, 32, 64
1621 * id_wid: an identifier that is sync-connected to the input
1622 * single_cycle: True indicates each stage to complete in 1 clock
1623 * compact: True indicates a reduced number of stages
1626 self
.id_wid
= id_wid
1627 self
.single_cycle
= single_cycle
1628 self
.compact
= compact
1630 self
.in_t
= Trigger()
1631 self
.i
= self
.ispec()
1632 self
.o
= self
.ospec()
1637 return FPADDBaseData(self
.width
, self
.id_wid
)
1640 return FPOpData(self
.width
, self
.id_wid
)
1642 def add_state(self
, state
):
1643 self
.states
.append(state
)
1646 def get_fragment(self
, platform
=None):
1647 """ creates the HDL code-fragment for FPAdd
1650 m
.submodules
.out_z
= self
.o
.z
1651 m
.submodules
.in_t
= self
.in_t
1653 self
.get_compact_fragment(m
, platform
)
1655 self
.get_longer_fragment(m
, platform
)
1657 with m
.FSM() as fsm
:
1659 for state
in self
.states
:
1660 with m
.State(state
.state_from
):
1665 def get_longer_fragment(self
, m
, platform
=None):
1667 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1669 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1673 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1674 sc
.setup(m
, a
, b
, self
.in_mid
)
1676 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1677 dn
.setup(m
, a
, b
, sc
.in_mid
)
1679 if self
.single_cycle
:
1680 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1681 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1683 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1684 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1686 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1687 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1689 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1690 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1692 if self
.single_cycle
:
1693 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1694 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1696 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1697 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1699 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1700 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1702 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1703 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1705 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1706 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1708 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1709 pa
.in_mid
, self
.out_mid
))
1711 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1712 pa
.in_mid
, self
.out_mid
))
1714 def get_compact_fragment(self
, m
, platform
=None):
1716 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1717 self
.width
, self
.id_wid
))
1718 get
.setup(m
, self
.i
, self
.in_t
.stb
, self
.in_t
.ack
)
1720 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1723 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1726 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1727 n1
.setup(m
, alm
.a1o
)
1729 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
1730 n1
.out_z
.mid
, self
.o
.mid
))
1732 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1733 # sc.o.mid, self.o.mid))
1736 class FPADDBase(FPState
):
1738 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1741 * width: bit-width of IEEE754. supported: 16, 32, 64
1742 * id_wid: an identifier that is sync-connected to the input
1743 * single_cycle: True indicates each stage to complete in 1 clock
1745 FPState
.__init
__(self
, "fpadd")
1747 self
.single_cycle
= single_cycle
1748 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1749 self
.o
= self
.ospec()
1751 self
.in_t
= Trigger()
1752 self
.i
= self
.ispec()
1754 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1755 self
.in_accept
= Signal(reset_less
=True)
1756 self
.add_stb
= Signal(reset_less
=True)
1757 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1760 return self
.mod
.ispec()
1763 return self
.mod
.ospec()
1765 def setup(self
, m
, i
, add_stb
, in_mid
):
1766 m
.d
.comb
+= [self
.i
.eq(i
),
1767 self
.mod
.i
.eq(self
.i
),
1768 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
1769 #self.add_stb.eq(add_stb),
1770 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1771 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1772 self
.o
.mid
.eq(self
.mod
.o
.mid
),
1773 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
1774 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
1775 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
1778 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1779 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1780 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
1781 #m.d.sync += self.in_t.stb.eq(0)
1783 m
.submodules
.fpadd
= self
.mod
1785 def action(self
, m
):
1787 # in_accept is set on incoming strobe HIGH and ack LOW.
1788 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1790 #with m.If(self.in_t.ack):
1791 # m.d.sync += self.in_t.stb.eq(0)
1792 with m
.If(~self
.z_done
):
1793 # not done: test for accepting an incoming operand pair
1794 with m
.If(self
.in_accept
):
1796 self
.add_ack
.eq(1), # acknowledge receipt...
1797 self
.in_t
.stb
.eq(1), # initiate add
1800 m
.d
.sync
+= [self
.add_ack
.eq(0),
1801 self
.in_t
.stb
.eq(0),
1805 # done: acknowledge, and write out id and value
1806 m
.d
.sync
+= [self
.add_ack
.eq(1),
1813 if self
.in_mid
is not None:
1814 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1817 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1819 # move to output state on detecting z ack
1820 with m
.If(self
.out_z
.trigger
):
1821 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1824 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1827 def __init__(self
, width
, id_wid
):
1828 self
.a
= Signal(width
)
1829 self
.b
= Signal(width
)
1830 self
.mid
= Signal(id_wid
, reset_less
=True)
1833 return [self
.a
.eq(i
.a
), self
.b
.eq(i
.b
), self
.mid
.eq(i
.mid
)]
1836 class FPADDStageOut
:
1837 def __init__(self
, width
, id_wid
):
1838 self
.z
= Signal(width
)
1839 self
.mid
= Signal(id_wid
, reset_less
=True)
1842 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
1845 # matches the format of FPADDStageOut, allows eq function to do assignments
1846 class PlaceHolder
: pass
1849 class FPAddBaseStage
:
1850 def __init__(self
, width
, id_wid
):
1852 self
.id_wid
= id_wid
1855 return FPADDStageIn(self
.width
, self
.id_wid
)
1858 return FPADDStageOut(self
.width
, self
.id_wid
)
1860 def process(self
, i
):
1867 class FPADDBasePipe
:
1868 def __init__(self
, width
, id_wid
):
1869 stage1
= FPAddBaseStage(width
, id_wid
)
1870 self
.pipe
= UnbufferedPipeline(stage1
)
1872 def elaborate(self
, platform
):
1873 return self
.pipe
.elaborate(platform
)
1876 return self
.pipe
.ports()
1879 def __init__(self
, width
, id_wid
):
1881 self
.id_wid
= id_wid
1883 for i
in range(rs_sz
):
1885 out_z
.name
= "out_z_%d" % i
1887 self
.res
= Array(res
)
1888 self
.in_z
= FPOp(width
)
1889 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1891 def setup(self
, m
, in_z
, in_mid
):
1892 m
.d
.comb
+= [self
.in_z
.eq(in_z
),
1893 self
.in_mid
.eq(in_mid
)]
1895 def get_fragment(self
, platform
=None):
1896 """ creates the HDL code-fragment for FPAdd
1899 m
.submodules
.res_in_z
= self
.in_z
1900 m
.submodules
+= self
.res
1912 """ FPADD: stages as follows:
1918 FPAddBase---> FPAddBaseMod
1920 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1922 FPAddBase is tricky: it is both a stage and *has* stages.
1923 Connection to FPAddBaseMod therefore requires an in stb/ack
1924 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1925 needs to be the thing that raises the incoming stb.
1928 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1931 * width: bit-width of IEEE754. supported: 16, 32, 64
1932 * id_wid: an identifier that is sync-connected to the input
1933 * single_cycle: True indicates each stage to complete in 1 clock
1936 self
.id_wid
= id_wid
1937 self
.single_cycle
= single_cycle
1939 #self.out_z = FPOp(width)
1940 self
.ids
= FPID(id_wid
)
1943 for i
in range(rs_sz
):
1946 in_a
.name
= "in_a_%d" % i
1947 in_b
.name
= "in_b_%d" % i
1948 rs
.append((in_a
, in_b
))
1952 for i
in range(rs_sz
):
1954 out_z
.name
= "out_z_%d" % i
1956 self
.res
= Array(res
)
1960 def add_state(self
, state
):
1961 self
.states
.append(state
)
1964 def get_fragment(self
, platform
=None):
1965 """ creates the HDL code-fragment for FPAdd
1968 m
.submodules
+= self
.rs
1970 in_a
= self
.rs
[0][0]
1971 in_b
= self
.rs
[0][1]
1973 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1978 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1983 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1984 ab
= self
.add_state(ab
)
1985 abd
= ab
.ispec() # create an input spec object for FPADDBase
1986 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
1987 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
1990 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
1993 with m
.FSM() as fsm
:
1995 for state
in self
.states
:
1996 with m
.State(state
.state_from
):
2002 if __name__
== "__main__":
2004 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
2005 main(alu
, ports
=alu
.rs
[0][0].ports() + \
2006 alu
.rs
[0][1].ports() + \
2007 alu
.res
[0].ports() + \
2008 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
2010 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
2011 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
2012 alu
.in_t
.ports() + \
2013 alu
.out_z
.ports() + \
2014 [alu
.in_mid
, alu
.out_mid
])
2017 # works... but don't use, just do "python fname.py convert -t v"
2018 #print (verilog.convert(alu, ports=[
2019 # ports=alu.in_a.ports() + \
2020 # alu.in_b.ports() + \
2021 # alu.out_z.ports())