1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
9 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
10 from fpbase
import MultiShiftRMerge
, Trigger
11 #from fpbase import FPNumShiftMultiRight
14 class FPState(FPBase
):
15 def __init__(self
, state_from
):
16 self
.state_from
= state_from
18 def set_inputs(self
, inputs
):
20 for k
,v
in inputs
.items():
23 def set_outputs(self
, outputs
):
24 self
.outputs
= outputs
25 for k
,v
in outputs
.items():
29 class FPGetSyncOpsMod
:
30 def __init__(self
, width
, num_ops
=2):
32 self
.num_ops
= num_ops
35 for i
in range(num_ops
):
36 inops
.append(Signal(width
, reset_less
=True))
37 outops
.append(Signal(width
, reset_less
=True))
40 self
.stb
= Signal(num_ops
)
42 self
.ready
= Signal(reset_less
=True)
43 self
.out_decode
= Signal(reset_less
=True)
45 def elaborate(self
, platform
):
47 m
.d
.comb
+= self
.ready
.eq(self
.stb
== Const(-1, (self
.num_ops
, False)))
48 m
.d
.comb
+= self
.out_decode
.eq(self
.ack
& self
.ready
)
49 with m
.If(self
.out_decode
):
50 for i
in range(self
.num_ops
):
52 self
.out_op
[i
].eq(self
.in_op
[i
]),
57 return self
.in_op
+ self
.out_op
+ [self
.stb
, self
.ack
]
60 class InputGroup(Trigger
):
61 def __init__(self
, width
, num_ops
=2, num_rows
=4):
62 Trigger
.__init
__(self
)
64 self
.num_ops
= num_ops
65 self
.num_rows
= num_rows
67 for i
in range(num_rows
):
68 self
.rs
.append(FPGetSyncOpsMod(width
, num_ops
))
71 for i
in range(num_ops
):
72 outops
.append(Signal(width
, reset_less
=True))
75 def elaborate(self
, platform
):
76 m
= Trigger
.elaborate(platform
)
77 pe
= PriorityEncoder(self
.num_rows
)
78 m
.submodules
.selector
= pe
80 # connect priority encoder
82 for i
in range(self
.num_rows
):
83 in_ready
.append(self
.rs
[i
].ready
)
84 m
.d
.comb
+= self
.pe
.i
.eq(Cat(*in_ready
))
85 m
.d
.comb
+= self
.stb
.eq(pe
.n
) # strobe-out valid when encoder is active
91 def __init__(self
, width
):
92 self
.in_op
= FPOp(width
)
93 self
.out_op
= Signal(width
)
94 self
.out_decode
= Signal(reset_less
=True)
96 def elaborate(self
, platform
):
98 m
.d
.comb
+= self
.out_decode
.eq((self
.in_op
.ack
) & (self
.in_op
.stb
))
99 m
.submodules
.get_op_in
= self
.in_op
100 #m.submodules.get_op_out = self.out_op
101 with m
.If(self
.out_decode
):
103 self
.out_op
.eq(self
.in_op
.v
),
108 class FPGetOp(FPState
):
112 def __init__(self
, in_state
, out_state
, in_op
, width
):
113 FPState
.__init
__(self
, in_state
)
114 self
.out_state
= out_state
115 self
.mod
= FPGetOpMod(width
)
117 self
.out_op
= Signal(width
)
118 self
.out_decode
= Signal(reset_less
=True)
120 def setup(self
, m
, in_op
):
121 """ links module to inputs and outputs
123 setattr(m
.submodules
, self
.state_from
, self
.mod
)
124 m
.d
.comb
+= self
.mod
.in_op
.copy(in_op
)
125 #m.d.comb += self.out_op.eq(self.mod.out_op)
126 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.out_decode
)
129 with m
.If(self
.out_decode
):
130 m
.next
= self
.out_state
132 self
.in_op
.ack
.eq(0),
133 self
.out_op
.eq(self
.mod
.out_op
)
136 m
.d
.sync
+= self
.in_op
.ack
.eq(1)
139 class FPGet2OpMod(Trigger
):
140 def __init__(self
, width
):
141 Trigger
.__init
__(self
)
142 self
.in_op1
= Signal(width
, reset_less
=True)
143 self
.in_op2
= Signal(width
, reset_less
=True)
144 self
.out_op1
= FPNumIn(None, width
)
145 self
.out_op2
= FPNumIn(None, width
)
147 def elaborate(self
, platform
):
148 m
= Trigger
.elaborate(self
, platform
)
149 #m.submodules.get_op_in = self.in_op
150 m
.submodules
.get_op1_out
= self
.out_op1
151 m
.submodules
.get_op2_out
= self
.out_op2
152 with m
.If(self
.trigger
):
154 self
.out_op1
.decode(self
.in_op1
),
155 self
.out_op2
.decode(self
.in_op2
),
160 class FPGet2Op(FPState
):
164 def __init__(self
, in_state
, out_state
, in_op1
, in_op2
, width
):
165 FPState
.__init
__(self
, in_state
)
166 self
.out_state
= out_state
167 self
.mod
= FPGet2OpMod(width
)
170 self
.out_op1
= FPNumIn(None, width
)
171 self
.out_op2
= FPNumIn(None, width
)
172 self
.in_stb
= Signal(reset_less
=True)
173 self
.out_ack
= Signal(reset_less
=True)
174 self
.out_decode
= Signal(reset_less
=True)
176 def setup(self
, m
, in_op1
, in_op2
, in_stb
, in_ack
):
177 """ links module to inputs and outputs
179 m
.submodules
.get_ops
= self
.mod
180 m
.d
.comb
+= self
.mod
.in_op1
.eq(in_op1
)
181 m
.d
.comb
+= self
.mod
.in_op2
.eq(in_op2
)
182 m
.d
.comb
+= self
.mod
.stb
.eq(in_stb
)
183 m
.d
.comb
+= self
.out_ack
.eq(self
.mod
.ack
)
184 m
.d
.comb
+= self
.out_decode
.eq(self
.mod
.trigger
)
185 m
.d
.comb
+= in_ack
.eq(self
.mod
.ack
)
188 with m
.If(self
.out_decode
):
189 m
.next
= self
.out_state
192 #self.out_op1.v.eq(self.mod.out_op1.v),
193 #self.out_op2.v.eq(self.mod.out_op2.v),
194 self
.out_op1
.copy(self
.mod
.out_op1
),
195 self
.out_op2
.copy(self
.mod
.out_op2
)
198 m
.d
.sync
+= self
.mod
.ack
.eq(1)
201 class FPAddSpecialCasesMod
:
202 """ special cases: NaNs, infs, zeros, denormalised
203 NOTE: some of these are unique to add. see "Special Operations"
204 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
207 def __init__(self
, width
):
208 self
.in_a
= FPNumBase(width
)
209 self
.in_b
= FPNumBase(width
)
210 self
.out_z
= FPNumOut(width
, False)
211 self
.out_do_z
= Signal(reset_less
=True)
213 def setup(self
, m
, in_a
, in_b
, out_do_z
):
214 """ links module to inputs and outputs
216 m
.submodules
.specialcases
= self
217 m
.d
.comb
+= self
.in_a
.copy(in_a
)
218 m
.d
.comb
+= self
.in_b
.copy(in_b
)
219 m
.d
.comb
+= out_do_z
.eq(self
.out_do_z
)
221 def elaborate(self
, platform
):
224 m
.submodules
.sc_in_a
= self
.in_a
225 m
.submodules
.sc_in_b
= self
.in_b
226 m
.submodules
.sc_out_z
= self
.out_z
229 m
.d
.comb
+= s_nomatch
.eq(self
.in_a
.s
!= self
.in_b
.s
)
232 m
.d
.comb
+= m_match
.eq(self
.in_a
.m
== self
.in_b
.m
)
234 # if a is NaN or b is NaN return NaN
235 with m
.If(self
.in_a
.is_nan | self
.in_b
.is_nan
):
236 m
.d
.comb
+= self
.out_do_z
.eq(1)
237 m
.d
.comb
+= self
.out_z
.nan(0)
239 # XXX WEIRDNESS for FP16 non-canonical NaN handling
242 ## if a is zero and b is NaN return -b
243 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
244 # m.d.comb += self.out_do_z.eq(1)
245 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247 ## if b is zero and a is NaN return -a
248 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
249 # m.d.comb += self.out_do_z.eq(1)
250 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252 ## if a is -zero and b is NaN return -b
253 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
254 # m.d.comb += self.out_do_z.eq(1)
255 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257 ## if b is -zero and a is NaN return -a
258 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
259 # m.d.comb += self.out_do_z.eq(1)
260 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262 # if a is inf return inf (or NaN)
263 with m
.Elif(self
.in_a
.is_inf
):
264 m
.d
.comb
+= self
.out_do_z
.eq(1)
265 m
.d
.comb
+= self
.out_z
.inf(self
.in_a
.s
)
266 # if a is inf and signs don't match return NaN
267 with m
.If(self
.in_b
.exp_128
& s_nomatch
):
268 m
.d
.comb
+= self
.out_z
.nan(0)
270 # if b is inf return inf
271 with m
.Elif(self
.in_b
.is_inf
):
272 m
.d
.comb
+= self
.out_do_z
.eq(1)
273 m
.d
.comb
+= self
.out_z
.inf(self
.in_b
.s
)
275 # if a is zero and b zero return signed-a/b
276 with m
.Elif(self
.in_a
.is_zero
& self
.in_b
.is_zero
):
277 m
.d
.comb
+= self
.out_do_z
.eq(1)
278 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
& self
.in_b
.s
,
282 # if a is zero return b
283 with m
.Elif(self
.in_a
.is_zero
):
284 m
.d
.comb
+= self
.out_do_z
.eq(1)
285 m
.d
.comb
+= self
.out_z
.create(self
.in_b
.s
, self
.in_b
.e
,
288 # if b is zero return a
289 with m
.Elif(self
.in_b
.is_zero
):
290 m
.d
.comb
+= self
.out_do_z
.eq(1)
291 m
.d
.comb
+= self
.out_z
.create(self
.in_a
.s
, self
.in_a
.e
,
294 # if a equal to -b return zero (+ve zero)
295 with m
.Elif(s_nomatch
& m_match
& (self
.in_a
.e
== self
.in_b
.e
)):
296 m
.d
.comb
+= self
.out_do_z
.eq(1)
297 m
.d
.comb
+= self
.out_z
.zero(0)
299 # Denormalised Number checks
301 m
.d
.comb
+= self
.out_do_z
.eq(0)
307 def __init__(self
, id_wid
):
310 self
.in_mid
= Signal(id_wid
, reset_less
=True)
311 self
.out_mid
= Signal(id_wid
, reset_less
=True)
317 if self
.id_wid
is not None:
318 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
321 class FPAddSpecialCases(FPState
, FPID
):
322 """ special cases: NaNs, infs, zeros, denormalised
323 NOTE: some of these are unique to add. see "Special Operations"
324 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 def __init__(self
, width
, id_wid
):
328 FPState
.__init
__(self
, "special_cases")
329 FPID
.__init
__(self
, id_wid
)
330 self
.mod
= FPAddSpecialCasesMod(width
)
331 self
.out_z
= FPNumOut(width
, False)
332 self
.out_do_z
= Signal(reset_less
=True)
334 def setup(self
, m
, in_a
, in_b
, in_mid
):
335 """ links module to inputs and outputs
337 self
.mod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
338 if self
.in_mid
is not None:
339 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
343 with m
.If(self
.out_do_z
):
344 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
) # only take the output
347 m
.next
= "denormalise"
350 class FPAddSpecialCasesDeNorm(FPState
, FPID
):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
356 def __init__(self
, width
, id_wid
):
357 FPState
.__init
__(self
, "special_cases")
358 FPID
.__init
__(self
, id_wid
)
359 self
.smod
= FPAddSpecialCasesMod(width
)
360 self
.out_z
= FPNumOut(width
, False)
361 self
.out_do_z
= Signal(reset_less
=True)
363 self
.dmod
= FPAddDeNormMod(width
)
364 self
.out_a
= FPNumBase(width
)
365 self
.out_b
= FPNumBase(width
)
367 def setup(self
, m
, in_a
, in_b
, in_mid
):
368 """ links module to inputs and outputs
370 self
.smod
.setup(m
, in_a
, in_b
, self
.out_do_z
)
371 self
.dmod
.setup(m
, in_a
, in_b
)
372 if self
.in_mid
is not None:
373 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
377 with m
.If(self
.out_do_z
):
378 m
.d
.sync
+= self
.out_z
.v
.eq(self
.smod
.out_z
.v
) # only take output
382 m
.d
.sync
+= self
.out_a
.copy(self
.dmod
.out_a
)
383 m
.d
.sync
+= self
.out_b
.copy(self
.dmod
.out_b
)
386 class FPAddDeNormMod(FPState
):
388 def __init__(self
, width
):
389 self
.in_a
= FPNumBase(width
)
390 self
.in_b
= FPNumBase(width
)
391 self
.out_a
= FPNumBase(width
)
392 self
.out_b
= FPNumBase(width
)
394 def setup(self
, m
, in_a
, in_b
):
395 """ links module to inputs and outputs
397 m
.submodules
.denormalise
= self
398 m
.d
.comb
+= self
.in_a
.copy(in_a
)
399 m
.d
.comb
+= self
.in_b
.copy(in_b
)
401 def elaborate(self
, platform
):
403 m
.submodules
.denorm_in_a
= self
.in_a
404 m
.submodules
.denorm_in_b
= self
.in_b
405 m
.submodules
.denorm_out_a
= self
.out_a
406 m
.submodules
.denorm_out_b
= self
.out_b
407 # hmmm, don't like repeating identical code
408 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
409 with m
.If(self
.in_a
.exp_n127
):
410 m
.d
.comb
+= self
.out_a
.e
.eq(self
.in_a
.N126
) # limit a exponent
412 m
.d
.comb
+= self
.out_a
.m
[-1].eq(1) # set top mantissa bit
414 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
415 with m
.If(self
.in_b
.exp_n127
):
416 m
.d
.comb
+= self
.out_b
.e
.eq(self
.in_b
.N126
) # limit a exponent
418 m
.d
.comb
+= self
.out_b
.m
[-1].eq(1) # set top mantissa bit
423 class FPAddDeNorm(FPState
, FPID
):
425 def __init__(self
, width
, id_wid
):
426 FPState
.__init
__(self
, "denormalise")
427 FPID
.__init
__(self
, id_wid
)
428 self
.mod
= FPAddDeNormMod(width
)
429 self
.out_a
= FPNumBase(width
)
430 self
.out_b
= FPNumBase(width
)
432 def setup(self
, m
, in_a
, in_b
, in_mid
):
433 """ links module to inputs and outputs
435 self
.mod
.setup(m
, in_a
, in_b
)
436 if self
.in_mid
is not None:
437 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
441 # Denormalised Number checks
443 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
444 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
447 class FPAddAlignMultiMod(FPState
):
449 def __init__(self
, width
):
450 self
.in_a
= FPNumBase(width
)
451 self
.in_b
= FPNumBase(width
)
452 self
.out_a
= FPNumIn(None, width
)
453 self
.out_b
= FPNumIn(None, width
)
454 self
.exp_eq
= Signal(reset_less
=True)
456 def elaborate(self
, platform
):
457 # This one however (single-cycle) will do the shift
462 m
.submodules
.align_in_a
= self
.in_a
463 m
.submodules
.align_in_b
= self
.in_b
464 m
.submodules
.align_out_a
= self
.out_a
465 m
.submodules
.align_out_b
= self
.out_b
467 # NOTE: this does *not* do single-cycle multi-shifting,
468 # it *STAYS* in the align state until exponents match
470 # exponent of a greater than b: shift b down
471 m
.d
.comb
+= self
.exp_eq
.eq(0)
472 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
473 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
474 agtb
= Signal(reset_less
=True)
475 altb
= Signal(reset_less
=True)
476 m
.d
.comb
+= agtb
.eq(self
.in_a
.e
> self
.in_b
.e
)
477 m
.d
.comb
+= altb
.eq(self
.in_a
.e
< self
.in_b
.e
)
479 m
.d
.comb
+= self
.out_b
.shift_down(self
.in_b
)
480 # exponent of b greater than a: shift a down
482 m
.d
.comb
+= self
.out_a
.shift_down(self
.in_a
)
483 # exponents equal: move to next stage.
485 m
.d
.comb
+= self
.exp_eq
.eq(1)
489 class FPAddAlignMulti(FPState
, FPID
):
491 def __init__(self
, width
, id_wid
):
492 FPID
.__init
__(self
, id_wid
)
493 FPState
.__init
__(self
, "align")
494 self
.mod
= FPAddAlignMultiMod(width
)
495 self
.out_a
= FPNumIn(None, width
)
496 self
.out_b
= FPNumIn(None, width
)
497 self
.exp_eq
= Signal(reset_less
=True)
499 def setup(self
, m
, in_a
, in_b
, in_mid
):
500 """ links module to inputs and outputs
502 m
.submodules
.align
= self
.mod
503 m
.d
.comb
+= self
.mod
.in_a
.copy(in_a
)
504 m
.d
.comb
+= self
.mod
.in_b
.copy(in_b
)
505 #m.d.comb += self.out_a.copy(self.mod.out_a)
506 #m.d.comb += self.out_b.copy(self.mod.out_b)
507 m
.d
.comb
+= self
.exp_eq
.eq(self
.mod
.exp_eq
)
508 if self
.in_mid
is not None:
509 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
513 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
514 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
515 with m
.If(self
.exp_eq
):
519 class FPAddAlignSingleMod
:
521 def __init__(self
, width
):
523 self
.in_a
= FPNumBase(width
)
524 self
.in_b
= FPNumBase(width
)
525 self
.out_a
= FPNumIn(None, width
)
526 self
.out_b
= FPNumIn(None, width
)
528 def setup(self
, m
, in_a
, in_b
):
529 """ links module to inputs and outputs
531 m
.submodules
.align
= self
532 m
.d
.comb
+= self
.in_a
.copy(in_a
)
533 m
.d
.comb
+= self
.in_b
.copy(in_b
)
535 def elaborate(self
, platform
):
536 """ Aligns A against B or B against A, depending on which has the
537 greater exponent. This is done in a *single* cycle using
538 variable-width bit-shift
540 the shifter used here is quite expensive in terms of gates.
541 Mux A or B in (and out) into temporaries, as only one of them
542 needs to be aligned against the other
546 m
.submodules
.align_in_a
= self
.in_a
547 m
.submodules
.align_in_b
= self
.in_b
548 m
.submodules
.align_out_a
= self
.out_a
549 m
.submodules
.align_out_b
= self
.out_b
551 # temporary (muxed) input and output to be shifted
552 t_inp
= FPNumBase(self
.width
)
553 t_out
= FPNumIn(None, self
.width
)
554 espec
= (len(self
.in_a
.e
), True)
555 msr
= MultiShiftRMerge(self
.in_a
.m_width
, espec
)
556 m
.submodules
.align_t_in
= t_inp
557 m
.submodules
.align_t_out
= t_out
558 m
.submodules
.multishift_r
= msr
560 ediff
= Signal(espec
, reset_less
=True)
561 ediffr
= Signal(espec
, reset_less
=True)
562 tdiff
= Signal(espec
, reset_less
=True)
563 elz
= Signal(reset_less
=True)
564 egz
= Signal(reset_less
=True)
566 # connect multi-shifter to t_inp/out mantissa (and tdiff)
567 m
.d
.comb
+= msr
.inp
.eq(t_inp
.m
)
568 m
.d
.comb
+= msr
.diff
.eq(tdiff
)
569 m
.d
.comb
+= t_out
.m
.eq(msr
.m
)
570 m
.d
.comb
+= t_out
.e
.eq(t_inp
.e
+ tdiff
)
571 m
.d
.comb
+= t_out
.s
.eq(t_inp
.s
)
573 m
.d
.comb
+= ediff
.eq(self
.in_a
.e
- self
.in_b
.e
)
574 m
.d
.comb
+= ediffr
.eq(self
.in_b
.e
- self
.in_a
.e
)
575 m
.d
.comb
+= elz
.eq(self
.in_a
.e
< self
.in_b
.e
)
576 m
.d
.comb
+= egz
.eq(self
.in_a
.e
> self
.in_b
.e
)
578 # default: A-exp == B-exp, A and B untouched (fall through)
579 m
.d
.comb
+= self
.out_a
.copy(self
.in_a
)
580 m
.d
.comb
+= self
.out_b
.copy(self
.in_b
)
581 # only one shifter (muxed)
582 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
583 # exponent of a greater than b: shift b down
585 m
.d
.comb
+= [t_inp
.copy(self
.in_b
),
587 self
.out_b
.copy(t_out
),
588 self
.out_b
.s
.eq(self
.in_b
.s
), # whoops forgot sign
590 # exponent of b greater than a: shift a down
592 m
.d
.comb
+= [t_inp
.copy(self
.in_a
),
594 self
.out_a
.copy(t_out
),
595 self
.out_a
.s
.eq(self
.in_a
.s
), # whoops forgot sign
600 class FPAddAlignSingle(FPState
, FPID
):
602 def __init__(self
, width
, id_wid
):
603 FPState
.__init
__(self
, "align")
604 FPID
.__init
__(self
, id_wid
)
605 self
.mod
= FPAddAlignSingleMod(width
)
606 self
.out_a
= FPNumIn(None, width
)
607 self
.out_b
= FPNumIn(None, width
)
609 def setup(self
, m
, in_a
, in_b
, in_mid
):
610 """ links module to inputs and outputs
612 self
.mod
.setup(m
, in_a
, in_b
)
613 if self
.in_mid
is not None:
614 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
618 # NOTE: could be done as comb
619 m
.d
.sync
+= self
.out_a
.copy(self
.mod
.out_a
)
620 m
.d
.sync
+= self
.out_b
.copy(self
.mod
.out_b
)
624 class FPAddAlignSingleAdd(FPState
, FPID
):
626 def __init__(self
, width
, id_wid
):
627 FPState
.__init
__(self
, "align")
628 FPID
.__init
__(self
, id_wid
)
629 self
.mod
= FPAddAlignSingleMod(width
)
630 self
.out_a
= FPNumIn(None, width
)
631 self
.out_b
= FPNumIn(None, width
)
633 self
.a0mod
= FPAddStage0Mod(width
)
634 self
.a0_out_z
= FPNumBase(width
, False)
635 self
.out_tot
= Signal(self
.a0_out_z
.m_width
+ 4, reset_less
=True)
636 self
.a0_out_z
= FPNumBase(width
, False)
638 self
.a1mod
= FPAddStage1Mod(width
)
639 self
.out_z
= FPNumBase(width
, False)
640 self
.out_of
= Overflow()
642 def setup(self
, m
, in_a
, in_b
, in_mid
):
643 """ links module to inputs and outputs
645 self
.mod
.setup(m
, in_a
, in_b
)
646 m
.d
.comb
+= self
.out_a
.copy(self
.mod
.out_a
)
647 m
.d
.comb
+= self
.out_b
.copy(self
.mod
.out_b
)
649 self
.a0mod
.setup(m
, self
.out_a
, self
.out_b
)
650 m
.d
.comb
+= self
.a0_out_z
.copy(self
.a0mod
.out_z
)
651 m
.d
.comb
+= self
.out_tot
.eq(self
.a0mod
.out_tot
)
653 self
.a1mod
.setup(m
, self
.out_tot
, self
.a0_out_z
)
655 if self
.in_mid
is not None:
656 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
660 m
.d
.sync
+= self
.out_of
.copy(self
.a1mod
.out_of
)
661 m
.d
.sync
+= self
.out_z
.copy(self
.a1mod
.out_z
)
662 m
.next
= "normalise_1"
665 class FPAddStage0Mod
:
667 def __init__(self
, width
):
668 self
.in_a
= FPNumBase(width
)
669 self
.in_b
= FPNumBase(width
)
670 self
.in_z
= FPNumBase(width
, False)
671 self
.out_z
= FPNumBase(width
, False)
672 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
674 def setup(self
, m
, in_a
, in_b
):
675 """ links module to inputs and outputs
677 m
.submodules
.add0
= self
678 m
.d
.comb
+= self
.in_a
.copy(in_a
)
679 m
.d
.comb
+= self
.in_b
.copy(in_b
)
681 def elaborate(self
, platform
):
683 m
.submodules
.add0_in_a
= self
.in_a
684 m
.submodules
.add0_in_b
= self
.in_b
685 m
.submodules
.add0_out_z
= self
.out_z
687 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_a
.e
)
689 # store intermediate tests (and zero-extended mantissas)
690 seq
= Signal(reset_less
=True)
691 mge
= Signal(reset_less
=True)
692 am0
= Signal(len(self
.in_a
.m
)+1, reset_less
=True)
693 bm0
= Signal(len(self
.in_b
.m
)+1, reset_less
=True)
694 m
.d
.comb
+= [seq
.eq(self
.in_a
.s
== self
.in_b
.s
),
695 mge
.eq(self
.in_a
.m
>= self
.in_b
.m
),
696 am0
.eq(Cat(self
.in_a
.m
, 0)),
697 bm0
.eq(Cat(self
.in_b
.m
, 0))
699 # same-sign (both negative or both positive) add mantissas
702 self
.out_tot
.eq(am0
+ bm0
),
703 self
.out_z
.s
.eq(self
.in_a
.s
)
705 # a mantissa greater than b, use a
708 self
.out_tot
.eq(am0
- bm0
),
709 self
.out_z
.s
.eq(self
.in_a
.s
)
711 # b mantissa greater than a, use b
714 self
.out_tot
.eq(bm0
- am0
),
715 self
.out_z
.s
.eq(self
.in_b
.s
)
720 class FPAddStage0(FPState
, FPID
):
721 """ First stage of add. covers same-sign (add) and subtract
722 special-casing when mantissas are greater or equal, to
723 give greatest accuracy.
726 def __init__(self
, width
, id_wid
):
727 FPState
.__init
__(self
, "add_0")
728 FPID
.__init
__(self
, id_wid
)
729 self
.mod
= FPAddStage0Mod(width
)
730 self
.out_z
= FPNumBase(width
, False)
731 self
.out_tot
= Signal(self
.out_z
.m_width
+ 4, reset_less
=True)
733 def setup(self
, m
, in_a
, in_b
, in_mid
):
734 """ links module to inputs and outputs
736 self
.mod
.setup(m
, in_a
, in_b
)
737 if self
.in_mid
is not None:
738 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
742 # NOTE: these could be done as combinatorial (merge add0+add1)
743 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
744 m
.d
.sync
+= self
.out_tot
.eq(self
.mod
.out_tot
)
748 class FPAddStage1Mod(FPState
):
749 """ Second stage of add: preparation for normalisation.
750 detects when tot sum is too big (tot[27] is kinda a carry bit)
753 def __init__(self
, width
):
754 self
.out_norm
= Signal(reset_less
=True)
755 self
.in_z
= FPNumBase(width
, False)
756 self
.in_tot
= Signal(self
.in_z
.m_width
+ 4, reset_less
=True)
757 self
.out_z
= FPNumBase(width
, False)
758 self
.out_of
= Overflow()
760 def setup(self
, m
, in_tot
, in_z
):
761 """ links module to inputs and outputs
763 m
.submodules
.add1
= self
764 m
.submodules
.add1_out_overflow
= self
.out_of
766 m
.d
.comb
+= self
.in_z
.copy(in_z
)
767 m
.d
.comb
+= self
.in_tot
.eq(in_tot
)
769 def elaborate(self
, platform
):
771 #m.submodules.norm1_in_overflow = self.in_of
772 #m.submodules.norm1_out_overflow = self.out_of
773 #m.submodules.norm1_in_z = self.in_z
774 #m.submodules.norm1_out_z = self.out_z
775 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
776 # tot[27] gets set when the sum overflows. shift result down
777 with m
.If(self
.in_tot
[-1]):
779 self
.out_z
.m
.eq(self
.in_tot
[4:]),
780 self
.out_of
.m0
.eq(self
.in_tot
[4]),
781 self
.out_of
.guard
.eq(self
.in_tot
[3]),
782 self
.out_of
.round_bit
.eq(self
.in_tot
[2]),
783 self
.out_of
.sticky
.eq(self
.in_tot
[1] | self
.in_tot
[0]),
784 self
.out_z
.e
.eq(self
.in_z
.e
+ 1)
789 self
.out_z
.m
.eq(self
.in_tot
[3:]),
790 self
.out_of
.m0
.eq(self
.in_tot
[3]),
791 self
.out_of
.guard
.eq(self
.in_tot
[2]),
792 self
.out_of
.round_bit
.eq(self
.in_tot
[1]),
793 self
.out_of
.sticky
.eq(self
.in_tot
[0])
798 class FPAddStage1(FPState
, FPID
):
800 def __init__(self
, width
, id_wid
):
801 FPState
.__init
__(self
, "add_1")
802 FPID
.__init
__(self
, id_wid
)
803 self
.mod
= FPAddStage1Mod(width
)
804 self
.out_z
= FPNumBase(width
, False)
805 self
.out_of
= Overflow()
806 self
.norm_stb
= Signal()
808 def setup(self
, m
, in_tot
, in_z
, in_mid
):
809 """ links module to inputs and outputs
811 self
.mod
.setup(m
, in_tot
, in_z
)
813 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
815 if self
.in_mid
is not None:
816 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
820 m
.d
.sync
+= self
.out_of
.copy(self
.mod
.out_of
)
821 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
822 m
.d
.sync
+= self
.norm_stb
.eq(1)
823 m
.next
= "normalise_1"
826 class FPNorm1ModSingle
:
828 def __init__(self
, width
):
830 self
.out_norm
= Signal(reset_less
=True)
831 self
.in_z
= FPNumBase(width
, False)
832 self
.in_of
= Overflow()
833 self
.out_z
= FPNumBase(width
, False)
834 self
.out_of
= Overflow()
836 def setup(self
, m
, in_z
, in_of
, out_z
):
837 """ links module to inputs and outputs
839 m
.submodules
.normalise_1
= self
841 m
.d
.comb
+= self
.in_z
.copy(in_z
)
842 m
.d
.comb
+= self
.in_of
.copy(in_of
)
844 m
.d
.comb
+= out_z
.copy(self
.out_z
)
846 def elaborate(self
, platform
):
849 mwid
= self
.out_z
.m_width
+2
850 pe
= PriorityEncoder(mwid
)
851 m
.submodules
.norm_pe
= pe
853 m
.submodules
.norm1_out_z
= self
.out_z
854 m
.submodules
.norm1_out_overflow
= self
.out_of
855 m
.submodules
.norm1_in_z
= self
.in_z
856 m
.submodules
.norm1_in_overflow
= self
.in_of
858 in_z
= FPNumBase(self
.width
, False)
860 m
.submodules
.norm1_insel_z
= in_z
861 m
.submodules
.norm1_insel_overflow
= in_of
863 espec
= (len(in_z
.e
), True)
864 ediff_n126
= Signal(espec
, reset_less
=True)
865 msr
= MultiShiftRMerge(mwid
, espec
)
866 m
.submodules
.multishift_r
= msr
868 m
.d
.comb
+= in_z
.copy(self
.in_z
)
869 m
.d
.comb
+= in_of
.copy(self
.in_of
)
870 # initialise out from in (overridden below)
871 m
.d
.comb
+= self
.out_z
.copy(in_z
)
872 m
.d
.comb
+= self
.out_of
.copy(in_of
)
873 # normalisation increase/decrease conditions
874 decrease
= Signal(reset_less
=True)
875 increase
= Signal(reset_less
=True)
876 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
877 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
880 # *sigh* not entirely obvious: count leading zeros (clz)
881 # with a PriorityEncoder: to find from the MSB
882 # we reverse the order of the bits.
883 temp_m
= Signal(mwid
, reset_less
=True)
884 temp_s
= Signal(mwid
+1, reset_less
=True)
885 clz
= Signal((len(in_z
.e
), True), reset_less
=True)
886 # make sure that the amount to decrease by does NOT
887 # go below the minimum non-INF/NaN exponent
888 limclz
= Mux(in_z
.exp_sub_n126
> pe
.o
, pe
.o
,
891 # cat round and guard bits back into the mantissa
892 temp_m
.eq(Cat(in_of
.round_bit
, in_of
.guard
, in_z
.m
)),
893 pe
.i
.eq(temp_m
[::-1]), # inverted
894 clz
.eq(limclz
), # count zeros from MSB down
895 temp_s
.eq(temp_m
<< clz
), # shift mantissa UP
896 self
.out_z
.e
.eq(in_z
.e
- clz
), # DECREASE exponent
897 self
.out_z
.m
.eq(temp_s
[2:]), # exclude bits 0&1
898 self
.out_of
.m0
.eq(temp_s
[2]), # copy of mantissa[0]
899 # overflow in bits 0..1: got shifted too (leave sticky)
900 self
.out_of
.guard
.eq(temp_s
[1]), # guard
901 self
.out_of
.round_bit
.eq(temp_s
[0]), # round
904 with m
.Elif(increase
):
905 temp_m
= Signal(mwid
+1, reset_less
=True)
907 temp_m
.eq(Cat(in_of
.sticky
, in_of
.round_bit
, in_of
.guard
,
909 ediff_n126
.eq(in_z
.N126
- in_z
.e
),
910 # connect multi-shifter to inp/out mantissa (and ediff)
912 msr
.diff
.eq(ediff_n126
),
913 self
.out_z
.m
.eq(msr
.m
[3:]),
914 self
.out_of
.m0
.eq(temp_s
[3]), # copy of mantissa[0]
915 # overflow in bits 0..1: got shifted too (leave sticky)
916 self
.out_of
.guard
.eq(temp_s
[2]), # guard
917 self
.out_of
.round_bit
.eq(temp_s
[1]), # round
918 self
.out_of
.sticky
.eq(temp_s
[0]), # sticky
919 self
.out_z
.e
.eq(in_z
.e
+ ediff_n126
),
925 class FPNorm1ModMulti
:
927 def __init__(self
, width
, single_cycle
=True):
929 self
.in_select
= Signal(reset_less
=True)
930 self
.out_norm
= Signal(reset_less
=True)
931 self
.in_z
= FPNumBase(width
, False)
932 self
.in_of
= Overflow()
933 self
.temp_z
= FPNumBase(width
, False)
934 self
.temp_of
= Overflow()
935 self
.out_z
= FPNumBase(width
, False)
936 self
.out_of
= Overflow()
938 def elaborate(self
, platform
):
941 m
.submodules
.norm1_out_z
= self
.out_z
942 m
.submodules
.norm1_out_overflow
= self
.out_of
943 m
.submodules
.norm1_temp_z
= self
.temp_z
944 m
.submodules
.norm1_temp_of
= self
.temp_of
945 m
.submodules
.norm1_in_z
= self
.in_z
946 m
.submodules
.norm1_in_overflow
= self
.in_of
948 in_z
= FPNumBase(self
.width
, False)
950 m
.submodules
.norm1_insel_z
= in_z
951 m
.submodules
.norm1_insel_overflow
= in_of
953 # select which of temp or in z/of to use
954 with m
.If(self
.in_select
):
955 m
.d
.comb
+= in_z
.copy(self
.in_z
)
956 m
.d
.comb
+= in_of
.copy(self
.in_of
)
958 m
.d
.comb
+= in_z
.copy(self
.temp_z
)
959 m
.d
.comb
+= in_of
.copy(self
.temp_of
)
960 # initialise out from in (overridden below)
961 m
.d
.comb
+= self
.out_z
.copy(in_z
)
962 m
.d
.comb
+= self
.out_of
.copy(in_of
)
963 # normalisation increase/decrease conditions
964 decrease
= Signal(reset_less
=True)
965 increase
= Signal(reset_less
=True)
966 m
.d
.comb
+= decrease
.eq(in_z
.m_msbzero
& in_z
.exp_gt_n126
)
967 m
.d
.comb
+= increase
.eq(in_z
.exp_lt_n126
)
968 m
.d
.comb
+= self
.out_norm
.eq(decrease | increase
) # loop-end
972 self
.out_z
.e
.eq(in_z
.e
- 1), # DECREASE exponent
973 self
.out_z
.m
.eq(in_z
.m
<< 1), # shift mantissa UP
974 self
.out_z
.m
[0].eq(in_of
.guard
), # steal guard (was tot[2])
975 self
.out_of
.guard
.eq(in_of
.round_bit
), # round (was tot[1])
976 self
.out_of
.round_bit
.eq(0), # reset round bit
977 self
.out_of
.m0
.eq(in_of
.guard
),
980 with m
.Elif(increase
):
982 self
.out_z
.e
.eq(in_z
.e
+ 1), # INCREASE exponent
983 self
.out_z
.m
.eq(in_z
.m
>> 1), # shift mantissa DOWN
984 self
.out_of
.guard
.eq(in_z
.m
[0]),
985 self
.out_of
.m0
.eq(in_z
.m
[1]),
986 self
.out_of
.round_bit
.eq(in_of
.guard
),
987 self
.out_of
.sticky
.eq(in_of
.sticky | in_of
.round_bit
)
993 class FPNorm1Single(FPState
, FPID
):
995 def __init__(self
, width
, id_wid
, single_cycle
=True):
996 FPID
.__init
__(self
, id_wid
)
997 FPState
.__init
__(self
, "normalise_1")
998 self
.mod
= FPNorm1ModSingle(width
)
999 self
.out_norm
= Signal(reset_less
=True)
1000 self
.out_z
= FPNumBase(width
)
1001 self
.out_roundz
= Signal(reset_less
=True)
1003 def setup(self
, m
, in_z
, in_of
, in_mid
):
1004 """ links module to inputs and outputs
1006 self
.mod
.setup(m
, in_z
, in_of
, self
.out_z
)
1008 if self
.in_mid
is not None:
1009 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1011 def action(self
, m
):
1013 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1017 class FPNorm1Multi(FPState
, FPID
):
1019 def __init__(self
, width
, id_wid
):
1020 FPID
.__init
__(self
, id_wid
)
1021 FPState
.__init
__(self
, "normalise_1")
1022 self
.mod
= FPNorm1ModMulti(width
)
1023 self
.stb
= Signal(reset_less
=True)
1024 self
.ack
= Signal(reset
=0, reset_less
=True)
1025 self
.out_norm
= Signal(reset_less
=True)
1026 self
.in_accept
= Signal(reset_less
=True)
1027 self
.temp_z
= FPNumBase(width
)
1028 self
.temp_of
= Overflow()
1029 self
.out_z
= FPNumBase(width
)
1030 self
.out_roundz
= Signal(reset_less
=True)
1032 def setup(self
, m
, in_z
, in_of
, norm_stb
, in_mid
):
1033 """ links module to inputs and outputs
1035 self
.mod
.setup(m
, in_z
, in_of
, norm_stb
,
1036 self
.in_accept
, self
.temp_z
, self
.temp_of
,
1037 self
.out_z
, self
.out_norm
)
1039 m
.d
.comb
+= self
.stb
.eq(norm_stb
)
1040 m
.d
.sync
+= self
.ack
.eq(0) # sets to zero when not in normalise_1 state
1042 if self
.in_mid
is not None:
1043 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1045 def action(self
, m
):
1047 m
.d
.comb
+= self
.in_accept
.eq((~self
.ack
) & (self
.stb
))
1048 m
.d
.sync
+= self
.temp_of
.copy(self
.mod
.out_of
)
1049 m
.d
.sync
+= self
.temp_z
.copy(self
.out_z
)
1050 with m
.If(self
.out_norm
):
1051 with m
.If(self
.in_accept
):
1056 m
.d
.sync
+= self
.ack
.eq(0)
1058 # normalisation not required (or done).
1060 m
.d
.sync
+= self
.ack
.eq(1)
1061 m
.d
.sync
+= self
.out_roundz
.eq(self
.mod
.out_of
.roundz
)
1064 class FPNormToPack(FPState
, FPID
):
1066 def __init__(self
, width
, id_wid
):
1067 FPID
.__init
__(self
, id_wid
)
1068 FPState
.__init
__(self
, "normalise_1")
1071 def setup(self
, m
, in_z
, in_of
, in_mid
):
1072 """ links module to inputs and outputs
1075 # Normalisation (chained to input in_z+in_of)
1076 nmod
= FPNorm1ModSingle(self
.width
)
1077 n_out_z
= FPNumBase(self
.width
)
1078 n_out_roundz
= Signal(reset_less
=True)
1079 nmod
.setup(m
, in_z
, in_of
, n_out_z
)
1081 # Rounding (chained to normalisation)
1082 rmod
= FPRoundMod(self
.width
)
1083 r_out_z
= FPNumBase(self
.width
)
1084 rmod
.setup(m
, n_out_z
, n_out_roundz
)
1085 m
.d
.comb
+= n_out_roundz
.eq(nmod
.out_of
.roundz
)
1086 m
.d
.comb
+= r_out_z
.copy(rmod
.out_z
)
1088 # Corrections (chained to rounding)
1089 cmod
= FPCorrectionsMod(self
.width
)
1090 c_out_z
= FPNumBase(self
.width
)
1091 cmod
.setup(m
, r_out_z
)
1092 m
.d
.comb
+= c_out_z
.copy(cmod
.out_z
)
1094 # Pack (chained to corrections)
1095 self
.pmod
= FPPackMod(self
.width
)
1096 self
.out_z
= FPNumBase(self
.width
)
1097 self
.pmod
.setup(m
, c_out_z
)
1100 if self
.in_mid
is not None:
1101 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1103 def action(self
, m
):
1104 self
.idsync(m
) # copies incoming ID to outgoing
1105 m
.d
.sync
+= self
.out_z
.v
.eq(self
.pmod
.out_z
.v
) # outputs packed result
1106 m
.next
= "pack_put_z"
1111 def __init__(self
, width
):
1112 self
.in_roundz
= Signal(reset_less
=True)
1113 self
.in_z
= FPNumBase(width
, False)
1114 self
.out_z
= FPNumBase(width
, False)
1116 def setup(self
, m
, in_z
, roundz
):
1117 m
.submodules
.roundz
= self
1119 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1120 m
.d
.comb
+= self
.in_roundz
.eq(roundz
)
1122 def elaborate(self
, platform
):
1124 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1125 with m
.If(self
.in_roundz
):
1126 m
.d
.comb
+= self
.out_z
.m
.eq(self
.in_z
.m
+ 1) # mantissa rounds up
1127 with m
.If(self
.in_z
.m
== self
.in_z
.m1s
): # all 1s
1128 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.e
+ 1) # exponent up
1132 class FPRound(FPState
, FPID
):
1134 def __init__(self
, width
, id_wid
):
1135 FPState
.__init
__(self
, "round")
1136 FPID
.__init
__(self
, id_wid
)
1137 self
.mod
= FPRoundMod(width
)
1138 self
.out_z
= FPNumBase(width
)
1140 def setup(self
, m
, in_z
, roundz
, in_mid
):
1141 """ links module to inputs and outputs
1143 self
.mod
.setup(m
, in_z
, roundz
)
1145 if self
.in_mid
is not None:
1146 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1148 def action(self
, m
):
1150 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1151 m
.next
= "corrections"
1154 class FPCorrectionsMod
:
1156 def __init__(self
, width
):
1157 self
.in_z
= FPNumOut(width
, False)
1158 self
.out_z
= FPNumOut(width
, False)
1160 def setup(self
, m
, in_z
):
1161 """ links module to inputs and outputs
1163 m
.submodules
.corrections
= self
1164 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1166 def elaborate(self
, platform
):
1168 m
.submodules
.corr_in_z
= self
.in_z
1169 m
.submodules
.corr_out_z
= self
.out_z
1170 m
.d
.comb
+= self
.out_z
.copy(self
.in_z
)
1171 with m
.If(self
.in_z
.is_denormalised
):
1172 m
.d
.comb
+= self
.out_z
.e
.eq(self
.in_z
.N127
)
1176 class FPCorrections(FPState
, FPID
):
1178 def __init__(self
, width
, id_wid
):
1179 FPState
.__init
__(self
, "corrections")
1180 FPID
.__init
__(self
, id_wid
)
1181 self
.mod
= FPCorrectionsMod(width
)
1182 self
.out_z
= FPNumBase(width
)
1184 def setup(self
, m
, in_z
, in_mid
):
1185 """ links module to inputs and outputs
1187 self
.mod
.setup(m
, in_z
)
1188 if self
.in_mid
is not None:
1189 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1191 def action(self
, m
):
1193 m
.d
.sync
+= self
.out_z
.copy(self
.mod
.out_z
)
1199 def __init__(self
, width
):
1200 self
.in_z
= FPNumOut(width
, False)
1201 self
.out_z
= FPNumOut(width
, False)
1203 def setup(self
, m
, in_z
):
1204 """ links module to inputs and outputs
1206 m
.submodules
.pack
= self
1207 m
.d
.comb
+= self
.in_z
.copy(in_z
)
1209 def elaborate(self
, platform
):
1211 m
.submodules
.pack_in_z
= self
.in_z
1212 with m
.If(self
.in_z
.is_overflowed
):
1213 m
.d
.comb
+= self
.out_z
.inf(self
.in_z
.s
)
1215 m
.d
.comb
+= self
.out_z
.create(self
.in_z
.s
, self
.in_z
.e
, self
.in_z
.m
)
1219 class FPPack(FPState
, FPID
):
1221 def __init__(self
, width
, id_wid
):
1222 FPState
.__init
__(self
, "pack")
1223 FPID
.__init
__(self
, id_wid
)
1224 self
.mod
= FPPackMod(width
)
1225 self
.out_z
= FPNumOut(width
, False)
1227 def setup(self
, m
, in_z
, in_mid
):
1228 """ links module to inputs and outputs
1230 self
.mod
.setup(m
, in_z
)
1231 if self
.in_mid
is not None:
1232 m
.d
.comb
+= self
.in_mid
.eq(in_mid
)
1234 def action(self
, m
):
1236 m
.d
.sync
+= self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1237 m
.next
= "pack_put_z"
1240 class FPPutZ(FPState
):
1242 def __init__(self
, state
, in_z
, out_z
, in_mid
, out_mid
, to_state
=None):
1243 FPState
.__init
__(self
, state
)
1244 if to_state
is None:
1245 to_state
= "get_ops"
1246 self
.to_state
= to_state
1249 self
.in_mid
= in_mid
1250 self
.out_mid
= out_mid
1252 def action(self
, m
):
1253 if self
.in_mid
is not None:
1254 m
.d
.sync
+= self
.out_mid
.eq(self
.in_mid
)
1256 self
.out_z
.v
.eq(self
.in_z
.v
)
1258 with m
.If(self
.out_z
.stb
& self
.out_z
.ack
):
1259 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1260 m
.next
= self
.to_state
1262 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1265 class FPPutZIdx(FPState
):
1267 def __init__(self
, state
, in_z
, out_zs
, in_mid
, to_state
=None):
1268 FPState
.__init
__(self
, state
)
1269 if to_state
is None:
1270 to_state
= "get_ops"
1271 self
.to_state
= to_state
1273 self
.out_zs
= out_zs
1274 self
.in_mid
= in_mid
1276 def action(self
, m
):
1277 outz_stb
= Signal(reset_less
=True)
1278 outz_ack
= Signal(reset_less
=True)
1279 m
.d
.comb
+= [outz_stb
.eq(self
.out_zs
[self
.in_mid
].stb
),
1280 outz_ack
.eq(self
.out_zs
[self
.in_mid
].ack
),
1283 self
.out_zs
[self
.in_mid
].v
.eq(self
.in_z
.v
)
1285 with m
.If(outz_stb
& outz_ack
):
1286 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(0)
1287 m
.next
= self
.to_state
1289 m
.d
.sync
+= self
.out_zs
[self
.in_mid
].stb
.eq(1)
1292 class FPADDBaseMod(FPID
):
1294 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
1297 * width: bit-width of IEEE754. supported: 16, 32, 64
1298 * id_wid: an identifier that is sync-connected to the input
1299 * single_cycle: True indicates each stage to complete in 1 clock
1300 * compact: True indicates a reduced number of stages
1302 FPID
.__init
__(self
, id_wid
)
1304 self
.single_cycle
= single_cycle
1305 self
.compact
= compact
1307 self
.in_t
= Trigger()
1308 self
.in_a
= Signal(width
)
1309 self
.in_b
= Signal(width
)
1310 self
.out_z
= FPOp(width
)
1314 def add_state(self
, state
):
1315 self
.states
.append(state
)
1318 def get_fragment(self
, platform
=None):
1319 """ creates the HDL code-fragment for FPAdd
1322 m
.submodules
.out_z
= self
.out_z
1323 m
.submodules
.in_t
= self
.in_t
1325 self
.get_compact_fragment(m
, platform
)
1327 self
.get_longer_fragment(m
, platform
)
1329 with m
.FSM() as fsm
:
1331 for state
in self
.states
:
1332 with m
.State(state
.state_from
):
1337 def get_longer_fragment(self
, m
, platform
=None):
1339 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1340 self
.in_a
, self
.in_b
, self
.width
))
1341 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1345 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
1346 sc
.setup(m
, a
, b
, self
.in_mid
)
1348 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
1349 dn
.setup(m
, a
, b
, sc
.in_mid
)
1351 if self
.single_cycle
:
1352 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
1353 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1355 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
1356 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
1358 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
1359 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
1361 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
1362 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
1364 if self
.single_cycle
:
1365 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
1366 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
1368 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
1369 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
1371 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
1372 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
1374 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
1375 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
1377 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
1378 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
1380 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
1381 pa
.in_mid
, self
.out_mid
))
1383 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1384 pa
.in_mid
, self
.out_mid
))
1386 def get_compact_fragment(self
, m
, platform
=None):
1388 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
1389 self
.in_a
, self
.in_b
, self
.width
))
1390 get
.setup(m
, self
.in_a
, self
.in_b
, self
.in_t
.stb
, self
.in_t
.ack
)
1394 sc
= self
.add_state(FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
))
1395 sc
.setup(m
, a
, b
, self
.in_mid
)
1397 alm
= self
.add_state(FPAddAlignSingleAdd(self
.width
, self
.id_wid
))
1398 alm
.setup(m
, sc
.out_a
, sc
.out_b
, sc
.in_mid
)
1400 n1
= self
.add_state(FPNormToPack(self
.width
, self
.id_wid
))
1401 n1
.setup(m
, alm
.out_z
, alm
.out_of
, alm
.in_mid
)
1403 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
, self
.out_z
,
1404 n1
.in_mid
, self
.out_mid
))
1406 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
1407 sc
.in_mid
, self
.out_mid
))
1410 class FPADDBase(FPState
, FPID
):
1412 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
1415 * width: bit-width of IEEE754. supported: 16, 32, 64
1416 * id_wid: an identifier that is sync-connected to the input
1417 * single_cycle: True indicates each stage to complete in 1 clock
1419 FPID
.__init
__(self
, id_wid
)
1420 FPState
.__init
__(self
, "fpadd")
1422 self
.single_cycle
= single_cycle
1423 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
1425 self
.in_t
= Trigger()
1426 self
.in_a
= Signal(width
)
1427 self
.in_b
= Signal(width
)
1428 #self.out_z = FPOp(width)
1430 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
1431 self
.in_accept
= Signal(reset_less
=True)
1432 self
.add_stb
= Signal(reset_less
=True)
1433 self
.add_ack
= Signal(reset
=0, reset_less
=True)
1435 def setup(self
, m
, a
, b
, add_stb
, in_mid
, out_z
, out_mid
):
1437 self
.out_mid
= out_mid
1438 m
.d
.comb
+= [self
.in_a
.eq(a
),
1440 self
.mod
.in_a
.eq(self
.in_a
),
1441 self
.mod
.in_b
.eq(self
.in_b
),
1442 self
.in_mid
.eq(in_mid
),
1443 self
.mod
.in_mid
.eq(self
.in_mid
),
1444 self
.z_done
.eq(self
.mod
.out_z
.trigger
),
1445 #self.add_stb.eq(add_stb),
1446 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
1447 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
1448 self
.out_mid
.eq(self
.mod
.out_mid
),
1449 self
.out_z
.v
.eq(self
.mod
.out_z
.v
),
1450 self
.out_z
.stb
.eq(self
.mod
.out_z
.stb
),
1451 self
.mod
.out_z
.ack
.eq(self
.out_z
.ack
),
1454 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
1455 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
1456 m
.d
.sync
+= self
.out_z
.ack
.eq(0) # likewise
1457 #m.d.sync += self.in_t.stb.eq(0)
1459 m
.submodules
.fpadd
= self
.mod
1461 def action(self
, m
):
1463 # in_accept is set on incoming strobe HIGH and ack LOW.
1464 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
1466 #with m.If(self.in_t.ack):
1467 # m.d.sync += self.in_t.stb.eq(0)
1468 with m
.If(~self
.z_done
):
1469 # not done: test for accepting an incoming operand pair
1470 with m
.If(self
.in_accept
):
1472 self
.add_ack
.eq(1), # acknowledge receipt...
1473 self
.in_t
.stb
.eq(1), # initiate add
1476 m
.d
.sync
+= [self
.add_ack
.eq(0),
1477 self
.in_t
.stb
.eq(0),
1478 self
.out_z
.ack
.eq(1),
1481 # done: acknowledge, and write out id and value
1482 m
.d
.sync
+= [self
.add_ack
.eq(1),
1489 if self
.in_mid
is not None:
1490 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
1493 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
1495 # move to output state on detecting z ack
1496 with m
.If(self
.out_z
.trigger
):
1497 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
1500 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
1503 def __init__(self
, width
, id_wid
):
1505 self
.id_wid
= id_wid
1507 for i
in range(rs_sz
):
1509 out_z
.name
= "out_z_%d" % i
1511 self
.res
= Array(res
)
1512 self
.in_z
= FPOp(width
)
1513 self
.in_mid
= Signal(self
.id_wid
, reset_less
=True)
1515 def setup(self
, m
, in_z
, in_mid
):
1516 m
.d
.comb
+= [self
.in_z
.copy(in_z
),
1517 self
.in_mid
.eq(in_mid
)]
1519 def get_fragment(self
, platform
=None):
1520 """ creates the HDL code-fragment for FPAdd
1523 m
.submodules
.res_in_z
= self
.in_z
1524 m
.submodules
+= self
.res
1536 """ FPADD: stages as follows:
1542 FPAddBase---> FPAddBaseMod
1544 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1546 FPAddBase is tricky: it is both a stage and *has* stages.
1547 Connection to FPAddBaseMod therefore requires an in stb/ack
1548 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1549 needs to be the thing that raises the incoming stb.
1552 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
1555 * width: bit-width of IEEE754. supported: 16, 32, 64
1556 * id_wid: an identifier that is sync-connected to the input
1557 * single_cycle: True indicates each stage to complete in 1 clock
1560 self
.id_wid
= id_wid
1561 self
.single_cycle
= single_cycle
1563 #self.out_z = FPOp(width)
1564 self
.ids
= FPID(id_wid
)
1567 for i
in range(rs_sz
):
1570 in_a
.name
= "in_a_%d" % i
1571 in_b
.name
= "in_b_%d" % i
1572 rs
.append((in_a
, in_b
))
1576 for i
in range(rs_sz
):
1578 out_z
.name
= "out_z_%d" % i
1580 self
.res
= Array(res
)
1584 def add_state(self
, state
):
1585 self
.states
.append(state
)
1588 def get_fragment(self
, platform
=None):
1589 """ creates the HDL code-fragment for FPAdd
1592 m
.submodules
+= self
.rs
1594 in_a
= self
.rs
[0][0]
1595 in_b
= self
.rs
[0][1]
1597 out_z
= FPOp(self
.width
)
1598 out_mid
= Signal(self
.id_wid
, reset_less
=True)
1599 m
.submodules
.out_z
= out_z
1601 geta
= self
.add_state(FPGetOp("get_a", "get_b",
1606 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
1611 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
1612 ab
= self
.add_state(ab
)
1613 ab
.setup(m
, a
, b
, getb
.out_decode
, self
.ids
.in_mid
,
1616 pz
= self
.add_state(FPPutZIdx("put_z", ab
.out_z
, self
.res
,
1619 with m
.FSM() as fsm
:
1621 for state
in self
.states
:
1622 with m
.State(state
.state_from
):
1628 if __name__
== "__main__":
1630 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
1631 main(alu
, ports
=alu
.rs
[0][0].ports() + \
1632 alu
.rs
[0][1].ports() + \
1633 alu
.res
[0].ports() + \
1634 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
1636 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
1637 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
1638 alu
.in_t
.ports() + \
1639 alu
.out_z
.ports() + \
1640 [alu
.in_mid
, alu
.out_mid
])
1643 # works... but don't use, just do "python fname.py convert -t v"
1644 #print (verilog.convert(alu, ports=[
1645 # ports=alu.in_a.ports() + \
1646 # alu.in_b.ports() + \
1647 # alu.out_z.ports())