create specialcasesmod setup fn
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8
9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
10 from fpbase import MultiShiftRMerge, Trigger
11 #from fpbase import FPNumShiftMultiRight
12
13 class FPState(FPBase):
14 def __init__(self, state_from):
15 self.state_from = state_from
16
17 def set_inputs(self, inputs):
18 self.inputs = inputs
19 for k,v in inputs.items():
20 setattr(self, k, v)
21
22 def set_outputs(self, outputs):
23 self.outputs = outputs
24 for k,v in outputs.items():
25 setattr(self, k, v)
26
27
28 class FPGetOpMod:
29 def __init__(self, width):
30 self.in_op = FPOp(width)
31 self.out_op = Signal(width)
32 self.out_decode = Signal(reset_less=True)
33
34 def elaborate(self, platform):
35 m = Module()
36 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
37 m.submodules.get_op_in = self.in_op
38 #m.submodules.get_op_out = self.out_op
39 with m.If(self.out_decode):
40 m.d.comb += [
41 self.out_op.eq(self.in_op.v),
42 ]
43 return m
44
45
46 class FPGetOp(FPState):
47 """ gets operand
48 """
49
50 def __init__(self, in_state, out_state, in_op, width):
51 FPState.__init__(self, in_state)
52 self.out_state = out_state
53 self.mod = FPGetOpMod(width)
54 self.in_op = in_op
55 self.out_op = Signal(width)
56 self.out_decode = Signal(reset_less=True)
57
58 def setup(self, m, in_op):
59 """ links module to inputs and outputs
60 """
61 setattr(m.submodules, self.state_from, self.mod)
62 m.d.comb += self.mod.in_op.copy(in_op)
63 #m.d.comb += self.out_op.eq(self.mod.out_op)
64 m.d.comb += self.out_decode.eq(self.mod.out_decode)
65
66 def action(self, m):
67 with m.If(self.out_decode):
68 m.next = self.out_state
69 m.d.sync += [
70 self.in_op.ack.eq(0),
71 self.out_op.eq(self.mod.out_op)
72 ]
73 with m.Else():
74 m.d.sync += self.in_op.ack.eq(1)
75
76
77 class FPGet2OpMod(Trigger):
78 def __init__(self, width):
79 Trigger.__init__(self)
80 self.in_op1 = Signal(width, reset_less=True)
81 self.in_op2 = Signal(width, reset_less=True)
82 self.out_op1 = FPNumIn(None, width)
83 self.out_op2 = FPNumIn(None, width)
84
85 def elaborate(self, platform):
86 m = Trigger.elaborate(self, platform)
87 #m.submodules.get_op_in = self.in_op
88 m.submodules.get_op1_out = self.out_op1
89 m.submodules.get_op2_out = self.out_op2
90 with m.If(self.trigger):
91 m.d.comb += [
92 self.out_op1.decode(self.in_op1),
93 self.out_op2.decode(self.in_op2),
94 ]
95 return m
96
97
98 class FPGet2Op(FPState):
99 """ gets operands
100 """
101
102 def __init__(self, in_state, out_state, in_op1, in_op2, width):
103 FPState.__init__(self, in_state)
104 self.out_state = out_state
105 self.mod = FPGet2OpMod(width)
106 self.in_op1 = in_op1
107 self.in_op2 = in_op2
108 self.out_op1 = FPNumIn(None, width)
109 self.out_op2 = FPNumIn(None, width)
110 self.in_stb = Signal(reset_less=True)
111 self.out_ack = Signal(reset_less=True)
112 self.out_decode = Signal(reset_less=True)
113
114 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
115 """ links module to inputs and outputs
116 """
117 m.submodules.get_ops = self.mod
118 m.d.comb += self.mod.in_op1.eq(in_op1)
119 m.d.comb += self.mod.in_op2.eq(in_op2)
120 m.d.comb += self.mod.stb.eq(in_stb)
121 m.d.comb += self.out_ack.eq(self.mod.ack)
122 m.d.comb += self.out_decode.eq(self.mod.trigger)
123 m.d.comb += in_ack.eq(self.mod.ack)
124
125 def action(self, m):
126 with m.If(self.out_decode):
127 m.next = self.out_state
128 m.d.sync += [
129 self.mod.ack.eq(0),
130 #self.out_op1.v.eq(self.mod.out_op1.v),
131 #self.out_op2.v.eq(self.mod.out_op2.v),
132 self.out_op1.copy(self.mod.out_op1),
133 self.out_op2.copy(self.mod.out_op2)
134 ]
135 with m.Else():
136 m.d.sync += self.mod.ack.eq(1)
137
138
139 class FPAddSpecialCasesMod:
140 """ special cases: NaNs, infs, zeros, denormalised
141 NOTE: some of these are unique to add. see "Special Operations"
142 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
143 """
144
145 def __init__(self, width):
146 self.in_a = FPNumBase(width)
147 self.in_b = FPNumBase(width)
148 self.out_z = FPNumOut(width, False)
149 self.out_do_z = Signal(reset_less=True)
150
151 def setup(self, m, in_a, in_b, out_do_z):
152 """ links module to inputs and outputs
153 """
154 m.submodules.specialcases = self
155 m.d.comb += self.in_a.copy(in_a)
156 m.d.comb += self.in_b.copy(in_b)
157 m.d.comb += out_do_z.eq(self.out_do_z)
158
159 def elaborate(self, platform):
160 m = Module()
161
162 m.submodules.sc_in_a = self.in_a
163 m.submodules.sc_in_b = self.in_b
164 m.submodules.sc_out_z = self.out_z
165
166 s_nomatch = Signal()
167 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
168
169 m_match = Signal()
170 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
171
172 # if a is NaN or b is NaN return NaN
173 with m.If(self.in_a.is_nan | self.in_b.is_nan):
174 m.d.comb += self.out_do_z.eq(1)
175 m.d.comb += self.out_z.nan(0)
176
177 # XXX WEIRDNESS for FP16 non-canonical NaN handling
178 # under review
179
180 ## if a is zero and b is NaN return -b
181 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
182 # m.d.comb += self.out_do_z.eq(1)
183 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
184
185 ## if b is zero and a is NaN return -a
186 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
187 # m.d.comb += self.out_do_z.eq(1)
188 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
189
190 ## if a is -zero and b is NaN return -b
191 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
192 # m.d.comb += self.out_do_z.eq(1)
193 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
194
195 ## if b is -zero and a is NaN return -a
196 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
197 # m.d.comb += self.out_do_z.eq(1)
198 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
199
200 # if a is inf return inf (or NaN)
201 with m.Elif(self.in_a.is_inf):
202 m.d.comb += self.out_do_z.eq(1)
203 m.d.comb += self.out_z.inf(self.in_a.s)
204 # if a is inf and signs don't match return NaN
205 with m.If(self.in_b.exp_128 & s_nomatch):
206 m.d.comb += self.out_z.nan(0)
207
208 # if b is inf return inf
209 with m.Elif(self.in_b.is_inf):
210 m.d.comb += self.out_do_z.eq(1)
211 m.d.comb += self.out_z.inf(self.in_b.s)
212
213 # if a is zero and b zero return signed-a/b
214 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
215 m.d.comb += self.out_do_z.eq(1)
216 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
217 self.in_b.e,
218 self.in_b.m[3:-1])
219
220 # if a is zero return b
221 with m.Elif(self.in_a.is_zero):
222 m.d.comb += self.out_do_z.eq(1)
223 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
224 self.in_b.m[3:-1])
225
226 # if b is zero return a
227 with m.Elif(self.in_b.is_zero):
228 m.d.comb += self.out_do_z.eq(1)
229 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
230 self.in_a.m[3:-1])
231
232 # if a equal to -b return zero (+ve zero)
233 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
234 m.d.comb += self.out_do_z.eq(1)
235 m.d.comb += self.out_z.zero(0)
236
237 # Denormalised Number checks
238 with m.Else():
239 m.d.comb += self.out_do_z.eq(0)
240
241 return m
242
243
244 class FPID:
245 def __init__(self, id_wid):
246 self.id_wid = id_wid
247 if self.id_wid:
248 self.in_mid = Signal(id_wid, reset_less=True)
249 self.out_mid = Signal(id_wid, reset_less=True)
250 else:
251 self.in_mid = None
252 self.out_mid = None
253
254 def idsync(self, m):
255 if self.id_wid is not None:
256 m.d.sync += self.out_mid.eq(self.in_mid)
257
258
259 class FPAddSpecialCases(FPState, FPID):
260 """ special cases: NaNs, infs, zeros, denormalised
261 NOTE: some of these are unique to add. see "Special Operations"
262 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
263 """
264
265 def __init__(self, width, id_wid):
266 FPState.__init__(self, "special_cases")
267 FPID.__init__(self, id_wid)
268 self.mod = FPAddSpecialCasesMod(width)
269 self.out_z = FPNumOut(width, False)
270 self.out_do_z = Signal(reset_less=True)
271
272 def setup(self, m, in_a, in_b, in_mid):
273 """ links module to inputs and outputs
274 """
275 self.mod.setup(m, in_a, in_b, self.out_do_z)
276 if self.in_mid is not None:
277 m.d.comb += self.in_mid.eq(in_mid)
278
279 def action(self, m):
280 self.idsync(m)
281 with m.If(self.out_do_z):
282 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
283 m.next = "put_z"
284 with m.Else():
285 m.next = "denormalise"
286
287
288 class FPAddDeNormMod(FPState):
289
290 def __init__(self, width):
291 self.in_a = FPNumBase(width)
292 self.in_b = FPNumBase(width)
293 self.out_a = FPNumBase(width)
294 self.out_b = FPNumBase(width)
295
296 def elaborate(self, platform):
297 m = Module()
298 m.submodules.denorm_in_a = self.in_a
299 m.submodules.denorm_in_b = self.in_b
300 m.submodules.denorm_out_a = self.out_a
301 m.submodules.denorm_out_b = self.out_b
302 # hmmm, don't like repeating identical code
303 m.d.comb += self.out_a.copy(self.in_a)
304 with m.If(self.in_a.exp_n127):
305 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
306 with m.Else():
307 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
308
309 m.d.comb += self.out_b.copy(self.in_b)
310 with m.If(self.in_b.exp_n127):
311 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
312 with m.Else():
313 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
314
315 return m
316
317
318 class FPAddDeNorm(FPState, FPID):
319
320 def __init__(self, width, id_wid):
321 FPState.__init__(self, "denormalise")
322 FPID.__init__(self, id_wid)
323 self.mod = FPAddDeNormMod(width)
324 self.out_a = FPNumBase(width)
325 self.out_b = FPNumBase(width)
326
327 def setup(self, m, in_a, in_b, in_mid):
328 """ links module to inputs and outputs
329 """
330 m.submodules.denormalise = self.mod
331 m.d.comb += self.mod.in_a.copy(in_a)
332 m.d.comb += self.mod.in_b.copy(in_b)
333 if self.in_mid is not None:
334 m.d.comb += self.in_mid.eq(in_mid)
335
336 def action(self, m):
337 self.idsync(m)
338 # Denormalised Number checks
339 m.next = "align"
340 m.d.sync += self.out_a.copy(self.mod.out_a)
341 m.d.sync += self.out_b.copy(self.mod.out_b)
342
343
344 class FPAddAlignMultiMod(FPState):
345
346 def __init__(self, width):
347 self.in_a = FPNumBase(width)
348 self.in_b = FPNumBase(width)
349 self.out_a = FPNumIn(None, width)
350 self.out_b = FPNumIn(None, width)
351 self.exp_eq = Signal(reset_less=True)
352
353 def elaborate(self, platform):
354 # This one however (single-cycle) will do the shift
355 # in one go.
356
357 m = Module()
358
359 m.submodules.align_in_a = self.in_a
360 m.submodules.align_in_b = self.in_b
361 m.submodules.align_out_a = self.out_a
362 m.submodules.align_out_b = self.out_b
363
364 # NOTE: this does *not* do single-cycle multi-shifting,
365 # it *STAYS* in the align state until exponents match
366
367 # exponent of a greater than b: shift b down
368 m.d.comb += self.exp_eq.eq(0)
369 m.d.comb += self.out_a.copy(self.in_a)
370 m.d.comb += self.out_b.copy(self.in_b)
371 agtb = Signal(reset_less=True)
372 altb = Signal(reset_less=True)
373 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
374 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
375 with m.If(agtb):
376 m.d.comb += self.out_b.shift_down(self.in_b)
377 # exponent of b greater than a: shift a down
378 with m.Elif(altb):
379 m.d.comb += self.out_a.shift_down(self.in_a)
380 # exponents equal: move to next stage.
381 with m.Else():
382 m.d.comb += self.exp_eq.eq(1)
383 return m
384
385
386 class FPAddAlignMulti(FPState, FPID):
387
388 def __init__(self, width, id_wid):
389 FPID.__init__(self, id_wid)
390 FPState.__init__(self, "align")
391 self.mod = FPAddAlignMultiMod(width)
392 self.out_a = FPNumIn(None, width)
393 self.out_b = FPNumIn(None, width)
394 self.exp_eq = Signal(reset_less=True)
395
396 def setup(self, m, in_a, in_b, in_mid):
397 """ links module to inputs and outputs
398 """
399 m.submodules.align = self.mod
400 m.d.comb += self.mod.in_a.copy(in_a)
401 m.d.comb += self.mod.in_b.copy(in_b)
402 #m.d.comb += self.out_a.copy(self.mod.out_a)
403 #m.d.comb += self.out_b.copy(self.mod.out_b)
404 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
405 if self.in_mid is not None:
406 m.d.comb += self.in_mid.eq(in_mid)
407
408 def action(self, m):
409 self.idsync(m)
410 m.d.sync += self.out_a.copy(self.mod.out_a)
411 m.d.sync += self.out_b.copy(self.mod.out_b)
412 with m.If(self.exp_eq):
413 m.next = "add_0"
414
415
416 class FPAddAlignSingleMod:
417
418 def __init__(self, width):
419 self.width = width
420 self.in_a = FPNumBase(width)
421 self.in_b = FPNumBase(width)
422 self.out_a = FPNumIn(None, width)
423 self.out_b = FPNumIn(None, width)
424
425 def elaborate(self, platform):
426 """ Aligns A against B or B against A, depending on which has the
427 greater exponent. This is done in a *single* cycle using
428 variable-width bit-shift
429
430 the shifter used here is quite expensive in terms of gates.
431 Mux A or B in (and out) into temporaries, as only one of them
432 needs to be aligned against the other
433 """
434 m = Module()
435
436 m.submodules.align_in_a = self.in_a
437 m.submodules.align_in_b = self.in_b
438 m.submodules.align_out_a = self.out_a
439 m.submodules.align_out_b = self.out_b
440
441 # temporary (muxed) input and output to be shifted
442 t_inp = FPNumBase(self.width)
443 t_out = FPNumIn(None, self.width)
444 espec = (len(self.in_a.e), True)
445 msr = MultiShiftRMerge(self.in_a.m_width, espec)
446 m.submodules.align_t_in = t_inp
447 m.submodules.align_t_out = t_out
448 m.submodules.multishift_r = msr
449
450 ediff = Signal(espec, reset_less=True)
451 ediffr = Signal(espec, reset_less=True)
452 tdiff = Signal(espec, reset_less=True)
453 elz = Signal(reset_less=True)
454 egz = Signal(reset_less=True)
455
456 # connect multi-shifter to t_inp/out mantissa (and tdiff)
457 m.d.comb += msr.inp.eq(t_inp.m)
458 m.d.comb += msr.diff.eq(tdiff)
459 m.d.comb += t_out.m.eq(msr.m)
460 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
461 m.d.comb += t_out.s.eq(t_inp.s)
462
463 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
464 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
465 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
466 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
467
468 # default: A-exp == B-exp, A and B untouched (fall through)
469 m.d.comb += self.out_a.copy(self.in_a)
470 m.d.comb += self.out_b.copy(self.in_b)
471 # only one shifter (muxed)
472 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
473 # exponent of a greater than b: shift b down
474 with m.If(egz):
475 m.d.comb += [t_inp.copy(self.in_b),
476 tdiff.eq(ediff),
477 self.out_b.copy(t_out),
478 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
479 ]
480 # exponent of b greater than a: shift a down
481 with m.Elif(elz):
482 m.d.comb += [t_inp.copy(self.in_a),
483 tdiff.eq(ediffr),
484 self.out_a.copy(t_out),
485 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
486 ]
487 return m
488
489
490 class FPAddAlignSingle(FPState, FPID):
491
492 def __init__(self, width, id_wid):
493 FPState.__init__(self, "align")
494 FPID.__init__(self, id_wid)
495 self.mod = FPAddAlignSingleMod(width)
496 self.out_a = FPNumIn(None, width)
497 self.out_b = FPNumIn(None, width)
498
499 def setup(self, m, in_a, in_b, in_mid):
500 """ links module to inputs and outputs
501 """
502 m.submodules.align = self.mod
503 m.d.comb += self.mod.in_a.copy(in_a)
504 m.d.comb += self.mod.in_b.copy(in_b)
505 if self.in_mid is not None:
506 m.d.comb += self.in_mid.eq(in_mid)
507
508 def action(self, m):
509 self.idsync(m)
510 # NOTE: could be done as comb
511 m.d.sync += self.out_a.copy(self.mod.out_a)
512 m.d.sync += self.out_b.copy(self.mod.out_b)
513 m.next = "add_0"
514
515
516 class FPAddStage0Mod:
517
518 def __init__(self, width):
519 self.in_a = FPNumBase(width)
520 self.in_b = FPNumBase(width)
521 self.in_z = FPNumBase(width, False)
522 self.out_z = FPNumBase(width, False)
523 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
524
525 def elaborate(self, platform):
526 m = Module()
527 m.submodules.add0_in_a = self.in_a
528 m.submodules.add0_in_b = self.in_b
529 m.submodules.add0_out_z = self.out_z
530
531 m.d.comb += self.out_z.e.eq(self.in_a.e)
532
533 # store intermediate tests (and zero-extended mantissas)
534 seq = Signal(reset_less=True)
535 mge = Signal(reset_less=True)
536 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
537 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
538 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
539 mge.eq(self.in_a.m >= self.in_b.m),
540 am0.eq(Cat(self.in_a.m, 0)),
541 bm0.eq(Cat(self.in_b.m, 0))
542 ]
543 # same-sign (both negative or both positive) add mantissas
544 with m.If(seq):
545 m.d.comb += [
546 self.out_tot.eq(am0 + bm0),
547 self.out_z.s.eq(self.in_a.s)
548 ]
549 # a mantissa greater than b, use a
550 with m.Elif(mge):
551 m.d.comb += [
552 self.out_tot.eq(am0 - bm0),
553 self.out_z.s.eq(self.in_a.s)
554 ]
555 # b mantissa greater than a, use b
556 with m.Else():
557 m.d.comb += [
558 self.out_tot.eq(bm0 - am0),
559 self.out_z.s.eq(self.in_b.s)
560 ]
561 return m
562
563
564 class FPAddStage0(FPState, FPID):
565 """ First stage of add. covers same-sign (add) and subtract
566 special-casing when mantissas are greater or equal, to
567 give greatest accuracy.
568 """
569
570 def __init__(self, width, id_wid):
571 FPState.__init__(self, "add_0")
572 FPID.__init__(self, id_wid)
573 self.mod = FPAddStage0Mod(width)
574 self.out_z = FPNumBase(width, False)
575 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
576
577 def setup(self, m, in_a, in_b, in_mid):
578 """ links module to inputs and outputs
579 """
580 m.submodules.add0 = self.mod
581 m.d.comb += self.mod.in_a.copy(in_a)
582 m.d.comb += self.mod.in_b.copy(in_b)
583 if self.in_mid is not None:
584 m.d.comb += self.in_mid.eq(in_mid)
585
586 def action(self, m):
587 self.idsync(m)
588 # NOTE: these could be done as combinatorial (merge add0+add1)
589 m.d.sync += self.out_z.copy(self.mod.out_z)
590 m.d.sync += self.out_tot.eq(self.mod.out_tot)
591 m.next = "add_1"
592
593
594 class FPAddStage1Mod(FPState):
595 """ Second stage of add: preparation for normalisation.
596 detects when tot sum is too big (tot[27] is kinda a carry bit)
597 """
598
599 def __init__(self, width):
600 self.out_norm = Signal(reset_less=True)
601 self.in_z = FPNumBase(width, False)
602 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
603 self.out_z = FPNumBase(width, False)
604 self.out_of = Overflow()
605
606 def elaborate(self, platform):
607 m = Module()
608 #m.submodules.norm1_in_overflow = self.in_of
609 #m.submodules.norm1_out_overflow = self.out_of
610 #m.submodules.norm1_in_z = self.in_z
611 #m.submodules.norm1_out_z = self.out_z
612 m.d.comb += self.out_z.copy(self.in_z)
613 # tot[27] gets set when the sum overflows. shift result down
614 with m.If(self.in_tot[-1]):
615 m.d.comb += [
616 self.out_z.m.eq(self.in_tot[4:]),
617 self.out_of.m0.eq(self.in_tot[4]),
618 self.out_of.guard.eq(self.in_tot[3]),
619 self.out_of.round_bit.eq(self.in_tot[2]),
620 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
621 self.out_z.e.eq(self.in_z.e + 1)
622 ]
623 # tot[27] zero case
624 with m.Else():
625 m.d.comb += [
626 self.out_z.m.eq(self.in_tot[3:]),
627 self.out_of.m0.eq(self.in_tot[3]),
628 self.out_of.guard.eq(self.in_tot[2]),
629 self.out_of.round_bit.eq(self.in_tot[1]),
630 self.out_of.sticky.eq(self.in_tot[0])
631 ]
632 return m
633
634
635 class FPAddStage1(FPState, FPID):
636
637 def __init__(self, width, id_wid):
638 FPState.__init__(self, "add_1")
639 FPID.__init__(self, id_wid)
640 self.mod = FPAddStage1Mod(width)
641 self.out_z = FPNumBase(width, False)
642 self.out_of = Overflow()
643 self.norm_stb = Signal()
644
645 def setup(self, m, in_tot, in_z, in_mid):
646 """ links module to inputs and outputs
647 """
648 m.submodules.add1 = self.mod
649 m.submodules.add1_out_overflow = self.out_of
650
651 m.d.comb += self.mod.in_z.copy(in_z)
652 m.d.comb += self.mod.in_tot.eq(in_tot)
653
654 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
655
656 if self.in_mid is not None:
657 m.d.comb += self.in_mid.eq(in_mid)
658
659 def action(self, m):
660 self.idsync(m)
661 m.d.sync += self.out_of.copy(self.mod.out_of)
662 m.d.sync += self.out_z.copy(self.mod.out_z)
663 m.d.sync += self.norm_stb.eq(1)
664 m.next = "normalise_1"
665
666
667 class FPNorm1ModSingle:
668
669 def __init__(self, width):
670 self.width = width
671 self.out_norm = Signal(reset_less=True)
672 self.in_z = FPNumBase(width, False)
673 self.in_of = Overflow()
674 self.out_z = FPNumBase(width, False)
675 self.out_of = Overflow()
676
677 def setup(self, m, in_z, in_of, out_z):
678 """ links module to inputs and outputs
679 """
680 m.submodules.normalise_1 = self
681
682 m.d.comb += self.in_z.copy(in_z)
683 m.d.comb += self.in_of.copy(in_of)
684
685 m.d.comb += out_z.copy(self.out_z)
686
687 def elaborate(self, platform):
688 m = Module()
689
690 mwid = self.out_z.m_width+2
691 pe = PriorityEncoder(mwid)
692 m.submodules.norm_pe = pe
693
694 m.submodules.norm1_out_z = self.out_z
695 m.submodules.norm1_out_overflow = self.out_of
696 m.submodules.norm1_in_z = self.in_z
697 m.submodules.norm1_in_overflow = self.in_of
698
699 in_z = FPNumBase(self.width, False)
700 in_of = Overflow()
701 m.submodules.norm1_insel_z = in_z
702 m.submodules.norm1_insel_overflow = in_of
703
704 espec = (len(in_z.e), True)
705 ediff_n126 = Signal(espec, reset_less=True)
706 msr = MultiShiftRMerge(mwid, espec)
707 m.submodules.multishift_r = msr
708
709 m.d.comb += in_z.copy(self.in_z)
710 m.d.comb += in_of.copy(self.in_of)
711 # initialise out from in (overridden below)
712 m.d.comb += self.out_z.copy(in_z)
713 m.d.comb += self.out_of.copy(in_of)
714 # normalisation increase/decrease conditions
715 decrease = Signal(reset_less=True)
716 increase = Signal(reset_less=True)
717 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
718 m.d.comb += increase.eq(in_z.exp_lt_n126)
719 # decrease exponent
720 with m.If(decrease):
721 # *sigh* not entirely obvious: count leading zeros (clz)
722 # with a PriorityEncoder: to find from the MSB
723 # we reverse the order of the bits.
724 temp_m = Signal(mwid, reset_less=True)
725 temp_s = Signal(mwid+1, reset_less=True)
726 clz = Signal((len(in_z.e), True), reset_less=True)
727 # make sure that the amount to decrease by does NOT
728 # go below the minimum non-INF/NaN exponent
729 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
730 in_z.exp_sub_n126)
731 m.d.comb += [
732 # cat round and guard bits back into the mantissa
733 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
734 pe.i.eq(temp_m[::-1]), # inverted
735 clz.eq(limclz), # count zeros from MSB down
736 temp_s.eq(temp_m << clz), # shift mantissa UP
737 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
738 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
739 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
740 # overflow in bits 0..1: got shifted too (leave sticky)
741 self.out_of.guard.eq(temp_s[1]), # guard
742 self.out_of.round_bit.eq(temp_s[0]), # round
743 ]
744 # increase exponent
745 with m.Elif(increase):
746 temp_m = Signal(mwid+1, reset_less=True)
747 m.d.comb += [
748 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
749 in_z.m)),
750 ediff_n126.eq(in_z.N126 - in_z.e),
751 # connect multi-shifter to inp/out mantissa (and ediff)
752 msr.inp.eq(temp_m),
753 msr.diff.eq(ediff_n126),
754 self.out_z.m.eq(msr.m[3:]),
755 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
756 # overflow in bits 0..1: got shifted too (leave sticky)
757 self.out_of.guard.eq(temp_s[2]), # guard
758 self.out_of.round_bit.eq(temp_s[1]), # round
759 self.out_of.sticky.eq(temp_s[0]), # sticky
760 self.out_z.e.eq(in_z.e + ediff_n126),
761 ]
762
763 return m
764
765
766 class FPNorm1ModMulti:
767
768 def __init__(self, width, single_cycle=True):
769 self.width = width
770 self.in_select = Signal(reset_less=True)
771 self.out_norm = Signal(reset_less=True)
772 self.in_z = FPNumBase(width, False)
773 self.in_of = Overflow()
774 self.temp_z = FPNumBase(width, False)
775 self.temp_of = Overflow()
776 self.out_z = FPNumBase(width, False)
777 self.out_of = Overflow()
778
779 def elaborate(self, platform):
780 m = Module()
781
782 m.submodules.norm1_out_z = self.out_z
783 m.submodules.norm1_out_overflow = self.out_of
784 m.submodules.norm1_temp_z = self.temp_z
785 m.submodules.norm1_temp_of = self.temp_of
786 m.submodules.norm1_in_z = self.in_z
787 m.submodules.norm1_in_overflow = self.in_of
788
789 in_z = FPNumBase(self.width, False)
790 in_of = Overflow()
791 m.submodules.norm1_insel_z = in_z
792 m.submodules.norm1_insel_overflow = in_of
793
794 # select which of temp or in z/of to use
795 with m.If(self.in_select):
796 m.d.comb += in_z.copy(self.in_z)
797 m.d.comb += in_of.copy(self.in_of)
798 with m.Else():
799 m.d.comb += in_z.copy(self.temp_z)
800 m.d.comb += in_of.copy(self.temp_of)
801 # initialise out from in (overridden below)
802 m.d.comb += self.out_z.copy(in_z)
803 m.d.comb += self.out_of.copy(in_of)
804 # normalisation increase/decrease conditions
805 decrease = Signal(reset_less=True)
806 increase = Signal(reset_less=True)
807 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
808 m.d.comb += increase.eq(in_z.exp_lt_n126)
809 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
810 # decrease exponent
811 with m.If(decrease):
812 m.d.comb += [
813 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
814 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
815 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
816 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
817 self.out_of.round_bit.eq(0), # reset round bit
818 self.out_of.m0.eq(in_of.guard),
819 ]
820 # increase exponent
821 with m.Elif(increase):
822 m.d.comb += [
823 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
824 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
825 self.out_of.guard.eq(in_z.m[0]),
826 self.out_of.m0.eq(in_z.m[1]),
827 self.out_of.round_bit.eq(in_of.guard),
828 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
829 ]
830
831 return m
832
833
834 class FPNorm1Single(FPState, FPID):
835
836 def __init__(self, width, id_wid, single_cycle=True):
837 FPID.__init__(self, id_wid)
838 FPState.__init__(self, "normalise_1")
839 self.mod = FPNorm1ModSingle(width)
840 self.out_norm = Signal(reset_less=True)
841 self.out_z = FPNumBase(width)
842 self.out_roundz = Signal(reset_less=True)
843
844 def setup(self, m, in_z, in_of, in_mid):
845 """ links module to inputs and outputs
846 """
847 self.mod.setup(m, in_z, in_of, self.out_z)
848
849 if self.in_mid is not None:
850 m.d.comb += self.in_mid.eq(in_mid)
851
852 def action(self, m):
853 self.idsync(m)
854 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
855 m.next = "round"
856
857
858 class FPNorm1Multi(FPState, FPID):
859
860 def __init__(self, width, id_wid):
861 FPID.__init__(self, id_wid)
862 FPState.__init__(self, "normalise_1")
863 self.mod = FPNorm1ModMulti(width)
864 self.stb = Signal(reset_less=True)
865 self.ack = Signal(reset=0, reset_less=True)
866 self.out_norm = Signal(reset_less=True)
867 self.in_accept = Signal(reset_less=True)
868 self.temp_z = FPNumBase(width)
869 self.temp_of = Overflow()
870 self.out_z = FPNumBase(width)
871 self.out_roundz = Signal(reset_less=True)
872
873 def setup(self, m, in_z, in_of, norm_stb, in_mid):
874 """ links module to inputs and outputs
875 """
876 self.mod.setup(m, in_z, in_of, norm_stb,
877 self.in_accept, self.temp_z, self.temp_of,
878 self.out_z, self.out_norm)
879
880 m.d.comb += self.stb.eq(norm_stb)
881 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
882
883 if self.in_mid is not None:
884 m.d.comb += self.in_mid.eq(in_mid)
885
886 def action(self, m):
887 self.idsync(m)
888 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
889 m.d.sync += self.temp_of.copy(self.mod.out_of)
890 m.d.sync += self.temp_z.copy(self.out_z)
891 with m.If(self.out_norm):
892 with m.If(self.in_accept):
893 m.d.sync += [
894 self.ack.eq(1),
895 ]
896 with m.Else():
897 m.d.sync += self.ack.eq(0)
898 with m.Else():
899 # normalisation not required (or done).
900 m.next = "round"
901 m.d.sync += self.ack.eq(1)
902 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
903
904
905 class FPNormToPack(FPState, FPID):
906
907 def __init__(self, width, id_wid):
908 FPID.__init__(self, id_wid)
909 FPState.__init__(self, "normalise_1")
910 self.width = width
911
912 def setup(self, m, in_z, in_of, in_mid):
913 """ links module to inputs and outputs
914 """
915
916 # Normalisation (chained to input in_z+in_of)
917 nmod = FPNorm1ModSingle(self.width)
918 n_out_z = FPNumBase(self.width)
919 n_out_roundz = Signal(reset_less=True)
920 nmod.setup(m, in_z, in_of, n_out_z)
921
922 # Rounding (chained to normalisation)
923 rmod = FPRoundMod(self.width)
924 r_out_z = FPNumBase(self.width)
925 rmod.setup(m, n_out_z, n_out_roundz)
926 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
927 m.d.comb += r_out_z.copy(rmod.out_z)
928
929 # Corrections (chained to rounding)
930 cmod = FPCorrectionsMod(self.width)
931 c_out_z = FPNumBase(self.width)
932 cmod.setup(m, r_out_z)
933 m.d.comb += c_out_z.copy(cmod.out_z)
934
935 # Pack (chained to corrections)
936 self.pmod = FPPackMod(self.width)
937 self.out_z = FPNumBase(self.width)
938 self.pmod.setup(m, c_out_z)
939
940 # Multiplex ID
941 if self.in_mid is not None:
942 m.d.comb += self.in_mid.eq(in_mid)
943
944 def action(self, m):
945 self.idsync(m) # copies incoming ID to outgoing
946 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
947 m.next = "pack_put_z"
948
949
950 class FPRoundMod:
951
952 def __init__(self, width):
953 self.in_roundz = Signal(reset_less=True)
954 self.in_z = FPNumBase(width, False)
955 self.out_z = FPNumBase(width, False)
956
957 def setup(self, m, in_z, roundz):
958 m.submodules.roundz = self
959
960 m.d.comb += self.in_z.copy(in_z)
961 m.d.comb += self.in_roundz.eq(roundz)
962
963 def elaborate(self, platform):
964 m = Module()
965 m.d.comb += self.out_z.copy(self.in_z)
966 with m.If(self.in_roundz):
967 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
968 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
969 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
970 return m
971
972
973 class FPRound(FPState, FPID):
974
975 def __init__(self, width, id_wid):
976 FPState.__init__(self, "round")
977 FPID.__init__(self, id_wid)
978 self.mod = FPRoundMod(width)
979 self.out_z = FPNumBase(width)
980
981 def setup(self, m, in_z, roundz, in_mid):
982 """ links module to inputs and outputs
983 """
984 self.mod.setup(m, in_z, roundz)
985
986 if self.in_mid is not None:
987 m.d.comb += self.in_mid.eq(in_mid)
988
989 def action(self, m):
990 self.idsync(m)
991 m.d.sync += self.out_z.copy(self.mod.out_z)
992 m.next = "corrections"
993
994
995 class FPCorrectionsMod:
996
997 def __init__(self, width):
998 self.in_z = FPNumOut(width, False)
999 self.out_z = FPNumOut(width, False)
1000
1001 def setup(self, m, in_z):
1002 """ links module to inputs and outputs
1003 """
1004 m.submodules.corrections = self
1005 m.d.comb += self.in_z.copy(in_z)
1006
1007 def elaborate(self, platform):
1008 m = Module()
1009 m.submodules.corr_in_z = self.in_z
1010 m.submodules.corr_out_z = self.out_z
1011 m.d.comb += self.out_z.copy(self.in_z)
1012 with m.If(self.in_z.is_denormalised):
1013 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1014 return m
1015
1016
1017 class FPCorrections(FPState, FPID):
1018
1019 def __init__(self, width, id_wid):
1020 FPState.__init__(self, "corrections")
1021 FPID.__init__(self, id_wid)
1022 self.mod = FPCorrectionsMod(width)
1023 self.out_z = FPNumBase(width)
1024
1025 def setup(self, m, in_z, in_mid):
1026 """ links module to inputs and outputs
1027 """
1028 self.mod.setup(m, in_z)
1029 if self.in_mid is not None:
1030 m.d.comb += self.in_mid.eq(in_mid)
1031
1032 def action(self, m):
1033 self.idsync(m)
1034 m.d.sync += self.out_z.copy(self.mod.out_z)
1035 m.next = "pack"
1036
1037
1038 class FPPackMod:
1039
1040 def __init__(self, width):
1041 self.in_z = FPNumOut(width, False)
1042 self.out_z = FPNumOut(width, False)
1043
1044 def setup(self, m, in_z):
1045 """ links module to inputs and outputs
1046 """
1047 m.submodules.pack = self
1048 m.d.comb += self.in_z.copy(in_z)
1049
1050 def elaborate(self, platform):
1051 m = Module()
1052 m.submodules.pack_in_z = self.in_z
1053 with m.If(self.in_z.is_overflowed):
1054 m.d.comb += self.out_z.inf(self.in_z.s)
1055 with m.Else():
1056 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1057 return m
1058
1059
1060 class FPPack(FPState, FPID):
1061
1062 def __init__(self, width, id_wid):
1063 FPState.__init__(self, "pack")
1064 FPID.__init__(self, id_wid)
1065 self.mod = FPPackMod(width)
1066 self.out_z = FPNumOut(width, False)
1067
1068 def setup(self, m, in_z, in_mid):
1069 """ links module to inputs and outputs
1070 """
1071 self.mod.setup(m, in_z)
1072 if self.in_mid is not None:
1073 m.d.comb += self.in_mid.eq(in_mid)
1074
1075 def action(self, m):
1076 self.idsync(m)
1077 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1078 m.next = "pack_put_z"
1079
1080
1081 class FPPutZ(FPState):
1082
1083 def __init__(self, state, in_z, out_z, in_mid, out_mid):
1084 FPState.__init__(self, state)
1085 self.in_z = in_z
1086 self.out_z = out_z
1087 self.in_mid = in_mid
1088 self.out_mid = out_mid
1089
1090 def action(self, m):
1091 if self.in_mid is not None:
1092 m.d.sync += self.out_mid.eq(self.in_mid)
1093 m.d.sync += [
1094 self.out_z.v.eq(self.in_z.v)
1095 ]
1096 with m.If(self.out_z.stb & self.out_z.ack):
1097 m.d.sync += self.out_z.stb.eq(0)
1098 m.next = "get_ops"
1099 with m.Else():
1100 m.d.sync += self.out_z.stb.eq(1)
1101
1102
1103 class FPADDBaseMod(FPID):
1104
1105 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1106 """ IEEE754 FP Add
1107
1108 * width: bit-width of IEEE754. supported: 16, 32, 64
1109 * id_wid: an identifier that is sync-connected to the input
1110 * single_cycle: True indicates each stage to complete in 1 clock
1111 * compact: True indicates a reduced number of stages
1112 """
1113 FPID.__init__(self, id_wid)
1114 self.width = width
1115 self.single_cycle = single_cycle
1116 self.compact = compact
1117
1118 self.in_t = Trigger()
1119 self.in_a = Signal(width)
1120 self.in_b = Signal(width)
1121 self.out_z = FPOp(width)
1122
1123 self.states = []
1124
1125 def add_state(self, state):
1126 self.states.append(state)
1127 return state
1128
1129 def get_fragment(self, platform=None):
1130 """ creates the HDL code-fragment for FPAdd
1131 """
1132 m = Module()
1133 m.submodules.out_z = self.out_z
1134 m.submodules.in_t = self.in_t
1135 if self.compact:
1136 self.get_compact_fragment(m, platform)
1137 else:
1138 self.get_longer_fragment(m, platform)
1139
1140 with m.FSM() as fsm:
1141
1142 for state in self.states:
1143 with m.State(state.state_from):
1144 state.action(m)
1145
1146 return m
1147
1148 def get_longer_fragment(self, m, platform=None):
1149
1150 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1151 self.in_a, self.in_b, self.width))
1152 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1153 a = get.out_op1
1154 b = get.out_op2
1155
1156 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1157 sc.setup(m, a, b, self.in_mid)
1158
1159 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1160 dn.setup(m, a, b, sc.in_mid)
1161
1162 if self.single_cycle:
1163 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1164 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1165 else:
1166 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1167 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1168
1169 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1170 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1171
1172 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1173 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1174
1175 if self.single_cycle:
1176 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1177 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1178 else:
1179 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1180 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1181
1182 rn = self.add_state(FPRound(self.width, self.id_wid))
1183 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1184
1185 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1186 cor.setup(m, rn.out_z, rn.in_mid)
1187
1188 pa = self.add_state(FPPack(self.width, self.id_wid))
1189 pa.setup(m, cor.out_z, rn.in_mid)
1190
1191 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1192 pa.in_mid, self.out_mid))
1193
1194 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1195 pa.in_mid, self.out_mid))
1196
1197 def get_compact_fragment(self, m, platform=None):
1198
1199 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1200 self.in_a, self.in_b, self.width))
1201 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1202 a = get.out_op1
1203 b = get.out_op2
1204
1205 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1206 sc.setup(m, a, b, self.in_mid)
1207
1208 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1209 dn.setup(m, a, b, sc.in_mid)
1210
1211 if self.single_cycle:
1212 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1213 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1214 else:
1215 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1216 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1217
1218 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1219 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1220
1221 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1222 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1223
1224 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1225 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1226
1227 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1228 n1.in_mid, self.out_mid))
1229
1230 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1231 sc.in_mid, self.out_mid))
1232
1233
1234 class FPADDBase(FPState, FPID):
1235
1236 def __init__(self, width, id_wid=None, single_cycle=False):
1237 """ IEEE754 FP Add
1238
1239 * width: bit-width of IEEE754. supported: 16, 32, 64
1240 * id_wid: an identifier that is sync-connected to the input
1241 * single_cycle: True indicates each stage to complete in 1 clock
1242 """
1243 FPID.__init__(self, id_wid)
1244 FPState.__init__(self, "fpadd")
1245 self.width = width
1246 self.single_cycle = single_cycle
1247 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1248
1249 self.in_t = Trigger()
1250 self.in_a = Signal(width)
1251 self.in_b = Signal(width)
1252 #self.out_z = FPOp(width)
1253
1254 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1255 self.in_accept = Signal(reset_less=True)
1256 self.add_stb = Signal(reset_less=True)
1257 self.add_ack = Signal(reset=0, reset_less=True)
1258
1259 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1260 self.out_z = out_z
1261 self.out_mid = out_mid
1262 m.d.comb += [self.in_a.eq(a),
1263 self.in_b.eq(b),
1264 self.mod.in_a.eq(self.in_a),
1265 self.mod.in_b.eq(self.in_b),
1266 self.in_mid.eq(in_mid),
1267 self.mod.in_mid.eq(self.in_mid),
1268 self.z_done.eq(self.mod.out_z.trigger),
1269 #self.add_stb.eq(add_stb),
1270 self.mod.in_t.stb.eq(self.in_t.stb),
1271 self.in_t.ack.eq(self.mod.in_t.ack),
1272 self.out_mid.eq(self.mod.out_mid),
1273 self.out_z.v.eq(self.mod.out_z.v),
1274 self.out_z.stb.eq(self.mod.out_z.stb),
1275 self.mod.out_z.ack.eq(self.out_z.ack),
1276 ]
1277
1278 m.d.sync += self.add_stb.eq(add_stb)
1279 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1280 #m.d.sync += self.in_t.stb.eq(0)
1281
1282 m.submodules.fpadd = self.mod
1283
1284 def action(self, m):
1285
1286 # in_accept is set on incoming strobe HIGH and ack LOW.
1287 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1288
1289 #with m.If(self.in_t.ack):
1290 # m.d.sync += self.in_t.stb.eq(0)
1291 with m.If(~self.z_done):
1292 # not done: test for accepting an incoming operand pair
1293 with m.If(self.in_accept):
1294 m.d.sync += [
1295 self.add_ack.eq(1), # acknowledge receipt...
1296 self.in_t.stb.eq(1), # initiate add
1297 ]
1298 with m.Else():
1299 m.d.sync += [self.add_ack.eq(0),
1300 self.in_t.stb.eq(0),
1301 ]
1302 with m.Else():
1303 # done: acknowledge, and write out id and value
1304 m.d.sync += [self.add_ack.eq(1),
1305 self.in_t.stb.eq(0)
1306 ]
1307 m.next = "get_a"
1308
1309 return
1310
1311 if self.in_mid is not None:
1312 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1313
1314 m.d.sync += [
1315 self.out_z.v.eq(self.mod.out_z.v)
1316 ]
1317 # move to output state on detecting z ack
1318 with m.If(self.out_z.trigger):
1319 m.d.sync += self.out_z.stb.eq(0)
1320 m.next = "put_z"
1321 with m.Else():
1322 m.d.sync += self.out_z.stb.eq(1)
1323
1324
1325 class FPADD(FPID):
1326 """ FPADD: stages as follows:
1327
1328 FPGetOp (a)
1329 |
1330 FPGetOp (b)
1331 |
1332 FPAddBase---> FPAddBaseMod
1333 | |
1334 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1335
1336 FPAddBase is tricky: it is both a stage and *has* stages.
1337 Connection to FPAddBaseMod therefore requires an in stb/ack
1338 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1339 needs to be the thing that raises the incoming stb.
1340 """
1341
1342 def __init__(self, width, id_wid=None, single_cycle=False):
1343 """ IEEE754 FP Add
1344
1345 * width: bit-width of IEEE754. supported: 16, 32, 64
1346 * id_wid: an identifier that is sync-connected to the input
1347 * single_cycle: True indicates each stage to complete in 1 clock
1348 """
1349 FPID.__init__(self, id_wid)
1350 self.width = width
1351 self.id_wid = id_wid
1352 self.single_cycle = single_cycle
1353
1354 self.in_a = FPOp(width)
1355 self.in_b = FPOp(width)
1356 self.out_z = FPOp(width)
1357
1358 self.states = []
1359
1360 def add_state(self, state):
1361 self.states.append(state)
1362 return state
1363
1364 def get_fragment(self, platform=None):
1365 """ creates the HDL code-fragment for FPAdd
1366 """
1367 m = Module()
1368 m.submodules.in_a = self.in_a
1369 m.submodules.in_b = self.in_b
1370 m.submodules.out_z = self.out_z
1371
1372 geta = self.add_state(FPGetOp("get_a", "get_b",
1373 self.in_a, self.width))
1374 geta.setup(m, self.in_a)
1375 a = geta.out_op
1376
1377 getb = self.add_state(FPGetOp("get_b", "fpadd",
1378 self.in_b, self.width))
1379 getb.setup(m, self.in_b)
1380 b = getb.out_op
1381
1382 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1383 ab = self.add_state(ab)
1384 ab.setup(m, a, b, getb.out_decode, self.in_mid,
1385 self.out_z, self.out_mid)
1386
1387 #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1388 # ab.out_mid, self.out_mid))
1389
1390 with m.FSM() as fsm:
1391
1392 for state in self.states:
1393 with m.State(state.state_from):
1394 state.action(m)
1395
1396 return m
1397
1398
1399 if __name__ == "__main__":
1400 if True:
1401 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1402 main(alu, ports=alu.in_a.ports() + \
1403 alu.in_b.ports() + \
1404 alu.out_z.ports() + \
1405 [alu.in_mid, alu.out_mid])
1406 else:
1407 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1408 main(alu, ports=[alu.in_a, alu.in_b] + \
1409 alu.in_t.ports() + \
1410 alu.out_z.ports() + \
1411 [alu.in_mid, alu.out_mid])
1412
1413
1414 # works... but don't use, just do "python fname.py convert -t v"
1415 #print (verilog.convert(alu, ports=[
1416 # ports=alu.in_a.ports() + \
1417 # alu.in_b.ports() + \
1418 # alu.out_z.ports())