merge specialcases and denorm into single combinatorial chain
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8
9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
10 from fpbase import MultiShiftRMerge, Trigger
11 #from fpbase import FPNumShiftMultiRight
12
13 class FPState(FPBase):
14 def __init__(self, state_from):
15 self.state_from = state_from
16
17 def set_inputs(self, inputs):
18 self.inputs = inputs
19 for k,v in inputs.items():
20 setattr(self, k, v)
21
22 def set_outputs(self, outputs):
23 self.outputs = outputs
24 for k,v in outputs.items():
25 setattr(self, k, v)
26
27
28 class FPGetOpMod:
29 def __init__(self, width):
30 self.in_op = FPOp(width)
31 self.out_op = Signal(width)
32 self.out_decode = Signal(reset_less=True)
33
34 def elaborate(self, platform):
35 m = Module()
36 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
37 m.submodules.get_op_in = self.in_op
38 #m.submodules.get_op_out = self.out_op
39 with m.If(self.out_decode):
40 m.d.comb += [
41 self.out_op.eq(self.in_op.v),
42 ]
43 return m
44
45
46 class FPGetOp(FPState):
47 """ gets operand
48 """
49
50 def __init__(self, in_state, out_state, in_op, width):
51 FPState.__init__(self, in_state)
52 self.out_state = out_state
53 self.mod = FPGetOpMod(width)
54 self.in_op = in_op
55 self.out_op = Signal(width)
56 self.out_decode = Signal(reset_less=True)
57
58 def setup(self, m, in_op):
59 """ links module to inputs and outputs
60 """
61 setattr(m.submodules, self.state_from, self.mod)
62 m.d.comb += self.mod.in_op.copy(in_op)
63 #m.d.comb += self.out_op.eq(self.mod.out_op)
64 m.d.comb += self.out_decode.eq(self.mod.out_decode)
65
66 def action(self, m):
67 with m.If(self.out_decode):
68 m.next = self.out_state
69 m.d.sync += [
70 self.in_op.ack.eq(0),
71 self.out_op.eq(self.mod.out_op)
72 ]
73 with m.Else():
74 m.d.sync += self.in_op.ack.eq(1)
75
76
77 class FPGet2OpMod(Trigger):
78 def __init__(self, width):
79 Trigger.__init__(self)
80 self.in_op1 = Signal(width, reset_less=True)
81 self.in_op2 = Signal(width, reset_less=True)
82 self.out_op1 = FPNumIn(None, width)
83 self.out_op2 = FPNumIn(None, width)
84
85 def elaborate(self, platform):
86 m = Trigger.elaborate(self, platform)
87 #m.submodules.get_op_in = self.in_op
88 m.submodules.get_op1_out = self.out_op1
89 m.submodules.get_op2_out = self.out_op2
90 with m.If(self.trigger):
91 m.d.comb += [
92 self.out_op1.decode(self.in_op1),
93 self.out_op2.decode(self.in_op2),
94 ]
95 return m
96
97
98 class FPGet2Op(FPState):
99 """ gets operands
100 """
101
102 def __init__(self, in_state, out_state, in_op1, in_op2, width):
103 FPState.__init__(self, in_state)
104 self.out_state = out_state
105 self.mod = FPGet2OpMod(width)
106 self.in_op1 = in_op1
107 self.in_op2 = in_op2
108 self.out_op1 = FPNumIn(None, width)
109 self.out_op2 = FPNumIn(None, width)
110 self.in_stb = Signal(reset_less=True)
111 self.out_ack = Signal(reset_less=True)
112 self.out_decode = Signal(reset_less=True)
113
114 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
115 """ links module to inputs and outputs
116 """
117 m.submodules.get_ops = self.mod
118 m.d.comb += self.mod.in_op1.eq(in_op1)
119 m.d.comb += self.mod.in_op2.eq(in_op2)
120 m.d.comb += self.mod.stb.eq(in_stb)
121 m.d.comb += self.out_ack.eq(self.mod.ack)
122 m.d.comb += self.out_decode.eq(self.mod.trigger)
123 m.d.comb += in_ack.eq(self.mod.ack)
124
125 def action(self, m):
126 with m.If(self.out_decode):
127 m.next = self.out_state
128 m.d.sync += [
129 self.mod.ack.eq(0),
130 #self.out_op1.v.eq(self.mod.out_op1.v),
131 #self.out_op2.v.eq(self.mod.out_op2.v),
132 self.out_op1.copy(self.mod.out_op1),
133 self.out_op2.copy(self.mod.out_op2)
134 ]
135 with m.Else():
136 m.d.sync += self.mod.ack.eq(1)
137
138
139 class FPAddSpecialCasesMod:
140 """ special cases: NaNs, infs, zeros, denormalised
141 NOTE: some of these are unique to add. see "Special Operations"
142 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
143 """
144
145 def __init__(self, width):
146 self.in_a = FPNumBase(width)
147 self.in_b = FPNumBase(width)
148 self.out_z = FPNumOut(width, False)
149 self.out_do_z = Signal(reset_less=True)
150
151 def setup(self, m, in_a, in_b, out_do_z):
152 """ links module to inputs and outputs
153 """
154 m.submodules.specialcases = self
155 m.d.comb += self.in_a.copy(in_a)
156 m.d.comb += self.in_b.copy(in_b)
157 m.d.comb += out_do_z.eq(self.out_do_z)
158
159 def elaborate(self, platform):
160 m = Module()
161
162 m.submodules.sc_in_a = self.in_a
163 m.submodules.sc_in_b = self.in_b
164 m.submodules.sc_out_z = self.out_z
165
166 s_nomatch = Signal()
167 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
168
169 m_match = Signal()
170 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
171
172 # if a is NaN or b is NaN return NaN
173 with m.If(self.in_a.is_nan | self.in_b.is_nan):
174 m.d.comb += self.out_do_z.eq(1)
175 m.d.comb += self.out_z.nan(0)
176
177 # XXX WEIRDNESS for FP16 non-canonical NaN handling
178 # under review
179
180 ## if a is zero and b is NaN return -b
181 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
182 # m.d.comb += self.out_do_z.eq(1)
183 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
184
185 ## if b is zero and a is NaN return -a
186 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
187 # m.d.comb += self.out_do_z.eq(1)
188 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
189
190 ## if a is -zero and b is NaN return -b
191 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
192 # m.d.comb += self.out_do_z.eq(1)
193 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
194
195 ## if b is -zero and a is NaN return -a
196 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
197 # m.d.comb += self.out_do_z.eq(1)
198 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
199
200 # if a is inf return inf (or NaN)
201 with m.Elif(self.in_a.is_inf):
202 m.d.comb += self.out_do_z.eq(1)
203 m.d.comb += self.out_z.inf(self.in_a.s)
204 # if a is inf and signs don't match return NaN
205 with m.If(self.in_b.exp_128 & s_nomatch):
206 m.d.comb += self.out_z.nan(0)
207
208 # if b is inf return inf
209 with m.Elif(self.in_b.is_inf):
210 m.d.comb += self.out_do_z.eq(1)
211 m.d.comb += self.out_z.inf(self.in_b.s)
212
213 # if a is zero and b zero return signed-a/b
214 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
215 m.d.comb += self.out_do_z.eq(1)
216 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
217 self.in_b.e,
218 self.in_b.m[3:-1])
219
220 # if a is zero return b
221 with m.Elif(self.in_a.is_zero):
222 m.d.comb += self.out_do_z.eq(1)
223 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
224 self.in_b.m[3:-1])
225
226 # if b is zero return a
227 with m.Elif(self.in_b.is_zero):
228 m.d.comb += self.out_do_z.eq(1)
229 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
230 self.in_a.m[3:-1])
231
232 # if a equal to -b return zero (+ve zero)
233 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
234 m.d.comb += self.out_do_z.eq(1)
235 m.d.comb += self.out_z.zero(0)
236
237 # Denormalised Number checks
238 with m.Else():
239 m.d.comb += self.out_do_z.eq(0)
240
241 return m
242
243
244 class FPID:
245 def __init__(self, id_wid):
246 self.id_wid = id_wid
247 if self.id_wid:
248 self.in_mid = Signal(id_wid, reset_less=True)
249 self.out_mid = Signal(id_wid, reset_less=True)
250 else:
251 self.in_mid = None
252 self.out_mid = None
253
254 def idsync(self, m):
255 if self.id_wid is not None:
256 m.d.sync += self.out_mid.eq(self.in_mid)
257
258
259 class FPAddSpecialCases(FPState, FPID):
260 """ special cases: NaNs, infs, zeros, denormalised
261 NOTE: some of these are unique to add. see "Special Operations"
262 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
263 """
264
265 def __init__(self, width, id_wid):
266 FPState.__init__(self, "special_cases")
267 FPID.__init__(self, id_wid)
268 self.mod = FPAddSpecialCasesMod(width)
269 self.out_z = FPNumOut(width, False)
270 self.out_do_z = Signal(reset_less=True)
271
272 def setup(self, m, in_a, in_b, in_mid):
273 """ links module to inputs and outputs
274 """
275 self.mod.setup(m, in_a, in_b, self.out_do_z)
276 if self.in_mid is not None:
277 m.d.comb += self.in_mid.eq(in_mid)
278
279 def action(self, m):
280 self.idsync(m)
281 with m.If(self.out_do_z):
282 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
283 m.next = "put_z"
284 with m.Else():
285 m.next = "denormalise"
286
287
288 class FPAddSpecialCasesDeNorm(FPState, FPID):
289 """ special cases: NaNs, infs, zeros, denormalised
290 NOTE: some of these are unique to add. see "Special Operations"
291 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
292 """
293
294 def __init__(self, width, id_wid):
295 FPState.__init__(self, "special_cases")
296 FPID.__init__(self, id_wid)
297 self.smod = FPAddSpecialCasesMod(width)
298 self.out_z = FPNumOut(width, False)
299 self.out_do_z = Signal(reset_less=True)
300
301 self.dmod = FPAddDeNormMod(width)
302 self.out_a = FPNumBase(width)
303 self.out_b = FPNumBase(width)
304
305 def setup(self, m, in_a, in_b, in_mid):
306 """ links module to inputs and outputs
307 """
308 self.smod.setup(m, in_a, in_b, self.out_do_z)
309 self.dmod.setup(m, in_a, in_b)
310 if self.in_mid is not None:
311 m.d.comb += self.in_mid.eq(in_mid)
312
313 def action(self, m):
314 self.idsync(m)
315 with m.If(self.out_do_z):
316 m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
317 m.next = "put_z"
318 with m.Else():
319 m.next = "align"
320 m.d.sync += self.out_a.copy(self.dmod.out_a)
321 m.d.sync += self.out_b.copy(self.dmod.out_b)
322
323
324 class FPAddDeNormMod(FPState):
325
326 def __init__(self, width):
327 self.in_a = FPNumBase(width)
328 self.in_b = FPNumBase(width)
329 self.out_a = FPNumBase(width)
330 self.out_b = FPNumBase(width)
331
332 def setup(self, m, in_a, in_b):
333 """ links module to inputs and outputs
334 """
335 m.submodules.denormalise = self
336 m.d.comb += self.in_a.copy(in_a)
337 m.d.comb += self.in_b.copy(in_b)
338
339 def elaborate(self, platform):
340 m = Module()
341 m.submodules.denorm_in_a = self.in_a
342 m.submodules.denorm_in_b = self.in_b
343 m.submodules.denorm_out_a = self.out_a
344 m.submodules.denorm_out_b = self.out_b
345 # hmmm, don't like repeating identical code
346 m.d.comb += self.out_a.copy(self.in_a)
347 with m.If(self.in_a.exp_n127):
348 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
349 with m.Else():
350 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
351
352 m.d.comb += self.out_b.copy(self.in_b)
353 with m.If(self.in_b.exp_n127):
354 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
355 with m.Else():
356 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
357
358 return m
359
360
361 class FPAddDeNorm(FPState, FPID):
362
363 def __init__(self, width, id_wid):
364 FPState.__init__(self, "denormalise")
365 FPID.__init__(self, id_wid)
366 self.mod = FPAddDeNormMod(width)
367 self.out_a = FPNumBase(width)
368 self.out_b = FPNumBase(width)
369
370 def setup(self, m, in_a, in_b, in_mid):
371 """ links module to inputs and outputs
372 """
373 self.mod.setup(m, in_a, in_b)
374 if self.in_mid is not None:
375 m.d.comb += self.in_mid.eq(in_mid)
376
377 def action(self, m):
378 self.idsync(m)
379 # Denormalised Number checks
380 m.next = "align"
381 m.d.sync += self.out_a.copy(self.mod.out_a)
382 m.d.sync += self.out_b.copy(self.mod.out_b)
383
384
385 class FPAddAlignMultiMod(FPState):
386
387 def __init__(self, width):
388 self.in_a = FPNumBase(width)
389 self.in_b = FPNumBase(width)
390 self.out_a = FPNumIn(None, width)
391 self.out_b = FPNumIn(None, width)
392 self.exp_eq = Signal(reset_less=True)
393
394 def elaborate(self, platform):
395 # This one however (single-cycle) will do the shift
396 # in one go.
397
398 m = Module()
399
400 m.submodules.align_in_a = self.in_a
401 m.submodules.align_in_b = self.in_b
402 m.submodules.align_out_a = self.out_a
403 m.submodules.align_out_b = self.out_b
404
405 # NOTE: this does *not* do single-cycle multi-shifting,
406 # it *STAYS* in the align state until exponents match
407
408 # exponent of a greater than b: shift b down
409 m.d.comb += self.exp_eq.eq(0)
410 m.d.comb += self.out_a.copy(self.in_a)
411 m.d.comb += self.out_b.copy(self.in_b)
412 agtb = Signal(reset_less=True)
413 altb = Signal(reset_less=True)
414 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
415 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
416 with m.If(agtb):
417 m.d.comb += self.out_b.shift_down(self.in_b)
418 # exponent of b greater than a: shift a down
419 with m.Elif(altb):
420 m.d.comb += self.out_a.shift_down(self.in_a)
421 # exponents equal: move to next stage.
422 with m.Else():
423 m.d.comb += self.exp_eq.eq(1)
424 return m
425
426
427 class FPAddAlignMulti(FPState, FPID):
428
429 def __init__(self, width, id_wid):
430 FPID.__init__(self, id_wid)
431 FPState.__init__(self, "align")
432 self.mod = FPAddAlignMultiMod(width)
433 self.out_a = FPNumIn(None, width)
434 self.out_b = FPNumIn(None, width)
435 self.exp_eq = Signal(reset_less=True)
436
437 def setup(self, m, in_a, in_b, in_mid):
438 """ links module to inputs and outputs
439 """
440 m.submodules.align = self.mod
441 m.d.comb += self.mod.in_a.copy(in_a)
442 m.d.comb += self.mod.in_b.copy(in_b)
443 #m.d.comb += self.out_a.copy(self.mod.out_a)
444 #m.d.comb += self.out_b.copy(self.mod.out_b)
445 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
446 if self.in_mid is not None:
447 m.d.comb += self.in_mid.eq(in_mid)
448
449 def action(self, m):
450 self.idsync(m)
451 m.d.sync += self.out_a.copy(self.mod.out_a)
452 m.d.sync += self.out_b.copy(self.mod.out_b)
453 with m.If(self.exp_eq):
454 m.next = "add_0"
455
456
457 class FPAddAlignSingleMod:
458
459 def __init__(self, width):
460 self.width = width
461 self.in_a = FPNumBase(width)
462 self.in_b = FPNumBase(width)
463 self.out_a = FPNumIn(None, width)
464 self.out_b = FPNumIn(None, width)
465
466 def elaborate(self, platform):
467 """ Aligns A against B or B against A, depending on which has the
468 greater exponent. This is done in a *single* cycle using
469 variable-width bit-shift
470
471 the shifter used here is quite expensive in terms of gates.
472 Mux A or B in (and out) into temporaries, as only one of them
473 needs to be aligned against the other
474 """
475 m = Module()
476
477 m.submodules.align_in_a = self.in_a
478 m.submodules.align_in_b = self.in_b
479 m.submodules.align_out_a = self.out_a
480 m.submodules.align_out_b = self.out_b
481
482 # temporary (muxed) input and output to be shifted
483 t_inp = FPNumBase(self.width)
484 t_out = FPNumIn(None, self.width)
485 espec = (len(self.in_a.e), True)
486 msr = MultiShiftRMerge(self.in_a.m_width, espec)
487 m.submodules.align_t_in = t_inp
488 m.submodules.align_t_out = t_out
489 m.submodules.multishift_r = msr
490
491 ediff = Signal(espec, reset_less=True)
492 ediffr = Signal(espec, reset_less=True)
493 tdiff = Signal(espec, reset_less=True)
494 elz = Signal(reset_less=True)
495 egz = Signal(reset_less=True)
496
497 # connect multi-shifter to t_inp/out mantissa (and tdiff)
498 m.d.comb += msr.inp.eq(t_inp.m)
499 m.d.comb += msr.diff.eq(tdiff)
500 m.d.comb += t_out.m.eq(msr.m)
501 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
502 m.d.comb += t_out.s.eq(t_inp.s)
503
504 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
505 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
506 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
507 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
508
509 # default: A-exp == B-exp, A and B untouched (fall through)
510 m.d.comb += self.out_a.copy(self.in_a)
511 m.d.comb += self.out_b.copy(self.in_b)
512 # only one shifter (muxed)
513 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
514 # exponent of a greater than b: shift b down
515 with m.If(egz):
516 m.d.comb += [t_inp.copy(self.in_b),
517 tdiff.eq(ediff),
518 self.out_b.copy(t_out),
519 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
520 ]
521 # exponent of b greater than a: shift a down
522 with m.Elif(elz):
523 m.d.comb += [t_inp.copy(self.in_a),
524 tdiff.eq(ediffr),
525 self.out_a.copy(t_out),
526 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
527 ]
528 return m
529
530
531 class FPAddAlignSingle(FPState, FPID):
532
533 def __init__(self, width, id_wid):
534 FPState.__init__(self, "align")
535 FPID.__init__(self, id_wid)
536 self.mod = FPAddAlignSingleMod(width)
537 self.out_a = FPNumIn(None, width)
538 self.out_b = FPNumIn(None, width)
539
540 def setup(self, m, in_a, in_b, in_mid):
541 """ links module to inputs and outputs
542 """
543 m.submodules.align = self.mod
544 m.d.comb += self.mod.in_a.copy(in_a)
545 m.d.comb += self.mod.in_b.copy(in_b)
546 if self.in_mid is not None:
547 m.d.comb += self.in_mid.eq(in_mid)
548
549 def action(self, m):
550 self.idsync(m)
551 # NOTE: could be done as comb
552 m.d.sync += self.out_a.copy(self.mod.out_a)
553 m.d.sync += self.out_b.copy(self.mod.out_b)
554 m.next = "add_0"
555
556
557 class FPAddStage0Mod:
558
559 def __init__(self, width):
560 self.in_a = FPNumBase(width)
561 self.in_b = FPNumBase(width)
562 self.in_z = FPNumBase(width, False)
563 self.out_z = FPNumBase(width, False)
564 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
565
566 def elaborate(self, platform):
567 m = Module()
568 m.submodules.add0_in_a = self.in_a
569 m.submodules.add0_in_b = self.in_b
570 m.submodules.add0_out_z = self.out_z
571
572 m.d.comb += self.out_z.e.eq(self.in_a.e)
573
574 # store intermediate tests (and zero-extended mantissas)
575 seq = Signal(reset_less=True)
576 mge = Signal(reset_less=True)
577 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
578 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
579 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
580 mge.eq(self.in_a.m >= self.in_b.m),
581 am0.eq(Cat(self.in_a.m, 0)),
582 bm0.eq(Cat(self.in_b.m, 0))
583 ]
584 # same-sign (both negative or both positive) add mantissas
585 with m.If(seq):
586 m.d.comb += [
587 self.out_tot.eq(am0 + bm0),
588 self.out_z.s.eq(self.in_a.s)
589 ]
590 # a mantissa greater than b, use a
591 with m.Elif(mge):
592 m.d.comb += [
593 self.out_tot.eq(am0 - bm0),
594 self.out_z.s.eq(self.in_a.s)
595 ]
596 # b mantissa greater than a, use b
597 with m.Else():
598 m.d.comb += [
599 self.out_tot.eq(bm0 - am0),
600 self.out_z.s.eq(self.in_b.s)
601 ]
602 return m
603
604
605 class FPAddStage0(FPState, FPID):
606 """ First stage of add. covers same-sign (add) and subtract
607 special-casing when mantissas are greater or equal, to
608 give greatest accuracy.
609 """
610
611 def __init__(self, width, id_wid):
612 FPState.__init__(self, "add_0")
613 FPID.__init__(self, id_wid)
614 self.mod = FPAddStage0Mod(width)
615 self.out_z = FPNumBase(width, False)
616 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
617
618 def setup(self, m, in_a, in_b, in_mid):
619 """ links module to inputs and outputs
620 """
621 m.submodules.add0 = self.mod
622 m.d.comb += self.mod.in_a.copy(in_a)
623 m.d.comb += self.mod.in_b.copy(in_b)
624 if self.in_mid is not None:
625 m.d.comb += self.in_mid.eq(in_mid)
626
627 def action(self, m):
628 self.idsync(m)
629 # NOTE: these could be done as combinatorial (merge add0+add1)
630 m.d.sync += self.out_z.copy(self.mod.out_z)
631 m.d.sync += self.out_tot.eq(self.mod.out_tot)
632 m.next = "add_1"
633
634
635 class FPAddStage1Mod(FPState):
636 """ Second stage of add: preparation for normalisation.
637 detects when tot sum is too big (tot[27] is kinda a carry bit)
638 """
639
640 def __init__(self, width):
641 self.out_norm = Signal(reset_less=True)
642 self.in_z = FPNumBase(width, False)
643 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
644 self.out_z = FPNumBase(width, False)
645 self.out_of = Overflow()
646
647 def elaborate(self, platform):
648 m = Module()
649 #m.submodules.norm1_in_overflow = self.in_of
650 #m.submodules.norm1_out_overflow = self.out_of
651 #m.submodules.norm1_in_z = self.in_z
652 #m.submodules.norm1_out_z = self.out_z
653 m.d.comb += self.out_z.copy(self.in_z)
654 # tot[27] gets set when the sum overflows. shift result down
655 with m.If(self.in_tot[-1]):
656 m.d.comb += [
657 self.out_z.m.eq(self.in_tot[4:]),
658 self.out_of.m0.eq(self.in_tot[4]),
659 self.out_of.guard.eq(self.in_tot[3]),
660 self.out_of.round_bit.eq(self.in_tot[2]),
661 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
662 self.out_z.e.eq(self.in_z.e + 1)
663 ]
664 # tot[27] zero case
665 with m.Else():
666 m.d.comb += [
667 self.out_z.m.eq(self.in_tot[3:]),
668 self.out_of.m0.eq(self.in_tot[3]),
669 self.out_of.guard.eq(self.in_tot[2]),
670 self.out_of.round_bit.eq(self.in_tot[1]),
671 self.out_of.sticky.eq(self.in_tot[0])
672 ]
673 return m
674
675
676 class FPAddStage1(FPState, FPID):
677
678 def __init__(self, width, id_wid):
679 FPState.__init__(self, "add_1")
680 FPID.__init__(self, id_wid)
681 self.mod = FPAddStage1Mod(width)
682 self.out_z = FPNumBase(width, False)
683 self.out_of = Overflow()
684 self.norm_stb = Signal()
685
686 def setup(self, m, in_tot, in_z, in_mid):
687 """ links module to inputs and outputs
688 """
689 m.submodules.add1 = self.mod
690 m.submodules.add1_out_overflow = self.out_of
691
692 m.d.comb += self.mod.in_z.copy(in_z)
693 m.d.comb += self.mod.in_tot.eq(in_tot)
694
695 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
696
697 if self.in_mid is not None:
698 m.d.comb += self.in_mid.eq(in_mid)
699
700 def action(self, m):
701 self.idsync(m)
702 m.d.sync += self.out_of.copy(self.mod.out_of)
703 m.d.sync += self.out_z.copy(self.mod.out_z)
704 m.d.sync += self.norm_stb.eq(1)
705 m.next = "normalise_1"
706
707
708 class FPNorm1ModSingle:
709
710 def __init__(self, width):
711 self.width = width
712 self.out_norm = Signal(reset_less=True)
713 self.in_z = FPNumBase(width, False)
714 self.in_of = Overflow()
715 self.out_z = FPNumBase(width, False)
716 self.out_of = Overflow()
717
718 def setup(self, m, in_z, in_of, out_z):
719 """ links module to inputs and outputs
720 """
721 m.submodules.normalise_1 = self
722
723 m.d.comb += self.in_z.copy(in_z)
724 m.d.comb += self.in_of.copy(in_of)
725
726 m.d.comb += out_z.copy(self.out_z)
727
728 def elaborate(self, platform):
729 m = Module()
730
731 mwid = self.out_z.m_width+2
732 pe = PriorityEncoder(mwid)
733 m.submodules.norm_pe = pe
734
735 m.submodules.norm1_out_z = self.out_z
736 m.submodules.norm1_out_overflow = self.out_of
737 m.submodules.norm1_in_z = self.in_z
738 m.submodules.norm1_in_overflow = self.in_of
739
740 in_z = FPNumBase(self.width, False)
741 in_of = Overflow()
742 m.submodules.norm1_insel_z = in_z
743 m.submodules.norm1_insel_overflow = in_of
744
745 espec = (len(in_z.e), True)
746 ediff_n126 = Signal(espec, reset_less=True)
747 msr = MultiShiftRMerge(mwid, espec)
748 m.submodules.multishift_r = msr
749
750 m.d.comb += in_z.copy(self.in_z)
751 m.d.comb += in_of.copy(self.in_of)
752 # initialise out from in (overridden below)
753 m.d.comb += self.out_z.copy(in_z)
754 m.d.comb += self.out_of.copy(in_of)
755 # normalisation increase/decrease conditions
756 decrease = Signal(reset_less=True)
757 increase = Signal(reset_less=True)
758 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
759 m.d.comb += increase.eq(in_z.exp_lt_n126)
760 # decrease exponent
761 with m.If(decrease):
762 # *sigh* not entirely obvious: count leading zeros (clz)
763 # with a PriorityEncoder: to find from the MSB
764 # we reverse the order of the bits.
765 temp_m = Signal(mwid, reset_less=True)
766 temp_s = Signal(mwid+1, reset_less=True)
767 clz = Signal((len(in_z.e), True), reset_less=True)
768 # make sure that the amount to decrease by does NOT
769 # go below the minimum non-INF/NaN exponent
770 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
771 in_z.exp_sub_n126)
772 m.d.comb += [
773 # cat round and guard bits back into the mantissa
774 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
775 pe.i.eq(temp_m[::-1]), # inverted
776 clz.eq(limclz), # count zeros from MSB down
777 temp_s.eq(temp_m << clz), # shift mantissa UP
778 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
779 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
780 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
781 # overflow in bits 0..1: got shifted too (leave sticky)
782 self.out_of.guard.eq(temp_s[1]), # guard
783 self.out_of.round_bit.eq(temp_s[0]), # round
784 ]
785 # increase exponent
786 with m.Elif(increase):
787 temp_m = Signal(mwid+1, reset_less=True)
788 m.d.comb += [
789 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
790 in_z.m)),
791 ediff_n126.eq(in_z.N126 - in_z.e),
792 # connect multi-shifter to inp/out mantissa (and ediff)
793 msr.inp.eq(temp_m),
794 msr.diff.eq(ediff_n126),
795 self.out_z.m.eq(msr.m[3:]),
796 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
797 # overflow in bits 0..1: got shifted too (leave sticky)
798 self.out_of.guard.eq(temp_s[2]), # guard
799 self.out_of.round_bit.eq(temp_s[1]), # round
800 self.out_of.sticky.eq(temp_s[0]), # sticky
801 self.out_z.e.eq(in_z.e + ediff_n126),
802 ]
803
804 return m
805
806
807 class FPNorm1ModMulti:
808
809 def __init__(self, width, single_cycle=True):
810 self.width = width
811 self.in_select = Signal(reset_less=True)
812 self.out_norm = Signal(reset_less=True)
813 self.in_z = FPNumBase(width, False)
814 self.in_of = Overflow()
815 self.temp_z = FPNumBase(width, False)
816 self.temp_of = Overflow()
817 self.out_z = FPNumBase(width, False)
818 self.out_of = Overflow()
819
820 def elaborate(self, platform):
821 m = Module()
822
823 m.submodules.norm1_out_z = self.out_z
824 m.submodules.norm1_out_overflow = self.out_of
825 m.submodules.norm1_temp_z = self.temp_z
826 m.submodules.norm1_temp_of = self.temp_of
827 m.submodules.norm1_in_z = self.in_z
828 m.submodules.norm1_in_overflow = self.in_of
829
830 in_z = FPNumBase(self.width, False)
831 in_of = Overflow()
832 m.submodules.norm1_insel_z = in_z
833 m.submodules.norm1_insel_overflow = in_of
834
835 # select which of temp or in z/of to use
836 with m.If(self.in_select):
837 m.d.comb += in_z.copy(self.in_z)
838 m.d.comb += in_of.copy(self.in_of)
839 with m.Else():
840 m.d.comb += in_z.copy(self.temp_z)
841 m.d.comb += in_of.copy(self.temp_of)
842 # initialise out from in (overridden below)
843 m.d.comb += self.out_z.copy(in_z)
844 m.d.comb += self.out_of.copy(in_of)
845 # normalisation increase/decrease conditions
846 decrease = Signal(reset_less=True)
847 increase = Signal(reset_less=True)
848 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
849 m.d.comb += increase.eq(in_z.exp_lt_n126)
850 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
851 # decrease exponent
852 with m.If(decrease):
853 m.d.comb += [
854 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
855 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
856 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
857 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
858 self.out_of.round_bit.eq(0), # reset round bit
859 self.out_of.m0.eq(in_of.guard),
860 ]
861 # increase exponent
862 with m.Elif(increase):
863 m.d.comb += [
864 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
865 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
866 self.out_of.guard.eq(in_z.m[0]),
867 self.out_of.m0.eq(in_z.m[1]),
868 self.out_of.round_bit.eq(in_of.guard),
869 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
870 ]
871
872 return m
873
874
875 class FPNorm1Single(FPState, FPID):
876
877 def __init__(self, width, id_wid, single_cycle=True):
878 FPID.__init__(self, id_wid)
879 FPState.__init__(self, "normalise_1")
880 self.mod = FPNorm1ModSingle(width)
881 self.out_norm = Signal(reset_less=True)
882 self.out_z = FPNumBase(width)
883 self.out_roundz = Signal(reset_less=True)
884
885 def setup(self, m, in_z, in_of, in_mid):
886 """ links module to inputs and outputs
887 """
888 self.mod.setup(m, in_z, in_of, self.out_z)
889
890 if self.in_mid is not None:
891 m.d.comb += self.in_mid.eq(in_mid)
892
893 def action(self, m):
894 self.idsync(m)
895 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
896 m.next = "round"
897
898
899 class FPNorm1Multi(FPState, FPID):
900
901 def __init__(self, width, id_wid):
902 FPID.__init__(self, id_wid)
903 FPState.__init__(self, "normalise_1")
904 self.mod = FPNorm1ModMulti(width)
905 self.stb = Signal(reset_less=True)
906 self.ack = Signal(reset=0, reset_less=True)
907 self.out_norm = Signal(reset_less=True)
908 self.in_accept = Signal(reset_less=True)
909 self.temp_z = FPNumBase(width)
910 self.temp_of = Overflow()
911 self.out_z = FPNumBase(width)
912 self.out_roundz = Signal(reset_less=True)
913
914 def setup(self, m, in_z, in_of, norm_stb, in_mid):
915 """ links module to inputs and outputs
916 """
917 self.mod.setup(m, in_z, in_of, norm_stb,
918 self.in_accept, self.temp_z, self.temp_of,
919 self.out_z, self.out_norm)
920
921 m.d.comb += self.stb.eq(norm_stb)
922 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
923
924 if self.in_mid is not None:
925 m.d.comb += self.in_mid.eq(in_mid)
926
927 def action(self, m):
928 self.idsync(m)
929 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
930 m.d.sync += self.temp_of.copy(self.mod.out_of)
931 m.d.sync += self.temp_z.copy(self.out_z)
932 with m.If(self.out_norm):
933 with m.If(self.in_accept):
934 m.d.sync += [
935 self.ack.eq(1),
936 ]
937 with m.Else():
938 m.d.sync += self.ack.eq(0)
939 with m.Else():
940 # normalisation not required (or done).
941 m.next = "round"
942 m.d.sync += self.ack.eq(1)
943 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
944
945
946 class FPNormToPack(FPState, FPID):
947
948 def __init__(self, width, id_wid):
949 FPID.__init__(self, id_wid)
950 FPState.__init__(self, "normalise_1")
951 self.width = width
952
953 def setup(self, m, in_z, in_of, in_mid):
954 """ links module to inputs and outputs
955 """
956
957 # Normalisation (chained to input in_z+in_of)
958 nmod = FPNorm1ModSingle(self.width)
959 n_out_z = FPNumBase(self.width)
960 n_out_roundz = Signal(reset_less=True)
961 nmod.setup(m, in_z, in_of, n_out_z)
962
963 # Rounding (chained to normalisation)
964 rmod = FPRoundMod(self.width)
965 r_out_z = FPNumBase(self.width)
966 rmod.setup(m, n_out_z, n_out_roundz)
967 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
968 m.d.comb += r_out_z.copy(rmod.out_z)
969
970 # Corrections (chained to rounding)
971 cmod = FPCorrectionsMod(self.width)
972 c_out_z = FPNumBase(self.width)
973 cmod.setup(m, r_out_z)
974 m.d.comb += c_out_z.copy(cmod.out_z)
975
976 # Pack (chained to corrections)
977 self.pmod = FPPackMod(self.width)
978 self.out_z = FPNumBase(self.width)
979 self.pmod.setup(m, c_out_z)
980
981 # Multiplex ID
982 if self.in_mid is not None:
983 m.d.comb += self.in_mid.eq(in_mid)
984
985 def action(self, m):
986 self.idsync(m) # copies incoming ID to outgoing
987 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
988 m.next = "pack_put_z"
989
990
991 class FPRoundMod:
992
993 def __init__(self, width):
994 self.in_roundz = Signal(reset_less=True)
995 self.in_z = FPNumBase(width, False)
996 self.out_z = FPNumBase(width, False)
997
998 def setup(self, m, in_z, roundz):
999 m.submodules.roundz = self
1000
1001 m.d.comb += self.in_z.copy(in_z)
1002 m.d.comb += self.in_roundz.eq(roundz)
1003
1004 def elaborate(self, platform):
1005 m = Module()
1006 m.d.comb += self.out_z.copy(self.in_z)
1007 with m.If(self.in_roundz):
1008 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1009 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1010 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1011 return m
1012
1013
1014 class FPRound(FPState, FPID):
1015
1016 def __init__(self, width, id_wid):
1017 FPState.__init__(self, "round")
1018 FPID.__init__(self, id_wid)
1019 self.mod = FPRoundMod(width)
1020 self.out_z = FPNumBase(width)
1021
1022 def setup(self, m, in_z, roundz, in_mid):
1023 """ links module to inputs and outputs
1024 """
1025 self.mod.setup(m, in_z, roundz)
1026
1027 if self.in_mid is not None:
1028 m.d.comb += self.in_mid.eq(in_mid)
1029
1030 def action(self, m):
1031 self.idsync(m)
1032 m.d.sync += self.out_z.copy(self.mod.out_z)
1033 m.next = "corrections"
1034
1035
1036 class FPCorrectionsMod:
1037
1038 def __init__(self, width):
1039 self.in_z = FPNumOut(width, False)
1040 self.out_z = FPNumOut(width, False)
1041
1042 def setup(self, m, in_z):
1043 """ links module to inputs and outputs
1044 """
1045 m.submodules.corrections = self
1046 m.d.comb += self.in_z.copy(in_z)
1047
1048 def elaborate(self, platform):
1049 m = Module()
1050 m.submodules.corr_in_z = self.in_z
1051 m.submodules.corr_out_z = self.out_z
1052 m.d.comb += self.out_z.copy(self.in_z)
1053 with m.If(self.in_z.is_denormalised):
1054 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1055 return m
1056
1057
1058 class FPCorrections(FPState, FPID):
1059
1060 def __init__(self, width, id_wid):
1061 FPState.__init__(self, "corrections")
1062 FPID.__init__(self, id_wid)
1063 self.mod = FPCorrectionsMod(width)
1064 self.out_z = FPNumBase(width)
1065
1066 def setup(self, m, in_z, in_mid):
1067 """ links module to inputs and outputs
1068 """
1069 self.mod.setup(m, in_z)
1070 if self.in_mid is not None:
1071 m.d.comb += self.in_mid.eq(in_mid)
1072
1073 def action(self, m):
1074 self.idsync(m)
1075 m.d.sync += self.out_z.copy(self.mod.out_z)
1076 m.next = "pack"
1077
1078
1079 class FPPackMod:
1080
1081 def __init__(self, width):
1082 self.in_z = FPNumOut(width, False)
1083 self.out_z = FPNumOut(width, False)
1084
1085 def setup(self, m, in_z):
1086 """ links module to inputs and outputs
1087 """
1088 m.submodules.pack = self
1089 m.d.comb += self.in_z.copy(in_z)
1090
1091 def elaborate(self, platform):
1092 m = Module()
1093 m.submodules.pack_in_z = self.in_z
1094 with m.If(self.in_z.is_overflowed):
1095 m.d.comb += self.out_z.inf(self.in_z.s)
1096 with m.Else():
1097 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1098 return m
1099
1100
1101 class FPPack(FPState, FPID):
1102
1103 def __init__(self, width, id_wid):
1104 FPState.__init__(self, "pack")
1105 FPID.__init__(self, id_wid)
1106 self.mod = FPPackMod(width)
1107 self.out_z = FPNumOut(width, False)
1108
1109 def setup(self, m, in_z, in_mid):
1110 """ links module to inputs and outputs
1111 """
1112 self.mod.setup(m, in_z)
1113 if self.in_mid is not None:
1114 m.d.comb += self.in_mid.eq(in_mid)
1115
1116 def action(self, m):
1117 self.idsync(m)
1118 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1119 m.next = "pack_put_z"
1120
1121
1122 class FPPutZ(FPState):
1123
1124 def __init__(self, state, in_z, out_z, in_mid, out_mid):
1125 FPState.__init__(self, state)
1126 self.in_z = in_z
1127 self.out_z = out_z
1128 self.in_mid = in_mid
1129 self.out_mid = out_mid
1130
1131 def action(self, m):
1132 if self.in_mid is not None:
1133 m.d.sync += self.out_mid.eq(self.in_mid)
1134 m.d.sync += [
1135 self.out_z.v.eq(self.in_z.v)
1136 ]
1137 with m.If(self.out_z.stb & self.out_z.ack):
1138 m.d.sync += self.out_z.stb.eq(0)
1139 m.next = "get_ops"
1140 with m.Else():
1141 m.d.sync += self.out_z.stb.eq(1)
1142
1143
1144 class FPADDBaseMod(FPID):
1145
1146 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1147 """ IEEE754 FP Add
1148
1149 * width: bit-width of IEEE754. supported: 16, 32, 64
1150 * id_wid: an identifier that is sync-connected to the input
1151 * single_cycle: True indicates each stage to complete in 1 clock
1152 * compact: True indicates a reduced number of stages
1153 """
1154 FPID.__init__(self, id_wid)
1155 self.width = width
1156 self.single_cycle = single_cycle
1157 self.compact = compact
1158
1159 self.in_t = Trigger()
1160 self.in_a = Signal(width)
1161 self.in_b = Signal(width)
1162 self.out_z = FPOp(width)
1163
1164 self.states = []
1165
1166 def add_state(self, state):
1167 self.states.append(state)
1168 return state
1169
1170 def get_fragment(self, platform=None):
1171 """ creates the HDL code-fragment for FPAdd
1172 """
1173 m = Module()
1174 m.submodules.out_z = self.out_z
1175 m.submodules.in_t = self.in_t
1176 if self.compact:
1177 self.get_compact_fragment(m, platform)
1178 else:
1179 self.get_longer_fragment(m, platform)
1180
1181 with m.FSM() as fsm:
1182
1183 for state in self.states:
1184 with m.State(state.state_from):
1185 state.action(m)
1186
1187 return m
1188
1189 def get_longer_fragment(self, m, platform=None):
1190
1191 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1192 self.in_a, self.in_b, self.width))
1193 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1194 a = get.out_op1
1195 b = get.out_op2
1196
1197 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1198 sc.setup(m, a, b, self.in_mid)
1199
1200 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1201 dn.setup(m, a, b, sc.in_mid)
1202
1203 if self.single_cycle:
1204 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1205 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1206 else:
1207 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1208 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1209
1210 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1211 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1212
1213 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1214 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1215
1216 if self.single_cycle:
1217 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1218 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1219 else:
1220 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1221 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1222
1223 rn = self.add_state(FPRound(self.width, self.id_wid))
1224 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1225
1226 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1227 cor.setup(m, rn.out_z, rn.in_mid)
1228
1229 pa = self.add_state(FPPack(self.width, self.id_wid))
1230 pa.setup(m, cor.out_z, rn.in_mid)
1231
1232 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1233 pa.in_mid, self.out_mid))
1234
1235 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1236 pa.in_mid, self.out_mid))
1237
1238 def get_compact_fragment(self, m, platform=None):
1239
1240 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1241 self.in_a, self.in_b, self.width))
1242 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1243 a = get.out_op1
1244 b = get.out_op2
1245
1246 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1247 sc.setup(m, a, b, self.in_mid)
1248
1249 if self.single_cycle:
1250 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1251 alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1252 else:
1253 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1254 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1255
1256 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1257 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1258
1259 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1260 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1261
1262 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1263 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1264
1265 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1266 n1.in_mid, self.out_mid))
1267
1268 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1269 sc.in_mid, self.out_mid))
1270
1271
1272 class FPADDBase(FPState, FPID):
1273
1274 def __init__(self, width, id_wid=None, single_cycle=False):
1275 """ IEEE754 FP Add
1276
1277 * width: bit-width of IEEE754. supported: 16, 32, 64
1278 * id_wid: an identifier that is sync-connected to the input
1279 * single_cycle: True indicates each stage to complete in 1 clock
1280 """
1281 FPID.__init__(self, id_wid)
1282 FPState.__init__(self, "fpadd")
1283 self.width = width
1284 self.single_cycle = single_cycle
1285 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1286
1287 self.in_t = Trigger()
1288 self.in_a = Signal(width)
1289 self.in_b = Signal(width)
1290 #self.out_z = FPOp(width)
1291
1292 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1293 self.in_accept = Signal(reset_less=True)
1294 self.add_stb = Signal(reset_less=True)
1295 self.add_ack = Signal(reset=0, reset_less=True)
1296
1297 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1298 self.out_z = out_z
1299 self.out_mid = out_mid
1300 m.d.comb += [self.in_a.eq(a),
1301 self.in_b.eq(b),
1302 self.mod.in_a.eq(self.in_a),
1303 self.mod.in_b.eq(self.in_b),
1304 self.in_mid.eq(in_mid),
1305 self.mod.in_mid.eq(self.in_mid),
1306 self.z_done.eq(self.mod.out_z.trigger),
1307 #self.add_stb.eq(add_stb),
1308 self.mod.in_t.stb.eq(self.in_t.stb),
1309 self.in_t.ack.eq(self.mod.in_t.ack),
1310 self.out_mid.eq(self.mod.out_mid),
1311 self.out_z.v.eq(self.mod.out_z.v),
1312 self.out_z.stb.eq(self.mod.out_z.stb),
1313 self.mod.out_z.ack.eq(self.out_z.ack),
1314 ]
1315
1316 m.d.sync += self.add_stb.eq(add_stb)
1317 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1318 #m.d.sync += self.in_t.stb.eq(0)
1319
1320 m.submodules.fpadd = self.mod
1321
1322 def action(self, m):
1323
1324 # in_accept is set on incoming strobe HIGH and ack LOW.
1325 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1326
1327 #with m.If(self.in_t.ack):
1328 # m.d.sync += self.in_t.stb.eq(0)
1329 with m.If(~self.z_done):
1330 # not done: test for accepting an incoming operand pair
1331 with m.If(self.in_accept):
1332 m.d.sync += [
1333 self.add_ack.eq(1), # acknowledge receipt...
1334 self.in_t.stb.eq(1), # initiate add
1335 ]
1336 with m.Else():
1337 m.d.sync += [self.add_ack.eq(0),
1338 self.in_t.stb.eq(0),
1339 ]
1340 with m.Else():
1341 # done: acknowledge, and write out id and value
1342 m.d.sync += [self.add_ack.eq(1),
1343 self.in_t.stb.eq(0)
1344 ]
1345 m.next = "get_a"
1346
1347 return
1348
1349 if self.in_mid is not None:
1350 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1351
1352 m.d.sync += [
1353 self.out_z.v.eq(self.mod.out_z.v)
1354 ]
1355 # move to output state on detecting z ack
1356 with m.If(self.out_z.trigger):
1357 m.d.sync += self.out_z.stb.eq(0)
1358 m.next = "put_z"
1359 with m.Else():
1360 m.d.sync += self.out_z.stb.eq(1)
1361
1362
1363 class FPADD(FPID):
1364 """ FPADD: stages as follows:
1365
1366 FPGetOp (a)
1367 |
1368 FPGetOp (b)
1369 |
1370 FPAddBase---> FPAddBaseMod
1371 | |
1372 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1373
1374 FPAddBase is tricky: it is both a stage and *has* stages.
1375 Connection to FPAddBaseMod therefore requires an in stb/ack
1376 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1377 needs to be the thing that raises the incoming stb.
1378 """
1379
1380 def __init__(self, width, id_wid=None, single_cycle=False):
1381 """ IEEE754 FP Add
1382
1383 * width: bit-width of IEEE754. supported: 16, 32, 64
1384 * id_wid: an identifier that is sync-connected to the input
1385 * single_cycle: True indicates each stage to complete in 1 clock
1386 """
1387 FPID.__init__(self, id_wid)
1388 self.width = width
1389 self.id_wid = id_wid
1390 self.single_cycle = single_cycle
1391
1392 self.in_a = FPOp(width)
1393 self.in_b = FPOp(width)
1394 self.out_z = FPOp(width)
1395
1396 self.states = []
1397
1398 def add_state(self, state):
1399 self.states.append(state)
1400 return state
1401
1402 def get_fragment(self, platform=None):
1403 """ creates the HDL code-fragment for FPAdd
1404 """
1405 m = Module()
1406 m.submodules.in_a = self.in_a
1407 m.submodules.in_b = self.in_b
1408 m.submodules.out_z = self.out_z
1409
1410 geta = self.add_state(FPGetOp("get_a", "get_b",
1411 self.in_a, self.width))
1412 geta.setup(m, self.in_a)
1413 a = geta.out_op
1414
1415 getb = self.add_state(FPGetOp("get_b", "fpadd",
1416 self.in_b, self.width))
1417 getb.setup(m, self.in_b)
1418 b = getb.out_op
1419
1420 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1421 ab = self.add_state(ab)
1422 ab.setup(m, a, b, getb.out_decode, self.in_mid,
1423 self.out_z, self.out_mid)
1424
1425 #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1426 # ab.out_mid, self.out_mid))
1427
1428 with m.FSM() as fsm:
1429
1430 for state in self.states:
1431 with m.State(state.state_from):
1432 state.action(m)
1433
1434 return m
1435
1436
1437 if __name__ == "__main__":
1438 if True:
1439 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1440 main(alu, ports=alu.in_a.ports() + \
1441 alu.in_b.ports() + \
1442 alu.out_z.ports() + \
1443 [alu.in_mid, alu.out_mid])
1444 else:
1445 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1446 main(alu, ports=[alu.in_a, alu.in_b] + \
1447 alu.in_t.ports() + \
1448 alu.out_z.ports() + \
1449 [alu.in_mid, alu.out_mid])
1450
1451
1452 # works... but don't use, just do "python fname.py convert -t v"
1453 #print (verilog.convert(alu, ports=[
1454 # ports=alu.in_a.ports() + \
1455 # alu.in_b.ports() + \
1456 # alu.out_z.ports())