allow code-creation
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8
9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
10 from fpbase import MultiShiftRMerge, Trigger
11 #from fpbase import FPNumShiftMultiRight
12
13 class FPState(FPBase):
14 def __init__(self, state_from):
15 self.state_from = state_from
16
17 def set_inputs(self, inputs):
18 self.inputs = inputs
19 for k,v in inputs.items():
20 setattr(self, k, v)
21
22 def set_outputs(self, outputs):
23 self.outputs = outputs
24 for k,v in outputs.items():
25 setattr(self, k, v)
26
27
28 class FPGetOpMod:
29 def __init__(self, width):
30 self.in_op = FPOp(width)
31 self.out_op = Signal(width)
32 self.out_decode = Signal(reset_less=True)
33
34 def elaborate(self, platform):
35 m = Module()
36 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
37 m.submodules.get_op_in = self.in_op
38 #m.submodules.get_op_out = self.out_op
39 with m.If(self.out_decode):
40 m.d.comb += [
41 self.out_op.eq(self.in_op.v),
42 ]
43 return m
44
45
46 class FPGetOp(FPState):
47 """ gets operand
48 """
49
50 def __init__(self, in_state, out_state, in_op, width):
51 FPState.__init__(self, in_state)
52 self.out_state = out_state
53 self.mod = FPGetOpMod(width)
54 self.in_op = in_op
55 self.out_op = Signal(width)
56 self.out_decode = Signal(reset_less=True)
57
58 def setup(self, m, in_op):
59 """ links module to inputs and outputs
60 """
61 setattr(m.submodules, self.state_from, self.mod)
62 m.d.comb += self.mod.in_op.copy(in_op)
63 #m.d.comb += self.out_op.eq(self.mod.out_op)
64 m.d.comb += self.out_decode.eq(self.mod.out_decode)
65
66 def action(self, m):
67 with m.If(self.out_decode):
68 m.next = self.out_state
69 m.d.sync += [
70 self.in_op.ack.eq(0),
71 self.out_op.eq(self.mod.out_op)
72 ]
73 with m.Else():
74 m.d.sync += self.in_op.ack.eq(1)
75
76
77 class FPGet2OpMod(Trigger):
78 def __init__(self, width):
79 Trigger.__init__(self)
80 self.in_op1 = Signal(width, reset_less=True)
81 self.in_op2 = Signal(width, reset_less=True)
82 self.out_op1 = FPNumIn(None, width)
83 self.out_op2 = FPNumIn(None, width)
84
85 def elaborate(self, platform):
86 m = Trigger.elaborate(self, platform)
87 #m.submodules.get_op_in = self.in_op
88 m.submodules.get_op1_out = self.out_op1
89 m.submodules.get_op2_out = self.out_op2
90 with m.If(self.trigger):
91 m.d.comb += [
92 self.out_op1.decode(self.in_op1),
93 self.out_op2.decode(self.in_op2),
94 ]
95 return m
96
97
98 class FPGet2Op(FPState):
99 """ gets operands
100 """
101
102 def __init__(self, in_state, out_state, in_op1, in_op2, width):
103 FPState.__init__(self, in_state)
104 self.out_state = out_state
105 self.mod = FPGet2OpMod(width)
106 self.in_op1 = in_op1
107 self.in_op2 = in_op2
108 self.out_op1 = FPNumIn(None, width)
109 self.out_op2 = FPNumIn(None, width)
110 self.in_stb = Signal(reset_less=True)
111 self.out_ack = Signal(reset_less=True)
112 self.out_decode = Signal(reset_less=True)
113
114 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
115 """ links module to inputs and outputs
116 """
117 m.submodules.get_ops = self.mod
118 m.d.comb += self.mod.in_op1.eq(in_op1)
119 m.d.comb += self.mod.in_op2.eq(in_op2)
120 m.d.comb += self.mod.stb.eq(in_stb)
121 m.d.comb += self.out_ack.eq(self.mod.ack)
122 m.d.comb += self.out_decode.eq(self.mod.trigger)
123 m.d.comb += in_ack.eq(self.mod.ack)
124
125 def action(self, m):
126 with m.If(self.out_decode):
127 m.next = self.out_state
128 m.d.sync += [
129 self.mod.ack.eq(0),
130 #self.out_op1.v.eq(self.mod.out_op1.v),
131 #self.out_op2.v.eq(self.mod.out_op2.v),
132 self.out_op1.copy(self.mod.out_op1),
133 self.out_op2.copy(self.mod.out_op2)
134 ]
135 with m.Else():
136 m.d.sync += self.mod.ack.eq(1)
137
138
139 class FPAddSpecialCasesMod:
140 """ special cases: NaNs, infs, zeros, denormalised
141 NOTE: some of these are unique to add. see "Special Operations"
142 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
143 """
144
145 def __init__(self, width):
146 self.in_a = FPNumBase(width)
147 self.in_b = FPNumBase(width)
148 self.out_z = FPNumOut(width, False)
149 self.out_do_z = Signal(reset_less=True)
150
151 def setup(self, m, in_a, in_b, out_do_z):
152 """ links module to inputs and outputs
153 """
154 m.submodules.specialcases = self
155 m.d.comb += self.in_a.copy(in_a)
156 m.d.comb += self.in_b.copy(in_b)
157 m.d.comb += out_do_z.eq(self.out_do_z)
158
159 def elaborate(self, platform):
160 m = Module()
161
162 m.submodules.sc_in_a = self.in_a
163 m.submodules.sc_in_b = self.in_b
164 m.submodules.sc_out_z = self.out_z
165
166 s_nomatch = Signal()
167 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
168
169 m_match = Signal()
170 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
171
172 # if a is NaN or b is NaN return NaN
173 with m.If(self.in_a.is_nan | self.in_b.is_nan):
174 m.d.comb += self.out_do_z.eq(1)
175 m.d.comb += self.out_z.nan(0)
176
177 # XXX WEIRDNESS for FP16 non-canonical NaN handling
178 # under review
179
180 ## if a is zero and b is NaN return -b
181 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
182 # m.d.comb += self.out_do_z.eq(1)
183 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
184
185 ## if b is zero and a is NaN return -a
186 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
187 # m.d.comb += self.out_do_z.eq(1)
188 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
189
190 ## if a is -zero and b is NaN return -b
191 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
192 # m.d.comb += self.out_do_z.eq(1)
193 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
194
195 ## if b is -zero and a is NaN return -a
196 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
197 # m.d.comb += self.out_do_z.eq(1)
198 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
199
200 # if a is inf return inf (or NaN)
201 with m.Elif(self.in_a.is_inf):
202 m.d.comb += self.out_do_z.eq(1)
203 m.d.comb += self.out_z.inf(self.in_a.s)
204 # if a is inf and signs don't match return NaN
205 with m.If(self.in_b.exp_128 & s_nomatch):
206 m.d.comb += self.out_z.nan(0)
207
208 # if b is inf return inf
209 with m.Elif(self.in_b.is_inf):
210 m.d.comb += self.out_do_z.eq(1)
211 m.d.comb += self.out_z.inf(self.in_b.s)
212
213 # if a is zero and b zero return signed-a/b
214 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
215 m.d.comb += self.out_do_z.eq(1)
216 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
217 self.in_b.e,
218 self.in_b.m[3:-1])
219
220 # if a is zero return b
221 with m.Elif(self.in_a.is_zero):
222 m.d.comb += self.out_do_z.eq(1)
223 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
224 self.in_b.m[3:-1])
225
226 # if b is zero return a
227 with m.Elif(self.in_b.is_zero):
228 m.d.comb += self.out_do_z.eq(1)
229 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
230 self.in_a.m[3:-1])
231
232 # if a equal to -b return zero (+ve zero)
233 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
234 m.d.comb += self.out_do_z.eq(1)
235 m.d.comb += self.out_z.zero(0)
236
237 # Denormalised Number checks
238 with m.Else():
239 m.d.comb += self.out_do_z.eq(0)
240
241 return m
242
243
244 class FPID:
245 def __init__(self, id_wid):
246 self.id_wid = id_wid
247 if self.id_wid:
248 self.in_mid = Signal(id_wid, reset_less=True)
249 self.out_mid = Signal(id_wid, reset_less=True)
250 else:
251 self.in_mid = None
252 self.out_mid = None
253
254 def idsync(self, m):
255 if self.id_wid is not None:
256 m.d.sync += self.out_mid.eq(self.in_mid)
257
258
259 class FPAddSpecialCases(FPState, FPID):
260 """ special cases: NaNs, infs, zeros, denormalised
261 NOTE: some of these are unique to add. see "Special Operations"
262 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
263 """
264
265 def __init__(self, width, id_wid):
266 FPState.__init__(self, "special_cases")
267 FPID.__init__(self, id_wid)
268 self.mod = FPAddSpecialCasesMod(width)
269 self.out_z = FPNumOut(width, False)
270 self.out_do_z = Signal(reset_less=True)
271
272 def setup(self, m, in_a, in_b, in_mid):
273 """ links module to inputs and outputs
274 """
275 self.mod.setup(m, in_a, in_b, self.out_do_z)
276 if self.in_mid is not None:
277 m.d.comb += self.in_mid.eq(in_mid)
278
279 def action(self, m):
280 self.idsync(m)
281 with m.If(self.out_do_z):
282 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
283 m.next = "put_z"
284 with m.Else():
285 m.next = "denormalise"
286
287
288 class FPAddSpecialCasesDeNorm(FPState, FPID):
289 """ special cases: NaNs, infs, zeros, denormalised
290 NOTE: some of these are unique to add. see "Special Operations"
291 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
292 """
293
294 def __init__(self, width, id_wid):
295 FPState.__init__(self, "special_cases")
296 FPID.__init__(self, id_wid)
297 self.smod = FPAddSpecialCasesMod(width)
298 self.out_z = FPNumOut(width, False)
299 self.out_do_z = Signal(reset_less=True)
300
301 self.dmod = FPAddDeNormMod(width)
302 self.out_a = FPNumBase(width)
303 self.out_b = FPNumBase(width)
304
305 def setup(self, m, in_a, in_b, in_mid):
306 """ links module to inputs and outputs
307 """
308 self.smod.setup(m, in_a, in_b, self.out_do_z)
309 self.dmod.setup(m, in_a, in_b)
310 if self.in_mid is not None:
311 m.d.comb += self.in_mid.eq(in_mid)
312
313 def action(self, m):
314 self.idsync(m)
315 with m.If(self.out_do_z):
316 m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
317 m.next = "put_z"
318 with m.Else():
319 m.next = "align"
320 m.d.sync += self.out_a.copy(self.dmod.out_a)
321 m.d.sync += self.out_b.copy(self.dmod.out_b)
322
323
324 class FPAddDeNormMod(FPState):
325
326 def __init__(self, width):
327 self.in_a = FPNumBase(width)
328 self.in_b = FPNumBase(width)
329 self.out_a = FPNumBase(width)
330 self.out_b = FPNumBase(width)
331
332 def setup(self, m, in_a, in_b):
333 """ links module to inputs and outputs
334 """
335 m.submodules.denormalise = self
336 m.d.comb += self.in_a.copy(in_a)
337 m.d.comb += self.in_b.copy(in_b)
338
339 def elaborate(self, platform):
340 m = Module()
341 m.submodules.denorm_in_a = self.in_a
342 m.submodules.denorm_in_b = self.in_b
343 m.submodules.denorm_out_a = self.out_a
344 m.submodules.denorm_out_b = self.out_b
345 # hmmm, don't like repeating identical code
346 m.d.comb += self.out_a.copy(self.in_a)
347 with m.If(self.in_a.exp_n127):
348 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
349 with m.Else():
350 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
351
352 m.d.comb += self.out_b.copy(self.in_b)
353 with m.If(self.in_b.exp_n127):
354 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
355 with m.Else():
356 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
357
358 return m
359
360
361 class FPAddDeNorm(FPState, FPID):
362
363 def __init__(self, width, id_wid):
364 FPState.__init__(self, "denormalise")
365 FPID.__init__(self, id_wid)
366 self.mod = FPAddDeNormMod(width)
367 self.out_a = FPNumBase(width)
368 self.out_b = FPNumBase(width)
369
370 def setup(self, m, in_a, in_b, in_mid):
371 """ links module to inputs and outputs
372 """
373 self.mod.setup(m, in_a, in_b)
374 if self.in_mid is not None:
375 m.d.comb += self.in_mid.eq(in_mid)
376
377 def action(self, m):
378 self.idsync(m)
379 # Denormalised Number checks
380 m.next = "align"
381 m.d.sync += self.out_a.copy(self.mod.out_a)
382 m.d.sync += self.out_b.copy(self.mod.out_b)
383
384
385 class FPAddAlignMultiMod(FPState):
386
387 def __init__(self, width):
388 self.in_a = FPNumBase(width)
389 self.in_b = FPNumBase(width)
390 self.out_a = FPNumIn(None, width)
391 self.out_b = FPNumIn(None, width)
392 self.exp_eq = Signal(reset_less=True)
393
394 def elaborate(self, platform):
395 # This one however (single-cycle) will do the shift
396 # in one go.
397
398 m = Module()
399
400 m.submodules.align_in_a = self.in_a
401 m.submodules.align_in_b = self.in_b
402 m.submodules.align_out_a = self.out_a
403 m.submodules.align_out_b = self.out_b
404
405 # NOTE: this does *not* do single-cycle multi-shifting,
406 # it *STAYS* in the align state until exponents match
407
408 # exponent of a greater than b: shift b down
409 m.d.comb += self.exp_eq.eq(0)
410 m.d.comb += self.out_a.copy(self.in_a)
411 m.d.comb += self.out_b.copy(self.in_b)
412 agtb = Signal(reset_less=True)
413 altb = Signal(reset_less=True)
414 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
415 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
416 with m.If(agtb):
417 m.d.comb += self.out_b.shift_down(self.in_b)
418 # exponent of b greater than a: shift a down
419 with m.Elif(altb):
420 m.d.comb += self.out_a.shift_down(self.in_a)
421 # exponents equal: move to next stage.
422 with m.Else():
423 m.d.comb += self.exp_eq.eq(1)
424 return m
425
426
427 class FPAddAlignMulti(FPState, FPID):
428
429 def __init__(self, width, id_wid):
430 FPID.__init__(self, id_wid)
431 FPState.__init__(self, "align")
432 self.mod = FPAddAlignMultiMod(width)
433 self.out_a = FPNumIn(None, width)
434 self.out_b = FPNumIn(None, width)
435 self.exp_eq = Signal(reset_less=True)
436
437 def setup(self, m, in_a, in_b, in_mid):
438 """ links module to inputs and outputs
439 """
440 m.submodules.align = self.mod
441 m.d.comb += self.mod.in_a.copy(in_a)
442 m.d.comb += self.mod.in_b.copy(in_b)
443 #m.d.comb += self.out_a.copy(self.mod.out_a)
444 #m.d.comb += self.out_b.copy(self.mod.out_b)
445 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
446 if self.in_mid is not None:
447 m.d.comb += self.in_mid.eq(in_mid)
448
449 def action(self, m):
450 self.idsync(m)
451 m.d.sync += self.out_a.copy(self.mod.out_a)
452 m.d.sync += self.out_b.copy(self.mod.out_b)
453 with m.If(self.exp_eq):
454 m.next = "add_0"
455
456
457 class FPAddAlignSingleMod:
458
459 def __init__(self, width):
460 self.width = width
461 self.in_a = FPNumBase(width)
462 self.in_b = FPNumBase(width)
463 self.out_a = FPNumIn(None, width)
464 self.out_b = FPNumIn(None, width)
465
466 def setup(self, m, in_a, in_b):
467 """ links module to inputs and outputs
468 """
469 m.submodules.align = self
470 m.d.comb += self.in_a.copy(in_a)
471 m.d.comb += self.in_b.copy(in_b)
472
473 def elaborate(self, platform):
474 """ Aligns A against B or B against A, depending on which has the
475 greater exponent. This is done in a *single* cycle using
476 variable-width bit-shift
477
478 the shifter used here is quite expensive in terms of gates.
479 Mux A or B in (and out) into temporaries, as only one of them
480 needs to be aligned against the other
481 """
482 m = Module()
483
484 m.submodules.align_in_a = self.in_a
485 m.submodules.align_in_b = self.in_b
486 m.submodules.align_out_a = self.out_a
487 m.submodules.align_out_b = self.out_b
488
489 # temporary (muxed) input and output to be shifted
490 t_inp = FPNumBase(self.width)
491 t_out = FPNumIn(None, self.width)
492 espec = (len(self.in_a.e), True)
493 msr = MultiShiftRMerge(self.in_a.m_width, espec)
494 m.submodules.align_t_in = t_inp
495 m.submodules.align_t_out = t_out
496 m.submodules.multishift_r = msr
497
498 ediff = Signal(espec, reset_less=True)
499 ediffr = Signal(espec, reset_less=True)
500 tdiff = Signal(espec, reset_less=True)
501 elz = Signal(reset_less=True)
502 egz = Signal(reset_less=True)
503
504 # connect multi-shifter to t_inp/out mantissa (and tdiff)
505 m.d.comb += msr.inp.eq(t_inp.m)
506 m.d.comb += msr.diff.eq(tdiff)
507 m.d.comb += t_out.m.eq(msr.m)
508 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
509 m.d.comb += t_out.s.eq(t_inp.s)
510
511 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
512 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
513 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
514 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
515
516 # default: A-exp == B-exp, A and B untouched (fall through)
517 m.d.comb += self.out_a.copy(self.in_a)
518 m.d.comb += self.out_b.copy(self.in_b)
519 # only one shifter (muxed)
520 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
521 # exponent of a greater than b: shift b down
522 with m.If(egz):
523 m.d.comb += [t_inp.copy(self.in_b),
524 tdiff.eq(ediff),
525 self.out_b.copy(t_out),
526 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
527 ]
528 # exponent of b greater than a: shift a down
529 with m.Elif(elz):
530 m.d.comb += [t_inp.copy(self.in_a),
531 tdiff.eq(ediffr),
532 self.out_a.copy(t_out),
533 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
534 ]
535 return m
536
537
538 class FPAddAlignSingle(FPState, FPID):
539
540 def __init__(self, width, id_wid):
541 FPState.__init__(self, "align")
542 FPID.__init__(self, id_wid)
543 self.mod = FPAddAlignSingleMod(width)
544 self.out_a = FPNumIn(None, width)
545 self.out_b = FPNumIn(None, width)
546
547 def setup(self, m, in_a, in_b, in_mid):
548 """ links module to inputs and outputs
549 """
550 self.mod.setup(m, in_a, in_b)
551 if self.in_mid is not None:
552 m.d.comb += self.in_mid.eq(in_mid)
553
554 def action(self, m):
555 self.idsync(m)
556 # NOTE: could be done as comb
557 m.d.sync += self.out_a.copy(self.mod.out_a)
558 m.d.sync += self.out_b.copy(self.mod.out_b)
559 m.next = "add_0"
560
561
562 class FPAddAlignSingleAdd(FPState, FPID):
563
564 def __init__(self, width, id_wid):
565 FPState.__init__(self, "align")
566 FPID.__init__(self, id_wid)
567 self.mod = FPAddAlignSingleMod(width)
568 self.out_a = FPNumIn(None, width)
569 self.out_b = FPNumIn(None, width)
570
571 self.a0mod = FPAddStage0Mod(width)
572 self.a0_out_z = FPNumBase(width, False)
573 self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
574 self.a0_out_z = FPNumBase(width, False)
575
576 self.a1mod = FPAddStage1Mod(width)
577 self.out_z = FPNumBase(width, False)
578 self.out_of = Overflow()
579
580 def setup(self, m, in_a, in_b, in_mid):
581 """ links module to inputs and outputs
582 """
583 self.mod.setup(m, in_a, in_b)
584 m.d.comb += self.out_a.copy(self.mod.out_a)
585 m.d.comb += self.out_b.copy(self.mod.out_b)
586
587 self.a0mod.setup(m, self.out_a, self.out_b)
588 m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
589 m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
590
591 self.a1mod.setup(m, self.out_tot, self.a0_out_z)
592
593 if self.in_mid is not None:
594 m.d.comb += self.in_mid.eq(in_mid)
595
596 def action(self, m):
597 self.idsync(m)
598 m.d.sync += self.out_of.copy(self.a1mod.out_of)
599 m.d.sync += self.out_z.copy(self.a1mod.out_z)
600 m.next = "normalise_1"
601
602
603 class FPAddStage0Mod:
604
605 def __init__(self, width):
606 self.in_a = FPNumBase(width)
607 self.in_b = FPNumBase(width)
608 self.in_z = FPNumBase(width, False)
609 self.out_z = FPNumBase(width, False)
610 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
611
612 def setup(self, m, in_a, in_b):
613 """ links module to inputs and outputs
614 """
615 m.submodules.add0 = self
616 m.d.comb += self.in_a.copy(in_a)
617 m.d.comb += self.in_b.copy(in_b)
618
619 def elaborate(self, platform):
620 m = Module()
621 m.submodules.add0_in_a = self.in_a
622 m.submodules.add0_in_b = self.in_b
623 m.submodules.add0_out_z = self.out_z
624
625 m.d.comb += self.out_z.e.eq(self.in_a.e)
626
627 # store intermediate tests (and zero-extended mantissas)
628 seq = Signal(reset_less=True)
629 mge = Signal(reset_less=True)
630 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
631 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
632 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
633 mge.eq(self.in_a.m >= self.in_b.m),
634 am0.eq(Cat(self.in_a.m, 0)),
635 bm0.eq(Cat(self.in_b.m, 0))
636 ]
637 # same-sign (both negative or both positive) add mantissas
638 with m.If(seq):
639 m.d.comb += [
640 self.out_tot.eq(am0 + bm0),
641 self.out_z.s.eq(self.in_a.s)
642 ]
643 # a mantissa greater than b, use a
644 with m.Elif(mge):
645 m.d.comb += [
646 self.out_tot.eq(am0 - bm0),
647 self.out_z.s.eq(self.in_a.s)
648 ]
649 # b mantissa greater than a, use b
650 with m.Else():
651 m.d.comb += [
652 self.out_tot.eq(bm0 - am0),
653 self.out_z.s.eq(self.in_b.s)
654 ]
655 return m
656
657
658 class FPAddStage0(FPState, FPID):
659 """ First stage of add. covers same-sign (add) and subtract
660 special-casing when mantissas are greater or equal, to
661 give greatest accuracy.
662 """
663
664 def __init__(self, width, id_wid):
665 FPState.__init__(self, "add_0")
666 FPID.__init__(self, id_wid)
667 self.mod = FPAddStage0Mod(width)
668 self.out_z = FPNumBase(width, False)
669 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
670
671 def setup(self, m, in_a, in_b, in_mid):
672 """ links module to inputs and outputs
673 """
674 self.mod.setup(m, in_a, in_b)
675 if self.in_mid is not None:
676 m.d.comb += self.in_mid.eq(in_mid)
677
678 def action(self, m):
679 self.idsync(m)
680 # NOTE: these could be done as combinatorial (merge add0+add1)
681 m.d.sync += self.out_z.copy(self.mod.out_z)
682 m.d.sync += self.out_tot.eq(self.mod.out_tot)
683 m.next = "add_1"
684
685
686 class FPAddStage1Mod(FPState):
687 """ Second stage of add: preparation for normalisation.
688 detects when tot sum is too big (tot[27] is kinda a carry bit)
689 """
690
691 def __init__(self, width):
692 self.out_norm = Signal(reset_less=True)
693 self.in_z = FPNumBase(width, False)
694 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
695 self.out_z = FPNumBase(width, False)
696 self.out_of = Overflow()
697
698 def setup(self, m, in_tot, in_z):
699 """ links module to inputs and outputs
700 """
701 m.submodules.add1 = self
702 m.submodules.add1_out_overflow = self.out_of
703
704 m.d.comb += self.in_z.copy(in_z)
705 m.d.comb += self.in_tot.eq(in_tot)
706
707 def elaborate(self, platform):
708 m = Module()
709 #m.submodules.norm1_in_overflow = self.in_of
710 #m.submodules.norm1_out_overflow = self.out_of
711 #m.submodules.norm1_in_z = self.in_z
712 #m.submodules.norm1_out_z = self.out_z
713 m.d.comb += self.out_z.copy(self.in_z)
714 # tot[27] gets set when the sum overflows. shift result down
715 with m.If(self.in_tot[-1]):
716 m.d.comb += [
717 self.out_z.m.eq(self.in_tot[4:]),
718 self.out_of.m0.eq(self.in_tot[4]),
719 self.out_of.guard.eq(self.in_tot[3]),
720 self.out_of.round_bit.eq(self.in_tot[2]),
721 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
722 self.out_z.e.eq(self.in_z.e + 1)
723 ]
724 # tot[27] zero case
725 with m.Else():
726 m.d.comb += [
727 self.out_z.m.eq(self.in_tot[3:]),
728 self.out_of.m0.eq(self.in_tot[3]),
729 self.out_of.guard.eq(self.in_tot[2]),
730 self.out_of.round_bit.eq(self.in_tot[1]),
731 self.out_of.sticky.eq(self.in_tot[0])
732 ]
733 return m
734
735
736 class FPAddStage1(FPState, FPID):
737
738 def __init__(self, width, id_wid):
739 FPState.__init__(self, "add_1")
740 FPID.__init__(self, id_wid)
741 self.mod = FPAddStage1Mod(width)
742 self.out_z = FPNumBase(width, False)
743 self.out_of = Overflow()
744 self.norm_stb = Signal()
745
746 def setup(self, m, in_tot, in_z, in_mid):
747 """ links module to inputs and outputs
748 """
749 self.mod.setup(m, in_tot, in_z)
750
751 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
752
753 if self.in_mid is not None:
754 m.d.comb += self.in_mid.eq(in_mid)
755
756 def action(self, m):
757 self.idsync(m)
758 m.d.sync += self.out_of.copy(self.mod.out_of)
759 m.d.sync += self.out_z.copy(self.mod.out_z)
760 m.d.sync += self.norm_stb.eq(1)
761 m.next = "normalise_1"
762
763
764 class FPNorm1ModSingle:
765
766 def __init__(self, width):
767 self.width = width
768 self.out_norm = Signal(reset_less=True)
769 self.in_z = FPNumBase(width, False)
770 self.in_of = Overflow()
771 self.out_z = FPNumBase(width, False)
772 self.out_of = Overflow()
773
774 def setup(self, m, in_z, in_of, out_z):
775 """ links module to inputs and outputs
776 """
777 m.submodules.normalise_1 = self
778
779 m.d.comb += self.in_z.copy(in_z)
780 m.d.comb += self.in_of.copy(in_of)
781
782 m.d.comb += out_z.copy(self.out_z)
783
784 def elaborate(self, platform):
785 m = Module()
786
787 mwid = self.out_z.m_width+2
788 pe = PriorityEncoder(mwid)
789 m.submodules.norm_pe = pe
790
791 m.submodules.norm1_out_z = self.out_z
792 m.submodules.norm1_out_overflow = self.out_of
793 m.submodules.norm1_in_z = self.in_z
794 m.submodules.norm1_in_overflow = self.in_of
795
796 in_z = FPNumBase(self.width, False)
797 in_of = Overflow()
798 m.submodules.norm1_insel_z = in_z
799 m.submodules.norm1_insel_overflow = in_of
800
801 espec = (len(in_z.e), True)
802 ediff_n126 = Signal(espec, reset_less=True)
803 msr = MultiShiftRMerge(mwid, espec)
804 m.submodules.multishift_r = msr
805
806 m.d.comb += in_z.copy(self.in_z)
807 m.d.comb += in_of.copy(self.in_of)
808 # initialise out from in (overridden below)
809 m.d.comb += self.out_z.copy(in_z)
810 m.d.comb += self.out_of.copy(in_of)
811 # normalisation increase/decrease conditions
812 decrease = Signal(reset_less=True)
813 increase = Signal(reset_less=True)
814 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
815 m.d.comb += increase.eq(in_z.exp_lt_n126)
816 # decrease exponent
817 with m.If(decrease):
818 # *sigh* not entirely obvious: count leading zeros (clz)
819 # with a PriorityEncoder: to find from the MSB
820 # we reverse the order of the bits.
821 temp_m = Signal(mwid, reset_less=True)
822 temp_s = Signal(mwid+1, reset_less=True)
823 clz = Signal((len(in_z.e), True), reset_less=True)
824 # make sure that the amount to decrease by does NOT
825 # go below the minimum non-INF/NaN exponent
826 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
827 in_z.exp_sub_n126)
828 m.d.comb += [
829 # cat round and guard bits back into the mantissa
830 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
831 pe.i.eq(temp_m[::-1]), # inverted
832 clz.eq(limclz), # count zeros from MSB down
833 temp_s.eq(temp_m << clz), # shift mantissa UP
834 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
835 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
836 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
837 # overflow in bits 0..1: got shifted too (leave sticky)
838 self.out_of.guard.eq(temp_s[1]), # guard
839 self.out_of.round_bit.eq(temp_s[0]), # round
840 ]
841 # increase exponent
842 with m.Elif(increase):
843 temp_m = Signal(mwid+1, reset_less=True)
844 m.d.comb += [
845 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
846 in_z.m)),
847 ediff_n126.eq(in_z.N126 - in_z.e),
848 # connect multi-shifter to inp/out mantissa (and ediff)
849 msr.inp.eq(temp_m),
850 msr.diff.eq(ediff_n126),
851 self.out_z.m.eq(msr.m[3:]),
852 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
853 # overflow in bits 0..1: got shifted too (leave sticky)
854 self.out_of.guard.eq(temp_s[2]), # guard
855 self.out_of.round_bit.eq(temp_s[1]), # round
856 self.out_of.sticky.eq(temp_s[0]), # sticky
857 self.out_z.e.eq(in_z.e + ediff_n126),
858 ]
859
860 return m
861
862
863 class FPNorm1ModMulti:
864
865 def __init__(self, width, single_cycle=True):
866 self.width = width
867 self.in_select = Signal(reset_less=True)
868 self.out_norm = Signal(reset_less=True)
869 self.in_z = FPNumBase(width, False)
870 self.in_of = Overflow()
871 self.temp_z = FPNumBase(width, False)
872 self.temp_of = Overflow()
873 self.out_z = FPNumBase(width, False)
874 self.out_of = Overflow()
875
876 def elaborate(self, platform):
877 m = Module()
878
879 m.submodules.norm1_out_z = self.out_z
880 m.submodules.norm1_out_overflow = self.out_of
881 m.submodules.norm1_temp_z = self.temp_z
882 m.submodules.norm1_temp_of = self.temp_of
883 m.submodules.norm1_in_z = self.in_z
884 m.submodules.norm1_in_overflow = self.in_of
885
886 in_z = FPNumBase(self.width, False)
887 in_of = Overflow()
888 m.submodules.norm1_insel_z = in_z
889 m.submodules.norm1_insel_overflow = in_of
890
891 # select which of temp or in z/of to use
892 with m.If(self.in_select):
893 m.d.comb += in_z.copy(self.in_z)
894 m.d.comb += in_of.copy(self.in_of)
895 with m.Else():
896 m.d.comb += in_z.copy(self.temp_z)
897 m.d.comb += in_of.copy(self.temp_of)
898 # initialise out from in (overridden below)
899 m.d.comb += self.out_z.copy(in_z)
900 m.d.comb += self.out_of.copy(in_of)
901 # normalisation increase/decrease conditions
902 decrease = Signal(reset_less=True)
903 increase = Signal(reset_less=True)
904 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
905 m.d.comb += increase.eq(in_z.exp_lt_n126)
906 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
907 # decrease exponent
908 with m.If(decrease):
909 m.d.comb += [
910 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
911 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
912 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
913 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
914 self.out_of.round_bit.eq(0), # reset round bit
915 self.out_of.m0.eq(in_of.guard),
916 ]
917 # increase exponent
918 with m.Elif(increase):
919 m.d.comb += [
920 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
921 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
922 self.out_of.guard.eq(in_z.m[0]),
923 self.out_of.m0.eq(in_z.m[1]),
924 self.out_of.round_bit.eq(in_of.guard),
925 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
926 ]
927
928 return m
929
930
931 class FPNorm1Single(FPState, FPID):
932
933 def __init__(self, width, id_wid, single_cycle=True):
934 FPID.__init__(self, id_wid)
935 FPState.__init__(self, "normalise_1")
936 self.mod = FPNorm1ModSingle(width)
937 self.out_norm = Signal(reset_less=True)
938 self.out_z = FPNumBase(width)
939 self.out_roundz = Signal(reset_less=True)
940
941 def setup(self, m, in_z, in_of, in_mid):
942 """ links module to inputs and outputs
943 """
944 self.mod.setup(m, in_z, in_of, self.out_z)
945
946 if self.in_mid is not None:
947 m.d.comb += self.in_mid.eq(in_mid)
948
949 def action(self, m):
950 self.idsync(m)
951 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
952 m.next = "round"
953
954
955 class FPNorm1Multi(FPState, FPID):
956
957 def __init__(self, width, id_wid):
958 FPID.__init__(self, id_wid)
959 FPState.__init__(self, "normalise_1")
960 self.mod = FPNorm1ModMulti(width)
961 self.stb = Signal(reset_less=True)
962 self.ack = Signal(reset=0, reset_less=True)
963 self.out_norm = Signal(reset_less=True)
964 self.in_accept = Signal(reset_less=True)
965 self.temp_z = FPNumBase(width)
966 self.temp_of = Overflow()
967 self.out_z = FPNumBase(width)
968 self.out_roundz = Signal(reset_less=True)
969
970 def setup(self, m, in_z, in_of, norm_stb, in_mid):
971 """ links module to inputs and outputs
972 """
973 self.mod.setup(m, in_z, in_of, norm_stb,
974 self.in_accept, self.temp_z, self.temp_of,
975 self.out_z, self.out_norm)
976
977 m.d.comb += self.stb.eq(norm_stb)
978 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
979
980 if self.in_mid is not None:
981 m.d.comb += self.in_mid.eq(in_mid)
982
983 def action(self, m):
984 self.idsync(m)
985 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
986 m.d.sync += self.temp_of.copy(self.mod.out_of)
987 m.d.sync += self.temp_z.copy(self.out_z)
988 with m.If(self.out_norm):
989 with m.If(self.in_accept):
990 m.d.sync += [
991 self.ack.eq(1),
992 ]
993 with m.Else():
994 m.d.sync += self.ack.eq(0)
995 with m.Else():
996 # normalisation not required (or done).
997 m.next = "round"
998 m.d.sync += self.ack.eq(1)
999 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1000
1001
1002 class FPNormToPack(FPState, FPID):
1003
1004 def __init__(self, width, id_wid):
1005 FPID.__init__(self, id_wid)
1006 FPState.__init__(self, "normalise_1")
1007 self.width = width
1008
1009 def setup(self, m, in_z, in_of, in_mid):
1010 """ links module to inputs and outputs
1011 """
1012
1013 # Normalisation (chained to input in_z+in_of)
1014 nmod = FPNorm1ModSingle(self.width)
1015 n_out_z = FPNumBase(self.width)
1016 n_out_roundz = Signal(reset_less=True)
1017 nmod.setup(m, in_z, in_of, n_out_z)
1018
1019 # Rounding (chained to normalisation)
1020 rmod = FPRoundMod(self.width)
1021 r_out_z = FPNumBase(self.width)
1022 rmod.setup(m, n_out_z, n_out_roundz)
1023 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1024 m.d.comb += r_out_z.copy(rmod.out_z)
1025
1026 # Corrections (chained to rounding)
1027 cmod = FPCorrectionsMod(self.width)
1028 c_out_z = FPNumBase(self.width)
1029 cmod.setup(m, r_out_z)
1030 m.d.comb += c_out_z.copy(cmod.out_z)
1031
1032 # Pack (chained to corrections)
1033 self.pmod = FPPackMod(self.width)
1034 self.out_z = FPNumBase(self.width)
1035 self.pmod.setup(m, c_out_z)
1036
1037 # Multiplex ID
1038 if self.in_mid is not None:
1039 m.d.comb += self.in_mid.eq(in_mid)
1040
1041 def action(self, m):
1042 self.idsync(m) # copies incoming ID to outgoing
1043 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1044 m.next = "pack_put_z"
1045
1046
1047 class FPRoundMod:
1048
1049 def __init__(self, width):
1050 self.in_roundz = Signal(reset_less=True)
1051 self.in_z = FPNumBase(width, False)
1052 self.out_z = FPNumBase(width, False)
1053
1054 def setup(self, m, in_z, roundz):
1055 m.submodules.roundz = self
1056
1057 m.d.comb += self.in_z.copy(in_z)
1058 m.d.comb += self.in_roundz.eq(roundz)
1059
1060 def elaborate(self, platform):
1061 m = Module()
1062 m.d.comb += self.out_z.copy(self.in_z)
1063 with m.If(self.in_roundz):
1064 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1065 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1066 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1067 return m
1068
1069
1070 class FPRound(FPState, FPID):
1071
1072 def __init__(self, width, id_wid):
1073 FPState.__init__(self, "round")
1074 FPID.__init__(self, id_wid)
1075 self.mod = FPRoundMod(width)
1076 self.out_z = FPNumBase(width)
1077
1078 def setup(self, m, in_z, roundz, in_mid):
1079 """ links module to inputs and outputs
1080 """
1081 self.mod.setup(m, in_z, roundz)
1082
1083 if self.in_mid is not None:
1084 m.d.comb += self.in_mid.eq(in_mid)
1085
1086 def action(self, m):
1087 self.idsync(m)
1088 m.d.sync += self.out_z.copy(self.mod.out_z)
1089 m.next = "corrections"
1090
1091
1092 class FPCorrectionsMod:
1093
1094 def __init__(self, width):
1095 self.in_z = FPNumOut(width, False)
1096 self.out_z = FPNumOut(width, False)
1097
1098 def setup(self, m, in_z):
1099 """ links module to inputs and outputs
1100 """
1101 m.submodules.corrections = self
1102 m.d.comb += self.in_z.copy(in_z)
1103
1104 def elaborate(self, platform):
1105 m = Module()
1106 m.submodules.corr_in_z = self.in_z
1107 m.submodules.corr_out_z = self.out_z
1108 m.d.comb += self.out_z.copy(self.in_z)
1109 with m.If(self.in_z.is_denormalised):
1110 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1111 return m
1112
1113
1114 class FPCorrections(FPState, FPID):
1115
1116 def __init__(self, width, id_wid):
1117 FPState.__init__(self, "corrections")
1118 FPID.__init__(self, id_wid)
1119 self.mod = FPCorrectionsMod(width)
1120 self.out_z = FPNumBase(width)
1121
1122 def setup(self, m, in_z, in_mid):
1123 """ links module to inputs and outputs
1124 """
1125 self.mod.setup(m, in_z)
1126 if self.in_mid is not None:
1127 m.d.comb += self.in_mid.eq(in_mid)
1128
1129 def action(self, m):
1130 self.idsync(m)
1131 m.d.sync += self.out_z.copy(self.mod.out_z)
1132 m.next = "pack"
1133
1134
1135 class FPPackMod:
1136
1137 def __init__(self, width):
1138 self.in_z = FPNumOut(width, False)
1139 self.out_z = FPNumOut(width, False)
1140
1141 def setup(self, m, in_z):
1142 """ links module to inputs and outputs
1143 """
1144 m.submodules.pack = self
1145 m.d.comb += self.in_z.copy(in_z)
1146
1147 def elaborate(self, platform):
1148 m = Module()
1149 m.submodules.pack_in_z = self.in_z
1150 with m.If(self.in_z.is_overflowed):
1151 m.d.comb += self.out_z.inf(self.in_z.s)
1152 with m.Else():
1153 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1154 return m
1155
1156
1157 class FPPack(FPState, FPID):
1158
1159 def __init__(self, width, id_wid):
1160 FPState.__init__(self, "pack")
1161 FPID.__init__(self, id_wid)
1162 self.mod = FPPackMod(width)
1163 self.out_z = FPNumOut(width, False)
1164
1165 def setup(self, m, in_z, in_mid):
1166 """ links module to inputs and outputs
1167 """
1168 self.mod.setup(m, in_z)
1169 if self.in_mid is not None:
1170 m.d.comb += self.in_mid.eq(in_mid)
1171
1172 def action(self, m):
1173 self.idsync(m)
1174 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1175 m.next = "pack_put_z"
1176
1177
1178 class FPPutZ(FPState):
1179
1180 def __init__(self, state, in_z, out_z, in_mid, out_mid):
1181 FPState.__init__(self, state)
1182 self.in_z = in_z
1183 self.out_z = out_z
1184 self.in_mid = in_mid
1185 self.out_mid = out_mid
1186
1187 def action(self, m):
1188 if self.in_mid is not None:
1189 m.d.sync += self.out_mid.eq(self.in_mid)
1190 m.d.sync += [
1191 self.out_z.v.eq(self.in_z.v)
1192 ]
1193 with m.If(self.out_z.stb & self.out_z.ack):
1194 m.d.sync += self.out_z.stb.eq(0)
1195 m.next = "get_ops"
1196 with m.Else():
1197 m.d.sync += self.out_z.stb.eq(1)
1198
1199
1200 class FPADDBaseMod(FPID):
1201
1202 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1203 """ IEEE754 FP Add
1204
1205 * width: bit-width of IEEE754. supported: 16, 32, 64
1206 * id_wid: an identifier that is sync-connected to the input
1207 * single_cycle: True indicates each stage to complete in 1 clock
1208 * compact: True indicates a reduced number of stages
1209 """
1210 FPID.__init__(self, id_wid)
1211 self.width = width
1212 self.single_cycle = single_cycle
1213 self.compact = compact
1214
1215 self.in_t = Trigger()
1216 self.in_a = Signal(width)
1217 self.in_b = Signal(width)
1218 self.out_z = FPOp(width)
1219
1220 self.states = []
1221
1222 def add_state(self, state):
1223 self.states.append(state)
1224 return state
1225
1226 def get_fragment(self, platform=None):
1227 """ creates the HDL code-fragment for FPAdd
1228 """
1229 m = Module()
1230 m.submodules.out_z = self.out_z
1231 m.submodules.in_t = self.in_t
1232 if self.compact:
1233 self.get_compact_fragment(m, platform)
1234 else:
1235 self.get_longer_fragment(m, platform)
1236
1237 with m.FSM() as fsm:
1238
1239 for state in self.states:
1240 with m.State(state.state_from):
1241 state.action(m)
1242
1243 return m
1244
1245 def get_longer_fragment(self, m, platform=None):
1246
1247 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1248 self.in_a, self.in_b, self.width))
1249 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1250 a = get.out_op1
1251 b = get.out_op2
1252
1253 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1254 sc.setup(m, a, b, self.in_mid)
1255
1256 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1257 dn.setup(m, a, b, sc.in_mid)
1258
1259 if self.single_cycle:
1260 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1261 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1262 else:
1263 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1264 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1265
1266 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1267 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1268
1269 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1270 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1271
1272 if self.single_cycle:
1273 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1274 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1275 else:
1276 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1277 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1278
1279 rn = self.add_state(FPRound(self.width, self.id_wid))
1280 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1281
1282 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1283 cor.setup(m, rn.out_z, rn.in_mid)
1284
1285 pa = self.add_state(FPPack(self.width, self.id_wid))
1286 pa.setup(m, cor.out_z, rn.in_mid)
1287
1288 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1289 pa.in_mid, self.out_mid))
1290
1291 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1292 pa.in_mid, self.out_mid))
1293
1294 def get_compact_fragment(self, m, platform=None):
1295
1296 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1297 self.in_a, self.in_b, self.width))
1298 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1299 a = get.out_op1
1300 b = get.out_op2
1301
1302 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1303 sc.setup(m, a, b, self.in_mid)
1304
1305 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1306 alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1307
1308 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1309 n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1310
1311 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1312 n1.in_mid, self.out_mid))
1313
1314 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1315 sc.in_mid, self.out_mid))
1316
1317
1318 class FPADDBase(FPState, FPID):
1319
1320 def __init__(self, width, id_wid=None, single_cycle=False):
1321 """ IEEE754 FP Add
1322
1323 * width: bit-width of IEEE754. supported: 16, 32, 64
1324 * id_wid: an identifier that is sync-connected to the input
1325 * single_cycle: True indicates each stage to complete in 1 clock
1326 """
1327 FPID.__init__(self, id_wid)
1328 FPState.__init__(self, "fpadd")
1329 self.width = width
1330 self.single_cycle = single_cycle
1331 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1332
1333 self.in_t = Trigger()
1334 self.in_a = Signal(width)
1335 self.in_b = Signal(width)
1336 #self.out_z = FPOp(width)
1337
1338 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1339 self.in_accept = Signal(reset_less=True)
1340 self.add_stb = Signal(reset_less=True)
1341 self.add_ack = Signal(reset=0, reset_less=True)
1342
1343 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1344 self.out_z = out_z
1345 self.out_mid = out_mid
1346 m.d.comb += [self.in_a.eq(a),
1347 self.in_b.eq(b),
1348 self.mod.in_a.eq(self.in_a),
1349 self.mod.in_b.eq(self.in_b),
1350 self.in_mid.eq(in_mid),
1351 self.mod.in_mid.eq(self.in_mid),
1352 self.z_done.eq(self.mod.out_z.trigger),
1353 #self.add_stb.eq(add_stb),
1354 self.mod.in_t.stb.eq(self.in_t.stb),
1355 self.in_t.ack.eq(self.mod.in_t.ack),
1356 self.out_mid.eq(self.mod.out_mid),
1357 self.out_z.v.eq(self.mod.out_z.v),
1358 self.out_z.stb.eq(self.mod.out_z.stb),
1359 self.mod.out_z.ack.eq(self.out_z.ack),
1360 ]
1361
1362 m.d.sync += self.add_stb.eq(add_stb)
1363 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1364 #m.d.sync += self.in_t.stb.eq(0)
1365
1366 m.submodules.fpadd = self.mod
1367
1368 def action(self, m):
1369
1370 # in_accept is set on incoming strobe HIGH and ack LOW.
1371 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1372
1373 #with m.If(self.in_t.ack):
1374 # m.d.sync += self.in_t.stb.eq(0)
1375 with m.If(~self.z_done):
1376 # not done: test for accepting an incoming operand pair
1377 with m.If(self.in_accept):
1378 m.d.sync += [
1379 self.add_ack.eq(1), # acknowledge receipt...
1380 self.in_t.stb.eq(1), # initiate add
1381 ]
1382 with m.Else():
1383 m.d.sync += [self.add_ack.eq(0),
1384 self.in_t.stb.eq(0),
1385 ]
1386 with m.Else():
1387 # done: acknowledge, and write out id and value
1388 m.d.sync += [self.add_ack.eq(1),
1389 self.in_t.stb.eq(0)
1390 ]
1391 m.next = "get_a"
1392
1393 return
1394
1395 if self.in_mid is not None:
1396 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1397
1398 m.d.sync += [
1399 self.out_z.v.eq(self.mod.out_z.v)
1400 ]
1401 # move to output state on detecting z ack
1402 with m.If(self.out_z.trigger):
1403 m.d.sync += self.out_z.stb.eq(0)
1404 m.next = "put_z"
1405 with m.Else():
1406 m.d.sync += self.out_z.stb.eq(1)
1407
1408
1409 class FPADD(FPID):
1410 """ FPADD: stages as follows:
1411
1412 FPGetOp (a)
1413 |
1414 FPGetOp (b)
1415 |
1416 FPAddBase---> FPAddBaseMod
1417 | |
1418 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1419
1420 FPAddBase is tricky: it is both a stage and *has* stages.
1421 Connection to FPAddBaseMod therefore requires an in stb/ack
1422 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1423 needs to be the thing that raises the incoming stb.
1424 """
1425
1426 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=1):
1427 """ IEEE754 FP Add
1428
1429 * width: bit-width of IEEE754. supported: 16, 32, 64
1430 * id_wid: an identifier that is sync-connected to the input
1431 * single_cycle: True indicates each stage to complete in 1 clock
1432 """
1433 self.width = width
1434 self.id_wid = id_wid
1435 self.single_cycle = single_cycle
1436
1437 #self.out_z = FPOp(width)
1438 self.ids = FPID(id_wid)
1439
1440 rs = []
1441 for i in range(rs_sz):
1442 in_a = FPOp(width)
1443 in_b = FPOp(width)
1444 out_z = FPOp(width)
1445 in_a.name = "in_a_%d" % i
1446 in_b.name = "in_b_%d" % i
1447 out_z.name = "out_z_%d" % i
1448 rs.append((in_a, in_b, out_z))
1449 self.rs = Array(rs)
1450
1451 self.states = []
1452
1453 def add_state(self, state):
1454 self.states.append(state)
1455 return state
1456
1457 def get_fragment(self, platform=None):
1458 """ creates the HDL code-fragment for FPAdd
1459 """
1460 m = Module()
1461 m.submodules += self.rs
1462
1463 in_a = self.rs[0][0]
1464 in_b = self.rs[0][1]
1465 out_z = self.rs[0][2]
1466 geta = self.add_state(FPGetOp("get_a", "get_b",
1467 in_a, self.width))
1468 geta.setup(m, in_a)
1469 a = geta.out_op
1470
1471 getb = self.add_state(FPGetOp("get_b", "fpadd",
1472 in_b, self.width))
1473 getb.setup(m, in_b)
1474 b = getb.out_op
1475
1476 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1477 ab = self.add_state(ab)
1478 ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1479 out_z, self.ids.out_mid)
1480
1481 #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1482 # ab.out_mid, self.out_mid))
1483
1484 with m.FSM() as fsm:
1485
1486 for state in self.states:
1487 with m.State(state.state_from):
1488 state.action(m)
1489
1490 return m
1491
1492
1493 if __name__ == "__main__":
1494 if True:
1495 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1496 main(alu, ports=alu.rs[0][0].ports() + \
1497 alu.rs[0][1].ports() + \
1498 alu.rs[0][2].ports() + \
1499 [alu.ids.in_mid, alu.ids.out_mid])
1500 else:
1501 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1502 main(alu, ports=[alu.in_a, alu.in_b] + \
1503 alu.in_t.ports() + \
1504 alu.out_z.ports() + \
1505 [alu.in_mid, alu.out_mid])
1506
1507
1508 # works... but don't use, just do "python fname.py convert -t v"
1509 #print (verilog.convert(alu, ports=[
1510 # ports=alu.in_a.ports() + \
1511 # alu.in_b.ports() + \
1512 # alu.out_z.ports())