create combined combinatorial align and add0
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8
9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
10 from fpbase import MultiShiftRMerge, Trigger
11 #from fpbase import FPNumShiftMultiRight
12
13 class FPState(FPBase):
14 def __init__(self, state_from):
15 self.state_from = state_from
16
17 def set_inputs(self, inputs):
18 self.inputs = inputs
19 for k,v in inputs.items():
20 setattr(self, k, v)
21
22 def set_outputs(self, outputs):
23 self.outputs = outputs
24 for k,v in outputs.items():
25 setattr(self, k, v)
26
27
28 class FPGetOpMod:
29 def __init__(self, width):
30 self.in_op = FPOp(width)
31 self.out_op = Signal(width)
32 self.out_decode = Signal(reset_less=True)
33
34 def elaborate(self, platform):
35 m = Module()
36 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
37 m.submodules.get_op_in = self.in_op
38 #m.submodules.get_op_out = self.out_op
39 with m.If(self.out_decode):
40 m.d.comb += [
41 self.out_op.eq(self.in_op.v),
42 ]
43 return m
44
45
46 class FPGetOp(FPState):
47 """ gets operand
48 """
49
50 def __init__(self, in_state, out_state, in_op, width):
51 FPState.__init__(self, in_state)
52 self.out_state = out_state
53 self.mod = FPGetOpMod(width)
54 self.in_op = in_op
55 self.out_op = Signal(width)
56 self.out_decode = Signal(reset_less=True)
57
58 def setup(self, m, in_op):
59 """ links module to inputs and outputs
60 """
61 setattr(m.submodules, self.state_from, self.mod)
62 m.d.comb += self.mod.in_op.copy(in_op)
63 #m.d.comb += self.out_op.eq(self.mod.out_op)
64 m.d.comb += self.out_decode.eq(self.mod.out_decode)
65
66 def action(self, m):
67 with m.If(self.out_decode):
68 m.next = self.out_state
69 m.d.sync += [
70 self.in_op.ack.eq(0),
71 self.out_op.eq(self.mod.out_op)
72 ]
73 with m.Else():
74 m.d.sync += self.in_op.ack.eq(1)
75
76
77 class FPGet2OpMod(Trigger):
78 def __init__(self, width):
79 Trigger.__init__(self)
80 self.in_op1 = Signal(width, reset_less=True)
81 self.in_op2 = Signal(width, reset_less=True)
82 self.out_op1 = FPNumIn(None, width)
83 self.out_op2 = FPNumIn(None, width)
84
85 def elaborate(self, platform):
86 m = Trigger.elaborate(self, platform)
87 #m.submodules.get_op_in = self.in_op
88 m.submodules.get_op1_out = self.out_op1
89 m.submodules.get_op2_out = self.out_op2
90 with m.If(self.trigger):
91 m.d.comb += [
92 self.out_op1.decode(self.in_op1),
93 self.out_op2.decode(self.in_op2),
94 ]
95 return m
96
97
98 class FPGet2Op(FPState):
99 """ gets operands
100 """
101
102 def __init__(self, in_state, out_state, in_op1, in_op2, width):
103 FPState.__init__(self, in_state)
104 self.out_state = out_state
105 self.mod = FPGet2OpMod(width)
106 self.in_op1 = in_op1
107 self.in_op2 = in_op2
108 self.out_op1 = FPNumIn(None, width)
109 self.out_op2 = FPNumIn(None, width)
110 self.in_stb = Signal(reset_less=True)
111 self.out_ack = Signal(reset_less=True)
112 self.out_decode = Signal(reset_less=True)
113
114 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
115 """ links module to inputs and outputs
116 """
117 m.submodules.get_ops = self.mod
118 m.d.comb += self.mod.in_op1.eq(in_op1)
119 m.d.comb += self.mod.in_op2.eq(in_op2)
120 m.d.comb += self.mod.stb.eq(in_stb)
121 m.d.comb += self.out_ack.eq(self.mod.ack)
122 m.d.comb += self.out_decode.eq(self.mod.trigger)
123 m.d.comb += in_ack.eq(self.mod.ack)
124
125 def action(self, m):
126 with m.If(self.out_decode):
127 m.next = self.out_state
128 m.d.sync += [
129 self.mod.ack.eq(0),
130 #self.out_op1.v.eq(self.mod.out_op1.v),
131 #self.out_op2.v.eq(self.mod.out_op2.v),
132 self.out_op1.copy(self.mod.out_op1),
133 self.out_op2.copy(self.mod.out_op2)
134 ]
135 with m.Else():
136 m.d.sync += self.mod.ack.eq(1)
137
138
139 class FPAddSpecialCasesMod:
140 """ special cases: NaNs, infs, zeros, denormalised
141 NOTE: some of these are unique to add. see "Special Operations"
142 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
143 """
144
145 def __init__(self, width):
146 self.in_a = FPNumBase(width)
147 self.in_b = FPNumBase(width)
148 self.out_z = FPNumOut(width, False)
149 self.out_do_z = Signal(reset_less=True)
150
151 def setup(self, m, in_a, in_b, out_do_z):
152 """ links module to inputs and outputs
153 """
154 m.submodules.specialcases = self
155 m.d.comb += self.in_a.copy(in_a)
156 m.d.comb += self.in_b.copy(in_b)
157 m.d.comb += out_do_z.eq(self.out_do_z)
158
159 def elaborate(self, platform):
160 m = Module()
161
162 m.submodules.sc_in_a = self.in_a
163 m.submodules.sc_in_b = self.in_b
164 m.submodules.sc_out_z = self.out_z
165
166 s_nomatch = Signal()
167 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
168
169 m_match = Signal()
170 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
171
172 # if a is NaN or b is NaN return NaN
173 with m.If(self.in_a.is_nan | self.in_b.is_nan):
174 m.d.comb += self.out_do_z.eq(1)
175 m.d.comb += self.out_z.nan(0)
176
177 # XXX WEIRDNESS for FP16 non-canonical NaN handling
178 # under review
179
180 ## if a is zero and b is NaN return -b
181 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
182 # m.d.comb += self.out_do_z.eq(1)
183 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
184
185 ## if b is zero and a is NaN return -a
186 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
187 # m.d.comb += self.out_do_z.eq(1)
188 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
189
190 ## if a is -zero and b is NaN return -b
191 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
192 # m.d.comb += self.out_do_z.eq(1)
193 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
194
195 ## if b is -zero and a is NaN return -a
196 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
197 # m.d.comb += self.out_do_z.eq(1)
198 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
199
200 # if a is inf return inf (or NaN)
201 with m.Elif(self.in_a.is_inf):
202 m.d.comb += self.out_do_z.eq(1)
203 m.d.comb += self.out_z.inf(self.in_a.s)
204 # if a is inf and signs don't match return NaN
205 with m.If(self.in_b.exp_128 & s_nomatch):
206 m.d.comb += self.out_z.nan(0)
207
208 # if b is inf return inf
209 with m.Elif(self.in_b.is_inf):
210 m.d.comb += self.out_do_z.eq(1)
211 m.d.comb += self.out_z.inf(self.in_b.s)
212
213 # if a is zero and b zero return signed-a/b
214 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
215 m.d.comb += self.out_do_z.eq(1)
216 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
217 self.in_b.e,
218 self.in_b.m[3:-1])
219
220 # if a is zero return b
221 with m.Elif(self.in_a.is_zero):
222 m.d.comb += self.out_do_z.eq(1)
223 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
224 self.in_b.m[3:-1])
225
226 # if b is zero return a
227 with m.Elif(self.in_b.is_zero):
228 m.d.comb += self.out_do_z.eq(1)
229 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
230 self.in_a.m[3:-1])
231
232 # if a equal to -b return zero (+ve zero)
233 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
234 m.d.comb += self.out_do_z.eq(1)
235 m.d.comb += self.out_z.zero(0)
236
237 # Denormalised Number checks
238 with m.Else():
239 m.d.comb += self.out_do_z.eq(0)
240
241 return m
242
243
244 class FPID:
245 def __init__(self, id_wid):
246 self.id_wid = id_wid
247 if self.id_wid:
248 self.in_mid = Signal(id_wid, reset_less=True)
249 self.out_mid = Signal(id_wid, reset_less=True)
250 else:
251 self.in_mid = None
252 self.out_mid = None
253
254 def idsync(self, m):
255 if self.id_wid is not None:
256 m.d.sync += self.out_mid.eq(self.in_mid)
257
258
259 class FPAddSpecialCases(FPState, FPID):
260 """ special cases: NaNs, infs, zeros, denormalised
261 NOTE: some of these are unique to add. see "Special Operations"
262 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
263 """
264
265 def __init__(self, width, id_wid):
266 FPState.__init__(self, "special_cases")
267 FPID.__init__(self, id_wid)
268 self.mod = FPAddSpecialCasesMod(width)
269 self.out_z = FPNumOut(width, False)
270 self.out_do_z = Signal(reset_less=True)
271
272 def setup(self, m, in_a, in_b, in_mid):
273 """ links module to inputs and outputs
274 """
275 self.mod.setup(m, in_a, in_b, self.out_do_z)
276 if self.in_mid is not None:
277 m.d.comb += self.in_mid.eq(in_mid)
278
279 def action(self, m):
280 self.idsync(m)
281 with m.If(self.out_do_z):
282 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
283 m.next = "put_z"
284 with m.Else():
285 m.next = "denormalise"
286
287
288 class FPAddSpecialCasesDeNorm(FPState, FPID):
289 """ special cases: NaNs, infs, zeros, denormalised
290 NOTE: some of these are unique to add. see "Special Operations"
291 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
292 """
293
294 def __init__(self, width, id_wid):
295 FPState.__init__(self, "special_cases")
296 FPID.__init__(self, id_wid)
297 self.smod = FPAddSpecialCasesMod(width)
298 self.out_z = FPNumOut(width, False)
299 self.out_do_z = Signal(reset_less=True)
300
301 self.dmod = FPAddDeNormMod(width)
302 self.out_a = FPNumBase(width)
303 self.out_b = FPNumBase(width)
304
305 def setup(self, m, in_a, in_b, in_mid):
306 """ links module to inputs and outputs
307 """
308 self.smod.setup(m, in_a, in_b, self.out_do_z)
309 self.dmod.setup(m, in_a, in_b)
310 if self.in_mid is not None:
311 m.d.comb += self.in_mid.eq(in_mid)
312
313 def action(self, m):
314 self.idsync(m)
315 with m.If(self.out_do_z):
316 m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
317 m.next = "put_z"
318 with m.Else():
319 m.next = "align"
320 m.d.sync += self.out_a.copy(self.dmod.out_a)
321 m.d.sync += self.out_b.copy(self.dmod.out_b)
322
323
324 class FPAddDeNormMod(FPState):
325
326 def __init__(self, width):
327 self.in_a = FPNumBase(width)
328 self.in_b = FPNumBase(width)
329 self.out_a = FPNumBase(width)
330 self.out_b = FPNumBase(width)
331
332 def setup(self, m, in_a, in_b):
333 """ links module to inputs and outputs
334 """
335 m.submodules.denormalise = self
336 m.d.comb += self.in_a.copy(in_a)
337 m.d.comb += self.in_b.copy(in_b)
338
339 def elaborate(self, platform):
340 m = Module()
341 m.submodules.denorm_in_a = self.in_a
342 m.submodules.denorm_in_b = self.in_b
343 m.submodules.denorm_out_a = self.out_a
344 m.submodules.denorm_out_b = self.out_b
345 # hmmm, don't like repeating identical code
346 m.d.comb += self.out_a.copy(self.in_a)
347 with m.If(self.in_a.exp_n127):
348 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
349 with m.Else():
350 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
351
352 m.d.comb += self.out_b.copy(self.in_b)
353 with m.If(self.in_b.exp_n127):
354 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
355 with m.Else():
356 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
357
358 return m
359
360
361 class FPAddDeNorm(FPState, FPID):
362
363 def __init__(self, width, id_wid):
364 FPState.__init__(self, "denormalise")
365 FPID.__init__(self, id_wid)
366 self.mod = FPAddDeNormMod(width)
367 self.out_a = FPNumBase(width)
368 self.out_b = FPNumBase(width)
369
370 def setup(self, m, in_a, in_b, in_mid):
371 """ links module to inputs and outputs
372 """
373 self.mod.setup(m, in_a, in_b)
374 if self.in_mid is not None:
375 m.d.comb += self.in_mid.eq(in_mid)
376
377 def action(self, m):
378 self.idsync(m)
379 # Denormalised Number checks
380 m.next = "align"
381 m.d.sync += self.out_a.copy(self.mod.out_a)
382 m.d.sync += self.out_b.copy(self.mod.out_b)
383
384
385 class FPAddAlignMultiMod(FPState):
386
387 def __init__(self, width):
388 self.in_a = FPNumBase(width)
389 self.in_b = FPNumBase(width)
390 self.out_a = FPNumIn(None, width)
391 self.out_b = FPNumIn(None, width)
392 self.exp_eq = Signal(reset_less=True)
393
394 def elaborate(self, platform):
395 # This one however (single-cycle) will do the shift
396 # in one go.
397
398 m = Module()
399
400 m.submodules.align_in_a = self.in_a
401 m.submodules.align_in_b = self.in_b
402 m.submodules.align_out_a = self.out_a
403 m.submodules.align_out_b = self.out_b
404
405 # NOTE: this does *not* do single-cycle multi-shifting,
406 # it *STAYS* in the align state until exponents match
407
408 # exponent of a greater than b: shift b down
409 m.d.comb += self.exp_eq.eq(0)
410 m.d.comb += self.out_a.copy(self.in_a)
411 m.d.comb += self.out_b.copy(self.in_b)
412 agtb = Signal(reset_less=True)
413 altb = Signal(reset_less=True)
414 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
415 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
416 with m.If(agtb):
417 m.d.comb += self.out_b.shift_down(self.in_b)
418 # exponent of b greater than a: shift a down
419 with m.Elif(altb):
420 m.d.comb += self.out_a.shift_down(self.in_a)
421 # exponents equal: move to next stage.
422 with m.Else():
423 m.d.comb += self.exp_eq.eq(1)
424 return m
425
426
427 class FPAddAlignMulti(FPState, FPID):
428
429 def __init__(self, width, id_wid):
430 FPID.__init__(self, id_wid)
431 FPState.__init__(self, "align")
432 self.mod = FPAddAlignMultiMod(width)
433 self.out_a = FPNumIn(None, width)
434 self.out_b = FPNumIn(None, width)
435 self.exp_eq = Signal(reset_less=True)
436
437 def setup(self, m, in_a, in_b, in_mid):
438 """ links module to inputs and outputs
439 """
440 m.submodules.align = self.mod
441 m.d.comb += self.mod.in_a.copy(in_a)
442 m.d.comb += self.mod.in_b.copy(in_b)
443 #m.d.comb += self.out_a.copy(self.mod.out_a)
444 #m.d.comb += self.out_b.copy(self.mod.out_b)
445 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
446 if self.in_mid is not None:
447 m.d.comb += self.in_mid.eq(in_mid)
448
449 def action(self, m):
450 self.idsync(m)
451 m.d.sync += self.out_a.copy(self.mod.out_a)
452 m.d.sync += self.out_b.copy(self.mod.out_b)
453 with m.If(self.exp_eq):
454 m.next = "add_0"
455
456
457 class FPAddAlignSingleMod:
458
459 def __init__(self, width):
460 self.width = width
461 self.in_a = FPNumBase(width)
462 self.in_b = FPNumBase(width)
463 self.out_a = FPNumIn(None, width)
464 self.out_b = FPNumIn(None, width)
465
466 def setup(self, m, in_a, in_b):
467 """ links module to inputs and outputs
468 """
469 m.submodules.align = self
470 m.d.comb += self.in_a.copy(in_a)
471 m.d.comb += self.in_b.copy(in_b)
472
473 def elaborate(self, platform):
474 """ Aligns A against B or B against A, depending on which has the
475 greater exponent. This is done in a *single* cycle using
476 variable-width bit-shift
477
478 the shifter used here is quite expensive in terms of gates.
479 Mux A or B in (and out) into temporaries, as only one of them
480 needs to be aligned against the other
481 """
482 m = Module()
483
484 m.submodules.align_in_a = self.in_a
485 m.submodules.align_in_b = self.in_b
486 m.submodules.align_out_a = self.out_a
487 m.submodules.align_out_b = self.out_b
488
489 # temporary (muxed) input and output to be shifted
490 t_inp = FPNumBase(self.width)
491 t_out = FPNumIn(None, self.width)
492 espec = (len(self.in_a.e), True)
493 msr = MultiShiftRMerge(self.in_a.m_width, espec)
494 m.submodules.align_t_in = t_inp
495 m.submodules.align_t_out = t_out
496 m.submodules.multishift_r = msr
497
498 ediff = Signal(espec, reset_less=True)
499 ediffr = Signal(espec, reset_less=True)
500 tdiff = Signal(espec, reset_less=True)
501 elz = Signal(reset_less=True)
502 egz = Signal(reset_less=True)
503
504 # connect multi-shifter to t_inp/out mantissa (and tdiff)
505 m.d.comb += msr.inp.eq(t_inp.m)
506 m.d.comb += msr.diff.eq(tdiff)
507 m.d.comb += t_out.m.eq(msr.m)
508 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
509 m.d.comb += t_out.s.eq(t_inp.s)
510
511 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
512 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
513 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
514 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
515
516 # default: A-exp == B-exp, A and B untouched (fall through)
517 m.d.comb += self.out_a.copy(self.in_a)
518 m.d.comb += self.out_b.copy(self.in_b)
519 # only one shifter (muxed)
520 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
521 # exponent of a greater than b: shift b down
522 with m.If(egz):
523 m.d.comb += [t_inp.copy(self.in_b),
524 tdiff.eq(ediff),
525 self.out_b.copy(t_out),
526 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
527 ]
528 # exponent of b greater than a: shift a down
529 with m.Elif(elz):
530 m.d.comb += [t_inp.copy(self.in_a),
531 tdiff.eq(ediffr),
532 self.out_a.copy(t_out),
533 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
534 ]
535 return m
536
537
538 class FPAddAlignSingle(FPState, FPID):
539
540 def __init__(self, width, id_wid):
541 FPState.__init__(self, "align")
542 FPID.__init__(self, id_wid)
543 self.mod = FPAddAlignSingleMod(width)
544 self.out_a = FPNumIn(None, width)
545 self.out_b = FPNumIn(None, width)
546
547 def setup(self, m, in_a, in_b, in_mid):
548 """ links module to inputs and outputs
549 """
550 self.mod.setup(m, in_a, in_b)
551 if self.in_mid is not None:
552 m.d.comb += self.in_mid.eq(in_mid)
553
554 def action(self, m):
555 self.idsync(m)
556 # NOTE: could be done as comb
557 m.d.sync += self.out_a.copy(self.mod.out_a)
558 m.d.sync += self.out_b.copy(self.mod.out_b)
559 m.next = "add_0"
560
561
562 class FPAddAlignSingleAdd(FPState, FPID):
563
564 def __init__(self, width, id_wid):
565 FPState.__init__(self, "align")
566 FPID.__init__(self, id_wid)
567 self.mod = FPAddAlignSingleMod(width)
568 self.out_a = FPNumIn(None, width)
569 self.out_b = FPNumIn(None, width)
570
571 self.a0mod = FPAddStage0Mod(width)
572 self.out_z = FPNumBase(width, False)
573 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
574
575 def setup(self, m, in_a, in_b, in_mid):
576 """ links module to inputs and outputs
577 """
578 self.mod.setup(m, in_a, in_b)
579 m.d.comb += self.out_a.copy(self.mod.out_a)
580 m.d.comb += self.out_b.copy(self.mod.out_b)
581
582 self.a0mod.setup(m, self.out_a, self.out_b)
583
584 if self.in_mid is not None:
585 m.d.comb += self.in_mid.eq(in_mid)
586
587 def action(self, m):
588 self.idsync(m)
589 m.d.sync += self.out_z.copy(self.a0mod.out_z)
590 m.d.sync += self.out_tot.eq(self.a0mod.out_tot)
591 m.next = "add_1"
592
593
594 class FPAddStage0Mod:
595
596 def __init__(self, width):
597 self.in_a = FPNumBase(width)
598 self.in_b = FPNumBase(width)
599 self.in_z = FPNumBase(width, False)
600 self.out_z = FPNumBase(width, False)
601 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
602
603 def setup(self, m, in_a, in_b):
604 """ links module to inputs and outputs
605 """
606 m.submodules.add0 = self
607 m.d.comb += self.in_a.copy(in_a)
608 m.d.comb += self.in_b.copy(in_b)
609
610 def elaborate(self, platform):
611 m = Module()
612 m.submodules.add0_in_a = self.in_a
613 m.submodules.add0_in_b = self.in_b
614 m.submodules.add0_out_z = self.out_z
615
616 m.d.comb += self.out_z.e.eq(self.in_a.e)
617
618 # store intermediate tests (and zero-extended mantissas)
619 seq = Signal(reset_less=True)
620 mge = Signal(reset_less=True)
621 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
622 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
623 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
624 mge.eq(self.in_a.m >= self.in_b.m),
625 am0.eq(Cat(self.in_a.m, 0)),
626 bm0.eq(Cat(self.in_b.m, 0))
627 ]
628 # same-sign (both negative or both positive) add mantissas
629 with m.If(seq):
630 m.d.comb += [
631 self.out_tot.eq(am0 + bm0),
632 self.out_z.s.eq(self.in_a.s)
633 ]
634 # a mantissa greater than b, use a
635 with m.Elif(mge):
636 m.d.comb += [
637 self.out_tot.eq(am0 - bm0),
638 self.out_z.s.eq(self.in_a.s)
639 ]
640 # b mantissa greater than a, use b
641 with m.Else():
642 m.d.comb += [
643 self.out_tot.eq(bm0 - am0),
644 self.out_z.s.eq(self.in_b.s)
645 ]
646 return m
647
648
649 class FPAddStage0(FPState, FPID):
650 """ First stage of add. covers same-sign (add) and subtract
651 special-casing when mantissas are greater or equal, to
652 give greatest accuracy.
653 """
654
655 def __init__(self, width, id_wid):
656 FPState.__init__(self, "add_0")
657 FPID.__init__(self, id_wid)
658 self.mod = FPAddStage0Mod(width)
659 self.out_z = FPNumBase(width, False)
660 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
661
662 def setup(self, m, in_a, in_b, in_mid):
663 """ links module to inputs and outputs
664 """
665 self.mod.setup(m, in_a, in_b)
666 if self.in_mid is not None:
667 m.d.comb += self.in_mid.eq(in_mid)
668
669 def action(self, m):
670 self.idsync(m)
671 # NOTE: these could be done as combinatorial (merge add0+add1)
672 m.d.sync += self.out_z.copy(self.mod.out_z)
673 m.d.sync += self.out_tot.eq(self.mod.out_tot)
674 m.next = "add_1"
675
676
677 class FPAddStage1Mod(FPState):
678 """ Second stage of add: preparation for normalisation.
679 detects when tot sum is too big (tot[27] is kinda a carry bit)
680 """
681
682 def __init__(self, width):
683 self.out_norm = Signal(reset_less=True)
684 self.in_z = FPNumBase(width, False)
685 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
686 self.out_z = FPNumBase(width, False)
687 self.out_of = Overflow()
688
689 def elaborate(self, platform):
690 m = Module()
691 #m.submodules.norm1_in_overflow = self.in_of
692 #m.submodules.norm1_out_overflow = self.out_of
693 #m.submodules.norm1_in_z = self.in_z
694 #m.submodules.norm1_out_z = self.out_z
695 m.d.comb += self.out_z.copy(self.in_z)
696 # tot[27] gets set when the sum overflows. shift result down
697 with m.If(self.in_tot[-1]):
698 m.d.comb += [
699 self.out_z.m.eq(self.in_tot[4:]),
700 self.out_of.m0.eq(self.in_tot[4]),
701 self.out_of.guard.eq(self.in_tot[3]),
702 self.out_of.round_bit.eq(self.in_tot[2]),
703 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
704 self.out_z.e.eq(self.in_z.e + 1)
705 ]
706 # tot[27] zero case
707 with m.Else():
708 m.d.comb += [
709 self.out_z.m.eq(self.in_tot[3:]),
710 self.out_of.m0.eq(self.in_tot[3]),
711 self.out_of.guard.eq(self.in_tot[2]),
712 self.out_of.round_bit.eq(self.in_tot[1]),
713 self.out_of.sticky.eq(self.in_tot[0])
714 ]
715 return m
716
717
718 class FPAddStage1(FPState, FPID):
719
720 def __init__(self, width, id_wid):
721 FPState.__init__(self, "add_1")
722 FPID.__init__(self, id_wid)
723 self.mod = FPAddStage1Mod(width)
724 self.out_z = FPNumBase(width, False)
725 self.out_of = Overflow()
726 self.norm_stb = Signal()
727
728 def setup(self, m, in_tot, in_z, in_mid):
729 """ links module to inputs and outputs
730 """
731 m.submodules.add1 = self.mod
732 m.submodules.add1_out_overflow = self.out_of
733
734 m.d.comb += self.mod.in_z.copy(in_z)
735 m.d.comb += self.mod.in_tot.eq(in_tot)
736
737 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
738
739 if self.in_mid is not None:
740 m.d.comb += self.in_mid.eq(in_mid)
741
742 def action(self, m):
743 self.idsync(m)
744 m.d.sync += self.out_of.copy(self.mod.out_of)
745 m.d.sync += self.out_z.copy(self.mod.out_z)
746 m.d.sync += self.norm_stb.eq(1)
747 m.next = "normalise_1"
748
749
750 class FPNorm1ModSingle:
751
752 def __init__(self, width):
753 self.width = width
754 self.out_norm = Signal(reset_less=True)
755 self.in_z = FPNumBase(width, False)
756 self.in_of = Overflow()
757 self.out_z = FPNumBase(width, False)
758 self.out_of = Overflow()
759
760 def setup(self, m, in_z, in_of, out_z):
761 """ links module to inputs and outputs
762 """
763 m.submodules.normalise_1 = self
764
765 m.d.comb += self.in_z.copy(in_z)
766 m.d.comb += self.in_of.copy(in_of)
767
768 m.d.comb += out_z.copy(self.out_z)
769
770 def elaborate(self, platform):
771 m = Module()
772
773 mwid = self.out_z.m_width+2
774 pe = PriorityEncoder(mwid)
775 m.submodules.norm_pe = pe
776
777 m.submodules.norm1_out_z = self.out_z
778 m.submodules.norm1_out_overflow = self.out_of
779 m.submodules.norm1_in_z = self.in_z
780 m.submodules.norm1_in_overflow = self.in_of
781
782 in_z = FPNumBase(self.width, False)
783 in_of = Overflow()
784 m.submodules.norm1_insel_z = in_z
785 m.submodules.norm1_insel_overflow = in_of
786
787 espec = (len(in_z.e), True)
788 ediff_n126 = Signal(espec, reset_less=True)
789 msr = MultiShiftRMerge(mwid, espec)
790 m.submodules.multishift_r = msr
791
792 m.d.comb += in_z.copy(self.in_z)
793 m.d.comb += in_of.copy(self.in_of)
794 # initialise out from in (overridden below)
795 m.d.comb += self.out_z.copy(in_z)
796 m.d.comb += self.out_of.copy(in_of)
797 # normalisation increase/decrease conditions
798 decrease = Signal(reset_less=True)
799 increase = Signal(reset_less=True)
800 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
801 m.d.comb += increase.eq(in_z.exp_lt_n126)
802 # decrease exponent
803 with m.If(decrease):
804 # *sigh* not entirely obvious: count leading zeros (clz)
805 # with a PriorityEncoder: to find from the MSB
806 # we reverse the order of the bits.
807 temp_m = Signal(mwid, reset_less=True)
808 temp_s = Signal(mwid+1, reset_less=True)
809 clz = Signal((len(in_z.e), True), reset_less=True)
810 # make sure that the amount to decrease by does NOT
811 # go below the minimum non-INF/NaN exponent
812 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
813 in_z.exp_sub_n126)
814 m.d.comb += [
815 # cat round and guard bits back into the mantissa
816 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
817 pe.i.eq(temp_m[::-1]), # inverted
818 clz.eq(limclz), # count zeros from MSB down
819 temp_s.eq(temp_m << clz), # shift mantissa UP
820 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
821 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
822 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
823 # overflow in bits 0..1: got shifted too (leave sticky)
824 self.out_of.guard.eq(temp_s[1]), # guard
825 self.out_of.round_bit.eq(temp_s[0]), # round
826 ]
827 # increase exponent
828 with m.Elif(increase):
829 temp_m = Signal(mwid+1, reset_less=True)
830 m.d.comb += [
831 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
832 in_z.m)),
833 ediff_n126.eq(in_z.N126 - in_z.e),
834 # connect multi-shifter to inp/out mantissa (and ediff)
835 msr.inp.eq(temp_m),
836 msr.diff.eq(ediff_n126),
837 self.out_z.m.eq(msr.m[3:]),
838 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
839 # overflow in bits 0..1: got shifted too (leave sticky)
840 self.out_of.guard.eq(temp_s[2]), # guard
841 self.out_of.round_bit.eq(temp_s[1]), # round
842 self.out_of.sticky.eq(temp_s[0]), # sticky
843 self.out_z.e.eq(in_z.e + ediff_n126),
844 ]
845
846 return m
847
848
849 class FPNorm1ModMulti:
850
851 def __init__(self, width, single_cycle=True):
852 self.width = width
853 self.in_select = Signal(reset_less=True)
854 self.out_norm = Signal(reset_less=True)
855 self.in_z = FPNumBase(width, False)
856 self.in_of = Overflow()
857 self.temp_z = FPNumBase(width, False)
858 self.temp_of = Overflow()
859 self.out_z = FPNumBase(width, False)
860 self.out_of = Overflow()
861
862 def elaborate(self, platform):
863 m = Module()
864
865 m.submodules.norm1_out_z = self.out_z
866 m.submodules.norm1_out_overflow = self.out_of
867 m.submodules.norm1_temp_z = self.temp_z
868 m.submodules.norm1_temp_of = self.temp_of
869 m.submodules.norm1_in_z = self.in_z
870 m.submodules.norm1_in_overflow = self.in_of
871
872 in_z = FPNumBase(self.width, False)
873 in_of = Overflow()
874 m.submodules.norm1_insel_z = in_z
875 m.submodules.norm1_insel_overflow = in_of
876
877 # select which of temp or in z/of to use
878 with m.If(self.in_select):
879 m.d.comb += in_z.copy(self.in_z)
880 m.d.comb += in_of.copy(self.in_of)
881 with m.Else():
882 m.d.comb += in_z.copy(self.temp_z)
883 m.d.comb += in_of.copy(self.temp_of)
884 # initialise out from in (overridden below)
885 m.d.comb += self.out_z.copy(in_z)
886 m.d.comb += self.out_of.copy(in_of)
887 # normalisation increase/decrease conditions
888 decrease = Signal(reset_less=True)
889 increase = Signal(reset_less=True)
890 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
891 m.d.comb += increase.eq(in_z.exp_lt_n126)
892 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
893 # decrease exponent
894 with m.If(decrease):
895 m.d.comb += [
896 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
897 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
898 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
899 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
900 self.out_of.round_bit.eq(0), # reset round bit
901 self.out_of.m0.eq(in_of.guard),
902 ]
903 # increase exponent
904 with m.Elif(increase):
905 m.d.comb += [
906 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
907 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
908 self.out_of.guard.eq(in_z.m[0]),
909 self.out_of.m0.eq(in_z.m[1]),
910 self.out_of.round_bit.eq(in_of.guard),
911 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
912 ]
913
914 return m
915
916
917 class FPNorm1Single(FPState, FPID):
918
919 def __init__(self, width, id_wid, single_cycle=True):
920 FPID.__init__(self, id_wid)
921 FPState.__init__(self, "normalise_1")
922 self.mod = FPNorm1ModSingle(width)
923 self.out_norm = Signal(reset_less=True)
924 self.out_z = FPNumBase(width)
925 self.out_roundz = Signal(reset_less=True)
926
927 def setup(self, m, in_z, in_of, in_mid):
928 """ links module to inputs and outputs
929 """
930 self.mod.setup(m, in_z, in_of, self.out_z)
931
932 if self.in_mid is not None:
933 m.d.comb += self.in_mid.eq(in_mid)
934
935 def action(self, m):
936 self.idsync(m)
937 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
938 m.next = "round"
939
940
941 class FPNorm1Multi(FPState, FPID):
942
943 def __init__(self, width, id_wid):
944 FPID.__init__(self, id_wid)
945 FPState.__init__(self, "normalise_1")
946 self.mod = FPNorm1ModMulti(width)
947 self.stb = Signal(reset_less=True)
948 self.ack = Signal(reset=0, reset_less=True)
949 self.out_norm = Signal(reset_less=True)
950 self.in_accept = Signal(reset_less=True)
951 self.temp_z = FPNumBase(width)
952 self.temp_of = Overflow()
953 self.out_z = FPNumBase(width)
954 self.out_roundz = Signal(reset_less=True)
955
956 def setup(self, m, in_z, in_of, norm_stb, in_mid):
957 """ links module to inputs and outputs
958 """
959 self.mod.setup(m, in_z, in_of, norm_stb,
960 self.in_accept, self.temp_z, self.temp_of,
961 self.out_z, self.out_norm)
962
963 m.d.comb += self.stb.eq(norm_stb)
964 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
965
966 if self.in_mid is not None:
967 m.d.comb += self.in_mid.eq(in_mid)
968
969 def action(self, m):
970 self.idsync(m)
971 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
972 m.d.sync += self.temp_of.copy(self.mod.out_of)
973 m.d.sync += self.temp_z.copy(self.out_z)
974 with m.If(self.out_norm):
975 with m.If(self.in_accept):
976 m.d.sync += [
977 self.ack.eq(1),
978 ]
979 with m.Else():
980 m.d.sync += self.ack.eq(0)
981 with m.Else():
982 # normalisation not required (or done).
983 m.next = "round"
984 m.d.sync += self.ack.eq(1)
985 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
986
987
988 class FPNormToPack(FPState, FPID):
989
990 def __init__(self, width, id_wid):
991 FPID.__init__(self, id_wid)
992 FPState.__init__(self, "normalise_1")
993 self.width = width
994
995 def setup(self, m, in_z, in_of, in_mid):
996 """ links module to inputs and outputs
997 """
998
999 # Normalisation (chained to input in_z+in_of)
1000 nmod = FPNorm1ModSingle(self.width)
1001 n_out_z = FPNumBase(self.width)
1002 n_out_roundz = Signal(reset_less=True)
1003 nmod.setup(m, in_z, in_of, n_out_z)
1004
1005 # Rounding (chained to normalisation)
1006 rmod = FPRoundMod(self.width)
1007 r_out_z = FPNumBase(self.width)
1008 rmod.setup(m, n_out_z, n_out_roundz)
1009 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1010 m.d.comb += r_out_z.copy(rmod.out_z)
1011
1012 # Corrections (chained to rounding)
1013 cmod = FPCorrectionsMod(self.width)
1014 c_out_z = FPNumBase(self.width)
1015 cmod.setup(m, r_out_z)
1016 m.d.comb += c_out_z.copy(cmod.out_z)
1017
1018 # Pack (chained to corrections)
1019 self.pmod = FPPackMod(self.width)
1020 self.out_z = FPNumBase(self.width)
1021 self.pmod.setup(m, c_out_z)
1022
1023 # Multiplex ID
1024 if self.in_mid is not None:
1025 m.d.comb += self.in_mid.eq(in_mid)
1026
1027 def action(self, m):
1028 self.idsync(m) # copies incoming ID to outgoing
1029 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1030 m.next = "pack_put_z"
1031
1032
1033 class FPRoundMod:
1034
1035 def __init__(self, width):
1036 self.in_roundz = Signal(reset_less=True)
1037 self.in_z = FPNumBase(width, False)
1038 self.out_z = FPNumBase(width, False)
1039
1040 def setup(self, m, in_z, roundz):
1041 m.submodules.roundz = self
1042
1043 m.d.comb += self.in_z.copy(in_z)
1044 m.d.comb += self.in_roundz.eq(roundz)
1045
1046 def elaborate(self, platform):
1047 m = Module()
1048 m.d.comb += self.out_z.copy(self.in_z)
1049 with m.If(self.in_roundz):
1050 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1051 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1052 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1053 return m
1054
1055
1056 class FPRound(FPState, FPID):
1057
1058 def __init__(self, width, id_wid):
1059 FPState.__init__(self, "round")
1060 FPID.__init__(self, id_wid)
1061 self.mod = FPRoundMod(width)
1062 self.out_z = FPNumBase(width)
1063
1064 def setup(self, m, in_z, roundz, in_mid):
1065 """ links module to inputs and outputs
1066 """
1067 self.mod.setup(m, in_z, roundz)
1068
1069 if self.in_mid is not None:
1070 m.d.comb += self.in_mid.eq(in_mid)
1071
1072 def action(self, m):
1073 self.idsync(m)
1074 m.d.sync += self.out_z.copy(self.mod.out_z)
1075 m.next = "corrections"
1076
1077
1078 class FPCorrectionsMod:
1079
1080 def __init__(self, width):
1081 self.in_z = FPNumOut(width, False)
1082 self.out_z = FPNumOut(width, False)
1083
1084 def setup(self, m, in_z):
1085 """ links module to inputs and outputs
1086 """
1087 m.submodules.corrections = self
1088 m.d.comb += self.in_z.copy(in_z)
1089
1090 def elaborate(self, platform):
1091 m = Module()
1092 m.submodules.corr_in_z = self.in_z
1093 m.submodules.corr_out_z = self.out_z
1094 m.d.comb += self.out_z.copy(self.in_z)
1095 with m.If(self.in_z.is_denormalised):
1096 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1097 return m
1098
1099
1100 class FPCorrections(FPState, FPID):
1101
1102 def __init__(self, width, id_wid):
1103 FPState.__init__(self, "corrections")
1104 FPID.__init__(self, id_wid)
1105 self.mod = FPCorrectionsMod(width)
1106 self.out_z = FPNumBase(width)
1107
1108 def setup(self, m, in_z, in_mid):
1109 """ links module to inputs and outputs
1110 """
1111 self.mod.setup(m, in_z)
1112 if self.in_mid is not None:
1113 m.d.comb += self.in_mid.eq(in_mid)
1114
1115 def action(self, m):
1116 self.idsync(m)
1117 m.d.sync += self.out_z.copy(self.mod.out_z)
1118 m.next = "pack"
1119
1120
1121 class FPPackMod:
1122
1123 def __init__(self, width):
1124 self.in_z = FPNumOut(width, False)
1125 self.out_z = FPNumOut(width, False)
1126
1127 def setup(self, m, in_z):
1128 """ links module to inputs and outputs
1129 """
1130 m.submodules.pack = self
1131 m.d.comb += self.in_z.copy(in_z)
1132
1133 def elaborate(self, platform):
1134 m = Module()
1135 m.submodules.pack_in_z = self.in_z
1136 with m.If(self.in_z.is_overflowed):
1137 m.d.comb += self.out_z.inf(self.in_z.s)
1138 with m.Else():
1139 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1140 return m
1141
1142
1143 class FPPack(FPState, FPID):
1144
1145 def __init__(self, width, id_wid):
1146 FPState.__init__(self, "pack")
1147 FPID.__init__(self, id_wid)
1148 self.mod = FPPackMod(width)
1149 self.out_z = FPNumOut(width, False)
1150
1151 def setup(self, m, in_z, in_mid):
1152 """ links module to inputs and outputs
1153 """
1154 self.mod.setup(m, in_z)
1155 if self.in_mid is not None:
1156 m.d.comb += self.in_mid.eq(in_mid)
1157
1158 def action(self, m):
1159 self.idsync(m)
1160 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1161 m.next = "pack_put_z"
1162
1163
1164 class FPPutZ(FPState):
1165
1166 def __init__(self, state, in_z, out_z, in_mid, out_mid):
1167 FPState.__init__(self, state)
1168 self.in_z = in_z
1169 self.out_z = out_z
1170 self.in_mid = in_mid
1171 self.out_mid = out_mid
1172
1173 def action(self, m):
1174 if self.in_mid is not None:
1175 m.d.sync += self.out_mid.eq(self.in_mid)
1176 m.d.sync += [
1177 self.out_z.v.eq(self.in_z.v)
1178 ]
1179 with m.If(self.out_z.stb & self.out_z.ack):
1180 m.d.sync += self.out_z.stb.eq(0)
1181 m.next = "get_ops"
1182 with m.Else():
1183 m.d.sync += self.out_z.stb.eq(1)
1184
1185
1186 class FPADDBaseMod(FPID):
1187
1188 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1189 """ IEEE754 FP Add
1190
1191 * width: bit-width of IEEE754. supported: 16, 32, 64
1192 * id_wid: an identifier that is sync-connected to the input
1193 * single_cycle: True indicates each stage to complete in 1 clock
1194 * compact: True indicates a reduced number of stages
1195 """
1196 FPID.__init__(self, id_wid)
1197 self.width = width
1198 self.single_cycle = single_cycle
1199 self.compact = compact
1200
1201 self.in_t = Trigger()
1202 self.in_a = Signal(width)
1203 self.in_b = Signal(width)
1204 self.out_z = FPOp(width)
1205
1206 self.states = []
1207
1208 def add_state(self, state):
1209 self.states.append(state)
1210 return state
1211
1212 def get_fragment(self, platform=None):
1213 """ creates the HDL code-fragment for FPAdd
1214 """
1215 m = Module()
1216 m.submodules.out_z = self.out_z
1217 m.submodules.in_t = self.in_t
1218 if self.compact:
1219 self.get_compact_fragment(m, platform)
1220 else:
1221 self.get_longer_fragment(m, platform)
1222
1223 with m.FSM() as fsm:
1224
1225 for state in self.states:
1226 with m.State(state.state_from):
1227 state.action(m)
1228
1229 return m
1230
1231 def get_longer_fragment(self, m, platform=None):
1232
1233 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1234 self.in_a, self.in_b, self.width))
1235 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1236 a = get.out_op1
1237 b = get.out_op2
1238
1239 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1240 sc.setup(m, a, b, self.in_mid)
1241
1242 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1243 dn.setup(m, a, b, sc.in_mid)
1244
1245 if self.single_cycle:
1246 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1247 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1248 else:
1249 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1250 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1251
1252 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1253 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1254
1255 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1256 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1257
1258 if self.single_cycle:
1259 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1260 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1261 else:
1262 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1263 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1264
1265 rn = self.add_state(FPRound(self.width, self.id_wid))
1266 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1267
1268 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1269 cor.setup(m, rn.out_z, rn.in_mid)
1270
1271 pa = self.add_state(FPPack(self.width, self.id_wid))
1272 pa.setup(m, cor.out_z, rn.in_mid)
1273
1274 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1275 pa.in_mid, self.out_mid))
1276
1277 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1278 pa.in_mid, self.out_mid))
1279
1280 def get_compact_fragment(self, m, platform=None):
1281
1282 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1283 self.in_a, self.in_b, self.width))
1284 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1285 a = get.out_op1
1286 b = get.out_op2
1287
1288 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1289 sc.setup(m, a, b, self.in_mid)
1290
1291 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1292 alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1293
1294 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1295 add1.setup(m, alm.out_tot, alm.out_z, alm.in_mid)
1296
1297 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1298 n1.setup(m, add1.out_z, add1.out_of, add1.in_mid)
1299
1300 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1301 n1.in_mid, self.out_mid))
1302
1303 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1304 sc.in_mid, self.out_mid))
1305
1306
1307 class FPADDBase(FPState, FPID):
1308
1309 def __init__(self, width, id_wid=None, single_cycle=False):
1310 """ IEEE754 FP Add
1311
1312 * width: bit-width of IEEE754. supported: 16, 32, 64
1313 * id_wid: an identifier that is sync-connected to the input
1314 * single_cycle: True indicates each stage to complete in 1 clock
1315 """
1316 FPID.__init__(self, id_wid)
1317 FPState.__init__(self, "fpadd")
1318 self.width = width
1319 self.single_cycle = single_cycle
1320 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1321
1322 self.in_t = Trigger()
1323 self.in_a = Signal(width)
1324 self.in_b = Signal(width)
1325 #self.out_z = FPOp(width)
1326
1327 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1328 self.in_accept = Signal(reset_less=True)
1329 self.add_stb = Signal(reset_less=True)
1330 self.add_ack = Signal(reset=0, reset_less=True)
1331
1332 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1333 self.out_z = out_z
1334 self.out_mid = out_mid
1335 m.d.comb += [self.in_a.eq(a),
1336 self.in_b.eq(b),
1337 self.mod.in_a.eq(self.in_a),
1338 self.mod.in_b.eq(self.in_b),
1339 self.in_mid.eq(in_mid),
1340 self.mod.in_mid.eq(self.in_mid),
1341 self.z_done.eq(self.mod.out_z.trigger),
1342 #self.add_stb.eq(add_stb),
1343 self.mod.in_t.stb.eq(self.in_t.stb),
1344 self.in_t.ack.eq(self.mod.in_t.ack),
1345 self.out_mid.eq(self.mod.out_mid),
1346 self.out_z.v.eq(self.mod.out_z.v),
1347 self.out_z.stb.eq(self.mod.out_z.stb),
1348 self.mod.out_z.ack.eq(self.out_z.ack),
1349 ]
1350
1351 m.d.sync += self.add_stb.eq(add_stb)
1352 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1353 #m.d.sync += self.in_t.stb.eq(0)
1354
1355 m.submodules.fpadd = self.mod
1356
1357 def action(self, m):
1358
1359 # in_accept is set on incoming strobe HIGH and ack LOW.
1360 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1361
1362 #with m.If(self.in_t.ack):
1363 # m.d.sync += self.in_t.stb.eq(0)
1364 with m.If(~self.z_done):
1365 # not done: test for accepting an incoming operand pair
1366 with m.If(self.in_accept):
1367 m.d.sync += [
1368 self.add_ack.eq(1), # acknowledge receipt...
1369 self.in_t.stb.eq(1), # initiate add
1370 ]
1371 with m.Else():
1372 m.d.sync += [self.add_ack.eq(0),
1373 self.in_t.stb.eq(0),
1374 ]
1375 with m.Else():
1376 # done: acknowledge, and write out id and value
1377 m.d.sync += [self.add_ack.eq(1),
1378 self.in_t.stb.eq(0)
1379 ]
1380 m.next = "get_a"
1381
1382 return
1383
1384 if self.in_mid is not None:
1385 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1386
1387 m.d.sync += [
1388 self.out_z.v.eq(self.mod.out_z.v)
1389 ]
1390 # move to output state on detecting z ack
1391 with m.If(self.out_z.trigger):
1392 m.d.sync += self.out_z.stb.eq(0)
1393 m.next = "put_z"
1394 with m.Else():
1395 m.d.sync += self.out_z.stb.eq(1)
1396
1397
1398 class FPADD(FPID):
1399 """ FPADD: stages as follows:
1400
1401 FPGetOp (a)
1402 |
1403 FPGetOp (b)
1404 |
1405 FPAddBase---> FPAddBaseMod
1406 | |
1407 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1408
1409 FPAddBase is tricky: it is both a stage and *has* stages.
1410 Connection to FPAddBaseMod therefore requires an in stb/ack
1411 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1412 needs to be the thing that raises the incoming stb.
1413 """
1414
1415 def __init__(self, width, id_wid=None, single_cycle=False):
1416 """ IEEE754 FP Add
1417
1418 * width: bit-width of IEEE754. supported: 16, 32, 64
1419 * id_wid: an identifier that is sync-connected to the input
1420 * single_cycle: True indicates each stage to complete in 1 clock
1421 """
1422 FPID.__init__(self, id_wid)
1423 self.width = width
1424 self.id_wid = id_wid
1425 self.single_cycle = single_cycle
1426
1427 self.in_a = FPOp(width)
1428 self.in_b = FPOp(width)
1429 self.out_z = FPOp(width)
1430
1431 self.states = []
1432
1433 def add_state(self, state):
1434 self.states.append(state)
1435 return state
1436
1437 def get_fragment(self, platform=None):
1438 """ creates the HDL code-fragment for FPAdd
1439 """
1440 m = Module()
1441 m.submodules.in_a = self.in_a
1442 m.submodules.in_b = self.in_b
1443 m.submodules.out_z = self.out_z
1444
1445 geta = self.add_state(FPGetOp("get_a", "get_b",
1446 self.in_a, self.width))
1447 geta.setup(m, self.in_a)
1448 a = geta.out_op
1449
1450 getb = self.add_state(FPGetOp("get_b", "fpadd",
1451 self.in_b, self.width))
1452 getb.setup(m, self.in_b)
1453 b = getb.out_op
1454
1455 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1456 ab = self.add_state(ab)
1457 ab.setup(m, a, b, getb.out_decode, self.in_mid,
1458 self.out_z, self.out_mid)
1459
1460 #pz = self.add_state(FPPutZ("put_z", ab.out_z, self.out_z,
1461 # ab.out_mid, self.out_mid))
1462
1463 with m.FSM() as fsm:
1464
1465 for state in self.states:
1466 with m.State(state.state_from):
1467 state.action(m)
1468
1469 return m
1470
1471
1472 if __name__ == "__main__":
1473 if True:
1474 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1475 main(alu, ports=alu.in_a.ports() + \
1476 alu.in_b.ports() + \
1477 alu.out_z.ports() + \
1478 [alu.in_mid, alu.out_mid])
1479 else:
1480 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1481 main(alu, ports=[alu.in_a, alu.in_b] + \
1482 alu.in_t.ports() + \
1483 alu.out_z.ports() + \
1484 [alu.in_mid, alu.out_mid])
1485
1486
1487 # works... but don't use, just do "python fname.py convert -t v"
1488 #print (verilog.convert(alu, ports=[
1489 # ports=alu.in_a.ports() + \
1490 # alu.in_b.ports() + \
1491 # alu.out_z.ports())