121f61b22ab54654bd7147bf8972a524699a362b
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8
9 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
10 from fpbase import MultiShiftRMerge, Trigger
11 #from fpbase import FPNumShiftMultiRight
12
13
14 class FPState(FPBase):
15 def __init__(self, state_from):
16 self.state_from = state_from
17
18 def set_inputs(self, inputs):
19 self.inputs = inputs
20 for k,v in inputs.items():
21 setattr(self, k, v)
22
23 def set_outputs(self, outputs):
24 self.outputs = outputs
25 for k,v in outputs.items():
26 setattr(self, k, v)
27
28
29 class FPGetOpMod:
30 def __init__(self, width):
31 self.in_op = FPOp(width)
32 self.out_op = Signal(width)
33 self.out_decode = Signal(reset_less=True)
34
35 def elaborate(self, platform):
36 m = Module()
37 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
38 m.submodules.get_op_in = self.in_op
39 #m.submodules.get_op_out = self.out_op
40 with m.If(self.out_decode):
41 m.d.comb += [
42 self.out_op.eq(self.in_op.v),
43 ]
44 return m
45
46
47 class FPGetOp(FPState):
48 """ gets operand
49 """
50
51 def __init__(self, in_state, out_state, in_op, width):
52 FPState.__init__(self, in_state)
53 self.out_state = out_state
54 self.mod = FPGetOpMod(width)
55 self.in_op = in_op
56 self.out_op = Signal(width)
57 self.out_decode = Signal(reset_less=True)
58
59 def setup(self, m, in_op):
60 """ links module to inputs and outputs
61 """
62 setattr(m.submodules, self.state_from, self.mod)
63 m.d.comb += self.mod.in_op.copy(in_op)
64 #m.d.comb += self.out_op.eq(self.mod.out_op)
65 m.d.comb += self.out_decode.eq(self.mod.out_decode)
66
67 def action(self, m):
68 with m.If(self.out_decode):
69 m.next = self.out_state
70 m.d.sync += [
71 self.in_op.ack.eq(0),
72 self.out_op.eq(self.mod.out_op)
73 ]
74 with m.Else():
75 m.d.sync += self.in_op.ack.eq(1)
76
77
78 class FPGet2OpMod(Trigger):
79 def __init__(self, width):
80 Trigger.__init__(self)
81 self.in_op1 = Signal(width, reset_less=True)
82 self.in_op2 = Signal(width, reset_less=True)
83 self.out_op1 = FPNumIn(None, width)
84 self.out_op2 = FPNumIn(None, width)
85
86 def elaborate(self, platform):
87 m = Trigger.elaborate(self, platform)
88 #m.submodules.get_op_in = self.in_op
89 m.submodules.get_op1_out = self.out_op1
90 m.submodules.get_op2_out = self.out_op2
91 with m.If(self.trigger):
92 m.d.comb += [
93 self.out_op1.decode(self.in_op1),
94 self.out_op2.decode(self.in_op2),
95 ]
96 return m
97
98
99 class FPGet2Op(FPState):
100 """ gets operands
101 """
102
103 def __init__(self, in_state, out_state, in_op1, in_op2, width):
104 FPState.__init__(self, in_state)
105 self.out_state = out_state
106 self.mod = FPGet2OpMod(width)
107 self.in_op1 = in_op1
108 self.in_op2 = in_op2
109 self.out_op1 = FPNumIn(None, width)
110 self.out_op2 = FPNumIn(None, width)
111 self.in_stb = Signal(reset_less=True)
112 self.out_ack = Signal(reset_less=True)
113 self.out_decode = Signal(reset_less=True)
114
115 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
116 """ links module to inputs and outputs
117 """
118 m.submodules.get_ops = self.mod
119 m.d.comb += self.mod.in_op1.eq(in_op1)
120 m.d.comb += self.mod.in_op2.eq(in_op2)
121 m.d.comb += self.mod.stb.eq(in_stb)
122 m.d.comb += self.out_ack.eq(self.mod.ack)
123 m.d.comb += self.out_decode.eq(self.mod.trigger)
124 m.d.comb += in_ack.eq(self.mod.ack)
125
126 def action(self, m):
127 with m.If(self.out_decode):
128 m.next = self.out_state
129 m.d.sync += [
130 self.mod.ack.eq(0),
131 #self.out_op1.v.eq(self.mod.out_op1.v),
132 #self.out_op2.v.eq(self.mod.out_op2.v),
133 self.out_op1.copy(self.mod.out_op1),
134 self.out_op2.copy(self.mod.out_op2)
135 ]
136 with m.Else():
137 m.d.sync += self.mod.ack.eq(1)
138
139
140 class FPAddSpecialCasesMod:
141 """ special cases: NaNs, infs, zeros, denormalised
142 NOTE: some of these are unique to add. see "Special Operations"
143 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
144 """
145
146 def __init__(self, width):
147 self.in_a = FPNumBase(width)
148 self.in_b = FPNumBase(width)
149 self.out_z = FPNumOut(width, False)
150 self.out_do_z = Signal(reset_less=True)
151
152 def setup(self, m, in_a, in_b, out_do_z):
153 """ links module to inputs and outputs
154 """
155 m.submodules.specialcases = self
156 m.d.comb += self.in_a.copy(in_a)
157 m.d.comb += self.in_b.copy(in_b)
158 m.d.comb += out_do_z.eq(self.out_do_z)
159
160 def elaborate(self, platform):
161 m = Module()
162
163 m.submodules.sc_in_a = self.in_a
164 m.submodules.sc_in_b = self.in_b
165 m.submodules.sc_out_z = self.out_z
166
167 s_nomatch = Signal()
168 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
169
170 m_match = Signal()
171 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
172
173 # if a is NaN or b is NaN return NaN
174 with m.If(self.in_a.is_nan | self.in_b.is_nan):
175 m.d.comb += self.out_do_z.eq(1)
176 m.d.comb += self.out_z.nan(0)
177
178 # XXX WEIRDNESS for FP16 non-canonical NaN handling
179 # under review
180
181 ## if a is zero and b is NaN return -b
182 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
183 # m.d.comb += self.out_do_z.eq(1)
184 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
185
186 ## if b is zero and a is NaN return -a
187 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
188 # m.d.comb += self.out_do_z.eq(1)
189 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
190
191 ## if a is -zero and b is NaN return -b
192 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
193 # m.d.comb += self.out_do_z.eq(1)
194 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
195
196 ## if b is -zero and a is NaN return -a
197 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
198 # m.d.comb += self.out_do_z.eq(1)
199 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
200
201 # if a is inf return inf (or NaN)
202 with m.Elif(self.in_a.is_inf):
203 m.d.comb += self.out_do_z.eq(1)
204 m.d.comb += self.out_z.inf(self.in_a.s)
205 # if a is inf and signs don't match return NaN
206 with m.If(self.in_b.exp_128 & s_nomatch):
207 m.d.comb += self.out_z.nan(0)
208
209 # if b is inf return inf
210 with m.Elif(self.in_b.is_inf):
211 m.d.comb += self.out_do_z.eq(1)
212 m.d.comb += self.out_z.inf(self.in_b.s)
213
214 # if a is zero and b zero return signed-a/b
215 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
216 m.d.comb += self.out_do_z.eq(1)
217 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
218 self.in_b.e,
219 self.in_b.m[3:-1])
220
221 # if a is zero return b
222 with m.Elif(self.in_a.is_zero):
223 m.d.comb += self.out_do_z.eq(1)
224 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
225 self.in_b.m[3:-1])
226
227 # if b is zero return a
228 with m.Elif(self.in_b.is_zero):
229 m.d.comb += self.out_do_z.eq(1)
230 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
231 self.in_a.m[3:-1])
232
233 # if a equal to -b return zero (+ve zero)
234 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
235 m.d.comb += self.out_do_z.eq(1)
236 m.d.comb += self.out_z.zero(0)
237
238 # Denormalised Number checks
239 with m.Else():
240 m.d.comb += self.out_do_z.eq(0)
241
242 return m
243
244
245 class FPID:
246 def __init__(self, id_wid):
247 self.id_wid = id_wid
248 if self.id_wid:
249 self.in_mid = Signal(id_wid, reset_less=True)
250 self.out_mid = Signal(id_wid, reset_less=True)
251 else:
252 self.in_mid = None
253 self.out_mid = None
254
255 def idsync(self, m):
256 if self.id_wid is not None:
257 m.d.sync += self.out_mid.eq(self.in_mid)
258
259
260 class FPAddSpecialCases(FPState, FPID):
261 """ special cases: NaNs, infs, zeros, denormalised
262 NOTE: some of these are unique to add. see "Special Operations"
263 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
264 """
265
266 def __init__(self, width, id_wid):
267 FPState.__init__(self, "special_cases")
268 FPID.__init__(self, id_wid)
269 self.mod = FPAddSpecialCasesMod(width)
270 self.out_z = FPNumOut(width, False)
271 self.out_do_z = Signal(reset_less=True)
272
273 def setup(self, m, in_a, in_b, in_mid):
274 """ links module to inputs and outputs
275 """
276 self.mod.setup(m, in_a, in_b, self.out_do_z)
277 if self.in_mid is not None:
278 m.d.comb += self.in_mid.eq(in_mid)
279
280 def action(self, m):
281 self.idsync(m)
282 with m.If(self.out_do_z):
283 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
284 m.next = "put_z"
285 with m.Else():
286 m.next = "denormalise"
287
288
289 class FPAddSpecialCasesDeNorm(FPState, FPID):
290 """ special cases: NaNs, infs, zeros, denormalised
291 NOTE: some of these are unique to add. see "Special Operations"
292 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
293 """
294
295 def __init__(self, width, id_wid):
296 FPState.__init__(self, "special_cases")
297 FPID.__init__(self, id_wid)
298 self.smod = FPAddSpecialCasesMod(width)
299 self.out_z = FPNumOut(width, False)
300 self.out_do_z = Signal(reset_less=True)
301
302 self.dmod = FPAddDeNormMod(width)
303 self.out_a = FPNumBase(width)
304 self.out_b = FPNumBase(width)
305
306 def setup(self, m, in_a, in_b, in_mid):
307 """ links module to inputs and outputs
308 """
309 self.smod.setup(m, in_a, in_b, self.out_do_z)
310 self.dmod.setup(m, in_a, in_b)
311 if self.in_mid is not None:
312 m.d.comb += self.in_mid.eq(in_mid)
313
314 def action(self, m):
315 self.idsync(m)
316 with m.If(self.out_do_z):
317 m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
318 m.next = "put_z"
319 with m.Else():
320 m.next = "align"
321 m.d.sync += self.out_a.copy(self.dmod.out_a)
322 m.d.sync += self.out_b.copy(self.dmod.out_b)
323
324
325 class FPAddDeNormMod(FPState):
326
327 def __init__(self, width):
328 self.in_a = FPNumBase(width)
329 self.in_b = FPNumBase(width)
330 self.out_a = FPNumBase(width)
331 self.out_b = FPNumBase(width)
332
333 def setup(self, m, in_a, in_b):
334 """ links module to inputs and outputs
335 """
336 m.submodules.denormalise = self
337 m.d.comb += self.in_a.copy(in_a)
338 m.d.comb += self.in_b.copy(in_b)
339
340 def elaborate(self, platform):
341 m = Module()
342 m.submodules.denorm_in_a = self.in_a
343 m.submodules.denorm_in_b = self.in_b
344 m.submodules.denorm_out_a = self.out_a
345 m.submodules.denorm_out_b = self.out_b
346 # hmmm, don't like repeating identical code
347 m.d.comb += self.out_a.copy(self.in_a)
348 with m.If(self.in_a.exp_n127):
349 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
350 with m.Else():
351 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
352
353 m.d.comb += self.out_b.copy(self.in_b)
354 with m.If(self.in_b.exp_n127):
355 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
356 with m.Else():
357 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
358
359 return m
360
361
362 class FPAddDeNorm(FPState, FPID):
363
364 def __init__(self, width, id_wid):
365 FPState.__init__(self, "denormalise")
366 FPID.__init__(self, id_wid)
367 self.mod = FPAddDeNormMod(width)
368 self.out_a = FPNumBase(width)
369 self.out_b = FPNumBase(width)
370
371 def setup(self, m, in_a, in_b, in_mid):
372 """ links module to inputs and outputs
373 """
374 self.mod.setup(m, in_a, in_b)
375 if self.in_mid is not None:
376 m.d.comb += self.in_mid.eq(in_mid)
377
378 def action(self, m):
379 self.idsync(m)
380 # Denormalised Number checks
381 m.next = "align"
382 m.d.sync += self.out_a.copy(self.mod.out_a)
383 m.d.sync += self.out_b.copy(self.mod.out_b)
384
385
386 class FPAddAlignMultiMod(FPState):
387
388 def __init__(self, width):
389 self.in_a = FPNumBase(width)
390 self.in_b = FPNumBase(width)
391 self.out_a = FPNumIn(None, width)
392 self.out_b = FPNumIn(None, width)
393 self.exp_eq = Signal(reset_less=True)
394
395 def elaborate(self, platform):
396 # This one however (single-cycle) will do the shift
397 # in one go.
398
399 m = Module()
400
401 m.submodules.align_in_a = self.in_a
402 m.submodules.align_in_b = self.in_b
403 m.submodules.align_out_a = self.out_a
404 m.submodules.align_out_b = self.out_b
405
406 # NOTE: this does *not* do single-cycle multi-shifting,
407 # it *STAYS* in the align state until exponents match
408
409 # exponent of a greater than b: shift b down
410 m.d.comb += self.exp_eq.eq(0)
411 m.d.comb += self.out_a.copy(self.in_a)
412 m.d.comb += self.out_b.copy(self.in_b)
413 agtb = Signal(reset_less=True)
414 altb = Signal(reset_less=True)
415 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
416 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
417 with m.If(agtb):
418 m.d.comb += self.out_b.shift_down(self.in_b)
419 # exponent of b greater than a: shift a down
420 with m.Elif(altb):
421 m.d.comb += self.out_a.shift_down(self.in_a)
422 # exponents equal: move to next stage.
423 with m.Else():
424 m.d.comb += self.exp_eq.eq(1)
425 return m
426
427
428 class FPAddAlignMulti(FPState, FPID):
429
430 def __init__(self, width, id_wid):
431 FPID.__init__(self, id_wid)
432 FPState.__init__(self, "align")
433 self.mod = FPAddAlignMultiMod(width)
434 self.out_a = FPNumIn(None, width)
435 self.out_b = FPNumIn(None, width)
436 self.exp_eq = Signal(reset_less=True)
437
438 def setup(self, m, in_a, in_b, in_mid):
439 """ links module to inputs and outputs
440 """
441 m.submodules.align = self.mod
442 m.d.comb += self.mod.in_a.copy(in_a)
443 m.d.comb += self.mod.in_b.copy(in_b)
444 #m.d.comb += self.out_a.copy(self.mod.out_a)
445 #m.d.comb += self.out_b.copy(self.mod.out_b)
446 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
447 if self.in_mid is not None:
448 m.d.comb += self.in_mid.eq(in_mid)
449
450 def action(self, m):
451 self.idsync(m)
452 m.d.sync += self.out_a.copy(self.mod.out_a)
453 m.d.sync += self.out_b.copy(self.mod.out_b)
454 with m.If(self.exp_eq):
455 m.next = "add_0"
456
457
458 class FPAddAlignSingleMod:
459
460 def __init__(self, width):
461 self.width = width
462 self.in_a = FPNumBase(width)
463 self.in_b = FPNumBase(width)
464 self.out_a = FPNumIn(None, width)
465 self.out_b = FPNumIn(None, width)
466
467 def setup(self, m, in_a, in_b):
468 """ links module to inputs and outputs
469 """
470 m.submodules.align = self
471 m.d.comb += self.in_a.copy(in_a)
472 m.d.comb += self.in_b.copy(in_b)
473
474 def elaborate(self, platform):
475 """ Aligns A against B or B against A, depending on which has the
476 greater exponent. This is done in a *single* cycle using
477 variable-width bit-shift
478
479 the shifter used here is quite expensive in terms of gates.
480 Mux A or B in (and out) into temporaries, as only one of them
481 needs to be aligned against the other
482 """
483 m = Module()
484
485 m.submodules.align_in_a = self.in_a
486 m.submodules.align_in_b = self.in_b
487 m.submodules.align_out_a = self.out_a
488 m.submodules.align_out_b = self.out_b
489
490 # temporary (muxed) input and output to be shifted
491 t_inp = FPNumBase(self.width)
492 t_out = FPNumIn(None, self.width)
493 espec = (len(self.in_a.e), True)
494 msr = MultiShiftRMerge(self.in_a.m_width, espec)
495 m.submodules.align_t_in = t_inp
496 m.submodules.align_t_out = t_out
497 m.submodules.multishift_r = msr
498
499 ediff = Signal(espec, reset_less=True)
500 ediffr = Signal(espec, reset_less=True)
501 tdiff = Signal(espec, reset_less=True)
502 elz = Signal(reset_less=True)
503 egz = Signal(reset_less=True)
504
505 # connect multi-shifter to t_inp/out mantissa (and tdiff)
506 m.d.comb += msr.inp.eq(t_inp.m)
507 m.d.comb += msr.diff.eq(tdiff)
508 m.d.comb += t_out.m.eq(msr.m)
509 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
510 m.d.comb += t_out.s.eq(t_inp.s)
511
512 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
513 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
514 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
515 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
516
517 # default: A-exp == B-exp, A and B untouched (fall through)
518 m.d.comb += self.out_a.copy(self.in_a)
519 m.d.comb += self.out_b.copy(self.in_b)
520 # only one shifter (muxed)
521 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
522 # exponent of a greater than b: shift b down
523 with m.If(egz):
524 m.d.comb += [t_inp.copy(self.in_b),
525 tdiff.eq(ediff),
526 self.out_b.copy(t_out),
527 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
528 ]
529 # exponent of b greater than a: shift a down
530 with m.Elif(elz):
531 m.d.comb += [t_inp.copy(self.in_a),
532 tdiff.eq(ediffr),
533 self.out_a.copy(t_out),
534 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
535 ]
536 return m
537
538
539 class FPAddAlignSingle(FPState, FPID):
540
541 def __init__(self, width, id_wid):
542 FPState.__init__(self, "align")
543 FPID.__init__(self, id_wid)
544 self.mod = FPAddAlignSingleMod(width)
545 self.out_a = FPNumIn(None, width)
546 self.out_b = FPNumIn(None, width)
547
548 def setup(self, m, in_a, in_b, in_mid):
549 """ links module to inputs and outputs
550 """
551 self.mod.setup(m, in_a, in_b)
552 if self.in_mid is not None:
553 m.d.comb += self.in_mid.eq(in_mid)
554
555 def action(self, m):
556 self.idsync(m)
557 # NOTE: could be done as comb
558 m.d.sync += self.out_a.copy(self.mod.out_a)
559 m.d.sync += self.out_b.copy(self.mod.out_b)
560 m.next = "add_0"
561
562
563 class FPAddAlignSingleAdd(FPState, FPID):
564
565 def __init__(self, width, id_wid):
566 FPState.__init__(self, "align")
567 FPID.__init__(self, id_wid)
568 self.mod = FPAddAlignSingleMod(width)
569 self.out_a = FPNumIn(None, width)
570 self.out_b = FPNumIn(None, width)
571
572 self.a0mod = FPAddStage0Mod(width)
573 self.a0_out_z = FPNumBase(width, False)
574 self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
575 self.a0_out_z = FPNumBase(width, False)
576
577 self.a1mod = FPAddStage1Mod(width)
578 self.out_z = FPNumBase(width, False)
579 self.out_of = Overflow()
580
581 def setup(self, m, in_a, in_b, in_mid):
582 """ links module to inputs and outputs
583 """
584 self.mod.setup(m, in_a, in_b)
585 m.d.comb += self.out_a.copy(self.mod.out_a)
586 m.d.comb += self.out_b.copy(self.mod.out_b)
587
588 self.a0mod.setup(m, self.out_a, self.out_b)
589 m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
590 m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
591
592 self.a1mod.setup(m, self.out_tot, self.a0_out_z)
593
594 if self.in_mid is not None:
595 m.d.comb += self.in_mid.eq(in_mid)
596
597 def action(self, m):
598 self.idsync(m)
599 m.d.sync += self.out_of.copy(self.a1mod.out_of)
600 m.d.sync += self.out_z.copy(self.a1mod.out_z)
601 m.next = "normalise_1"
602
603
604 class FPAddStage0Mod:
605
606 def __init__(self, width):
607 self.in_a = FPNumBase(width)
608 self.in_b = FPNumBase(width)
609 self.in_z = FPNumBase(width, False)
610 self.out_z = FPNumBase(width, False)
611 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
612
613 def setup(self, m, in_a, in_b):
614 """ links module to inputs and outputs
615 """
616 m.submodules.add0 = self
617 m.d.comb += self.in_a.copy(in_a)
618 m.d.comb += self.in_b.copy(in_b)
619
620 def elaborate(self, platform):
621 m = Module()
622 m.submodules.add0_in_a = self.in_a
623 m.submodules.add0_in_b = self.in_b
624 m.submodules.add0_out_z = self.out_z
625
626 m.d.comb += self.out_z.e.eq(self.in_a.e)
627
628 # store intermediate tests (and zero-extended mantissas)
629 seq = Signal(reset_less=True)
630 mge = Signal(reset_less=True)
631 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
632 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
633 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
634 mge.eq(self.in_a.m >= self.in_b.m),
635 am0.eq(Cat(self.in_a.m, 0)),
636 bm0.eq(Cat(self.in_b.m, 0))
637 ]
638 # same-sign (both negative or both positive) add mantissas
639 with m.If(seq):
640 m.d.comb += [
641 self.out_tot.eq(am0 + bm0),
642 self.out_z.s.eq(self.in_a.s)
643 ]
644 # a mantissa greater than b, use a
645 with m.Elif(mge):
646 m.d.comb += [
647 self.out_tot.eq(am0 - bm0),
648 self.out_z.s.eq(self.in_a.s)
649 ]
650 # b mantissa greater than a, use b
651 with m.Else():
652 m.d.comb += [
653 self.out_tot.eq(bm0 - am0),
654 self.out_z.s.eq(self.in_b.s)
655 ]
656 return m
657
658
659 class FPAddStage0(FPState, FPID):
660 """ First stage of add. covers same-sign (add) and subtract
661 special-casing when mantissas are greater or equal, to
662 give greatest accuracy.
663 """
664
665 def __init__(self, width, id_wid):
666 FPState.__init__(self, "add_0")
667 FPID.__init__(self, id_wid)
668 self.mod = FPAddStage0Mod(width)
669 self.out_z = FPNumBase(width, False)
670 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
671
672 def setup(self, m, in_a, in_b, in_mid):
673 """ links module to inputs and outputs
674 """
675 self.mod.setup(m, in_a, in_b)
676 if self.in_mid is not None:
677 m.d.comb += self.in_mid.eq(in_mid)
678
679 def action(self, m):
680 self.idsync(m)
681 # NOTE: these could be done as combinatorial (merge add0+add1)
682 m.d.sync += self.out_z.copy(self.mod.out_z)
683 m.d.sync += self.out_tot.eq(self.mod.out_tot)
684 m.next = "add_1"
685
686
687 class FPAddStage1Mod(FPState):
688 """ Second stage of add: preparation for normalisation.
689 detects when tot sum is too big (tot[27] is kinda a carry bit)
690 """
691
692 def __init__(self, width):
693 self.out_norm = Signal(reset_less=True)
694 self.in_z = FPNumBase(width, False)
695 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
696 self.out_z = FPNumBase(width, False)
697 self.out_of = Overflow()
698
699 def setup(self, m, in_tot, in_z):
700 """ links module to inputs and outputs
701 """
702 m.submodules.add1 = self
703 m.submodules.add1_out_overflow = self.out_of
704
705 m.d.comb += self.in_z.copy(in_z)
706 m.d.comb += self.in_tot.eq(in_tot)
707
708 def elaborate(self, platform):
709 m = Module()
710 #m.submodules.norm1_in_overflow = self.in_of
711 #m.submodules.norm1_out_overflow = self.out_of
712 #m.submodules.norm1_in_z = self.in_z
713 #m.submodules.norm1_out_z = self.out_z
714 m.d.comb += self.out_z.copy(self.in_z)
715 # tot[27] gets set when the sum overflows. shift result down
716 with m.If(self.in_tot[-1]):
717 m.d.comb += [
718 self.out_z.m.eq(self.in_tot[4:]),
719 self.out_of.m0.eq(self.in_tot[4]),
720 self.out_of.guard.eq(self.in_tot[3]),
721 self.out_of.round_bit.eq(self.in_tot[2]),
722 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
723 self.out_z.e.eq(self.in_z.e + 1)
724 ]
725 # tot[27] zero case
726 with m.Else():
727 m.d.comb += [
728 self.out_z.m.eq(self.in_tot[3:]),
729 self.out_of.m0.eq(self.in_tot[3]),
730 self.out_of.guard.eq(self.in_tot[2]),
731 self.out_of.round_bit.eq(self.in_tot[1]),
732 self.out_of.sticky.eq(self.in_tot[0])
733 ]
734 return m
735
736
737 class FPAddStage1(FPState, FPID):
738
739 def __init__(self, width, id_wid):
740 FPState.__init__(self, "add_1")
741 FPID.__init__(self, id_wid)
742 self.mod = FPAddStage1Mod(width)
743 self.out_z = FPNumBase(width, False)
744 self.out_of = Overflow()
745 self.norm_stb = Signal()
746
747 def setup(self, m, in_tot, in_z, in_mid):
748 """ links module to inputs and outputs
749 """
750 self.mod.setup(m, in_tot, in_z)
751
752 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
753
754 if self.in_mid is not None:
755 m.d.comb += self.in_mid.eq(in_mid)
756
757 def action(self, m):
758 self.idsync(m)
759 m.d.sync += self.out_of.copy(self.mod.out_of)
760 m.d.sync += self.out_z.copy(self.mod.out_z)
761 m.d.sync += self.norm_stb.eq(1)
762 m.next = "normalise_1"
763
764
765 class FPNorm1ModSingle:
766
767 def __init__(self, width):
768 self.width = width
769 self.out_norm = Signal(reset_less=True)
770 self.in_z = FPNumBase(width, False)
771 self.in_of = Overflow()
772 self.out_z = FPNumBase(width, False)
773 self.out_of = Overflow()
774
775 def setup(self, m, in_z, in_of, out_z):
776 """ links module to inputs and outputs
777 """
778 m.submodules.normalise_1 = self
779
780 m.d.comb += self.in_z.copy(in_z)
781 m.d.comb += self.in_of.copy(in_of)
782
783 m.d.comb += out_z.copy(self.out_z)
784
785 def elaborate(self, platform):
786 m = Module()
787
788 mwid = self.out_z.m_width+2
789 pe = PriorityEncoder(mwid)
790 m.submodules.norm_pe = pe
791
792 m.submodules.norm1_out_z = self.out_z
793 m.submodules.norm1_out_overflow = self.out_of
794 m.submodules.norm1_in_z = self.in_z
795 m.submodules.norm1_in_overflow = self.in_of
796
797 in_z = FPNumBase(self.width, False)
798 in_of = Overflow()
799 m.submodules.norm1_insel_z = in_z
800 m.submodules.norm1_insel_overflow = in_of
801
802 espec = (len(in_z.e), True)
803 ediff_n126 = Signal(espec, reset_less=True)
804 msr = MultiShiftRMerge(mwid, espec)
805 m.submodules.multishift_r = msr
806
807 m.d.comb += in_z.copy(self.in_z)
808 m.d.comb += in_of.copy(self.in_of)
809 # initialise out from in (overridden below)
810 m.d.comb += self.out_z.copy(in_z)
811 m.d.comb += self.out_of.copy(in_of)
812 # normalisation increase/decrease conditions
813 decrease = Signal(reset_less=True)
814 increase = Signal(reset_less=True)
815 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
816 m.d.comb += increase.eq(in_z.exp_lt_n126)
817 # decrease exponent
818 with m.If(decrease):
819 # *sigh* not entirely obvious: count leading zeros (clz)
820 # with a PriorityEncoder: to find from the MSB
821 # we reverse the order of the bits.
822 temp_m = Signal(mwid, reset_less=True)
823 temp_s = Signal(mwid+1, reset_less=True)
824 clz = Signal((len(in_z.e), True), reset_less=True)
825 # make sure that the amount to decrease by does NOT
826 # go below the minimum non-INF/NaN exponent
827 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
828 in_z.exp_sub_n126)
829 m.d.comb += [
830 # cat round and guard bits back into the mantissa
831 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
832 pe.i.eq(temp_m[::-1]), # inverted
833 clz.eq(limclz), # count zeros from MSB down
834 temp_s.eq(temp_m << clz), # shift mantissa UP
835 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
836 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
837 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
838 # overflow in bits 0..1: got shifted too (leave sticky)
839 self.out_of.guard.eq(temp_s[1]), # guard
840 self.out_of.round_bit.eq(temp_s[0]), # round
841 ]
842 # increase exponent
843 with m.Elif(increase):
844 temp_m = Signal(mwid+1, reset_less=True)
845 m.d.comb += [
846 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
847 in_z.m)),
848 ediff_n126.eq(in_z.N126 - in_z.e),
849 # connect multi-shifter to inp/out mantissa (and ediff)
850 msr.inp.eq(temp_m),
851 msr.diff.eq(ediff_n126),
852 self.out_z.m.eq(msr.m[3:]),
853 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
854 # overflow in bits 0..1: got shifted too (leave sticky)
855 self.out_of.guard.eq(temp_s[2]), # guard
856 self.out_of.round_bit.eq(temp_s[1]), # round
857 self.out_of.sticky.eq(temp_s[0]), # sticky
858 self.out_z.e.eq(in_z.e + ediff_n126),
859 ]
860
861 return m
862
863
864 class FPNorm1ModMulti:
865
866 def __init__(self, width, single_cycle=True):
867 self.width = width
868 self.in_select = Signal(reset_less=True)
869 self.out_norm = Signal(reset_less=True)
870 self.in_z = FPNumBase(width, False)
871 self.in_of = Overflow()
872 self.temp_z = FPNumBase(width, False)
873 self.temp_of = Overflow()
874 self.out_z = FPNumBase(width, False)
875 self.out_of = Overflow()
876
877 def elaborate(self, platform):
878 m = Module()
879
880 m.submodules.norm1_out_z = self.out_z
881 m.submodules.norm1_out_overflow = self.out_of
882 m.submodules.norm1_temp_z = self.temp_z
883 m.submodules.norm1_temp_of = self.temp_of
884 m.submodules.norm1_in_z = self.in_z
885 m.submodules.norm1_in_overflow = self.in_of
886
887 in_z = FPNumBase(self.width, False)
888 in_of = Overflow()
889 m.submodules.norm1_insel_z = in_z
890 m.submodules.norm1_insel_overflow = in_of
891
892 # select which of temp or in z/of to use
893 with m.If(self.in_select):
894 m.d.comb += in_z.copy(self.in_z)
895 m.d.comb += in_of.copy(self.in_of)
896 with m.Else():
897 m.d.comb += in_z.copy(self.temp_z)
898 m.d.comb += in_of.copy(self.temp_of)
899 # initialise out from in (overridden below)
900 m.d.comb += self.out_z.copy(in_z)
901 m.d.comb += self.out_of.copy(in_of)
902 # normalisation increase/decrease conditions
903 decrease = Signal(reset_less=True)
904 increase = Signal(reset_less=True)
905 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
906 m.d.comb += increase.eq(in_z.exp_lt_n126)
907 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
908 # decrease exponent
909 with m.If(decrease):
910 m.d.comb += [
911 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
912 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
913 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
914 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
915 self.out_of.round_bit.eq(0), # reset round bit
916 self.out_of.m0.eq(in_of.guard),
917 ]
918 # increase exponent
919 with m.Elif(increase):
920 m.d.comb += [
921 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
922 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
923 self.out_of.guard.eq(in_z.m[0]),
924 self.out_of.m0.eq(in_z.m[1]),
925 self.out_of.round_bit.eq(in_of.guard),
926 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
927 ]
928
929 return m
930
931
932 class FPNorm1Single(FPState, FPID):
933
934 def __init__(self, width, id_wid, single_cycle=True):
935 FPID.__init__(self, id_wid)
936 FPState.__init__(self, "normalise_1")
937 self.mod = FPNorm1ModSingle(width)
938 self.out_norm = Signal(reset_less=True)
939 self.out_z = FPNumBase(width)
940 self.out_roundz = Signal(reset_less=True)
941
942 def setup(self, m, in_z, in_of, in_mid):
943 """ links module to inputs and outputs
944 """
945 self.mod.setup(m, in_z, in_of, self.out_z)
946
947 if self.in_mid is not None:
948 m.d.comb += self.in_mid.eq(in_mid)
949
950 def action(self, m):
951 self.idsync(m)
952 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
953 m.next = "round"
954
955
956 class FPNorm1Multi(FPState, FPID):
957
958 def __init__(self, width, id_wid):
959 FPID.__init__(self, id_wid)
960 FPState.__init__(self, "normalise_1")
961 self.mod = FPNorm1ModMulti(width)
962 self.stb = Signal(reset_less=True)
963 self.ack = Signal(reset=0, reset_less=True)
964 self.out_norm = Signal(reset_less=True)
965 self.in_accept = Signal(reset_less=True)
966 self.temp_z = FPNumBase(width)
967 self.temp_of = Overflow()
968 self.out_z = FPNumBase(width)
969 self.out_roundz = Signal(reset_less=True)
970
971 def setup(self, m, in_z, in_of, norm_stb, in_mid):
972 """ links module to inputs and outputs
973 """
974 self.mod.setup(m, in_z, in_of, norm_stb,
975 self.in_accept, self.temp_z, self.temp_of,
976 self.out_z, self.out_norm)
977
978 m.d.comb += self.stb.eq(norm_stb)
979 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
980
981 if self.in_mid is not None:
982 m.d.comb += self.in_mid.eq(in_mid)
983
984 def action(self, m):
985 self.idsync(m)
986 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
987 m.d.sync += self.temp_of.copy(self.mod.out_of)
988 m.d.sync += self.temp_z.copy(self.out_z)
989 with m.If(self.out_norm):
990 with m.If(self.in_accept):
991 m.d.sync += [
992 self.ack.eq(1),
993 ]
994 with m.Else():
995 m.d.sync += self.ack.eq(0)
996 with m.Else():
997 # normalisation not required (or done).
998 m.next = "round"
999 m.d.sync += self.ack.eq(1)
1000 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1001
1002
1003 class FPNormToPack(FPState, FPID):
1004
1005 def __init__(self, width, id_wid):
1006 FPID.__init__(self, id_wid)
1007 FPState.__init__(self, "normalise_1")
1008 self.width = width
1009
1010 def setup(self, m, in_z, in_of, in_mid):
1011 """ links module to inputs and outputs
1012 """
1013
1014 # Normalisation (chained to input in_z+in_of)
1015 nmod = FPNorm1ModSingle(self.width)
1016 n_out_z = FPNumBase(self.width)
1017 n_out_roundz = Signal(reset_less=True)
1018 nmod.setup(m, in_z, in_of, n_out_z)
1019
1020 # Rounding (chained to normalisation)
1021 rmod = FPRoundMod(self.width)
1022 r_out_z = FPNumBase(self.width)
1023 rmod.setup(m, n_out_z, n_out_roundz)
1024 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1025 m.d.comb += r_out_z.copy(rmod.out_z)
1026
1027 # Corrections (chained to rounding)
1028 cmod = FPCorrectionsMod(self.width)
1029 c_out_z = FPNumBase(self.width)
1030 cmod.setup(m, r_out_z)
1031 m.d.comb += c_out_z.copy(cmod.out_z)
1032
1033 # Pack (chained to corrections)
1034 self.pmod = FPPackMod(self.width)
1035 self.out_z = FPNumBase(self.width)
1036 self.pmod.setup(m, c_out_z)
1037
1038 # Multiplex ID
1039 if self.in_mid is not None:
1040 m.d.comb += self.in_mid.eq(in_mid)
1041
1042 def action(self, m):
1043 self.idsync(m) # copies incoming ID to outgoing
1044 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1045 m.next = "pack_put_z"
1046
1047
1048 class FPRoundMod:
1049
1050 def __init__(self, width):
1051 self.in_roundz = Signal(reset_less=True)
1052 self.in_z = FPNumBase(width, False)
1053 self.out_z = FPNumBase(width, False)
1054
1055 def setup(self, m, in_z, roundz):
1056 m.submodules.roundz = self
1057
1058 m.d.comb += self.in_z.copy(in_z)
1059 m.d.comb += self.in_roundz.eq(roundz)
1060
1061 def elaborate(self, platform):
1062 m = Module()
1063 m.d.comb += self.out_z.copy(self.in_z)
1064 with m.If(self.in_roundz):
1065 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1066 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1067 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1068 return m
1069
1070
1071 class FPRound(FPState, FPID):
1072
1073 def __init__(self, width, id_wid):
1074 FPState.__init__(self, "round")
1075 FPID.__init__(self, id_wid)
1076 self.mod = FPRoundMod(width)
1077 self.out_z = FPNumBase(width)
1078
1079 def setup(self, m, in_z, roundz, in_mid):
1080 """ links module to inputs and outputs
1081 """
1082 self.mod.setup(m, in_z, roundz)
1083
1084 if self.in_mid is not None:
1085 m.d.comb += self.in_mid.eq(in_mid)
1086
1087 def action(self, m):
1088 self.idsync(m)
1089 m.d.sync += self.out_z.copy(self.mod.out_z)
1090 m.next = "corrections"
1091
1092
1093 class FPCorrectionsMod:
1094
1095 def __init__(self, width):
1096 self.in_z = FPNumOut(width, False)
1097 self.out_z = FPNumOut(width, False)
1098
1099 def setup(self, m, in_z):
1100 """ links module to inputs and outputs
1101 """
1102 m.submodules.corrections = self
1103 m.d.comb += self.in_z.copy(in_z)
1104
1105 def elaborate(self, platform):
1106 m = Module()
1107 m.submodules.corr_in_z = self.in_z
1108 m.submodules.corr_out_z = self.out_z
1109 m.d.comb += self.out_z.copy(self.in_z)
1110 with m.If(self.in_z.is_denormalised):
1111 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1112 return m
1113
1114
1115 class FPCorrections(FPState, FPID):
1116
1117 def __init__(self, width, id_wid):
1118 FPState.__init__(self, "corrections")
1119 FPID.__init__(self, id_wid)
1120 self.mod = FPCorrectionsMod(width)
1121 self.out_z = FPNumBase(width)
1122
1123 def setup(self, m, in_z, in_mid):
1124 """ links module to inputs and outputs
1125 """
1126 self.mod.setup(m, in_z)
1127 if self.in_mid is not None:
1128 m.d.comb += self.in_mid.eq(in_mid)
1129
1130 def action(self, m):
1131 self.idsync(m)
1132 m.d.sync += self.out_z.copy(self.mod.out_z)
1133 m.next = "pack"
1134
1135
1136 class FPPackMod:
1137
1138 def __init__(self, width):
1139 self.in_z = FPNumOut(width, False)
1140 self.out_z = FPNumOut(width, False)
1141
1142 def setup(self, m, in_z):
1143 """ links module to inputs and outputs
1144 """
1145 m.submodules.pack = self
1146 m.d.comb += self.in_z.copy(in_z)
1147
1148 def elaborate(self, platform):
1149 m = Module()
1150 m.submodules.pack_in_z = self.in_z
1151 with m.If(self.in_z.is_overflowed):
1152 m.d.comb += self.out_z.inf(self.in_z.s)
1153 with m.Else():
1154 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1155 return m
1156
1157
1158 class FPPack(FPState, FPID):
1159
1160 def __init__(self, width, id_wid):
1161 FPState.__init__(self, "pack")
1162 FPID.__init__(self, id_wid)
1163 self.mod = FPPackMod(width)
1164 self.out_z = FPNumOut(width, False)
1165
1166 def setup(self, m, in_z, in_mid):
1167 """ links module to inputs and outputs
1168 """
1169 self.mod.setup(m, in_z)
1170 if self.in_mid is not None:
1171 m.d.comb += self.in_mid.eq(in_mid)
1172
1173 def action(self, m):
1174 self.idsync(m)
1175 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1176 m.next = "pack_put_z"
1177
1178
1179 class FPPutZ(FPState):
1180
1181 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1182 FPState.__init__(self, state)
1183 if to_state is None:
1184 to_state = "get_ops"
1185 self.to_state = to_state
1186 self.in_z = in_z
1187 self.out_z = out_z
1188 self.in_mid = in_mid
1189 self.out_mid = out_mid
1190
1191 def action(self, m):
1192 if self.in_mid is not None:
1193 m.d.sync += self.out_mid.eq(self.in_mid)
1194 m.d.sync += [
1195 self.out_z.v.eq(self.in_z.v)
1196 ]
1197 with m.If(self.out_z.stb & self.out_z.ack):
1198 m.d.sync += self.out_z.stb.eq(0)
1199 m.next = self.to_state
1200 with m.Else():
1201 m.d.sync += self.out_z.stb.eq(1)
1202
1203
1204 class FPPutZIdx(FPState):
1205
1206 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1207 FPState.__init__(self, state)
1208 if to_state is None:
1209 to_state = "get_ops"
1210 self.to_state = to_state
1211 self.in_z = in_z
1212 self.out_zs = out_zs
1213 self.in_mid = in_mid
1214
1215 def action(self, m):
1216 outz_stb = Signal(reset_less=True)
1217 outz_ack = Signal(reset_less=True)
1218 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1219 outz_ack.eq(self.out_zs[self.in_mid].ack),
1220 ]
1221 m.d.sync += [
1222 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1223 ]
1224 with m.If(outz_stb & outz_ack):
1225 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1226 m.next = self.to_state
1227 with m.Else():
1228 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1229
1230
1231 class FPADDBaseMod(FPID):
1232
1233 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1234 """ IEEE754 FP Add
1235
1236 * width: bit-width of IEEE754. supported: 16, 32, 64
1237 * id_wid: an identifier that is sync-connected to the input
1238 * single_cycle: True indicates each stage to complete in 1 clock
1239 * compact: True indicates a reduced number of stages
1240 """
1241 FPID.__init__(self, id_wid)
1242 self.width = width
1243 self.single_cycle = single_cycle
1244 self.compact = compact
1245
1246 self.in_t = Trigger()
1247 self.in_a = Signal(width)
1248 self.in_b = Signal(width)
1249 self.out_z = FPOp(width)
1250
1251 self.states = []
1252
1253 def add_state(self, state):
1254 self.states.append(state)
1255 return state
1256
1257 def get_fragment(self, platform=None):
1258 """ creates the HDL code-fragment for FPAdd
1259 """
1260 m = Module()
1261 m.submodules.out_z = self.out_z
1262 m.submodules.in_t = self.in_t
1263 if self.compact:
1264 self.get_compact_fragment(m, platform)
1265 else:
1266 self.get_longer_fragment(m, platform)
1267
1268 with m.FSM() as fsm:
1269
1270 for state in self.states:
1271 with m.State(state.state_from):
1272 state.action(m)
1273
1274 return m
1275
1276 def get_longer_fragment(self, m, platform=None):
1277
1278 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1279 self.in_a, self.in_b, self.width))
1280 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1281 a = get.out_op1
1282 b = get.out_op2
1283
1284 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1285 sc.setup(m, a, b, self.in_mid)
1286
1287 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1288 dn.setup(m, a, b, sc.in_mid)
1289
1290 if self.single_cycle:
1291 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1292 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1293 else:
1294 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1295 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1296
1297 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1298 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1299
1300 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1301 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1302
1303 if self.single_cycle:
1304 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1305 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1306 else:
1307 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1308 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1309
1310 rn = self.add_state(FPRound(self.width, self.id_wid))
1311 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1312
1313 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1314 cor.setup(m, rn.out_z, rn.in_mid)
1315
1316 pa = self.add_state(FPPack(self.width, self.id_wid))
1317 pa.setup(m, cor.out_z, rn.in_mid)
1318
1319 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1320 pa.in_mid, self.out_mid))
1321
1322 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1323 pa.in_mid, self.out_mid))
1324
1325 def get_compact_fragment(self, m, platform=None):
1326
1327 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1328 self.in_a, self.in_b, self.width))
1329 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1330 a = get.out_op1
1331 b = get.out_op2
1332
1333 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1334 sc.setup(m, a, b, self.in_mid)
1335
1336 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1337 alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1338
1339 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1340 n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1341
1342 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1343 n1.in_mid, self.out_mid))
1344
1345 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1346 sc.in_mid, self.out_mid))
1347
1348
1349 class FPADDBase(FPState, FPID):
1350
1351 def __init__(self, width, id_wid=None, single_cycle=False):
1352 """ IEEE754 FP Add
1353
1354 * width: bit-width of IEEE754. supported: 16, 32, 64
1355 * id_wid: an identifier that is sync-connected to the input
1356 * single_cycle: True indicates each stage to complete in 1 clock
1357 """
1358 FPID.__init__(self, id_wid)
1359 FPState.__init__(self, "fpadd")
1360 self.width = width
1361 self.single_cycle = single_cycle
1362 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1363
1364 self.in_t = Trigger()
1365 self.in_a = Signal(width)
1366 self.in_b = Signal(width)
1367 #self.out_z = FPOp(width)
1368
1369 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1370 self.in_accept = Signal(reset_less=True)
1371 self.add_stb = Signal(reset_less=True)
1372 self.add_ack = Signal(reset=0, reset_less=True)
1373
1374 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1375 self.out_z = out_z
1376 self.out_mid = out_mid
1377 m.d.comb += [self.in_a.eq(a),
1378 self.in_b.eq(b),
1379 self.mod.in_a.eq(self.in_a),
1380 self.mod.in_b.eq(self.in_b),
1381 self.in_mid.eq(in_mid),
1382 self.mod.in_mid.eq(self.in_mid),
1383 self.z_done.eq(self.mod.out_z.trigger),
1384 #self.add_stb.eq(add_stb),
1385 self.mod.in_t.stb.eq(self.in_t.stb),
1386 self.in_t.ack.eq(self.mod.in_t.ack),
1387 self.out_mid.eq(self.mod.out_mid),
1388 self.out_z.v.eq(self.mod.out_z.v),
1389 self.out_z.stb.eq(self.mod.out_z.stb),
1390 self.mod.out_z.ack.eq(self.out_z.ack),
1391 ]
1392
1393 m.d.sync += self.add_stb.eq(add_stb)
1394 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1395 m.d.sync += self.out_z.ack.eq(0) # likewise
1396 #m.d.sync += self.in_t.stb.eq(0)
1397
1398 m.submodules.fpadd = self.mod
1399
1400 def action(self, m):
1401
1402 # in_accept is set on incoming strobe HIGH and ack LOW.
1403 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1404
1405 #with m.If(self.in_t.ack):
1406 # m.d.sync += self.in_t.stb.eq(0)
1407 with m.If(~self.z_done):
1408 # not done: test for accepting an incoming operand pair
1409 with m.If(self.in_accept):
1410 m.d.sync += [
1411 self.add_ack.eq(1), # acknowledge receipt...
1412 self.in_t.stb.eq(1), # initiate add
1413 ]
1414 with m.Else():
1415 m.d.sync += [self.add_ack.eq(0),
1416 self.in_t.stb.eq(0),
1417 self.out_z.ack.eq(1),
1418 ]
1419 with m.Else():
1420 # done: acknowledge, and write out id and value
1421 m.d.sync += [self.add_ack.eq(1),
1422 self.in_t.stb.eq(0)
1423 ]
1424 m.next = "put_z"
1425
1426 return
1427
1428 if self.in_mid is not None:
1429 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1430
1431 m.d.sync += [
1432 self.out_z.v.eq(self.mod.out_z.v)
1433 ]
1434 # move to output state on detecting z ack
1435 with m.If(self.out_z.trigger):
1436 m.d.sync += self.out_z.stb.eq(0)
1437 m.next = "put_z"
1438 with m.Else():
1439 m.d.sync += self.out_z.stb.eq(1)
1440
1441 class ResArray:
1442 def __init__(self, width, id_wid):
1443 self.width = width
1444 self.id_wid = id_wid
1445 res = []
1446 for i in range(rs_sz):
1447 out_z = FPOp(width)
1448 out_z.name = "out_z_%d" % i
1449 res.append(out_z)
1450 self.res = Array(res)
1451 self.in_z = FPOp(width)
1452 self.in_mid = Signal(self.id_wid, reset_less=True)
1453
1454 def setup(self, m, in_z, in_mid):
1455 m.d.comb += [self.in_z.copy(in_z),
1456 self.in_mid.eq(in_mid)]
1457
1458 def get_fragment(self, platform=None):
1459 """ creates the HDL code-fragment for FPAdd
1460 """
1461 m = Module()
1462 m.submodules.res_in_z = self.in_z
1463 m.submodules += self.res
1464
1465 return m
1466
1467 def ports(self):
1468 res = []
1469 for z in self.res:
1470 res += z.ports()
1471 return res
1472
1473
1474 class FPADD(FPID):
1475 """ FPADD: stages as follows:
1476
1477 FPGetOp (a)
1478 |
1479 FPGetOp (b)
1480 |
1481 FPAddBase---> FPAddBaseMod
1482 | |
1483 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1484
1485 FPAddBase is tricky: it is both a stage and *has* stages.
1486 Connection to FPAddBaseMod therefore requires an in stb/ack
1487 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1488 needs to be the thing that raises the incoming stb.
1489 """
1490
1491 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1492 """ IEEE754 FP Add
1493
1494 * width: bit-width of IEEE754. supported: 16, 32, 64
1495 * id_wid: an identifier that is sync-connected to the input
1496 * single_cycle: True indicates each stage to complete in 1 clock
1497 """
1498 self.width = width
1499 self.id_wid = id_wid
1500 self.single_cycle = single_cycle
1501
1502 #self.out_z = FPOp(width)
1503 self.ids = FPID(id_wid)
1504
1505 rs = []
1506 for i in range(rs_sz):
1507 in_a = FPOp(width)
1508 in_b = FPOp(width)
1509 in_a.name = "in_a_%d" % i
1510 in_b.name = "in_b_%d" % i
1511 rs.append((in_a, in_b))
1512 self.rs = Array(rs)
1513
1514 res = []
1515 for i in range(rs_sz):
1516 out_z = FPOp(width)
1517 out_z.name = "out_z_%d" % i
1518 res.append(out_z)
1519 self.res = Array(res)
1520
1521 self.states = []
1522
1523 def add_state(self, state):
1524 self.states.append(state)
1525 return state
1526
1527 def get_fragment(self, platform=None):
1528 """ creates the HDL code-fragment for FPAdd
1529 """
1530 m = Module()
1531 m.submodules += self.rs
1532
1533 in_a = self.rs[0][0]
1534 in_b = self.rs[0][1]
1535
1536 out_z = FPOp(self.width)
1537 out_mid = Signal(self.id_wid, reset_less=True)
1538 m.submodules.out_z = out_z
1539
1540 geta = self.add_state(FPGetOp("get_a", "get_b",
1541 in_a, self.width))
1542 geta.setup(m, in_a)
1543 a = geta.out_op
1544
1545 getb = self.add_state(FPGetOp("get_b", "fpadd",
1546 in_b, self.width))
1547 getb.setup(m, in_b)
1548 b = getb.out_op
1549
1550 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1551 ab = self.add_state(ab)
1552 ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1553 out_z, out_mid)
1554
1555 pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1556 out_mid, "get_a"))
1557
1558 with m.FSM() as fsm:
1559
1560 for state in self.states:
1561 with m.State(state.state_from):
1562 state.action(m)
1563
1564 return m
1565
1566
1567 if __name__ == "__main__":
1568 if True:
1569 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1570 main(alu, ports=alu.rs[0][0].ports() + \
1571 alu.rs[0][1].ports() + \
1572 alu.res[0].ports() + \
1573 [alu.ids.in_mid, alu.ids.out_mid])
1574 else:
1575 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1576 main(alu, ports=[alu.in_a, alu.in_b] + \
1577 alu.in_t.ports() + \
1578 alu.out_z.ports() + \
1579 [alu.in_mid, alu.out_mid])
1580
1581
1582 # works... but don't use, just do "python fname.py convert -t v"
1583 #print (verilog.convert(alu, ports=[
1584 # ports=alu.in_a.ports() + \
1585 # alu.in_b.ports() + \
1586 # alu.out_z.ports())