split out normalisation to separate module
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 from fpbase import FPState
18 from fpcommon.getop import (FPGetOpMod, FPGetOp, FPNumBase2Ops, FPADDBaseData, FPGet2OpMod, FPGet2Op)
19 from fpcommon.denorm import (FPSCData, FPAddDeNormMod, FPAddDeNorm)
20 from fpcommon.postcalc import FPAddStage1Data
21 from fpcommon.postnormalise import (FPNorm1Data, FPNorm1ModSingle,
22 FPNorm1ModMulti, FPNorm1Single, FPNorm1Multi)
23
24
25 class FPAddSpecialCasesMod:
26 """ special cases: NaNs, infs, zeros, denormalised
27 NOTE: some of these are unique to add. see "Special Operations"
28 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
29 """
30
31 def __init__(self, width, id_wid):
32 self.width = width
33 self.id_wid = id_wid
34 self.i = self.ispec()
35 self.o = self.ospec()
36
37 def ispec(self):
38 return FPADDBaseData(self.width, self.id_wid)
39
40 def ospec(self):
41 return FPSCData(self.width, self.id_wid)
42
43 def setup(self, m, i):
44 """ links module to inputs and outputs
45 """
46 m.submodules.specialcases = self
47 m.d.comb += self.i.eq(i)
48
49 def process(self, i):
50 return self.o
51
52 def elaborate(self, platform):
53 m = Module()
54
55 m.submodules.sc_out_z = self.o.z
56
57 # decode: XXX really should move to separate stage
58 a1 = FPNumIn(None, self.width)
59 b1 = FPNumIn(None, self.width)
60 m.submodules.sc_decode_a = a1
61 m.submodules.sc_decode_b = b1
62 m.d.comb += [a1.decode(self.i.a),
63 b1.decode(self.i.b),
64 ]
65
66 s_nomatch = Signal()
67 m.d.comb += s_nomatch.eq(a1.s != b1.s)
68
69 m_match = Signal()
70 m.d.comb += m_match.eq(a1.m == b1.m)
71
72 # if a is NaN or b is NaN return NaN
73 with m.If(a1.is_nan | b1.is_nan):
74 m.d.comb += self.o.out_do_z.eq(1)
75 m.d.comb += self.o.z.nan(0)
76
77 # XXX WEIRDNESS for FP16 non-canonical NaN handling
78 # under review
79
80 ## if a is zero and b is NaN return -b
81 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
82 # m.d.comb += self.o.out_do_z.eq(1)
83 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
84
85 ## if b is zero and a is NaN return -a
86 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
87 # m.d.comb += self.o.out_do_z.eq(1)
88 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
89
90 ## if a is -zero and b is NaN return -b
91 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
92 # m.d.comb += self.o.out_do_z.eq(1)
93 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
94
95 ## if b is -zero and a is NaN return -a
96 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
97 # m.d.comb += self.o.out_do_z.eq(1)
98 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
99
100 # if a is inf return inf (or NaN)
101 with m.Elif(a1.is_inf):
102 m.d.comb += self.o.out_do_z.eq(1)
103 m.d.comb += self.o.z.inf(a1.s)
104 # if a is inf and signs don't match return NaN
105 with m.If(b1.exp_128 & s_nomatch):
106 m.d.comb += self.o.z.nan(0)
107
108 # if b is inf return inf
109 with m.Elif(b1.is_inf):
110 m.d.comb += self.o.out_do_z.eq(1)
111 m.d.comb += self.o.z.inf(b1.s)
112
113 # if a is zero and b zero return signed-a/b
114 with m.Elif(a1.is_zero & b1.is_zero):
115 m.d.comb += self.o.out_do_z.eq(1)
116 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
117
118 # if a is zero return b
119 with m.Elif(a1.is_zero):
120 m.d.comb += self.o.out_do_z.eq(1)
121 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
122
123 # if b is zero return a
124 with m.Elif(b1.is_zero):
125 m.d.comb += self.o.out_do_z.eq(1)
126 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
127
128 # if a equal to -b return zero (+ve zero)
129 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
130 m.d.comb += self.o.out_do_z.eq(1)
131 m.d.comb += self.o.z.zero(0)
132
133 # Denormalised Number checks next, so pass a/b data through
134 with m.Else():
135 m.d.comb += self.o.out_do_z.eq(0)
136 m.d.comb += self.o.a.eq(a1)
137 m.d.comb += self.o.b.eq(b1)
138
139 m.d.comb += self.o.oz.eq(self.o.z.v)
140 m.d.comb += self.o.mid.eq(self.i.mid)
141
142 return m
143
144
145 class FPID:
146 def __init__(self, id_wid):
147 self.id_wid = id_wid
148 if self.id_wid:
149 self.in_mid = Signal(id_wid, reset_less=True)
150 self.out_mid = Signal(id_wid, reset_less=True)
151 else:
152 self.in_mid = None
153 self.out_mid = None
154
155 def idsync(self, m):
156 if self.id_wid is not None:
157 m.d.sync += self.out_mid.eq(self.in_mid)
158
159
160 class FPAddSpecialCases(FPState):
161 """ special cases: NaNs, infs, zeros, denormalised
162 NOTE: some of these are unique to add. see "Special Operations"
163 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
164 """
165
166 def __init__(self, width, id_wid):
167 FPState.__init__(self, "special_cases")
168 self.mod = FPAddSpecialCasesMod(width)
169 self.out_z = self.mod.ospec()
170 self.out_do_z = Signal(reset_less=True)
171
172 def setup(self, m, i):
173 """ links module to inputs and outputs
174 """
175 self.mod.setup(m, i, self.out_do_z)
176 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
177 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
178
179 def action(self, m):
180 self.idsync(m)
181 with m.If(self.out_do_z):
182 m.next = "put_z"
183 with m.Else():
184 m.next = "denormalise"
185
186
187 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
188 """ special cases: NaNs, infs, zeros, denormalised
189 NOTE: some of these are unique to add. see "Special Operations"
190 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
191 """
192
193 def __init__(self, width, id_wid):
194 FPState.__init__(self, "special_cases")
195 self.width = width
196 self.id_wid = id_wid
197 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
198 self.out = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid) # DeNorm ospec
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
210 dmod = FPAddDeNormMod(self.width, self.id_wid)
211
212 chain = StageChain([smod, dmod])
213 chain.setup(m, i)
214
215 # only needed for break-out (early-out)
216 # self.out_do_z = smod.o.out_do_z
217
218 self.o = dmod.o
219
220 def process(self, i):
221 return self.o
222
223 def action(self, m):
224 # for break-out (early-out)
225 #with m.If(self.out_do_z):
226 # m.next = "put_z"
227 #with m.Else():
228 m.d.sync += self.out.eq(self.process(None))
229 m.next = "align"
230
231
232 class FPAddAlignMultiMod(FPState):
233
234 def __init__(self, width):
235 self.in_a = FPNumBase(width)
236 self.in_b = FPNumBase(width)
237 self.out_a = FPNumIn(None, width)
238 self.out_b = FPNumIn(None, width)
239 self.exp_eq = Signal(reset_less=True)
240
241 def elaborate(self, platform):
242 # This one however (single-cycle) will do the shift
243 # in one go.
244
245 m = Module()
246
247 m.submodules.align_in_a = self.in_a
248 m.submodules.align_in_b = self.in_b
249 m.submodules.align_out_a = self.out_a
250 m.submodules.align_out_b = self.out_b
251
252 # NOTE: this does *not* do single-cycle multi-shifting,
253 # it *STAYS* in the align state until exponents match
254
255 # exponent of a greater than b: shift b down
256 m.d.comb += self.exp_eq.eq(0)
257 m.d.comb += self.out_a.eq(self.in_a)
258 m.d.comb += self.out_b.eq(self.in_b)
259 agtb = Signal(reset_less=True)
260 altb = Signal(reset_less=True)
261 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
262 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
263 with m.If(agtb):
264 m.d.comb += self.out_b.shift_down(self.in_b)
265 # exponent of b greater than a: shift a down
266 with m.Elif(altb):
267 m.d.comb += self.out_a.shift_down(self.in_a)
268 # exponents equal: move to next stage.
269 with m.Else():
270 m.d.comb += self.exp_eq.eq(1)
271 return m
272
273
274 class FPAddAlignMulti(FPState):
275
276 def __init__(self, width, id_wid):
277 FPState.__init__(self, "align")
278 self.mod = FPAddAlignMultiMod(width)
279 self.out_a = FPNumIn(None, width)
280 self.out_b = FPNumIn(None, width)
281 self.exp_eq = Signal(reset_less=True)
282
283 def setup(self, m, in_a, in_b):
284 """ links module to inputs and outputs
285 """
286 m.submodules.align = self.mod
287 m.d.comb += self.mod.in_a.eq(in_a)
288 m.d.comb += self.mod.in_b.eq(in_b)
289 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
290 m.d.sync += self.out_a.eq(self.mod.out_a)
291 m.d.sync += self.out_b.eq(self.mod.out_b)
292
293 def action(self, m):
294 with m.If(self.exp_eq):
295 m.next = "add_0"
296
297
298 class FPNumIn2Ops:
299
300 def __init__(self, width, id_wid):
301 self.a = FPNumIn(None, width)
302 self.b = FPNumIn(None, width)
303 self.z = FPNumOut(width, False)
304 self.out_do_z = Signal(reset_less=True)
305 self.oz = Signal(width, reset_less=True)
306 self.mid = Signal(id_wid, reset_less=True)
307
308 def eq(self, i):
309 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
310 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
311
312
313 class FPAddAlignSingleMod:
314
315 def __init__(self, width, id_wid):
316 self.width = width
317 self.id_wid = id_wid
318 self.i = self.ispec()
319 self.o = self.ospec()
320
321 def ispec(self):
322 return FPSCData(self.width, self.id_wid)
323
324 def ospec(self):
325 return FPNumIn2Ops(self.width, self.id_wid)
326
327 def process(self, i):
328 return self.o
329
330 def setup(self, m, i):
331 """ links module to inputs and outputs
332 """
333 m.submodules.align = self
334 m.d.comb += self.i.eq(i)
335
336 def elaborate(self, platform):
337 """ Aligns A against B or B against A, depending on which has the
338 greater exponent. This is done in a *single* cycle using
339 variable-width bit-shift
340
341 the shifter used here is quite expensive in terms of gates.
342 Mux A or B in (and out) into temporaries, as only one of them
343 needs to be aligned against the other
344 """
345 m = Module()
346
347 m.submodules.align_in_a = self.i.a
348 m.submodules.align_in_b = self.i.b
349 m.submodules.align_out_a = self.o.a
350 m.submodules.align_out_b = self.o.b
351
352 # temporary (muxed) input and output to be shifted
353 t_inp = FPNumBase(self.width)
354 t_out = FPNumIn(None, self.width)
355 espec = (len(self.i.a.e), True)
356 msr = MultiShiftRMerge(self.i.a.m_width, espec)
357 m.submodules.align_t_in = t_inp
358 m.submodules.align_t_out = t_out
359 m.submodules.multishift_r = msr
360
361 ediff = Signal(espec, reset_less=True)
362 ediffr = Signal(espec, reset_less=True)
363 tdiff = Signal(espec, reset_less=True)
364 elz = Signal(reset_less=True)
365 egz = Signal(reset_less=True)
366
367 # connect multi-shifter to t_inp/out mantissa (and tdiff)
368 m.d.comb += msr.inp.eq(t_inp.m)
369 m.d.comb += msr.diff.eq(tdiff)
370 m.d.comb += t_out.m.eq(msr.m)
371 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
372 m.d.comb += t_out.s.eq(t_inp.s)
373
374 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
375 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
376 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
377 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
378
379 # default: A-exp == B-exp, A and B untouched (fall through)
380 m.d.comb += self.o.a.eq(self.i.a)
381 m.d.comb += self.o.b.eq(self.i.b)
382 # only one shifter (muxed)
383 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
384 # exponent of a greater than b: shift b down
385 with m.If(~self.i.out_do_z):
386 with m.If(egz):
387 m.d.comb += [t_inp.eq(self.i.b),
388 tdiff.eq(ediff),
389 self.o.b.eq(t_out),
390 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
391 ]
392 # exponent of b greater than a: shift a down
393 with m.Elif(elz):
394 m.d.comb += [t_inp.eq(self.i.a),
395 tdiff.eq(ediffr),
396 self.o.a.eq(t_out),
397 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
398 ]
399
400 m.d.comb += self.o.mid.eq(self.i.mid)
401 m.d.comb += self.o.z.eq(self.i.z)
402 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
403 m.d.comb += self.o.oz.eq(self.i.oz)
404
405 return m
406
407
408 class FPAddAlignSingle(FPState):
409
410 def __init__(self, width, id_wid):
411 FPState.__init__(self, "align")
412 self.mod = FPAddAlignSingleMod(width, id_wid)
413 self.out_a = FPNumIn(None, width)
414 self.out_b = FPNumIn(None, width)
415
416 def setup(self, m, i):
417 """ links module to inputs and outputs
418 """
419 self.mod.setup(m, i)
420
421 # NOTE: could be done as comb
422 m.d.sync += self.out_a.eq(self.mod.out_a)
423 m.d.sync += self.out_b.eq(self.mod.out_b)
424
425 def action(self, m):
426 m.next = "add_0"
427
428
429 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
430
431 def __init__(self, width, id_wid):
432 FPState.__init__(self, "align")
433 self.width = width
434 self.id_wid = id_wid
435 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
436 self.a1o = self.ospec()
437
438 def ispec(self):
439 return FPSCData(self.width, self.id_wid)
440
441 def ospec(self):
442 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
443
444 def setup(self, m, i):
445 """ links module to inputs and outputs
446 """
447
448 # chain AddAlignSingle, AddStage0 and AddStage1
449 mod = FPAddAlignSingleMod(self.width, self.id_wid)
450 a0mod = FPAddStage0Mod(self.width, self.id_wid)
451 a1mod = FPAddStage1Mod(self.width, self.id_wid)
452
453 chain = StageChain([mod, a0mod, a1mod])
454 chain.setup(m, i)
455
456 self.o = a1mod.o
457
458 def process(self, i):
459 return self.o
460
461 def action(self, m):
462 m.d.sync += self.a1o.eq(self.process(None))
463 m.next = "normalise_1"
464
465
466 class FPAddStage0Data:
467
468 def __init__(self, width, id_wid):
469 self.z = FPNumBase(width, False)
470 self.out_do_z = Signal(reset_less=True)
471 self.oz = Signal(width, reset_less=True)
472 self.tot = Signal(self.z.m_width + 4, reset_less=True)
473 self.mid = Signal(id_wid, reset_less=True)
474
475 def eq(self, i):
476 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
477 self.tot.eq(i.tot), self.mid.eq(i.mid)]
478
479
480 class FPAddStage0Mod:
481
482 def __init__(self, width, id_wid):
483 self.width = width
484 self.id_wid = id_wid
485 self.i = self.ispec()
486 self.o = self.ospec()
487
488 def ispec(self):
489 return FPSCData(self.width, self.id_wid)
490
491 def ospec(self):
492 return FPAddStage0Data(self.width, self.id_wid)
493
494 def process(self, i):
495 return self.o
496
497 def setup(self, m, i):
498 """ links module to inputs and outputs
499 """
500 m.submodules.add0 = self
501 m.d.comb += self.i.eq(i)
502
503 def elaborate(self, platform):
504 m = Module()
505 m.submodules.add0_in_a = self.i.a
506 m.submodules.add0_in_b = self.i.b
507 m.submodules.add0_out_z = self.o.z
508
509 # store intermediate tests (and zero-extended mantissas)
510 seq = Signal(reset_less=True)
511 mge = Signal(reset_less=True)
512 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
513 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
514 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
515 mge.eq(self.i.a.m >= self.i.b.m),
516 am0.eq(Cat(self.i.a.m, 0)),
517 bm0.eq(Cat(self.i.b.m, 0))
518 ]
519 # same-sign (both negative or both positive) add mantissas
520 with m.If(~self.i.out_do_z):
521 m.d.comb += self.o.z.e.eq(self.i.a.e)
522 with m.If(seq):
523 m.d.comb += [
524 self.o.tot.eq(am0 + bm0),
525 self.o.z.s.eq(self.i.a.s)
526 ]
527 # a mantissa greater than b, use a
528 with m.Elif(mge):
529 m.d.comb += [
530 self.o.tot.eq(am0 - bm0),
531 self.o.z.s.eq(self.i.a.s)
532 ]
533 # b mantissa greater than a, use b
534 with m.Else():
535 m.d.comb += [
536 self.o.tot.eq(bm0 - am0),
537 self.o.z.s.eq(self.i.b.s)
538 ]
539
540 m.d.comb += self.o.oz.eq(self.i.oz)
541 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
542 m.d.comb += self.o.mid.eq(self.i.mid)
543 return m
544
545
546 class FPAddStage0(FPState):
547 """ First stage of add. covers same-sign (add) and subtract
548 special-casing when mantissas are greater or equal, to
549 give greatest accuracy.
550 """
551
552 def __init__(self, width, id_wid):
553 FPState.__init__(self, "add_0")
554 self.mod = FPAddStage0Mod(width)
555 self.o = self.mod.ospec()
556
557 def setup(self, m, i):
558 """ links module to inputs and outputs
559 """
560 self.mod.setup(m, i)
561
562 # NOTE: these could be done as combinatorial (merge add0+add1)
563 m.d.sync += self.o.eq(self.mod.o)
564
565 def action(self, m):
566 m.next = "add_1"
567
568
569 class FPAddStage1Mod(FPState):
570 """ Second stage of add: preparation for normalisation.
571 detects when tot sum is too big (tot[27] is kinda a carry bit)
572 """
573
574 def __init__(self, width, id_wid):
575 self.width = width
576 self.id_wid = id_wid
577 self.i = self.ispec()
578 self.o = self.ospec()
579
580 def ispec(self):
581 return FPAddStage0Data(self.width, self.id_wid)
582
583 def ospec(self):
584 return FPAddStage1Data(self.width, self.id_wid)
585
586 def process(self, i):
587 return self.o
588
589 def setup(self, m, i):
590 """ links module to inputs and outputs
591 """
592 m.submodules.add1 = self
593 m.submodules.add1_out_overflow = self.o.of
594
595 m.d.comb += self.i.eq(i)
596
597 def elaborate(self, platform):
598 m = Module()
599 m.d.comb += self.o.z.eq(self.i.z)
600 # tot[-1] (MSB) gets set when the sum overflows. shift result down
601 with m.If(~self.i.out_do_z):
602 with m.If(self.i.tot[-1]):
603 m.d.comb += [
604 self.o.z.m.eq(self.i.tot[4:]),
605 self.o.of.m0.eq(self.i.tot[4]),
606 self.o.of.guard.eq(self.i.tot[3]),
607 self.o.of.round_bit.eq(self.i.tot[2]),
608 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
609 self.o.z.e.eq(self.i.z.e + 1)
610 ]
611 # tot[-1] (MSB) zero case
612 with m.Else():
613 m.d.comb += [
614 self.o.z.m.eq(self.i.tot[3:]),
615 self.o.of.m0.eq(self.i.tot[3]),
616 self.o.of.guard.eq(self.i.tot[2]),
617 self.o.of.round_bit.eq(self.i.tot[1]),
618 self.o.of.sticky.eq(self.i.tot[0])
619 ]
620
621 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
622 m.d.comb += self.o.oz.eq(self.i.oz)
623 m.d.comb += self.o.mid.eq(self.i.mid)
624
625 return m
626
627
628 class FPAddStage1(FPState):
629
630 def __init__(self, width, id_wid):
631 FPState.__init__(self, "add_1")
632 self.mod = FPAddStage1Mod(width)
633 self.out_z = FPNumBase(width, False)
634 self.out_of = Overflow()
635 self.norm_stb = Signal()
636
637 def setup(self, m, i):
638 """ links module to inputs and outputs
639 """
640 self.mod.setup(m, i)
641
642 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
643
644 m.d.sync += self.out_of.eq(self.mod.out_of)
645 m.d.sync += self.out_z.eq(self.mod.out_z)
646 m.d.sync += self.norm_stb.eq(1)
647
648 def action(self, m):
649 m.next = "normalise_1"
650
651
652
653
654 def __init__(self, width, single_cycle=True):
655 self.width = width
656 self.in_select = Signal(reset_less=True)
657 self.in_z = FPNumBase(width, False)
658 self.in_of = Overflow()
659 self.temp_z = FPNumBase(width, False)
660 self.temp_of = Overflow()
661 self.out_z = FPNumBase(width, False)
662 self.out_of = Overflow()
663
664 def elaborate(self, platform):
665 m = Module()
666
667 m.submodules.norm1_out_z = self.out_z
668 m.submodules.norm1_out_overflow = self.out_of
669 m.submodules.norm1_temp_z = self.temp_z
670 m.submodules.norm1_temp_of = self.temp_of
671 m.submodules.norm1_in_z = self.in_z
672 m.submodules.norm1_in_overflow = self.in_of
673
674 in_z = FPNumBase(self.width, False)
675 in_of = Overflow()
676 m.submodules.norm1_insel_z = in_z
677 m.submodules.norm1_insel_overflow = in_of
678
679 # select which of temp or in z/of to use
680 with m.If(self.in_select):
681 m.d.comb += in_z.eq(self.in_z)
682 m.d.comb += in_of.eq(self.in_of)
683 with m.Else():
684 m.d.comb += in_z.eq(self.temp_z)
685 m.d.comb += in_of.eq(self.temp_of)
686 # initialise out from in (overridden below)
687 m.d.comb += self.out_z.eq(in_z)
688 m.d.comb += self.out_of.eq(in_of)
689 # normalisation increase/decrease conditions
690 decrease = Signal(reset_less=True)
691 increase = Signal(reset_less=True)
692 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
693 m.d.comb += increase.eq(in_z.exp_lt_n126)
694 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
695 # decrease exponent
696 with m.If(decrease):
697 m.d.comb += [
698 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
699 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
700 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
701 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
702 self.out_of.round_bit.eq(0), # reset round bit
703 self.out_of.m0.eq(in_of.guard),
704 ]
705 # increase exponent
706 with m.Elif(increase):
707 m.d.comb += [
708 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
709 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
710 self.out_of.guard.eq(in_z.m[0]),
711 self.out_of.m0.eq(in_z.m[1]),
712 self.out_of.round_bit.eq(in_of.guard),
713 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
714 ]
715
716 return m
717
718
719 class FPNormToPack(FPState, UnbufferedPipeline):
720
721 def __init__(self, width, id_wid):
722 FPState.__init__(self, "normalise_1")
723 self.id_wid = id_wid
724 self.width = width
725 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
726
727 def ispec(self):
728 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
729
730 def ospec(self):
731 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
732
733 def setup(self, m, i):
734 """ links module to inputs and outputs
735 """
736
737 # Normalisation, Rounding Corrections, Pack - in a chain
738 nmod = FPNorm1ModSingle(self.width, self.id_wid)
739 rmod = FPRoundMod(self.width, self.id_wid)
740 cmod = FPCorrectionsMod(self.width, self.id_wid)
741 pmod = FPPackMod(self.width, self.id_wid)
742 chain = StageChain([nmod, rmod, cmod, pmod])
743 chain.setup(m, i)
744 self.out_z = pmod.ospec()
745
746 self.o = pmod.o
747
748 def process(self, i):
749 return self.o
750
751 def action(self, m):
752 m.d.sync += self.out_z.eq(self.process(None))
753 m.next = "pack_put_z"
754
755
756 class FPRoundData:
757
758 def __init__(self, width, id_wid):
759 self.z = FPNumBase(width, False)
760 self.out_do_z = Signal(reset_less=True)
761 self.oz = Signal(width, reset_less=True)
762 self.mid = Signal(id_wid, reset_less=True)
763
764 def eq(self, i):
765 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
766 self.mid.eq(i.mid)]
767
768
769 class FPRoundMod:
770
771 def __init__(self, width, id_wid):
772 self.width = width
773 self.id_wid = id_wid
774 self.i = self.ispec()
775 self.out_z = self.ospec()
776
777 def ispec(self):
778 return FPNorm1Data(self.width, self.id_wid)
779
780 def ospec(self):
781 return FPRoundData(self.width, self.id_wid)
782
783 def process(self, i):
784 return self.out_z
785
786 def setup(self, m, i):
787 m.submodules.roundz = self
788 m.d.comb += self.i.eq(i)
789
790 def elaborate(self, platform):
791 m = Module()
792 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
793 with m.If(~self.i.out_do_z):
794 with m.If(self.i.roundz):
795 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
796 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
797 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
798
799 return m
800
801
802 class FPRound(FPState):
803
804 def __init__(self, width, id_wid):
805 FPState.__init__(self, "round")
806 self.mod = FPRoundMod(width)
807 self.out_z = self.ospec()
808
809 def ispec(self):
810 return self.mod.ispec()
811
812 def ospec(self):
813 return self.mod.ospec()
814
815 def setup(self, m, i):
816 """ links module to inputs and outputs
817 """
818 self.mod.setup(m, i)
819
820 self.idsync(m)
821 m.d.sync += self.out_z.eq(self.mod.out_z)
822 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
823
824 def action(self, m):
825 m.next = "corrections"
826
827
828 class FPCorrectionsMod:
829
830 def __init__(self, width, id_wid):
831 self.width = width
832 self.id_wid = id_wid
833 self.i = self.ispec()
834 self.out_z = self.ospec()
835
836 def ispec(self):
837 return FPRoundData(self.width, self.id_wid)
838
839 def ospec(self):
840 return FPRoundData(self.width, self.id_wid)
841
842 def process(self, i):
843 return self.out_z
844
845 def setup(self, m, i):
846 """ links module to inputs and outputs
847 """
848 m.submodules.corrections = self
849 m.d.comb += self.i.eq(i)
850
851 def elaborate(self, platform):
852 m = Module()
853 m.submodules.corr_in_z = self.i.z
854 m.submodules.corr_out_z = self.out_z.z
855 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
856 with m.If(~self.i.out_do_z):
857 with m.If(self.i.z.is_denormalised):
858 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
859 return m
860
861
862 class FPCorrections(FPState):
863
864 def __init__(self, width, id_wid):
865 FPState.__init__(self, "corrections")
866 self.mod = FPCorrectionsMod(width)
867 self.out_z = self.ospec()
868
869 def ispec(self):
870 return self.mod.ispec()
871
872 def ospec(self):
873 return self.mod.ospec()
874
875 def setup(self, m, in_z):
876 """ links module to inputs and outputs
877 """
878 self.mod.setup(m, in_z)
879
880 m.d.sync += self.out_z.eq(self.mod.out_z)
881 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
882
883 def action(self, m):
884 m.next = "pack"
885
886
887 class FPPackData:
888
889 def __init__(self, width, id_wid):
890 self.z = Signal(width, reset_less=True)
891 self.mid = Signal(id_wid, reset_less=True)
892
893 def eq(self, i):
894 return [self.z.eq(i.z), self.mid.eq(i.mid)]
895
896 def ports(self):
897 return [self.z, self.mid]
898
899
900 class FPPackMod:
901
902 def __init__(self, width, id_wid):
903 self.width = width
904 self.id_wid = id_wid
905 self.i = self.ispec()
906 self.o = self.ospec()
907
908 def ispec(self):
909 return FPRoundData(self.width, self.id_wid)
910
911 def ospec(self):
912 return FPPackData(self.width, self.id_wid)
913
914 def process(self, i):
915 return self.o
916
917 def setup(self, m, in_z):
918 """ links module to inputs and outputs
919 """
920 m.submodules.pack = self
921 m.d.comb += self.i.eq(in_z)
922
923 def elaborate(self, platform):
924 m = Module()
925 z = FPNumOut(self.width, False)
926 m.submodules.pack_in_z = self.i.z
927 m.submodules.pack_out_z = z
928 m.d.comb += self.o.mid.eq(self.i.mid)
929 with m.If(~self.i.out_do_z):
930 with m.If(self.i.z.is_overflowed):
931 m.d.comb += z.inf(self.i.z.s)
932 with m.Else():
933 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
934 with m.Else():
935 m.d.comb += z.v.eq(self.i.oz)
936 m.d.comb += self.o.z.eq(z.v)
937 return m
938
939
940 class FPPack(FPState):
941
942 def __init__(self, width, id_wid):
943 FPState.__init__(self, "pack")
944 self.mod = FPPackMod(width)
945 self.out_z = self.ospec()
946
947 def ispec(self):
948 return self.mod.ispec()
949
950 def ospec(self):
951 return self.mod.ospec()
952
953 def setup(self, m, in_z):
954 """ links module to inputs and outputs
955 """
956 self.mod.setup(m, in_z)
957
958 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
959 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
960
961 def action(self, m):
962 m.next = "pack_put_z"
963
964
965 class FPPutZ(FPState):
966
967 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
968 FPState.__init__(self, state)
969 if to_state is None:
970 to_state = "get_ops"
971 self.to_state = to_state
972 self.in_z = in_z
973 self.out_z = out_z
974 self.in_mid = in_mid
975 self.out_mid = out_mid
976
977 def action(self, m):
978 if self.in_mid is not None:
979 m.d.sync += self.out_mid.eq(self.in_mid)
980 m.d.sync += [
981 self.out_z.z.v.eq(self.in_z)
982 ]
983 with m.If(self.out_z.z.stb & self.out_z.z.ack):
984 m.d.sync += self.out_z.z.stb.eq(0)
985 m.next = self.to_state
986 with m.Else():
987 m.d.sync += self.out_z.z.stb.eq(1)
988
989
990 class FPPutZIdx(FPState):
991
992 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
993 FPState.__init__(self, state)
994 if to_state is None:
995 to_state = "get_ops"
996 self.to_state = to_state
997 self.in_z = in_z
998 self.out_zs = out_zs
999 self.in_mid = in_mid
1000
1001 def action(self, m):
1002 outz_stb = Signal(reset_less=True)
1003 outz_ack = Signal(reset_less=True)
1004 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1005 outz_ack.eq(self.out_zs[self.in_mid].ack),
1006 ]
1007 m.d.sync += [
1008 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1009 ]
1010 with m.If(outz_stb & outz_ack):
1011 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1012 m.next = self.to_state
1013 with m.Else():
1014 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1015
1016
1017 class FPOpData:
1018 def __init__(self, width, id_wid):
1019 self.z = FPOp(width)
1020 self.mid = Signal(id_wid, reset_less=True)
1021
1022 def eq(self, i):
1023 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1024
1025 def ports(self):
1026 return [self.z, self.mid]
1027
1028
1029 class FPADDBaseMod:
1030
1031 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1032 """ IEEE754 FP Add
1033
1034 * width: bit-width of IEEE754. supported: 16, 32, 64
1035 * id_wid: an identifier that is sync-connected to the input
1036 * single_cycle: True indicates each stage to complete in 1 clock
1037 * compact: True indicates a reduced number of stages
1038 """
1039 self.width = width
1040 self.id_wid = id_wid
1041 self.single_cycle = single_cycle
1042 self.compact = compact
1043
1044 self.in_t = Trigger()
1045 self.i = self.ispec()
1046 self.o = self.ospec()
1047
1048 self.states = []
1049
1050 def ispec(self):
1051 return FPADDBaseData(self.width, self.id_wid)
1052
1053 def ospec(self):
1054 return FPOpData(self.width, self.id_wid)
1055
1056 def add_state(self, state):
1057 self.states.append(state)
1058 return state
1059
1060 def get_fragment(self, platform=None):
1061 """ creates the HDL code-fragment for FPAdd
1062 """
1063 m = Module()
1064 m.submodules.out_z = self.o.z
1065 m.submodules.in_t = self.in_t
1066 if self.compact:
1067 self.get_compact_fragment(m, platform)
1068 else:
1069 self.get_longer_fragment(m, platform)
1070
1071 with m.FSM() as fsm:
1072
1073 for state in self.states:
1074 with m.State(state.state_from):
1075 state.action(m)
1076
1077 return m
1078
1079 def get_longer_fragment(self, m, platform=None):
1080
1081 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1082 self.width))
1083 get.setup(m, self.i)
1084 a = get.out_op1
1085 b = get.out_op2
1086 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1087
1088 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1089 sc.setup(m, a, b, self.in_mid)
1090
1091 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1092 dn.setup(m, a, b, sc.in_mid)
1093
1094 if self.single_cycle:
1095 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1096 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1097 else:
1098 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1099 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1100
1101 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1102 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1103
1104 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1105 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1106
1107 if self.single_cycle:
1108 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1109 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1110 else:
1111 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1112 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1113
1114 rn = self.add_state(FPRound(self.width, self.id_wid))
1115 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1116
1117 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1118 cor.setup(m, rn.out_z, rn.in_mid)
1119
1120 pa = self.add_state(FPPack(self.width, self.id_wid))
1121 pa.setup(m, cor.out_z, rn.in_mid)
1122
1123 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1124 pa.in_mid, self.out_mid))
1125
1126 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1127 pa.in_mid, self.out_mid))
1128
1129 def get_compact_fragment(self, m, platform=None):
1130
1131
1132 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1133 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1134 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1135 n1 = FPNormToPack(self.width, self.id_wid)
1136
1137 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1138
1139 chainlist = [get, sc, alm, n1]
1140 chain = StageChain(chainlist, specallocate=True)
1141 chain.setup(m, self.i)
1142
1143 for mod in chainlist:
1144 sc = self.add_state(mod)
1145
1146 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1147 n1.out_z.mid, self.o.mid))
1148
1149 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1150 # sc.o.mid, self.o.mid))
1151
1152
1153 class FPADDBase(FPState):
1154
1155 def __init__(self, width, id_wid=None, single_cycle=False):
1156 """ IEEE754 FP Add
1157
1158 * width: bit-width of IEEE754. supported: 16, 32, 64
1159 * id_wid: an identifier that is sync-connected to the input
1160 * single_cycle: True indicates each stage to complete in 1 clock
1161 """
1162 FPState.__init__(self, "fpadd")
1163 self.width = width
1164 self.single_cycle = single_cycle
1165 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1166 self.o = self.ospec()
1167
1168 self.in_t = Trigger()
1169 self.i = self.ispec()
1170
1171 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1172 self.in_accept = Signal(reset_less=True)
1173 self.add_stb = Signal(reset_less=True)
1174 self.add_ack = Signal(reset=0, reset_less=True)
1175
1176 def ispec(self):
1177 return self.mod.ispec()
1178
1179 def ospec(self):
1180 return self.mod.ospec()
1181
1182 def setup(self, m, i, add_stb, in_mid):
1183 m.d.comb += [self.i.eq(i),
1184 self.mod.i.eq(self.i),
1185 self.z_done.eq(self.mod.o.z.trigger),
1186 #self.add_stb.eq(add_stb),
1187 self.mod.in_t.stb.eq(self.in_t.stb),
1188 self.in_t.ack.eq(self.mod.in_t.ack),
1189 self.o.mid.eq(self.mod.o.mid),
1190 self.o.z.v.eq(self.mod.o.z.v),
1191 self.o.z.stb.eq(self.mod.o.z.stb),
1192 self.mod.o.z.ack.eq(self.o.z.ack),
1193 ]
1194
1195 m.d.sync += self.add_stb.eq(add_stb)
1196 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1197 m.d.sync += self.o.z.ack.eq(0) # likewise
1198 #m.d.sync += self.in_t.stb.eq(0)
1199
1200 m.submodules.fpadd = self.mod
1201
1202 def action(self, m):
1203
1204 # in_accept is set on incoming strobe HIGH and ack LOW.
1205 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1206
1207 #with m.If(self.in_t.ack):
1208 # m.d.sync += self.in_t.stb.eq(0)
1209 with m.If(~self.z_done):
1210 # not done: test for accepting an incoming operand pair
1211 with m.If(self.in_accept):
1212 m.d.sync += [
1213 self.add_ack.eq(1), # acknowledge receipt...
1214 self.in_t.stb.eq(1), # initiate add
1215 ]
1216 with m.Else():
1217 m.d.sync += [self.add_ack.eq(0),
1218 self.in_t.stb.eq(0),
1219 self.o.z.ack.eq(1),
1220 ]
1221 with m.Else():
1222 # done: acknowledge, and write out id and value
1223 m.d.sync += [self.add_ack.eq(1),
1224 self.in_t.stb.eq(0)
1225 ]
1226 m.next = "put_z"
1227
1228 return
1229
1230 if self.in_mid is not None:
1231 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1232
1233 m.d.sync += [
1234 self.out_z.v.eq(self.mod.out_z.v)
1235 ]
1236 # move to output state on detecting z ack
1237 with m.If(self.out_z.trigger):
1238 m.d.sync += self.out_z.stb.eq(0)
1239 m.next = "put_z"
1240 with m.Else():
1241 m.d.sync += self.out_z.stb.eq(1)
1242
1243
1244 class FPADDBasePipe(ControlBase):
1245 def __init__(self, width, id_wid):
1246 ControlBase.__init__(self)
1247 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1248 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1249 self.pipe3 = FPNormToPack(width, id_wid)
1250
1251 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1252
1253 def elaborate(self, platform):
1254 m = Module()
1255 m.submodules.scnorm = self.pipe1
1256 m.submodules.addalign = self.pipe2
1257 m.submodules.normpack = self.pipe3
1258 m.d.comb += self._eqs
1259 return m
1260
1261
1262 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1263 def __init__(self, width, id_wid, num_rows):
1264 self.num_rows = num_rows
1265 def iospec(): return FPADDBaseData(width, id_wid)
1266 stage = PassThroughStage(iospec)
1267 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1268
1269
1270 class FPADDMuxOutPipe(CombMuxOutPipe):
1271 def __init__(self, width, id_wid, num_rows):
1272 self.num_rows = num_rows
1273 def iospec(): return FPPackData(width, id_wid)
1274 stage = PassThroughStage(iospec)
1275 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1276
1277
1278 class FPADDMuxInOut:
1279 """ Reservation-Station version of FPADD pipeline.
1280
1281 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1282 * 3-stage adder pipeline
1283 * fan-out on outputs (an array of FPPackData: z,mid)
1284
1285 Fan-in and Fan-out are combinatorial.
1286 """
1287 def __init__(self, width, id_wid, num_rows):
1288 self.num_rows = num_rows
1289 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1290 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1291 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1292
1293 self.p = self.inpipe.p # kinda annoying,
1294 self.n = self.outpipe.n # use pipe in/out as this class in/out
1295 self._ports = self.inpipe.ports() + self.outpipe.ports()
1296
1297 def elaborate(self, platform):
1298 m = Module()
1299 m.submodules.inpipe = self.inpipe
1300 m.submodules.fpadd = self.fpadd
1301 m.submodules.outpipe = self.outpipe
1302
1303 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1304 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1305
1306 return m
1307
1308 def ports(self):
1309 return self._ports
1310
1311
1312 class FPADD(FPID):
1313 """ FPADD: stages as follows:
1314
1315 FPGetOp (a)
1316 |
1317 FPGetOp (b)
1318 |
1319 FPAddBase---> FPAddBaseMod
1320 | |
1321 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1322
1323 FPAddBase is tricky: it is both a stage and *has* stages.
1324 Connection to FPAddBaseMod therefore requires an in stb/ack
1325 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1326 needs to be the thing that raises the incoming stb.
1327 """
1328
1329 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1330 """ IEEE754 FP Add
1331
1332 * width: bit-width of IEEE754. supported: 16, 32, 64
1333 * id_wid: an identifier that is sync-connected to the input
1334 * single_cycle: True indicates each stage to complete in 1 clock
1335 """
1336 self.width = width
1337 self.id_wid = id_wid
1338 self.single_cycle = single_cycle
1339
1340 #self.out_z = FPOp(width)
1341 self.ids = FPID(id_wid)
1342
1343 rs = []
1344 for i in range(rs_sz):
1345 in_a = FPOp(width)
1346 in_b = FPOp(width)
1347 in_a.name = "in_a_%d" % i
1348 in_b.name = "in_b_%d" % i
1349 rs.append((in_a, in_b))
1350 self.rs = Array(rs)
1351
1352 res = []
1353 for i in range(rs_sz):
1354 out_z = FPOp(width)
1355 out_z.name = "out_z_%d" % i
1356 res.append(out_z)
1357 self.res = Array(res)
1358
1359 self.states = []
1360
1361 def add_state(self, state):
1362 self.states.append(state)
1363 return state
1364
1365 def get_fragment(self, platform=None):
1366 """ creates the HDL code-fragment for FPAdd
1367 """
1368 m = Module()
1369 m.submodules += self.rs
1370
1371 in_a = self.rs[0][0]
1372 in_b = self.rs[0][1]
1373
1374 geta = self.add_state(FPGetOp("get_a", "get_b",
1375 in_a, self.width))
1376 geta.setup(m, in_a)
1377 a = geta.out_op
1378
1379 getb = self.add_state(FPGetOp("get_b", "fpadd",
1380 in_b, self.width))
1381 getb.setup(m, in_b)
1382 b = getb.out_op
1383
1384 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1385 ab = self.add_state(ab)
1386 abd = ab.ispec() # create an input spec object for FPADDBase
1387 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1388 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1389 o = ab.o
1390
1391 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1392 o.mid, "get_a"))
1393
1394 with m.FSM() as fsm:
1395
1396 for state in self.states:
1397 with m.State(state.state_from):
1398 state.action(m)
1399
1400 return m
1401
1402
1403 if __name__ == "__main__":
1404 if True:
1405 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1406 main(alu, ports=alu.rs[0][0].ports() + \
1407 alu.rs[0][1].ports() + \
1408 alu.res[0].ports() + \
1409 [alu.ids.in_mid, alu.ids.out_mid])
1410 else:
1411 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1412 main(alu, ports=[alu.in_a, alu.in_b] + \
1413 alu.in_t.ports() + \
1414 alu.out_z.ports() + \
1415 [alu.in_mid, alu.out_mid])
1416
1417
1418 # works... but don't use, just do "python fname.py convert -t v"
1419 #print (verilog.convert(alu, ports=[
1420 # ports=alu.in_a.ports() + \
1421 # alu.in_b.ports() + \
1422 # alu.out_z.ports())