solve sync/comb issue with using state-machine or pipeline in DeNorm block
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.smod = FPAddSpecialCasesMod(width, id_wid)
359 self.dmod = FPAddDeNormMod(width, id_wid)
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.out = self.ospec()
362
363 def ispec(self):
364 return self.smod.ispec()
365
366 def ospec(self):
367 return self.dmod.ospec()
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self.smod.setup(m, i)
376 self.dmod.setup(m, self.smod.o)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
378
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
382
383 # out_do_z=False
384 self.o = self.dmod.o
385
386 def process(self, i):
387 return self.o
388
389 def action(self, m):
390 #with m.If(self.out_do_z):
391 # m.next = "put_z"
392 #with m.Else():
393 m.d.sync += self.out.eq(self.process(None))
394 m.next = "align"
395
396
397 class FPAddDeNormMod(FPState):
398
399 def __init__(self, width, id_wid):
400 self.width = width
401 self.id_wid = id_wid
402 self.i = self.ispec()
403 self.o = self.ospec()
404
405 def ispec(self):
406 return FPSCData(self.width, self.id_wid)
407
408 def ospec(self):
409 return FPSCData(self.width, self.id_wid)
410
411 def setup(self, m, i):
412 """ links module to inputs and outputs
413 """
414 m.submodules.denormalise = self
415 m.d.comb += self.i.eq(i)
416
417 def elaborate(self, platform):
418 m = Module()
419 m.submodules.denorm_in_a = self.i.a
420 m.submodules.denorm_in_b = self.i.b
421 m.submodules.denorm_out_a = self.o.a
422 m.submodules.denorm_out_b = self.o.b
423
424 with m.If(~self.i.out_do_z):
425 # XXX hmmm, don't like repeating identical code
426 m.d.comb += self.o.a.eq(self.i.a)
427 with m.If(self.i.a.exp_n127):
428 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
429 with m.Else():
430 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
431
432 m.d.comb += self.o.b.eq(self.i.b)
433 with m.If(self.i.b.exp_n127):
434 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
435 with m.Else():
436 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
437
438 m.d.comb += self.o.mid.eq(self.i.mid)
439 m.d.comb += self.o.z.eq(self.i.z)
440 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
441 m.d.comb += self.o.oz.eq(self.i.oz)
442
443 return m
444
445
446 class FPAddDeNorm(FPState):
447
448 def __init__(self, width, id_wid):
449 FPState.__init__(self, "denormalise")
450 self.mod = FPAddDeNormMod(width)
451 self.out_a = FPNumBase(width)
452 self.out_b = FPNumBase(width)
453
454 def setup(self, m, i):
455 """ links module to inputs and outputs
456 """
457 self.mod.setup(m, i)
458
459 m.d.sync += self.out_a.eq(self.mod.out_a)
460 m.d.sync += self.out_b.eq(self.mod.out_b)
461
462 def action(self, m):
463 # Denormalised Number checks
464 m.next = "align"
465
466
467 class FPAddAlignMultiMod(FPState):
468
469 def __init__(self, width):
470 self.in_a = FPNumBase(width)
471 self.in_b = FPNumBase(width)
472 self.out_a = FPNumIn(None, width)
473 self.out_b = FPNumIn(None, width)
474 self.exp_eq = Signal(reset_less=True)
475
476 def elaborate(self, platform):
477 # This one however (single-cycle) will do the shift
478 # in one go.
479
480 m = Module()
481
482 m.submodules.align_in_a = self.in_a
483 m.submodules.align_in_b = self.in_b
484 m.submodules.align_out_a = self.out_a
485 m.submodules.align_out_b = self.out_b
486
487 # NOTE: this does *not* do single-cycle multi-shifting,
488 # it *STAYS* in the align state until exponents match
489
490 # exponent of a greater than b: shift b down
491 m.d.comb += self.exp_eq.eq(0)
492 m.d.comb += self.out_a.eq(self.in_a)
493 m.d.comb += self.out_b.eq(self.in_b)
494 agtb = Signal(reset_less=True)
495 altb = Signal(reset_less=True)
496 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
497 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
498 with m.If(agtb):
499 m.d.comb += self.out_b.shift_down(self.in_b)
500 # exponent of b greater than a: shift a down
501 with m.Elif(altb):
502 m.d.comb += self.out_a.shift_down(self.in_a)
503 # exponents equal: move to next stage.
504 with m.Else():
505 m.d.comb += self.exp_eq.eq(1)
506 return m
507
508
509 class FPAddAlignMulti(FPState):
510
511 def __init__(self, width, id_wid):
512 FPState.__init__(self, "align")
513 self.mod = FPAddAlignMultiMod(width)
514 self.out_a = FPNumIn(None, width)
515 self.out_b = FPNumIn(None, width)
516 self.exp_eq = Signal(reset_less=True)
517
518 def setup(self, m, in_a, in_b):
519 """ links module to inputs and outputs
520 """
521 m.submodules.align = self.mod
522 m.d.comb += self.mod.in_a.eq(in_a)
523 m.d.comb += self.mod.in_b.eq(in_b)
524 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
525 m.d.sync += self.out_a.eq(self.mod.out_a)
526 m.d.sync += self.out_b.eq(self.mod.out_b)
527
528 def action(self, m):
529 with m.If(self.exp_eq):
530 m.next = "add_0"
531
532
533 class FPNumIn2Ops:
534
535 def __init__(self, width, id_wid):
536 self.a = FPNumIn(None, width)
537 self.b = FPNumIn(None, width)
538 self.z = FPNumOut(width, False)
539 self.out_do_z = Signal(reset_less=True)
540 self.oz = Signal(width, reset_less=True)
541 self.mid = Signal(id_wid, reset_less=True)
542
543 def eq(self, i):
544 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
545 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
546
547
548 class FPAddAlignSingleMod:
549
550 def __init__(self, width, id_wid):
551 self.width = width
552 self.id_wid = id_wid
553 self.i = self.ispec()
554 self.o = self.ospec()
555
556 def ispec(self):
557 return FPSCData(self.width, self.id_wid)
558
559 def ospec(self):
560 return FPNumIn2Ops(self.width, self.id_wid)
561
562 def process(self, i):
563 return self.o
564
565 def setup(self, m, i):
566 """ links module to inputs and outputs
567 """
568 m.submodules.align = self
569 m.d.comb += self.i.eq(i)
570
571 def elaborate(self, platform):
572 """ Aligns A against B or B against A, depending on which has the
573 greater exponent. This is done in a *single* cycle using
574 variable-width bit-shift
575
576 the shifter used here is quite expensive in terms of gates.
577 Mux A or B in (and out) into temporaries, as only one of them
578 needs to be aligned against the other
579 """
580 m = Module()
581
582 m.submodules.align_in_a = self.i.a
583 m.submodules.align_in_b = self.i.b
584 m.submodules.align_out_a = self.o.a
585 m.submodules.align_out_b = self.o.b
586
587 # temporary (muxed) input and output to be shifted
588 t_inp = FPNumBase(self.width)
589 t_out = FPNumIn(None, self.width)
590 espec = (len(self.i.a.e), True)
591 msr = MultiShiftRMerge(self.i.a.m_width, espec)
592 m.submodules.align_t_in = t_inp
593 m.submodules.align_t_out = t_out
594 m.submodules.multishift_r = msr
595
596 ediff = Signal(espec, reset_less=True)
597 ediffr = Signal(espec, reset_less=True)
598 tdiff = Signal(espec, reset_less=True)
599 elz = Signal(reset_less=True)
600 egz = Signal(reset_less=True)
601
602 # connect multi-shifter to t_inp/out mantissa (and tdiff)
603 m.d.comb += msr.inp.eq(t_inp.m)
604 m.d.comb += msr.diff.eq(tdiff)
605 m.d.comb += t_out.m.eq(msr.m)
606 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
607 m.d.comb += t_out.s.eq(t_inp.s)
608
609 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
610 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
611 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
612 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
613
614 # default: A-exp == B-exp, A and B untouched (fall through)
615 m.d.comb += self.o.a.eq(self.i.a)
616 m.d.comb += self.o.b.eq(self.i.b)
617 # only one shifter (muxed)
618 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
619 # exponent of a greater than b: shift b down
620 with m.If(~self.i.out_do_z):
621 with m.If(egz):
622 m.d.comb += [t_inp.eq(self.i.b),
623 tdiff.eq(ediff),
624 self.o.b.eq(t_out),
625 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
626 ]
627 # exponent of b greater than a: shift a down
628 with m.Elif(elz):
629 m.d.comb += [t_inp.eq(self.i.a),
630 tdiff.eq(ediffr),
631 self.o.a.eq(t_out),
632 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
633 ]
634
635 m.d.comb += self.o.mid.eq(self.i.mid)
636 m.d.comb += self.o.z.eq(self.i.z)
637 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
638 m.d.comb += self.o.oz.eq(self.i.oz)
639
640 return m
641
642
643 class FPAddAlignSingle(FPState):
644
645 def __init__(self, width, id_wid):
646 FPState.__init__(self, "align")
647 self.mod = FPAddAlignSingleMod(width, id_wid)
648 self.out_a = FPNumIn(None, width)
649 self.out_b = FPNumIn(None, width)
650
651 def setup(self, m, i):
652 """ links module to inputs and outputs
653 """
654 self.mod.setup(m, i)
655
656 # NOTE: could be done as comb
657 m.d.sync += self.out_a.eq(self.mod.out_a)
658 m.d.sync += self.out_b.eq(self.mod.out_b)
659
660 def action(self, m):
661 m.next = "add_0"
662
663
664 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
665
666 def __init__(self, width, id_wid):
667 FPState.__init__(self, "align")
668 self.width = width
669 self.id_wid = id_wid
670 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
671 self.a1o = self.ospec()
672
673 def ispec(self):
674 return FPSCData(self.width, self.id_wid)
675
676 def ospec(self):
677 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
678
679 def setup(self, m, i):
680 """ links module to inputs and outputs
681 """
682
683 # chain AddAlignSingle, AddStage0 and AddStage1
684 mod = FPAddAlignSingleMod(self.width, self.id_wid)
685 a0mod = FPAddStage0Mod(self.width, self.id_wid)
686 a1mod = FPAddStage1Mod(self.width, self.id_wid)
687
688 chain = StageChain([mod, a0mod, a1mod])
689 chain.setup(m, i)
690
691 self.o = a1mod.o
692
693 def process(self, i):
694 return self.o
695
696 def action(self, m):
697 m.d.sync += self.a1o.eq(self.process(None))
698 m.next = "normalise_1"
699
700
701 class FPAddStage0Data:
702
703 def __init__(self, width, id_wid):
704 self.z = FPNumBase(width, False)
705 self.out_do_z = Signal(reset_less=True)
706 self.oz = Signal(width, reset_less=True)
707 self.tot = Signal(self.z.m_width + 4, reset_less=True)
708 self.mid = Signal(id_wid, reset_less=True)
709
710 def eq(self, i):
711 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
712 self.tot.eq(i.tot), self.mid.eq(i.mid)]
713
714
715 class FPAddStage0Mod:
716
717 def __init__(self, width, id_wid):
718 self.width = width
719 self.id_wid = id_wid
720 self.i = self.ispec()
721 self.o = self.ospec()
722
723 def ispec(self):
724 return FPSCData(self.width, self.id_wid)
725
726 def ospec(self):
727 return FPAddStage0Data(self.width, self.id_wid)
728
729 def process(self, i):
730 return self.o
731
732 def setup(self, m, i):
733 """ links module to inputs and outputs
734 """
735 m.submodules.add0 = self
736 m.d.comb += self.i.eq(i)
737
738 def elaborate(self, platform):
739 m = Module()
740 m.submodules.add0_in_a = self.i.a
741 m.submodules.add0_in_b = self.i.b
742 m.submodules.add0_out_z = self.o.z
743
744 # store intermediate tests (and zero-extended mantissas)
745 seq = Signal(reset_less=True)
746 mge = Signal(reset_less=True)
747 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
748 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
749 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
750 mge.eq(self.i.a.m >= self.i.b.m),
751 am0.eq(Cat(self.i.a.m, 0)),
752 bm0.eq(Cat(self.i.b.m, 0))
753 ]
754 # same-sign (both negative or both positive) add mantissas
755 with m.If(~self.i.out_do_z):
756 m.d.comb += self.o.z.e.eq(self.i.a.e)
757 with m.If(seq):
758 m.d.comb += [
759 self.o.tot.eq(am0 + bm0),
760 self.o.z.s.eq(self.i.a.s)
761 ]
762 # a mantissa greater than b, use a
763 with m.Elif(mge):
764 m.d.comb += [
765 self.o.tot.eq(am0 - bm0),
766 self.o.z.s.eq(self.i.a.s)
767 ]
768 # b mantissa greater than a, use b
769 with m.Else():
770 m.d.comb += [
771 self.o.tot.eq(bm0 - am0),
772 self.o.z.s.eq(self.i.b.s)
773 ]
774
775 m.d.comb += self.o.oz.eq(self.i.oz)
776 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
777 m.d.comb += self.o.mid.eq(self.i.mid)
778 return m
779
780
781 class FPAddStage0(FPState):
782 """ First stage of add. covers same-sign (add) and subtract
783 special-casing when mantissas are greater or equal, to
784 give greatest accuracy.
785 """
786
787 def __init__(self, width, id_wid):
788 FPState.__init__(self, "add_0")
789 self.mod = FPAddStage0Mod(width)
790 self.o = self.mod.ospec()
791
792 def setup(self, m, i):
793 """ links module to inputs and outputs
794 """
795 self.mod.setup(m, i)
796
797 # NOTE: these could be done as combinatorial (merge add0+add1)
798 m.d.sync += self.o.eq(self.mod.o)
799
800 def action(self, m):
801 m.next = "add_1"
802
803
804 class FPAddStage1Data:
805
806 def __init__(self, width, id_wid):
807 self.z = FPNumBase(width, False)
808 self.out_do_z = Signal(reset_less=True)
809 self.oz = Signal(width, reset_less=True)
810 self.of = Overflow()
811 self.mid = Signal(id_wid, reset_less=True)
812
813 def eq(self, i):
814 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
815 self.of.eq(i.of), self.mid.eq(i.mid)]
816
817
818
819 class FPAddStage1Mod(FPState):
820 """ Second stage of add: preparation for normalisation.
821 detects when tot sum is too big (tot[27] is kinda a carry bit)
822 """
823
824 def __init__(self, width, id_wid):
825 self.width = width
826 self.id_wid = id_wid
827 self.i = self.ispec()
828 self.o = self.ospec()
829
830 def ispec(self):
831 return FPAddStage0Data(self.width, self.id_wid)
832
833 def ospec(self):
834 return FPAddStage1Data(self.width, self.id_wid)
835
836 def process(self, i):
837 return self.o
838
839 def setup(self, m, i):
840 """ links module to inputs and outputs
841 """
842 m.submodules.add1 = self
843 m.submodules.add1_out_overflow = self.o.of
844
845 m.d.comb += self.i.eq(i)
846
847 def elaborate(self, platform):
848 m = Module()
849 m.d.comb += self.o.z.eq(self.i.z)
850 # tot[-1] (MSB) gets set when the sum overflows. shift result down
851 with m.If(~self.i.out_do_z):
852 with m.If(self.i.tot[-1]):
853 m.d.comb += [
854 self.o.z.m.eq(self.i.tot[4:]),
855 self.o.of.m0.eq(self.i.tot[4]),
856 self.o.of.guard.eq(self.i.tot[3]),
857 self.o.of.round_bit.eq(self.i.tot[2]),
858 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
859 self.o.z.e.eq(self.i.z.e + 1)
860 ]
861 # tot[-1] (MSB) zero case
862 with m.Else():
863 m.d.comb += [
864 self.o.z.m.eq(self.i.tot[3:]),
865 self.o.of.m0.eq(self.i.tot[3]),
866 self.o.of.guard.eq(self.i.tot[2]),
867 self.o.of.round_bit.eq(self.i.tot[1]),
868 self.o.of.sticky.eq(self.i.tot[0])
869 ]
870
871 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
872 m.d.comb += self.o.oz.eq(self.i.oz)
873 m.d.comb += self.o.mid.eq(self.i.mid)
874
875 return m
876
877
878 class FPAddStage1(FPState):
879
880 def __init__(self, width, id_wid):
881 FPState.__init__(self, "add_1")
882 self.mod = FPAddStage1Mod(width)
883 self.out_z = FPNumBase(width, False)
884 self.out_of = Overflow()
885 self.norm_stb = Signal()
886
887 def setup(self, m, i):
888 """ links module to inputs and outputs
889 """
890 self.mod.setup(m, i)
891
892 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
893
894 m.d.sync += self.out_of.eq(self.mod.out_of)
895 m.d.sync += self.out_z.eq(self.mod.out_z)
896 m.d.sync += self.norm_stb.eq(1)
897
898 def action(self, m):
899 m.next = "normalise_1"
900
901
902 class FPNormaliseModSingle:
903
904 def __init__(self, width):
905 self.width = width
906 self.in_z = self.ispec()
907 self.out_z = self.ospec()
908
909 def ispec(self):
910 return FPNumBase(self.width, False)
911
912 def ospec(self):
913 return FPNumBase(self.width, False)
914
915 def setup(self, m, i):
916 """ links module to inputs and outputs
917 """
918 m.submodules.normalise = self
919 m.d.comb += self.i.eq(i)
920
921 def elaborate(self, platform):
922 m = Module()
923
924 mwid = self.out_z.m_width+2
925 pe = PriorityEncoder(mwid)
926 m.submodules.norm_pe = pe
927
928 m.submodules.norm1_out_z = self.out_z
929 m.submodules.norm1_in_z = self.in_z
930
931 in_z = FPNumBase(self.width, False)
932 in_of = Overflow()
933 m.submodules.norm1_insel_z = in_z
934 m.submodules.norm1_insel_overflow = in_of
935
936 espec = (len(in_z.e), True)
937 ediff_n126 = Signal(espec, reset_less=True)
938 msr = MultiShiftRMerge(mwid, espec)
939 m.submodules.multishift_r = msr
940
941 m.d.comb += in_z.eq(self.in_z)
942 m.d.comb += in_of.eq(self.in_of)
943 # initialise out from in (overridden below)
944 m.d.comb += self.out_z.eq(in_z)
945 m.d.comb += self.out_of.eq(in_of)
946 # normalisation decrease condition
947 decrease = Signal(reset_less=True)
948 m.d.comb += decrease.eq(in_z.m_msbzero)
949 # decrease exponent
950 with m.If(decrease):
951 # *sigh* not entirely obvious: count leading zeros (clz)
952 # with a PriorityEncoder: to find from the MSB
953 # we reverse the order of the bits.
954 temp_m = Signal(mwid, reset_less=True)
955 temp_s = Signal(mwid+1, reset_less=True)
956 clz = Signal((len(in_z.e), True), reset_less=True)
957 m.d.comb += [
958 # cat round and guard bits back into the mantissa
959 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
960 pe.i.eq(temp_m[::-1]), # inverted
961 clz.eq(pe.o), # count zeros from MSB down
962 temp_s.eq(temp_m << clz), # shift mantissa UP
963 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
964 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
965 ]
966
967 return m
968
969
970 class FPNorm1Data:
971
972 def __init__(self, width, id_wid):
973 self.roundz = Signal(reset_less=True)
974 self.z = FPNumBase(width, False)
975 self.out_do_z = Signal(reset_less=True)
976 self.oz = Signal(width, reset_less=True)
977 self.mid = Signal(id_wid, reset_less=True)
978
979 def eq(self, i):
980 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
981 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
982
983
984 class FPNorm1ModSingle:
985
986 def __init__(self, width, id_wid):
987 self.width = width
988 self.id_wid = id_wid
989 self.i = self.ispec()
990 self.o = self.ospec()
991
992 def ispec(self):
993 return FPAddStage1Data(self.width, self.id_wid)
994
995 def ospec(self):
996 return FPNorm1Data(self.width, self.id_wid)
997
998 def setup(self, m, i):
999 """ links module to inputs and outputs
1000 """
1001 m.submodules.normalise_1 = self
1002 m.d.comb += self.i.eq(i)
1003
1004 def process(self, i):
1005 return self.o
1006
1007 def elaborate(self, platform):
1008 m = Module()
1009
1010 mwid = self.o.z.m_width+2
1011 pe = PriorityEncoder(mwid)
1012 m.submodules.norm_pe = pe
1013
1014 of = Overflow()
1015 m.d.comb += self.o.roundz.eq(of.roundz)
1016
1017 m.submodules.norm1_out_z = self.o.z
1018 m.submodules.norm1_out_overflow = of
1019 m.submodules.norm1_in_z = self.i.z
1020 m.submodules.norm1_in_overflow = self.i.of
1021
1022 i = self.ispec()
1023 m.submodules.norm1_insel_z = i.z
1024 m.submodules.norm1_insel_overflow = i.of
1025
1026 espec = (len(i.z.e), True)
1027 ediff_n126 = Signal(espec, reset_less=True)
1028 msr = MultiShiftRMerge(mwid, espec)
1029 m.submodules.multishift_r = msr
1030
1031 m.d.comb += i.eq(self.i)
1032 # initialise out from in (overridden below)
1033 m.d.comb += self.o.z.eq(i.z)
1034 m.d.comb += of.eq(i.of)
1035 # normalisation increase/decrease conditions
1036 decrease = Signal(reset_less=True)
1037 increase = Signal(reset_less=True)
1038 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1039 m.d.comb += increase.eq(i.z.exp_lt_n126)
1040 # decrease exponent
1041 with m.If(~self.i.out_do_z):
1042 with m.If(decrease):
1043 # *sigh* not entirely obvious: count leading zeros (clz)
1044 # with a PriorityEncoder: to find from the MSB
1045 # we reverse the order of the bits.
1046 temp_m = Signal(mwid, reset_less=True)
1047 temp_s = Signal(mwid+1, reset_less=True)
1048 clz = Signal((len(i.z.e), True), reset_less=True)
1049 # make sure that the amount to decrease by does NOT
1050 # go below the minimum non-INF/NaN exponent
1051 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1052 i.z.exp_sub_n126)
1053 m.d.comb += [
1054 # cat round and guard bits back into the mantissa
1055 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1056 pe.i.eq(temp_m[::-1]), # inverted
1057 clz.eq(limclz), # count zeros from MSB down
1058 temp_s.eq(temp_m << clz), # shift mantissa UP
1059 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1060 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1061 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1062 # overflow in bits 0..1: got shifted too (leave sticky)
1063 of.guard.eq(temp_s[1]), # guard
1064 of.round_bit.eq(temp_s[0]), # round
1065 ]
1066 # increase exponent
1067 with m.Elif(increase):
1068 temp_m = Signal(mwid+1, reset_less=True)
1069 m.d.comb += [
1070 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1071 i.z.m)),
1072 ediff_n126.eq(i.z.N126 - i.z.e),
1073 # connect multi-shifter to inp/out mantissa (and ediff)
1074 msr.inp.eq(temp_m),
1075 msr.diff.eq(ediff_n126),
1076 self.o.z.m.eq(msr.m[3:]),
1077 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1078 # overflow in bits 0..1: got shifted too (leave sticky)
1079 of.guard.eq(temp_s[2]), # guard
1080 of.round_bit.eq(temp_s[1]), # round
1081 of.sticky.eq(temp_s[0]), # sticky
1082 self.o.z.e.eq(i.z.e + ediff_n126),
1083 ]
1084
1085 m.d.comb += self.o.mid.eq(self.i.mid)
1086 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1087 m.d.comb += self.o.oz.eq(self.i.oz)
1088
1089 return m
1090
1091
1092 class FPNorm1ModMulti:
1093
1094 def __init__(self, width, single_cycle=True):
1095 self.width = width
1096 self.in_select = Signal(reset_less=True)
1097 self.in_z = FPNumBase(width, False)
1098 self.in_of = Overflow()
1099 self.temp_z = FPNumBase(width, False)
1100 self.temp_of = Overflow()
1101 self.out_z = FPNumBase(width, False)
1102 self.out_of = Overflow()
1103
1104 def elaborate(self, platform):
1105 m = Module()
1106
1107 m.submodules.norm1_out_z = self.out_z
1108 m.submodules.norm1_out_overflow = self.out_of
1109 m.submodules.norm1_temp_z = self.temp_z
1110 m.submodules.norm1_temp_of = self.temp_of
1111 m.submodules.norm1_in_z = self.in_z
1112 m.submodules.norm1_in_overflow = self.in_of
1113
1114 in_z = FPNumBase(self.width, False)
1115 in_of = Overflow()
1116 m.submodules.norm1_insel_z = in_z
1117 m.submodules.norm1_insel_overflow = in_of
1118
1119 # select which of temp or in z/of to use
1120 with m.If(self.in_select):
1121 m.d.comb += in_z.eq(self.in_z)
1122 m.d.comb += in_of.eq(self.in_of)
1123 with m.Else():
1124 m.d.comb += in_z.eq(self.temp_z)
1125 m.d.comb += in_of.eq(self.temp_of)
1126 # initialise out from in (overridden below)
1127 m.d.comb += self.out_z.eq(in_z)
1128 m.d.comb += self.out_of.eq(in_of)
1129 # normalisation increase/decrease conditions
1130 decrease = Signal(reset_less=True)
1131 increase = Signal(reset_less=True)
1132 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1133 m.d.comb += increase.eq(in_z.exp_lt_n126)
1134 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1135 # decrease exponent
1136 with m.If(decrease):
1137 m.d.comb += [
1138 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1139 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1140 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1141 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1142 self.out_of.round_bit.eq(0), # reset round bit
1143 self.out_of.m0.eq(in_of.guard),
1144 ]
1145 # increase exponent
1146 with m.Elif(increase):
1147 m.d.comb += [
1148 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1149 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1150 self.out_of.guard.eq(in_z.m[0]),
1151 self.out_of.m0.eq(in_z.m[1]),
1152 self.out_of.round_bit.eq(in_of.guard),
1153 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1154 ]
1155
1156 return m
1157
1158
1159 class FPNorm1Single(FPState):
1160
1161 def __init__(self, width, id_wid, single_cycle=True):
1162 FPState.__init__(self, "normalise_1")
1163 self.mod = FPNorm1ModSingle(width)
1164 self.o = self.ospec()
1165 self.out_z = FPNumBase(width, False)
1166 self.out_roundz = Signal(reset_less=True)
1167
1168 def ispec(self):
1169 return self.mod.ispec()
1170
1171 def ospec(self):
1172 return self.mod.ospec()
1173
1174 def setup(self, m, i):
1175 """ links module to inputs and outputs
1176 """
1177 self.mod.setup(m, i)
1178
1179 def action(self, m):
1180 m.next = "round"
1181
1182
1183 class FPNorm1Multi(FPState):
1184
1185 def __init__(self, width, id_wid):
1186 FPState.__init__(self, "normalise_1")
1187 self.mod = FPNorm1ModMulti(width)
1188 self.stb = Signal(reset_less=True)
1189 self.ack = Signal(reset=0, reset_less=True)
1190 self.out_norm = Signal(reset_less=True)
1191 self.in_accept = Signal(reset_less=True)
1192 self.temp_z = FPNumBase(width)
1193 self.temp_of = Overflow()
1194 self.out_z = FPNumBase(width)
1195 self.out_roundz = Signal(reset_less=True)
1196
1197 def setup(self, m, in_z, in_of, norm_stb):
1198 """ links module to inputs and outputs
1199 """
1200 self.mod.setup(m, in_z, in_of, norm_stb,
1201 self.in_accept, self.temp_z, self.temp_of,
1202 self.out_z, self.out_norm)
1203
1204 m.d.comb += self.stb.eq(norm_stb)
1205 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1206
1207 def action(self, m):
1208 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1209 m.d.sync += self.temp_of.eq(self.mod.out_of)
1210 m.d.sync += self.temp_z.eq(self.out_z)
1211 with m.If(self.out_norm):
1212 with m.If(self.in_accept):
1213 m.d.sync += [
1214 self.ack.eq(1),
1215 ]
1216 with m.Else():
1217 m.d.sync += self.ack.eq(0)
1218 with m.Else():
1219 # normalisation not required (or done).
1220 m.next = "round"
1221 m.d.sync += self.ack.eq(1)
1222 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1223
1224
1225 class FPNormToPack(FPState, UnbufferedPipeline):
1226
1227 def __init__(self, width, id_wid):
1228 FPState.__init__(self, "normalise_1")
1229 self.id_wid = id_wid
1230 self.width = width
1231 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1232
1233 def ispec(self):
1234 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1235
1236 def ospec(self):
1237 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1238
1239 def setup(self, m, i):
1240 """ links module to inputs and outputs
1241 """
1242
1243 # Normalisation, Rounding Corrections, Pack - in a chain
1244 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1245 rmod = FPRoundMod(self.width, self.id_wid)
1246 cmod = FPCorrectionsMod(self.width, self.id_wid)
1247 pmod = FPPackMod(self.width, self.id_wid)
1248 chain = StageChain([nmod, rmod, cmod, pmod])
1249 chain.setup(m, i)
1250 self.out_z = pmod.ospec()
1251
1252 self.o = pmod.o
1253
1254 def process(self, i):
1255 return self.o
1256
1257 def action(self, m):
1258 m.d.sync += self.out_z.eq(self.process(None))
1259 m.next = "pack_put_z"
1260
1261
1262 class FPRoundData:
1263
1264 def __init__(self, width, id_wid):
1265 self.z = FPNumBase(width, False)
1266 self.out_do_z = Signal(reset_less=True)
1267 self.oz = Signal(width, reset_less=True)
1268 self.mid = Signal(id_wid, reset_less=True)
1269
1270 def eq(self, i):
1271 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1272 self.mid.eq(i.mid)]
1273
1274
1275 class FPRoundMod:
1276
1277 def __init__(self, width, id_wid):
1278 self.width = width
1279 self.id_wid = id_wid
1280 self.i = self.ispec()
1281 self.out_z = self.ospec()
1282
1283 def ispec(self):
1284 return FPNorm1Data(self.width, self.id_wid)
1285
1286 def ospec(self):
1287 return FPRoundData(self.width, self.id_wid)
1288
1289 def process(self, i):
1290 return self.out_z
1291
1292 def setup(self, m, i):
1293 m.submodules.roundz = self
1294 m.d.comb += self.i.eq(i)
1295
1296 def elaborate(self, platform):
1297 m = Module()
1298 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1299 with m.If(~self.i.out_do_z):
1300 with m.If(self.i.roundz):
1301 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1302 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1303 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1304
1305 return m
1306
1307
1308 class FPRound(FPState):
1309
1310 def __init__(self, width, id_wid):
1311 FPState.__init__(self, "round")
1312 self.mod = FPRoundMod(width)
1313 self.out_z = self.ospec()
1314
1315 def ispec(self):
1316 return self.mod.ispec()
1317
1318 def ospec(self):
1319 return self.mod.ospec()
1320
1321 def setup(self, m, i):
1322 """ links module to inputs and outputs
1323 """
1324 self.mod.setup(m, i)
1325
1326 self.idsync(m)
1327 m.d.sync += self.out_z.eq(self.mod.out_z)
1328 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1329
1330 def action(self, m):
1331 m.next = "corrections"
1332
1333
1334 class FPCorrectionsMod:
1335
1336 def __init__(self, width, id_wid):
1337 self.width = width
1338 self.id_wid = id_wid
1339 self.i = self.ispec()
1340 self.out_z = self.ospec()
1341
1342 def ispec(self):
1343 return FPRoundData(self.width, self.id_wid)
1344
1345 def ospec(self):
1346 return FPRoundData(self.width, self.id_wid)
1347
1348 def process(self, i):
1349 return self.out_z
1350
1351 def setup(self, m, i):
1352 """ links module to inputs and outputs
1353 """
1354 m.submodules.corrections = self
1355 m.d.comb += self.i.eq(i)
1356
1357 def elaborate(self, platform):
1358 m = Module()
1359 m.submodules.corr_in_z = self.i.z
1360 m.submodules.corr_out_z = self.out_z.z
1361 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1362 with m.If(~self.i.out_do_z):
1363 with m.If(self.i.z.is_denormalised):
1364 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1365 return m
1366
1367
1368 class FPCorrections(FPState):
1369
1370 def __init__(self, width, id_wid):
1371 FPState.__init__(self, "corrections")
1372 self.mod = FPCorrectionsMod(width)
1373 self.out_z = self.ospec()
1374
1375 def ispec(self):
1376 return self.mod.ispec()
1377
1378 def ospec(self):
1379 return self.mod.ospec()
1380
1381 def setup(self, m, in_z):
1382 """ links module to inputs and outputs
1383 """
1384 self.mod.setup(m, in_z)
1385
1386 m.d.sync += self.out_z.eq(self.mod.out_z)
1387 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1388
1389 def action(self, m):
1390 m.next = "pack"
1391
1392
1393 class FPPackData:
1394
1395 def __init__(self, width, id_wid):
1396 self.z = Signal(width, reset_less=True)
1397 self.mid = Signal(id_wid, reset_less=True)
1398
1399 def eq(self, i):
1400 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1401
1402 def ports(self):
1403 return [self.z, self.mid]
1404
1405
1406 class FPPackMod:
1407
1408 def __init__(self, width, id_wid):
1409 self.width = width
1410 self.id_wid = id_wid
1411 self.i = self.ispec()
1412 self.o = self.ospec()
1413
1414 def ispec(self):
1415 return FPRoundData(self.width, self.id_wid)
1416
1417 def ospec(self):
1418 return FPPackData(self.width, self.id_wid)
1419
1420 def process(self, i):
1421 return self.o
1422
1423 def setup(self, m, in_z):
1424 """ links module to inputs and outputs
1425 """
1426 m.submodules.pack = self
1427 m.d.comb += self.i.eq(in_z)
1428
1429 def elaborate(self, platform):
1430 m = Module()
1431 z = FPNumOut(self.width, False)
1432 m.submodules.pack_in_z = self.i.z
1433 m.submodules.pack_out_z = z
1434 m.d.comb += self.o.mid.eq(self.i.mid)
1435 with m.If(~self.i.out_do_z):
1436 with m.If(self.i.z.is_overflowed):
1437 m.d.comb += z.inf(self.i.z.s)
1438 with m.Else():
1439 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1440 with m.Else():
1441 m.d.comb += z.v.eq(self.i.oz)
1442 m.d.comb += self.o.z.eq(z.v)
1443 return m
1444
1445
1446 class FPPack(FPState):
1447
1448 def __init__(self, width, id_wid):
1449 FPState.__init__(self, "pack")
1450 self.mod = FPPackMod(width)
1451 self.out_z = self.ospec()
1452
1453 def ispec(self):
1454 return self.mod.ispec()
1455
1456 def ospec(self):
1457 return self.mod.ospec()
1458
1459 def setup(self, m, in_z):
1460 """ links module to inputs and outputs
1461 """
1462 self.mod.setup(m, in_z)
1463
1464 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1465 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1466
1467 def action(self, m):
1468 m.next = "pack_put_z"
1469
1470
1471 class FPPutZ(FPState):
1472
1473 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1474 FPState.__init__(self, state)
1475 if to_state is None:
1476 to_state = "get_ops"
1477 self.to_state = to_state
1478 self.in_z = in_z
1479 self.out_z = out_z
1480 self.in_mid = in_mid
1481 self.out_mid = out_mid
1482
1483 def action(self, m):
1484 if self.in_mid is not None:
1485 m.d.sync += self.out_mid.eq(self.in_mid)
1486 m.d.sync += [
1487 self.out_z.z.v.eq(self.in_z)
1488 ]
1489 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1490 m.d.sync += self.out_z.z.stb.eq(0)
1491 m.next = self.to_state
1492 with m.Else():
1493 m.d.sync += self.out_z.z.stb.eq(1)
1494
1495
1496 class FPPutZIdx(FPState):
1497
1498 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1499 FPState.__init__(self, state)
1500 if to_state is None:
1501 to_state = "get_ops"
1502 self.to_state = to_state
1503 self.in_z = in_z
1504 self.out_zs = out_zs
1505 self.in_mid = in_mid
1506
1507 def action(self, m):
1508 outz_stb = Signal(reset_less=True)
1509 outz_ack = Signal(reset_less=True)
1510 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1511 outz_ack.eq(self.out_zs[self.in_mid].ack),
1512 ]
1513 m.d.sync += [
1514 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1515 ]
1516 with m.If(outz_stb & outz_ack):
1517 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1518 m.next = self.to_state
1519 with m.Else():
1520 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1521
1522 class FPOpData:
1523 def __init__(self, width, id_wid):
1524 self.z = FPOp(width)
1525 self.mid = Signal(id_wid, reset_less=True)
1526
1527 def eq(self, i):
1528 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1529
1530 def ports(self):
1531 return [self.z, self.mid]
1532
1533
1534 class FPADDBaseMod:
1535
1536 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1537 """ IEEE754 FP Add
1538
1539 * width: bit-width of IEEE754. supported: 16, 32, 64
1540 * id_wid: an identifier that is sync-connected to the input
1541 * single_cycle: True indicates each stage to complete in 1 clock
1542 * compact: True indicates a reduced number of stages
1543 """
1544 self.width = width
1545 self.id_wid = id_wid
1546 self.single_cycle = single_cycle
1547 self.compact = compact
1548
1549 self.in_t = Trigger()
1550 self.i = self.ispec()
1551 self.o = self.ospec()
1552
1553 self.states = []
1554
1555 def ispec(self):
1556 return FPADDBaseData(self.width, self.id_wid)
1557
1558 def ospec(self):
1559 return FPOpData(self.width, self.id_wid)
1560
1561 def add_state(self, state):
1562 self.states.append(state)
1563 return state
1564
1565 def get_fragment(self, platform=None):
1566 """ creates the HDL code-fragment for FPAdd
1567 """
1568 m = Module()
1569 m.submodules.out_z = self.o.z
1570 m.submodules.in_t = self.in_t
1571 if self.compact:
1572 self.get_compact_fragment(m, platform)
1573 else:
1574 self.get_longer_fragment(m, platform)
1575
1576 with m.FSM() as fsm:
1577
1578 for state in self.states:
1579 with m.State(state.state_from):
1580 state.action(m)
1581
1582 return m
1583
1584 def get_longer_fragment(self, m, platform=None):
1585
1586 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1587 self.width))
1588 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1589 a = get.out_op1
1590 b = get.out_op2
1591
1592 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1593 sc.setup(m, a, b, self.in_mid)
1594
1595 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1596 dn.setup(m, a, b, sc.in_mid)
1597
1598 if self.single_cycle:
1599 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1600 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1601 else:
1602 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1603 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1604
1605 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1606 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1607
1608 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1609 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1610
1611 if self.single_cycle:
1612 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1613 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1614 else:
1615 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1616 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1617
1618 rn = self.add_state(FPRound(self.width, self.id_wid))
1619 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1620
1621 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1622 cor.setup(m, rn.out_z, rn.in_mid)
1623
1624 pa = self.add_state(FPPack(self.width, self.id_wid))
1625 pa.setup(m, cor.out_z, rn.in_mid)
1626
1627 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1628 pa.in_mid, self.out_mid))
1629
1630 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1631 pa.in_mid, self.out_mid))
1632
1633 def get_compact_fragment(self, m, platform=None):
1634
1635 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1636 self.width, self.id_wid))
1637 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1638
1639 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1640 sc.setup(m, get.o)
1641
1642 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1643 alm.setup(m, sc.out)
1644
1645 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1646 n1.setup(m, alm.a1o)
1647
1648 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1649 n1.out_z.mid, self.o.mid))
1650
1651 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1652 # sc.o.mid, self.o.mid))
1653
1654
1655 class FPADDBase(FPState):
1656
1657 def __init__(self, width, id_wid=None, single_cycle=False):
1658 """ IEEE754 FP Add
1659
1660 * width: bit-width of IEEE754. supported: 16, 32, 64
1661 * id_wid: an identifier that is sync-connected to the input
1662 * single_cycle: True indicates each stage to complete in 1 clock
1663 """
1664 FPState.__init__(self, "fpadd")
1665 self.width = width
1666 self.single_cycle = single_cycle
1667 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1668 self.o = self.ospec()
1669
1670 self.in_t = Trigger()
1671 self.i = self.ispec()
1672
1673 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1674 self.in_accept = Signal(reset_less=True)
1675 self.add_stb = Signal(reset_less=True)
1676 self.add_ack = Signal(reset=0, reset_less=True)
1677
1678 def ispec(self):
1679 return self.mod.ispec()
1680
1681 def ospec(self):
1682 return self.mod.ospec()
1683
1684 def setup(self, m, i, add_stb, in_mid):
1685 m.d.comb += [self.i.eq(i),
1686 self.mod.i.eq(self.i),
1687 self.z_done.eq(self.mod.o.z.trigger),
1688 #self.add_stb.eq(add_stb),
1689 self.mod.in_t.stb.eq(self.in_t.stb),
1690 self.in_t.ack.eq(self.mod.in_t.ack),
1691 self.o.mid.eq(self.mod.o.mid),
1692 self.o.z.v.eq(self.mod.o.z.v),
1693 self.o.z.stb.eq(self.mod.o.z.stb),
1694 self.mod.o.z.ack.eq(self.o.z.ack),
1695 ]
1696
1697 m.d.sync += self.add_stb.eq(add_stb)
1698 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1699 m.d.sync += self.o.z.ack.eq(0) # likewise
1700 #m.d.sync += self.in_t.stb.eq(0)
1701
1702 m.submodules.fpadd = self.mod
1703
1704 def action(self, m):
1705
1706 # in_accept is set on incoming strobe HIGH and ack LOW.
1707 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1708
1709 #with m.If(self.in_t.ack):
1710 # m.d.sync += self.in_t.stb.eq(0)
1711 with m.If(~self.z_done):
1712 # not done: test for accepting an incoming operand pair
1713 with m.If(self.in_accept):
1714 m.d.sync += [
1715 self.add_ack.eq(1), # acknowledge receipt...
1716 self.in_t.stb.eq(1), # initiate add
1717 ]
1718 with m.Else():
1719 m.d.sync += [self.add_ack.eq(0),
1720 self.in_t.stb.eq(0),
1721 self.o.z.ack.eq(1),
1722 ]
1723 with m.Else():
1724 # done: acknowledge, and write out id and value
1725 m.d.sync += [self.add_ack.eq(1),
1726 self.in_t.stb.eq(0)
1727 ]
1728 m.next = "put_z"
1729
1730 return
1731
1732 if self.in_mid is not None:
1733 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1734
1735 m.d.sync += [
1736 self.out_z.v.eq(self.mod.out_z.v)
1737 ]
1738 # move to output state on detecting z ack
1739 with m.If(self.out_z.trigger):
1740 m.d.sync += self.out_z.stb.eq(0)
1741 m.next = "put_z"
1742 with m.Else():
1743 m.d.sync += self.out_z.stb.eq(1)
1744
1745
1746 class FPADDBasePipe(ControlBase):
1747 def __init__(self, width, id_wid):
1748 ControlBase.__init__(self)
1749 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1750 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1751 self.pipe3 = FPNormToPack(width, id_wid)
1752
1753 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1754
1755 def elaborate(self, platform):
1756 m = Module()
1757 m.submodules.scnorm = self.pipe1
1758 m.submodules.addalign = self.pipe2
1759 m.submodules.normpack = self.pipe3
1760 m.d.comb += self._eqs
1761 return m
1762
1763
1764 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1765 def __init__(self, width, id_wid, num_rows):
1766 self.num_rows = num_rows
1767 def iospec(): return FPADDBaseData(width, id_wid)
1768 stage = PassThroughStage(iospec)
1769 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1770
1771
1772 class FPADDMuxOutPipe(CombMuxOutPipe):
1773 def __init__(self, width, id_wid, num_rows):
1774 self.num_rows = num_rows
1775 def iospec(): return FPPackData(width, id_wid)
1776 stage = PassThroughStage(iospec)
1777 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1778
1779
1780 class FPADDMuxInOut:
1781 """ Reservation-Station version of FPADD pipeline.
1782
1783 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1784 * 3-stage adder pipeline
1785 * fan-out on outputs (an array of FPPackData: z,mid)
1786
1787 Fan-in and Fan-out are combinatorial.
1788 """
1789 def __init__(self, width, id_wid, num_rows):
1790 self.num_rows = num_rows
1791 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1792 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1793 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1794
1795 self.p = self.inpipe.p # kinda annoying,
1796 self.n = self.outpipe.n # use pipe in/out as this class in/out
1797 self._ports = self.inpipe.ports() + self.outpipe.ports()
1798
1799 def elaborate(self, platform):
1800 m = Module()
1801 m.submodules.inpipe = self.inpipe
1802 m.submodules.fpadd = self.fpadd
1803 m.submodules.outpipe = self.outpipe
1804
1805 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1806 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1807
1808 return m
1809
1810 def ports(self):
1811 return self._ports
1812
1813
1814 class FPADD(FPID):
1815 """ FPADD: stages as follows:
1816
1817 FPGetOp (a)
1818 |
1819 FPGetOp (b)
1820 |
1821 FPAddBase---> FPAddBaseMod
1822 | |
1823 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1824
1825 FPAddBase is tricky: it is both a stage and *has* stages.
1826 Connection to FPAddBaseMod therefore requires an in stb/ack
1827 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1828 needs to be the thing that raises the incoming stb.
1829 """
1830
1831 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1832 """ IEEE754 FP Add
1833
1834 * width: bit-width of IEEE754. supported: 16, 32, 64
1835 * id_wid: an identifier that is sync-connected to the input
1836 * single_cycle: True indicates each stage to complete in 1 clock
1837 """
1838 self.width = width
1839 self.id_wid = id_wid
1840 self.single_cycle = single_cycle
1841
1842 #self.out_z = FPOp(width)
1843 self.ids = FPID(id_wid)
1844
1845 rs = []
1846 for i in range(rs_sz):
1847 in_a = FPOp(width)
1848 in_b = FPOp(width)
1849 in_a.name = "in_a_%d" % i
1850 in_b.name = "in_b_%d" % i
1851 rs.append((in_a, in_b))
1852 self.rs = Array(rs)
1853
1854 res = []
1855 for i in range(rs_sz):
1856 out_z = FPOp(width)
1857 out_z.name = "out_z_%d" % i
1858 res.append(out_z)
1859 self.res = Array(res)
1860
1861 self.states = []
1862
1863 def add_state(self, state):
1864 self.states.append(state)
1865 return state
1866
1867 def get_fragment(self, platform=None):
1868 """ creates the HDL code-fragment for FPAdd
1869 """
1870 m = Module()
1871 m.submodules += self.rs
1872
1873 in_a = self.rs[0][0]
1874 in_b = self.rs[0][1]
1875
1876 geta = self.add_state(FPGetOp("get_a", "get_b",
1877 in_a, self.width))
1878 geta.setup(m, in_a)
1879 a = geta.out_op
1880
1881 getb = self.add_state(FPGetOp("get_b", "fpadd",
1882 in_b, self.width))
1883 getb.setup(m, in_b)
1884 b = getb.out_op
1885
1886 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1887 ab = self.add_state(ab)
1888 abd = ab.ispec() # create an input spec object for FPADDBase
1889 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1890 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1891 o = ab.o
1892
1893 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1894 o.mid, "get_a"))
1895
1896 with m.FSM() as fsm:
1897
1898 for state in self.states:
1899 with m.State(state.state_from):
1900 state.action(m)
1901
1902 return m
1903
1904
1905 if __name__ == "__main__":
1906 if True:
1907 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1908 main(alu, ports=alu.rs[0][0].ports() + \
1909 alu.rs[0][1].ports() + \
1910 alu.res[0].ports() + \
1911 [alu.ids.in_mid, alu.ids.out_mid])
1912 else:
1913 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1914 main(alu, ports=[alu.in_a, alu.in_b] + \
1915 alu.in_t.ports() + \
1916 alu.out_z.ports() + \
1917 [alu.in_mid, alu.out_mid])
1918
1919
1920 # works... but don't use, just do "python fname.py convert -t v"
1921 #print (verilog.convert(alu, ports=[
1922 # ports=alu.in_a.ports() + \
1923 # alu.in_b.ports() + \
1924 # alu.out_z.ports())