use StageChain for SCDeNorm
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.width = width
359 self.id_wid = id_wid
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.out = self.ospec()
362
363 def ispec(self):
364 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
365
366 def ospec(self):
367 return FPSCData(self.width, self.id_wid) # DeNorm ospec
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
373 dmod = FPAddDeNormMod(self.width, self.id_wid)
374
375 chain = StageChain([smod, dmod])
376 chain.setup(m, i)
377
378 # only needed for break-out (early-out)
379 # self.out_do_z = smod.o.out_do_z
380
381 self.o = dmod.o
382
383 def process(self, i):
384 return self.o
385
386 def action(self, m):
387 # for break-out (early-out)
388 #with m.If(self.out_do_z):
389 # m.next = "put_z"
390 #with m.Else():
391 m.d.sync += self.out.eq(self.process(None))
392 m.next = "align"
393
394
395 class FPAddDeNormMod(FPState):
396
397 def __init__(self, width, id_wid):
398 self.width = width
399 self.id_wid = id_wid
400 self.i = self.ispec()
401 self.o = self.ospec()
402
403 def ispec(self):
404 return FPSCData(self.width, self.id_wid)
405
406 def ospec(self):
407 return FPSCData(self.width, self.id_wid)
408
409 def process(self, i):
410 return self.o
411
412 def setup(self, m, i):
413 """ links module to inputs and outputs
414 """
415 m.submodules.denormalise = self
416 m.d.comb += self.i.eq(i)
417
418 def elaborate(self, platform):
419 m = Module()
420 m.submodules.denorm_in_a = self.i.a
421 m.submodules.denorm_in_b = self.i.b
422 m.submodules.denorm_out_a = self.o.a
423 m.submodules.denorm_out_b = self.o.b
424
425 with m.If(~self.i.out_do_z):
426 # XXX hmmm, don't like repeating identical code
427 m.d.comb += self.o.a.eq(self.i.a)
428 with m.If(self.i.a.exp_n127):
429 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
430 with m.Else():
431 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
432
433 m.d.comb += self.o.b.eq(self.i.b)
434 with m.If(self.i.b.exp_n127):
435 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
436 with m.Else():
437 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
438
439 m.d.comb += self.o.mid.eq(self.i.mid)
440 m.d.comb += self.o.z.eq(self.i.z)
441 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
442 m.d.comb += self.o.oz.eq(self.i.oz)
443
444 return m
445
446
447 class FPAddDeNorm(FPState):
448
449 def __init__(self, width, id_wid):
450 FPState.__init__(self, "denormalise")
451 self.mod = FPAddDeNormMod(width)
452 self.out_a = FPNumBase(width)
453 self.out_b = FPNumBase(width)
454
455 def setup(self, m, i):
456 """ links module to inputs and outputs
457 """
458 self.mod.setup(m, i)
459
460 m.d.sync += self.out_a.eq(self.mod.out_a)
461 m.d.sync += self.out_b.eq(self.mod.out_b)
462
463 def action(self, m):
464 # Denormalised Number checks
465 m.next = "align"
466
467
468 class FPAddAlignMultiMod(FPState):
469
470 def __init__(self, width):
471 self.in_a = FPNumBase(width)
472 self.in_b = FPNumBase(width)
473 self.out_a = FPNumIn(None, width)
474 self.out_b = FPNumIn(None, width)
475 self.exp_eq = Signal(reset_less=True)
476
477 def elaborate(self, platform):
478 # This one however (single-cycle) will do the shift
479 # in one go.
480
481 m = Module()
482
483 m.submodules.align_in_a = self.in_a
484 m.submodules.align_in_b = self.in_b
485 m.submodules.align_out_a = self.out_a
486 m.submodules.align_out_b = self.out_b
487
488 # NOTE: this does *not* do single-cycle multi-shifting,
489 # it *STAYS* in the align state until exponents match
490
491 # exponent of a greater than b: shift b down
492 m.d.comb += self.exp_eq.eq(0)
493 m.d.comb += self.out_a.eq(self.in_a)
494 m.d.comb += self.out_b.eq(self.in_b)
495 agtb = Signal(reset_less=True)
496 altb = Signal(reset_less=True)
497 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
498 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
499 with m.If(agtb):
500 m.d.comb += self.out_b.shift_down(self.in_b)
501 # exponent of b greater than a: shift a down
502 with m.Elif(altb):
503 m.d.comb += self.out_a.shift_down(self.in_a)
504 # exponents equal: move to next stage.
505 with m.Else():
506 m.d.comb += self.exp_eq.eq(1)
507 return m
508
509
510 class FPAddAlignMulti(FPState):
511
512 def __init__(self, width, id_wid):
513 FPState.__init__(self, "align")
514 self.mod = FPAddAlignMultiMod(width)
515 self.out_a = FPNumIn(None, width)
516 self.out_b = FPNumIn(None, width)
517 self.exp_eq = Signal(reset_less=True)
518
519 def setup(self, m, in_a, in_b):
520 """ links module to inputs and outputs
521 """
522 m.submodules.align = self.mod
523 m.d.comb += self.mod.in_a.eq(in_a)
524 m.d.comb += self.mod.in_b.eq(in_b)
525 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
526 m.d.sync += self.out_a.eq(self.mod.out_a)
527 m.d.sync += self.out_b.eq(self.mod.out_b)
528
529 def action(self, m):
530 with m.If(self.exp_eq):
531 m.next = "add_0"
532
533
534 class FPNumIn2Ops:
535
536 def __init__(self, width, id_wid):
537 self.a = FPNumIn(None, width)
538 self.b = FPNumIn(None, width)
539 self.z = FPNumOut(width, False)
540 self.out_do_z = Signal(reset_less=True)
541 self.oz = Signal(width, reset_less=True)
542 self.mid = Signal(id_wid, reset_less=True)
543
544 def eq(self, i):
545 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
546 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
547
548
549 class FPAddAlignSingleMod:
550
551 def __init__(self, width, id_wid):
552 self.width = width
553 self.id_wid = id_wid
554 self.i = self.ispec()
555 self.o = self.ospec()
556
557 def ispec(self):
558 return FPSCData(self.width, self.id_wid)
559
560 def ospec(self):
561 return FPNumIn2Ops(self.width, self.id_wid)
562
563 def process(self, i):
564 return self.o
565
566 def setup(self, m, i):
567 """ links module to inputs and outputs
568 """
569 m.submodules.align = self
570 m.d.comb += self.i.eq(i)
571
572 def elaborate(self, platform):
573 """ Aligns A against B or B against A, depending on which has the
574 greater exponent. This is done in a *single* cycle using
575 variable-width bit-shift
576
577 the shifter used here is quite expensive in terms of gates.
578 Mux A or B in (and out) into temporaries, as only one of them
579 needs to be aligned against the other
580 """
581 m = Module()
582
583 m.submodules.align_in_a = self.i.a
584 m.submodules.align_in_b = self.i.b
585 m.submodules.align_out_a = self.o.a
586 m.submodules.align_out_b = self.o.b
587
588 # temporary (muxed) input and output to be shifted
589 t_inp = FPNumBase(self.width)
590 t_out = FPNumIn(None, self.width)
591 espec = (len(self.i.a.e), True)
592 msr = MultiShiftRMerge(self.i.a.m_width, espec)
593 m.submodules.align_t_in = t_inp
594 m.submodules.align_t_out = t_out
595 m.submodules.multishift_r = msr
596
597 ediff = Signal(espec, reset_less=True)
598 ediffr = Signal(espec, reset_less=True)
599 tdiff = Signal(espec, reset_less=True)
600 elz = Signal(reset_less=True)
601 egz = Signal(reset_less=True)
602
603 # connect multi-shifter to t_inp/out mantissa (and tdiff)
604 m.d.comb += msr.inp.eq(t_inp.m)
605 m.d.comb += msr.diff.eq(tdiff)
606 m.d.comb += t_out.m.eq(msr.m)
607 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
608 m.d.comb += t_out.s.eq(t_inp.s)
609
610 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
611 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
612 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
613 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
614
615 # default: A-exp == B-exp, A and B untouched (fall through)
616 m.d.comb += self.o.a.eq(self.i.a)
617 m.d.comb += self.o.b.eq(self.i.b)
618 # only one shifter (muxed)
619 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
620 # exponent of a greater than b: shift b down
621 with m.If(~self.i.out_do_z):
622 with m.If(egz):
623 m.d.comb += [t_inp.eq(self.i.b),
624 tdiff.eq(ediff),
625 self.o.b.eq(t_out),
626 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
627 ]
628 # exponent of b greater than a: shift a down
629 with m.Elif(elz):
630 m.d.comb += [t_inp.eq(self.i.a),
631 tdiff.eq(ediffr),
632 self.o.a.eq(t_out),
633 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
634 ]
635
636 m.d.comb += self.o.mid.eq(self.i.mid)
637 m.d.comb += self.o.z.eq(self.i.z)
638 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
639 m.d.comb += self.o.oz.eq(self.i.oz)
640
641 return m
642
643
644 class FPAddAlignSingle(FPState):
645
646 def __init__(self, width, id_wid):
647 FPState.__init__(self, "align")
648 self.mod = FPAddAlignSingleMod(width, id_wid)
649 self.out_a = FPNumIn(None, width)
650 self.out_b = FPNumIn(None, width)
651
652 def setup(self, m, i):
653 """ links module to inputs and outputs
654 """
655 self.mod.setup(m, i)
656
657 # NOTE: could be done as comb
658 m.d.sync += self.out_a.eq(self.mod.out_a)
659 m.d.sync += self.out_b.eq(self.mod.out_b)
660
661 def action(self, m):
662 m.next = "add_0"
663
664
665 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
666
667 def __init__(self, width, id_wid):
668 FPState.__init__(self, "align")
669 self.width = width
670 self.id_wid = id_wid
671 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
672 self.a1o = self.ospec()
673
674 def ispec(self):
675 return FPSCData(self.width, self.id_wid)
676
677 def ospec(self):
678 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
679
680 def setup(self, m, i):
681 """ links module to inputs and outputs
682 """
683
684 # chain AddAlignSingle, AddStage0 and AddStage1
685 mod = FPAddAlignSingleMod(self.width, self.id_wid)
686 a0mod = FPAddStage0Mod(self.width, self.id_wid)
687 a1mod = FPAddStage1Mod(self.width, self.id_wid)
688
689 chain = StageChain([mod, a0mod, a1mod])
690 chain.setup(m, i)
691
692 self.o = a1mod.o
693
694 def process(self, i):
695 return self.o
696
697 def action(self, m):
698 m.d.sync += self.a1o.eq(self.process(None))
699 m.next = "normalise_1"
700
701
702 class FPAddStage0Data:
703
704 def __init__(self, width, id_wid):
705 self.z = FPNumBase(width, False)
706 self.out_do_z = Signal(reset_less=True)
707 self.oz = Signal(width, reset_less=True)
708 self.tot = Signal(self.z.m_width + 4, reset_less=True)
709 self.mid = Signal(id_wid, reset_less=True)
710
711 def eq(self, i):
712 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
713 self.tot.eq(i.tot), self.mid.eq(i.mid)]
714
715
716 class FPAddStage0Mod:
717
718 def __init__(self, width, id_wid):
719 self.width = width
720 self.id_wid = id_wid
721 self.i = self.ispec()
722 self.o = self.ospec()
723
724 def ispec(self):
725 return FPSCData(self.width, self.id_wid)
726
727 def ospec(self):
728 return FPAddStage0Data(self.width, self.id_wid)
729
730 def process(self, i):
731 return self.o
732
733 def setup(self, m, i):
734 """ links module to inputs and outputs
735 """
736 m.submodules.add0 = self
737 m.d.comb += self.i.eq(i)
738
739 def elaborate(self, platform):
740 m = Module()
741 m.submodules.add0_in_a = self.i.a
742 m.submodules.add0_in_b = self.i.b
743 m.submodules.add0_out_z = self.o.z
744
745 # store intermediate tests (and zero-extended mantissas)
746 seq = Signal(reset_less=True)
747 mge = Signal(reset_less=True)
748 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
749 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
750 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
751 mge.eq(self.i.a.m >= self.i.b.m),
752 am0.eq(Cat(self.i.a.m, 0)),
753 bm0.eq(Cat(self.i.b.m, 0))
754 ]
755 # same-sign (both negative or both positive) add mantissas
756 with m.If(~self.i.out_do_z):
757 m.d.comb += self.o.z.e.eq(self.i.a.e)
758 with m.If(seq):
759 m.d.comb += [
760 self.o.tot.eq(am0 + bm0),
761 self.o.z.s.eq(self.i.a.s)
762 ]
763 # a mantissa greater than b, use a
764 with m.Elif(mge):
765 m.d.comb += [
766 self.o.tot.eq(am0 - bm0),
767 self.o.z.s.eq(self.i.a.s)
768 ]
769 # b mantissa greater than a, use b
770 with m.Else():
771 m.d.comb += [
772 self.o.tot.eq(bm0 - am0),
773 self.o.z.s.eq(self.i.b.s)
774 ]
775
776 m.d.comb += self.o.oz.eq(self.i.oz)
777 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
778 m.d.comb += self.o.mid.eq(self.i.mid)
779 return m
780
781
782 class FPAddStage0(FPState):
783 """ First stage of add. covers same-sign (add) and subtract
784 special-casing when mantissas are greater or equal, to
785 give greatest accuracy.
786 """
787
788 def __init__(self, width, id_wid):
789 FPState.__init__(self, "add_0")
790 self.mod = FPAddStage0Mod(width)
791 self.o = self.mod.ospec()
792
793 def setup(self, m, i):
794 """ links module to inputs and outputs
795 """
796 self.mod.setup(m, i)
797
798 # NOTE: these could be done as combinatorial (merge add0+add1)
799 m.d.sync += self.o.eq(self.mod.o)
800
801 def action(self, m):
802 m.next = "add_1"
803
804
805 class FPAddStage1Data:
806
807 def __init__(self, width, id_wid):
808 self.z = FPNumBase(width, False)
809 self.out_do_z = Signal(reset_less=True)
810 self.oz = Signal(width, reset_less=True)
811 self.of = Overflow()
812 self.mid = Signal(id_wid, reset_less=True)
813
814 def eq(self, i):
815 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
816 self.of.eq(i.of), self.mid.eq(i.mid)]
817
818
819
820 class FPAddStage1Mod(FPState):
821 """ Second stage of add: preparation for normalisation.
822 detects when tot sum is too big (tot[27] is kinda a carry bit)
823 """
824
825 def __init__(self, width, id_wid):
826 self.width = width
827 self.id_wid = id_wid
828 self.i = self.ispec()
829 self.o = self.ospec()
830
831 def ispec(self):
832 return FPAddStage0Data(self.width, self.id_wid)
833
834 def ospec(self):
835 return FPAddStage1Data(self.width, self.id_wid)
836
837 def process(self, i):
838 return self.o
839
840 def setup(self, m, i):
841 """ links module to inputs and outputs
842 """
843 m.submodules.add1 = self
844 m.submodules.add1_out_overflow = self.o.of
845
846 m.d.comb += self.i.eq(i)
847
848 def elaborate(self, platform):
849 m = Module()
850 m.d.comb += self.o.z.eq(self.i.z)
851 # tot[-1] (MSB) gets set when the sum overflows. shift result down
852 with m.If(~self.i.out_do_z):
853 with m.If(self.i.tot[-1]):
854 m.d.comb += [
855 self.o.z.m.eq(self.i.tot[4:]),
856 self.o.of.m0.eq(self.i.tot[4]),
857 self.o.of.guard.eq(self.i.tot[3]),
858 self.o.of.round_bit.eq(self.i.tot[2]),
859 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
860 self.o.z.e.eq(self.i.z.e + 1)
861 ]
862 # tot[-1] (MSB) zero case
863 with m.Else():
864 m.d.comb += [
865 self.o.z.m.eq(self.i.tot[3:]),
866 self.o.of.m0.eq(self.i.tot[3]),
867 self.o.of.guard.eq(self.i.tot[2]),
868 self.o.of.round_bit.eq(self.i.tot[1]),
869 self.o.of.sticky.eq(self.i.tot[0])
870 ]
871
872 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
873 m.d.comb += self.o.oz.eq(self.i.oz)
874 m.d.comb += self.o.mid.eq(self.i.mid)
875
876 return m
877
878
879 class FPAddStage1(FPState):
880
881 def __init__(self, width, id_wid):
882 FPState.__init__(self, "add_1")
883 self.mod = FPAddStage1Mod(width)
884 self.out_z = FPNumBase(width, False)
885 self.out_of = Overflow()
886 self.norm_stb = Signal()
887
888 def setup(self, m, i):
889 """ links module to inputs and outputs
890 """
891 self.mod.setup(m, i)
892
893 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
894
895 m.d.sync += self.out_of.eq(self.mod.out_of)
896 m.d.sync += self.out_z.eq(self.mod.out_z)
897 m.d.sync += self.norm_stb.eq(1)
898
899 def action(self, m):
900 m.next = "normalise_1"
901
902
903 class FPNormaliseModSingle:
904
905 def __init__(self, width):
906 self.width = width
907 self.in_z = self.ispec()
908 self.out_z = self.ospec()
909
910 def ispec(self):
911 return FPNumBase(self.width, False)
912
913 def ospec(self):
914 return FPNumBase(self.width, False)
915
916 def setup(self, m, i):
917 """ links module to inputs and outputs
918 """
919 m.submodules.normalise = self
920 m.d.comb += self.i.eq(i)
921
922 def elaborate(self, platform):
923 m = Module()
924
925 mwid = self.out_z.m_width+2
926 pe = PriorityEncoder(mwid)
927 m.submodules.norm_pe = pe
928
929 m.submodules.norm1_out_z = self.out_z
930 m.submodules.norm1_in_z = self.in_z
931
932 in_z = FPNumBase(self.width, False)
933 in_of = Overflow()
934 m.submodules.norm1_insel_z = in_z
935 m.submodules.norm1_insel_overflow = in_of
936
937 espec = (len(in_z.e), True)
938 ediff_n126 = Signal(espec, reset_less=True)
939 msr = MultiShiftRMerge(mwid, espec)
940 m.submodules.multishift_r = msr
941
942 m.d.comb += in_z.eq(self.in_z)
943 m.d.comb += in_of.eq(self.in_of)
944 # initialise out from in (overridden below)
945 m.d.comb += self.out_z.eq(in_z)
946 m.d.comb += self.out_of.eq(in_of)
947 # normalisation decrease condition
948 decrease = Signal(reset_less=True)
949 m.d.comb += decrease.eq(in_z.m_msbzero)
950 # decrease exponent
951 with m.If(decrease):
952 # *sigh* not entirely obvious: count leading zeros (clz)
953 # with a PriorityEncoder: to find from the MSB
954 # we reverse the order of the bits.
955 temp_m = Signal(mwid, reset_less=True)
956 temp_s = Signal(mwid+1, reset_less=True)
957 clz = Signal((len(in_z.e), True), reset_less=True)
958 m.d.comb += [
959 # cat round and guard bits back into the mantissa
960 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
961 pe.i.eq(temp_m[::-1]), # inverted
962 clz.eq(pe.o), # count zeros from MSB down
963 temp_s.eq(temp_m << clz), # shift mantissa UP
964 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
965 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
966 ]
967
968 return m
969
970
971 class FPNorm1Data:
972
973 def __init__(self, width, id_wid):
974 self.roundz = Signal(reset_less=True)
975 self.z = FPNumBase(width, False)
976 self.out_do_z = Signal(reset_less=True)
977 self.oz = Signal(width, reset_less=True)
978 self.mid = Signal(id_wid, reset_less=True)
979
980 def eq(self, i):
981 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
982 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
983
984
985 class FPNorm1ModSingle:
986
987 def __init__(self, width, id_wid):
988 self.width = width
989 self.id_wid = id_wid
990 self.i = self.ispec()
991 self.o = self.ospec()
992
993 def ispec(self):
994 return FPAddStage1Data(self.width, self.id_wid)
995
996 def ospec(self):
997 return FPNorm1Data(self.width, self.id_wid)
998
999 def setup(self, m, i):
1000 """ links module to inputs and outputs
1001 """
1002 m.submodules.normalise_1 = self
1003 m.d.comb += self.i.eq(i)
1004
1005 def process(self, i):
1006 return self.o
1007
1008 def elaborate(self, platform):
1009 m = Module()
1010
1011 mwid = self.o.z.m_width+2
1012 pe = PriorityEncoder(mwid)
1013 m.submodules.norm_pe = pe
1014
1015 of = Overflow()
1016 m.d.comb += self.o.roundz.eq(of.roundz)
1017
1018 m.submodules.norm1_out_z = self.o.z
1019 m.submodules.norm1_out_overflow = of
1020 m.submodules.norm1_in_z = self.i.z
1021 m.submodules.norm1_in_overflow = self.i.of
1022
1023 i = self.ispec()
1024 m.submodules.norm1_insel_z = i.z
1025 m.submodules.norm1_insel_overflow = i.of
1026
1027 espec = (len(i.z.e), True)
1028 ediff_n126 = Signal(espec, reset_less=True)
1029 msr = MultiShiftRMerge(mwid, espec)
1030 m.submodules.multishift_r = msr
1031
1032 m.d.comb += i.eq(self.i)
1033 # initialise out from in (overridden below)
1034 m.d.comb += self.o.z.eq(i.z)
1035 m.d.comb += of.eq(i.of)
1036 # normalisation increase/decrease conditions
1037 decrease = Signal(reset_less=True)
1038 increase = Signal(reset_less=True)
1039 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1040 m.d.comb += increase.eq(i.z.exp_lt_n126)
1041 # decrease exponent
1042 with m.If(~self.i.out_do_z):
1043 with m.If(decrease):
1044 # *sigh* not entirely obvious: count leading zeros (clz)
1045 # with a PriorityEncoder: to find from the MSB
1046 # we reverse the order of the bits.
1047 temp_m = Signal(mwid, reset_less=True)
1048 temp_s = Signal(mwid+1, reset_less=True)
1049 clz = Signal((len(i.z.e), True), reset_less=True)
1050 # make sure that the amount to decrease by does NOT
1051 # go below the minimum non-INF/NaN exponent
1052 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1053 i.z.exp_sub_n126)
1054 m.d.comb += [
1055 # cat round and guard bits back into the mantissa
1056 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1057 pe.i.eq(temp_m[::-1]), # inverted
1058 clz.eq(limclz), # count zeros from MSB down
1059 temp_s.eq(temp_m << clz), # shift mantissa UP
1060 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1061 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1062 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1063 # overflow in bits 0..1: got shifted too (leave sticky)
1064 of.guard.eq(temp_s[1]), # guard
1065 of.round_bit.eq(temp_s[0]), # round
1066 ]
1067 # increase exponent
1068 with m.Elif(increase):
1069 temp_m = Signal(mwid+1, reset_less=True)
1070 m.d.comb += [
1071 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1072 i.z.m)),
1073 ediff_n126.eq(i.z.N126 - i.z.e),
1074 # connect multi-shifter to inp/out mantissa (and ediff)
1075 msr.inp.eq(temp_m),
1076 msr.diff.eq(ediff_n126),
1077 self.o.z.m.eq(msr.m[3:]),
1078 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1079 # overflow in bits 0..1: got shifted too (leave sticky)
1080 of.guard.eq(temp_s[2]), # guard
1081 of.round_bit.eq(temp_s[1]), # round
1082 of.sticky.eq(temp_s[0]), # sticky
1083 self.o.z.e.eq(i.z.e + ediff_n126),
1084 ]
1085
1086 m.d.comb += self.o.mid.eq(self.i.mid)
1087 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1088 m.d.comb += self.o.oz.eq(self.i.oz)
1089
1090 return m
1091
1092
1093 class FPNorm1ModMulti:
1094
1095 def __init__(self, width, single_cycle=True):
1096 self.width = width
1097 self.in_select = Signal(reset_less=True)
1098 self.in_z = FPNumBase(width, False)
1099 self.in_of = Overflow()
1100 self.temp_z = FPNumBase(width, False)
1101 self.temp_of = Overflow()
1102 self.out_z = FPNumBase(width, False)
1103 self.out_of = Overflow()
1104
1105 def elaborate(self, platform):
1106 m = Module()
1107
1108 m.submodules.norm1_out_z = self.out_z
1109 m.submodules.norm1_out_overflow = self.out_of
1110 m.submodules.norm1_temp_z = self.temp_z
1111 m.submodules.norm1_temp_of = self.temp_of
1112 m.submodules.norm1_in_z = self.in_z
1113 m.submodules.norm1_in_overflow = self.in_of
1114
1115 in_z = FPNumBase(self.width, False)
1116 in_of = Overflow()
1117 m.submodules.norm1_insel_z = in_z
1118 m.submodules.norm1_insel_overflow = in_of
1119
1120 # select which of temp or in z/of to use
1121 with m.If(self.in_select):
1122 m.d.comb += in_z.eq(self.in_z)
1123 m.d.comb += in_of.eq(self.in_of)
1124 with m.Else():
1125 m.d.comb += in_z.eq(self.temp_z)
1126 m.d.comb += in_of.eq(self.temp_of)
1127 # initialise out from in (overridden below)
1128 m.d.comb += self.out_z.eq(in_z)
1129 m.d.comb += self.out_of.eq(in_of)
1130 # normalisation increase/decrease conditions
1131 decrease = Signal(reset_less=True)
1132 increase = Signal(reset_less=True)
1133 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1134 m.d.comb += increase.eq(in_z.exp_lt_n126)
1135 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1136 # decrease exponent
1137 with m.If(decrease):
1138 m.d.comb += [
1139 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1140 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1141 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1142 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1143 self.out_of.round_bit.eq(0), # reset round bit
1144 self.out_of.m0.eq(in_of.guard),
1145 ]
1146 # increase exponent
1147 with m.Elif(increase):
1148 m.d.comb += [
1149 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1150 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1151 self.out_of.guard.eq(in_z.m[0]),
1152 self.out_of.m0.eq(in_z.m[1]),
1153 self.out_of.round_bit.eq(in_of.guard),
1154 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1155 ]
1156
1157 return m
1158
1159
1160 class FPNorm1Single(FPState):
1161
1162 def __init__(self, width, id_wid, single_cycle=True):
1163 FPState.__init__(self, "normalise_1")
1164 self.mod = FPNorm1ModSingle(width)
1165 self.o = self.ospec()
1166 self.out_z = FPNumBase(width, False)
1167 self.out_roundz = Signal(reset_less=True)
1168
1169 def ispec(self):
1170 return self.mod.ispec()
1171
1172 def ospec(self):
1173 return self.mod.ospec()
1174
1175 def setup(self, m, i):
1176 """ links module to inputs and outputs
1177 """
1178 self.mod.setup(m, i)
1179
1180 def action(self, m):
1181 m.next = "round"
1182
1183
1184 class FPNorm1Multi(FPState):
1185
1186 def __init__(self, width, id_wid):
1187 FPState.__init__(self, "normalise_1")
1188 self.mod = FPNorm1ModMulti(width)
1189 self.stb = Signal(reset_less=True)
1190 self.ack = Signal(reset=0, reset_less=True)
1191 self.out_norm = Signal(reset_less=True)
1192 self.in_accept = Signal(reset_less=True)
1193 self.temp_z = FPNumBase(width)
1194 self.temp_of = Overflow()
1195 self.out_z = FPNumBase(width)
1196 self.out_roundz = Signal(reset_less=True)
1197
1198 def setup(self, m, in_z, in_of, norm_stb):
1199 """ links module to inputs and outputs
1200 """
1201 self.mod.setup(m, in_z, in_of, norm_stb,
1202 self.in_accept, self.temp_z, self.temp_of,
1203 self.out_z, self.out_norm)
1204
1205 m.d.comb += self.stb.eq(norm_stb)
1206 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1207
1208 def action(self, m):
1209 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1210 m.d.sync += self.temp_of.eq(self.mod.out_of)
1211 m.d.sync += self.temp_z.eq(self.out_z)
1212 with m.If(self.out_norm):
1213 with m.If(self.in_accept):
1214 m.d.sync += [
1215 self.ack.eq(1),
1216 ]
1217 with m.Else():
1218 m.d.sync += self.ack.eq(0)
1219 with m.Else():
1220 # normalisation not required (or done).
1221 m.next = "round"
1222 m.d.sync += self.ack.eq(1)
1223 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1224
1225
1226 class FPNormToPack(FPState, UnbufferedPipeline):
1227
1228 def __init__(self, width, id_wid):
1229 FPState.__init__(self, "normalise_1")
1230 self.id_wid = id_wid
1231 self.width = width
1232 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1233
1234 def ispec(self):
1235 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1236
1237 def ospec(self):
1238 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1239
1240 def setup(self, m, i):
1241 """ links module to inputs and outputs
1242 """
1243
1244 # Normalisation, Rounding Corrections, Pack - in a chain
1245 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1246 rmod = FPRoundMod(self.width, self.id_wid)
1247 cmod = FPCorrectionsMod(self.width, self.id_wid)
1248 pmod = FPPackMod(self.width, self.id_wid)
1249 chain = StageChain([nmod, rmod, cmod, pmod])
1250 chain.setup(m, i)
1251 self.out_z = pmod.ospec()
1252
1253 self.o = pmod.o
1254
1255 def process(self, i):
1256 return self.o
1257
1258 def action(self, m):
1259 m.d.sync += self.out_z.eq(self.process(None))
1260 m.next = "pack_put_z"
1261
1262
1263 class FPRoundData:
1264
1265 def __init__(self, width, id_wid):
1266 self.z = FPNumBase(width, False)
1267 self.out_do_z = Signal(reset_less=True)
1268 self.oz = Signal(width, reset_less=True)
1269 self.mid = Signal(id_wid, reset_less=True)
1270
1271 def eq(self, i):
1272 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1273 self.mid.eq(i.mid)]
1274
1275
1276 class FPRoundMod:
1277
1278 def __init__(self, width, id_wid):
1279 self.width = width
1280 self.id_wid = id_wid
1281 self.i = self.ispec()
1282 self.out_z = self.ospec()
1283
1284 def ispec(self):
1285 return FPNorm1Data(self.width, self.id_wid)
1286
1287 def ospec(self):
1288 return FPRoundData(self.width, self.id_wid)
1289
1290 def process(self, i):
1291 return self.out_z
1292
1293 def setup(self, m, i):
1294 m.submodules.roundz = self
1295 m.d.comb += self.i.eq(i)
1296
1297 def elaborate(self, platform):
1298 m = Module()
1299 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1300 with m.If(~self.i.out_do_z):
1301 with m.If(self.i.roundz):
1302 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1303 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1304 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1305
1306 return m
1307
1308
1309 class FPRound(FPState):
1310
1311 def __init__(self, width, id_wid):
1312 FPState.__init__(self, "round")
1313 self.mod = FPRoundMod(width)
1314 self.out_z = self.ospec()
1315
1316 def ispec(self):
1317 return self.mod.ispec()
1318
1319 def ospec(self):
1320 return self.mod.ospec()
1321
1322 def setup(self, m, i):
1323 """ links module to inputs and outputs
1324 """
1325 self.mod.setup(m, i)
1326
1327 self.idsync(m)
1328 m.d.sync += self.out_z.eq(self.mod.out_z)
1329 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1330
1331 def action(self, m):
1332 m.next = "corrections"
1333
1334
1335 class FPCorrectionsMod:
1336
1337 def __init__(self, width, id_wid):
1338 self.width = width
1339 self.id_wid = id_wid
1340 self.i = self.ispec()
1341 self.out_z = self.ospec()
1342
1343 def ispec(self):
1344 return FPRoundData(self.width, self.id_wid)
1345
1346 def ospec(self):
1347 return FPRoundData(self.width, self.id_wid)
1348
1349 def process(self, i):
1350 return self.out_z
1351
1352 def setup(self, m, i):
1353 """ links module to inputs and outputs
1354 """
1355 m.submodules.corrections = self
1356 m.d.comb += self.i.eq(i)
1357
1358 def elaborate(self, platform):
1359 m = Module()
1360 m.submodules.corr_in_z = self.i.z
1361 m.submodules.corr_out_z = self.out_z.z
1362 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1363 with m.If(~self.i.out_do_z):
1364 with m.If(self.i.z.is_denormalised):
1365 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1366 return m
1367
1368
1369 class FPCorrections(FPState):
1370
1371 def __init__(self, width, id_wid):
1372 FPState.__init__(self, "corrections")
1373 self.mod = FPCorrectionsMod(width)
1374 self.out_z = self.ospec()
1375
1376 def ispec(self):
1377 return self.mod.ispec()
1378
1379 def ospec(self):
1380 return self.mod.ospec()
1381
1382 def setup(self, m, in_z):
1383 """ links module to inputs and outputs
1384 """
1385 self.mod.setup(m, in_z)
1386
1387 m.d.sync += self.out_z.eq(self.mod.out_z)
1388 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1389
1390 def action(self, m):
1391 m.next = "pack"
1392
1393
1394 class FPPackData:
1395
1396 def __init__(self, width, id_wid):
1397 self.z = Signal(width, reset_less=True)
1398 self.mid = Signal(id_wid, reset_less=True)
1399
1400 def eq(self, i):
1401 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1402
1403 def ports(self):
1404 return [self.z, self.mid]
1405
1406
1407 class FPPackMod:
1408
1409 def __init__(self, width, id_wid):
1410 self.width = width
1411 self.id_wid = id_wid
1412 self.i = self.ispec()
1413 self.o = self.ospec()
1414
1415 def ispec(self):
1416 return FPRoundData(self.width, self.id_wid)
1417
1418 def ospec(self):
1419 return FPPackData(self.width, self.id_wid)
1420
1421 def process(self, i):
1422 return self.o
1423
1424 def setup(self, m, in_z):
1425 """ links module to inputs and outputs
1426 """
1427 m.submodules.pack = self
1428 m.d.comb += self.i.eq(in_z)
1429
1430 def elaborate(self, platform):
1431 m = Module()
1432 z = FPNumOut(self.width, False)
1433 m.submodules.pack_in_z = self.i.z
1434 m.submodules.pack_out_z = z
1435 m.d.comb += self.o.mid.eq(self.i.mid)
1436 with m.If(~self.i.out_do_z):
1437 with m.If(self.i.z.is_overflowed):
1438 m.d.comb += z.inf(self.i.z.s)
1439 with m.Else():
1440 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1441 with m.Else():
1442 m.d.comb += z.v.eq(self.i.oz)
1443 m.d.comb += self.o.z.eq(z.v)
1444 return m
1445
1446
1447 class FPPack(FPState):
1448
1449 def __init__(self, width, id_wid):
1450 FPState.__init__(self, "pack")
1451 self.mod = FPPackMod(width)
1452 self.out_z = self.ospec()
1453
1454 def ispec(self):
1455 return self.mod.ispec()
1456
1457 def ospec(self):
1458 return self.mod.ospec()
1459
1460 def setup(self, m, in_z):
1461 """ links module to inputs and outputs
1462 """
1463 self.mod.setup(m, in_z)
1464
1465 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1466 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1467
1468 def action(self, m):
1469 m.next = "pack_put_z"
1470
1471
1472 class FPPutZ(FPState):
1473
1474 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1475 FPState.__init__(self, state)
1476 if to_state is None:
1477 to_state = "get_ops"
1478 self.to_state = to_state
1479 self.in_z = in_z
1480 self.out_z = out_z
1481 self.in_mid = in_mid
1482 self.out_mid = out_mid
1483
1484 def action(self, m):
1485 if self.in_mid is not None:
1486 m.d.sync += self.out_mid.eq(self.in_mid)
1487 m.d.sync += [
1488 self.out_z.z.v.eq(self.in_z)
1489 ]
1490 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1491 m.d.sync += self.out_z.z.stb.eq(0)
1492 m.next = self.to_state
1493 with m.Else():
1494 m.d.sync += self.out_z.z.stb.eq(1)
1495
1496
1497 class FPPutZIdx(FPState):
1498
1499 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1500 FPState.__init__(self, state)
1501 if to_state is None:
1502 to_state = "get_ops"
1503 self.to_state = to_state
1504 self.in_z = in_z
1505 self.out_zs = out_zs
1506 self.in_mid = in_mid
1507
1508 def action(self, m):
1509 outz_stb = Signal(reset_less=True)
1510 outz_ack = Signal(reset_less=True)
1511 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1512 outz_ack.eq(self.out_zs[self.in_mid].ack),
1513 ]
1514 m.d.sync += [
1515 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1516 ]
1517 with m.If(outz_stb & outz_ack):
1518 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1519 m.next = self.to_state
1520 with m.Else():
1521 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1522
1523
1524 class FPOpData:
1525 def __init__(self, width, id_wid):
1526 self.z = FPOp(width)
1527 self.mid = Signal(id_wid, reset_less=True)
1528
1529 def eq(self, i):
1530 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1531
1532 def ports(self):
1533 return [self.z, self.mid]
1534
1535
1536 class FPADDBaseMod:
1537
1538 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1539 """ IEEE754 FP Add
1540
1541 * width: bit-width of IEEE754. supported: 16, 32, 64
1542 * id_wid: an identifier that is sync-connected to the input
1543 * single_cycle: True indicates each stage to complete in 1 clock
1544 * compact: True indicates a reduced number of stages
1545 """
1546 self.width = width
1547 self.id_wid = id_wid
1548 self.single_cycle = single_cycle
1549 self.compact = compact
1550
1551 self.in_t = Trigger()
1552 self.i = self.ispec()
1553 self.o = self.ospec()
1554
1555 self.states = []
1556
1557 def ispec(self):
1558 return FPADDBaseData(self.width, self.id_wid)
1559
1560 def ospec(self):
1561 return FPOpData(self.width, self.id_wid)
1562
1563 def add_state(self, state):
1564 self.states.append(state)
1565 return state
1566
1567 def get_fragment(self, platform=None):
1568 """ creates the HDL code-fragment for FPAdd
1569 """
1570 m = Module()
1571 m.submodules.out_z = self.o.z
1572 m.submodules.in_t = self.in_t
1573 if self.compact:
1574 self.get_compact_fragment(m, platform)
1575 else:
1576 self.get_longer_fragment(m, platform)
1577
1578 with m.FSM() as fsm:
1579
1580 for state in self.states:
1581 with m.State(state.state_from):
1582 state.action(m)
1583
1584 return m
1585
1586 def get_longer_fragment(self, m, platform=None):
1587
1588 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1589 self.width))
1590 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1591 a = get.out_op1
1592 b = get.out_op2
1593
1594 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1595 sc.setup(m, a, b, self.in_mid)
1596
1597 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1598 dn.setup(m, a, b, sc.in_mid)
1599
1600 if self.single_cycle:
1601 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1602 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1603 else:
1604 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1605 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1606
1607 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1608 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1609
1610 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1611 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1612
1613 if self.single_cycle:
1614 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1615 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1616 else:
1617 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1618 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1619
1620 rn = self.add_state(FPRound(self.width, self.id_wid))
1621 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1622
1623 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1624 cor.setup(m, rn.out_z, rn.in_mid)
1625
1626 pa = self.add_state(FPPack(self.width, self.id_wid))
1627 pa.setup(m, cor.out_z, rn.in_mid)
1628
1629 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1630 pa.in_mid, self.out_mid))
1631
1632 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1633 pa.in_mid, self.out_mid))
1634
1635 def get_compact_fragment(self, m, platform=None):
1636
1637 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1638 self.width, self.id_wid))
1639 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1640
1641 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1642 sc.setup(m, get.o)
1643
1644 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1645 alm.setup(m, sc.out)
1646
1647 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1648 n1.setup(m, alm.a1o)
1649
1650 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1651 n1.out_z.mid, self.o.mid))
1652
1653 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1654 # sc.o.mid, self.o.mid))
1655
1656
1657 class FPADDBase(FPState):
1658
1659 def __init__(self, width, id_wid=None, single_cycle=False):
1660 """ IEEE754 FP Add
1661
1662 * width: bit-width of IEEE754. supported: 16, 32, 64
1663 * id_wid: an identifier that is sync-connected to the input
1664 * single_cycle: True indicates each stage to complete in 1 clock
1665 """
1666 FPState.__init__(self, "fpadd")
1667 self.width = width
1668 self.single_cycle = single_cycle
1669 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1670 self.o = self.ospec()
1671
1672 self.in_t = Trigger()
1673 self.i = self.ispec()
1674
1675 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1676 self.in_accept = Signal(reset_less=True)
1677 self.add_stb = Signal(reset_less=True)
1678 self.add_ack = Signal(reset=0, reset_less=True)
1679
1680 def ispec(self):
1681 return self.mod.ispec()
1682
1683 def ospec(self):
1684 return self.mod.ospec()
1685
1686 def setup(self, m, i, add_stb, in_mid):
1687 m.d.comb += [self.i.eq(i),
1688 self.mod.i.eq(self.i),
1689 self.z_done.eq(self.mod.o.z.trigger),
1690 #self.add_stb.eq(add_stb),
1691 self.mod.in_t.stb.eq(self.in_t.stb),
1692 self.in_t.ack.eq(self.mod.in_t.ack),
1693 self.o.mid.eq(self.mod.o.mid),
1694 self.o.z.v.eq(self.mod.o.z.v),
1695 self.o.z.stb.eq(self.mod.o.z.stb),
1696 self.mod.o.z.ack.eq(self.o.z.ack),
1697 ]
1698
1699 m.d.sync += self.add_stb.eq(add_stb)
1700 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1701 m.d.sync += self.o.z.ack.eq(0) # likewise
1702 #m.d.sync += self.in_t.stb.eq(0)
1703
1704 m.submodules.fpadd = self.mod
1705
1706 def action(self, m):
1707
1708 # in_accept is set on incoming strobe HIGH and ack LOW.
1709 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1710
1711 #with m.If(self.in_t.ack):
1712 # m.d.sync += self.in_t.stb.eq(0)
1713 with m.If(~self.z_done):
1714 # not done: test for accepting an incoming operand pair
1715 with m.If(self.in_accept):
1716 m.d.sync += [
1717 self.add_ack.eq(1), # acknowledge receipt...
1718 self.in_t.stb.eq(1), # initiate add
1719 ]
1720 with m.Else():
1721 m.d.sync += [self.add_ack.eq(0),
1722 self.in_t.stb.eq(0),
1723 self.o.z.ack.eq(1),
1724 ]
1725 with m.Else():
1726 # done: acknowledge, and write out id and value
1727 m.d.sync += [self.add_ack.eq(1),
1728 self.in_t.stb.eq(0)
1729 ]
1730 m.next = "put_z"
1731
1732 return
1733
1734 if self.in_mid is not None:
1735 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1736
1737 m.d.sync += [
1738 self.out_z.v.eq(self.mod.out_z.v)
1739 ]
1740 # move to output state on detecting z ack
1741 with m.If(self.out_z.trigger):
1742 m.d.sync += self.out_z.stb.eq(0)
1743 m.next = "put_z"
1744 with m.Else():
1745 m.d.sync += self.out_z.stb.eq(1)
1746
1747
1748 class FPADDBasePipe(ControlBase):
1749 def __init__(self, width, id_wid):
1750 ControlBase.__init__(self)
1751 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1752 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1753 self.pipe3 = FPNormToPack(width, id_wid)
1754
1755 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1756
1757 def elaborate(self, platform):
1758 m = Module()
1759 m.submodules.scnorm = self.pipe1
1760 m.submodules.addalign = self.pipe2
1761 m.submodules.normpack = self.pipe3
1762 m.d.comb += self._eqs
1763 return m
1764
1765
1766 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1767 def __init__(self, width, id_wid, num_rows):
1768 self.num_rows = num_rows
1769 def iospec(): return FPADDBaseData(width, id_wid)
1770 stage = PassThroughStage(iospec)
1771 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1772
1773
1774 class FPADDMuxOutPipe(CombMuxOutPipe):
1775 def __init__(self, width, id_wid, num_rows):
1776 self.num_rows = num_rows
1777 def iospec(): return FPPackData(width, id_wid)
1778 stage = PassThroughStage(iospec)
1779 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1780
1781
1782 class FPADDMuxInOut:
1783 """ Reservation-Station version of FPADD pipeline.
1784
1785 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1786 * 3-stage adder pipeline
1787 * fan-out on outputs (an array of FPPackData: z,mid)
1788
1789 Fan-in and Fan-out are combinatorial.
1790 """
1791 def __init__(self, width, id_wid, num_rows):
1792 self.num_rows = num_rows
1793 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1794 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1795 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1796
1797 self.p = self.inpipe.p # kinda annoying,
1798 self.n = self.outpipe.n # use pipe in/out as this class in/out
1799 self._ports = self.inpipe.ports() + self.outpipe.ports()
1800
1801 def elaborate(self, platform):
1802 m = Module()
1803 m.submodules.inpipe = self.inpipe
1804 m.submodules.fpadd = self.fpadd
1805 m.submodules.outpipe = self.outpipe
1806
1807 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1808 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1809
1810 return m
1811
1812 def ports(self):
1813 return self._ports
1814
1815
1816 class FPADD(FPID):
1817 """ FPADD: stages as follows:
1818
1819 FPGetOp (a)
1820 |
1821 FPGetOp (b)
1822 |
1823 FPAddBase---> FPAddBaseMod
1824 | |
1825 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1826
1827 FPAddBase is tricky: it is both a stage and *has* stages.
1828 Connection to FPAddBaseMod therefore requires an in stb/ack
1829 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1830 needs to be the thing that raises the incoming stb.
1831 """
1832
1833 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1834 """ IEEE754 FP Add
1835
1836 * width: bit-width of IEEE754. supported: 16, 32, 64
1837 * id_wid: an identifier that is sync-connected to the input
1838 * single_cycle: True indicates each stage to complete in 1 clock
1839 """
1840 self.width = width
1841 self.id_wid = id_wid
1842 self.single_cycle = single_cycle
1843
1844 #self.out_z = FPOp(width)
1845 self.ids = FPID(id_wid)
1846
1847 rs = []
1848 for i in range(rs_sz):
1849 in_a = FPOp(width)
1850 in_b = FPOp(width)
1851 in_a.name = "in_a_%d" % i
1852 in_b.name = "in_b_%d" % i
1853 rs.append((in_a, in_b))
1854 self.rs = Array(rs)
1855
1856 res = []
1857 for i in range(rs_sz):
1858 out_z = FPOp(width)
1859 out_z.name = "out_z_%d" % i
1860 res.append(out_z)
1861 self.res = Array(res)
1862
1863 self.states = []
1864
1865 def add_state(self, state):
1866 self.states.append(state)
1867 return state
1868
1869 def get_fragment(self, platform=None):
1870 """ creates the HDL code-fragment for FPAdd
1871 """
1872 m = Module()
1873 m.submodules += self.rs
1874
1875 in_a = self.rs[0][0]
1876 in_b = self.rs[0][1]
1877
1878 geta = self.add_state(FPGetOp("get_a", "get_b",
1879 in_a, self.width))
1880 geta.setup(m, in_a)
1881 a = geta.out_op
1882
1883 getb = self.add_state(FPGetOp("get_b", "fpadd",
1884 in_b, self.width))
1885 getb.setup(m, in_b)
1886 b = getb.out_op
1887
1888 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1889 ab = self.add_state(ab)
1890 abd = ab.ispec() # create an input spec object for FPADDBase
1891 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1892 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1893 o = ab.o
1894
1895 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1896 o.mid, "get_a"))
1897
1898 with m.FSM() as fsm:
1899
1900 for state in self.states:
1901 with m.State(state.state_from):
1902 state.action(m)
1903
1904 return m
1905
1906
1907 if __name__ == "__main__":
1908 if True:
1909 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1910 main(alu, ports=alu.rs[0][0].ports() + \
1911 alu.rs[0][1].ports() + \
1912 alu.res[0].ports() + \
1913 [alu.ids.in_mid, alu.ids.out_mid])
1914 else:
1915 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1916 main(alu, ports=[alu.in_a, alu.in_b] + \
1917 alu.in_t.ports() + \
1918 alu.out_z.ports() + \
1919 [alu.in_mid, alu.out_mid])
1920
1921
1922 # works... but don't use, just do "python fname.py convert -t v"
1923 #print (verilog.convert(alu, ports=[
1924 # ports=alu.in_a.ports() + \
1925 # alu.in_b.ports() + \
1926 # alu.out_z.ports())