solve sync/comb for stage/state
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.mod.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def setup(self, m, i, in_stb, in_ack):
153 """ links module to inputs and outputs
154 """
155 m.submodules.get_ops = self.mod
156 m.d.comb += self.mod.i.eq(i)
157 m.d.comb += self.mod.stb.eq(in_stb)
158 m.d.comb += self.out_ack.eq(self.mod.ack)
159 m.d.comb += self.out_decode.eq(self.mod.trigger)
160 m.d.comb += in_ack.eq(self.mod.ack)
161
162 def action(self, m):
163 with m.If(self.out_decode):
164 m.next = self.out_state
165 m.d.sync += [
166 self.mod.ack.eq(0),
167 self.o.eq(self.mod.o),
168 ]
169 with m.Else():
170 m.d.sync += self.mod.ack.eq(1)
171
172
173 class FPSCData:
174
175 def __init__(self, width, id_wid):
176 self.a = FPNumBase(width, True)
177 self.b = FPNumBase(width, True)
178 self.z = FPNumOut(width, False)
179 self.oz = Signal(width, reset_less=True)
180 self.out_do_z = Signal(reset_less=True)
181 self.mid = Signal(id_wid, reset_less=True)
182
183 def eq(self, i):
184 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
185 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
186
187
188 class FPAddSpecialCasesMod:
189 """ special cases: NaNs, infs, zeros, denormalised
190 NOTE: some of these are unique to add. see "Special Operations"
191 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
192 """
193
194 def __init__(self, width, id_wid):
195 self.width = width
196 self.id_wid = id_wid
197 self.i = self.ispec()
198 self.o = self.ospec()
199
200 def ispec(self):
201 return FPADDBaseData(self.width, self.id_wid)
202
203 def ospec(self):
204 return FPSCData(self.width, self.id_wid)
205
206 def setup(self, m, i):
207 """ links module to inputs and outputs
208 """
209 m.submodules.specialcases = self
210 m.d.comb += self.i.eq(i)
211
212 def process(self, i):
213 return self.o
214
215 def elaborate(self, platform):
216 m = Module()
217
218 m.submodules.sc_out_z = self.o.z
219
220 # decode: XXX really should move to separate stage
221 a1 = FPNumIn(None, self.width)
222 b1 = FPNumIn(None, self.width)
223 m.submodules.sc_decode_a = a1
224 m.submodules.sc_decode_b = b1
225 m.d.comb += [a1.decode(self.i.a),
226 b1.decode(self.i.b),
227 ]
228
229 s_nomatch = Signal()
230 m.d.comb += s_nomatch.eq(a1.s != b1.s)
231
232 m_match = Signal()
233 m.d.comb += m_match.eq(a1.m == b1.m)
234
235 # if a is NaN or b is NaN return NaN
236 with m.If(a1.is_nan | b1.is_nan):
237 m.d.comb += self.o.out_do_z.eq(1)
238 m.d.comb += self.o.z.nan(0)
239
240 # XXX WEIRDNESS for FP16 non-canonical NaN handling
241 # under review
242
243 ## if a is zero and b is NaN return -b
244 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
245 # m.d.comb += self.o.out_do_z.eq(1)
246 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
247
248 ## if b is zero and a is NaN return -a
249 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
250 # m.d.comb += self.o.out_do_z.eq(1)
251 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
252
253 ## if a is -zero and b is NaN return -b
254 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
255 # m.d.comb += self.o.out_do_z.eq(1)
256 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
257
258 ## if b is -zero and a is NaN return -a
259 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
260 # m.d.comb += self.o.out_do_z.eq(1)
261 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
262
263 # if a is inf return inf (or NaN)
264 with m.Elif(a1.is_inf):
265 m.d.comb += self.o.out_do_z.eq(1)
266 m.d.comb += self.o.z.inf(a1.s)
267 # if a is inf and signs don't match return NaN
268 with m.If(b1.exp_128 & s_nomatch):
269 m.d.comb += self.o.z.nan(0)
270
271 # if b is inf return inf
272 with m.Elif(b1.is_inf):
273 m.d.comb += self.o.out_do_z.eq(1)
274 m.d.comb += self.o.z.inf(b1.s)
275
276 # if a is zero and b zero return signed-a/b
277 with m.Elif(a1.is_zero & b1.is_zero):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
280
281 # if a is zero return b
282 with m.Elif(a1.is_zero):
283 m.d.comb += self.o.out_do_z.eq(1)
284 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
285
286 # if b is zero return a
287 with m.Elif(b1.is_zero):
288 m.d.comb += self.o.out_do_z.eq(1)
289 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
290
291 # if a equal to -b return zero (+ve zero)
292 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
293 m.d.comb += self.o.out_do_z.eq(1)
294 m.d.comb += self.o.z.zero(0)
295
296 # Denormalised Number checks next, so pass a/b data through
297 with m.Else():
298 m.d.comb += self.o.out_do_z.eq(0)
299 m.d.comb += self.o.a.eq(a1)
300 m.d.comb += self.o.b.eq(b1)
301
302 m.d.comb += self.o.oz.eq(self.o.z.v)
303 m.d.comb += self.o.mid.eq(self.i.mid)
304
305 return m
306
307
308 class FPID:
309 def __init__(self, id_wid):
310 self.id_wid = id_wid
311 if self.id_wid:
312 self.in_mid = Signal(id_wid, reset_less=True)
313 self.out_mid = Signal(id_wid, reset_less=True)
314 else:
315 self.in_mid = None
316 self.out_mid = None
317
318 def idsync(self, m):
319 if self.id_wid is not None:
320 m.d.sync += self.out_mid.eq(self.in_mid)
321
322
323 class FPAddSpecialCases(FPState):
324 """ special cases: NaNs, infs, zeros, denormalised
325 NOTE: some of these are unique to add. see "Special Operations"
326 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
327 """
328
329 def __init__(self, width, id_wid):
330 FPState.__init__(self, "special_cases")
331 self.mod = FPAddSpecialCasesMod(width)
332 self.out_z = self.mod.ospec()
333 self.out_do_z = Signal(reset_less=True)
334
335 def setup(self, m, i):
336 """ links module to inputs and outputs
337 """
338 self.mod.setup(m, i, self.out_do_z)
339 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
340 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
341
342 def action(self, m):
343 self.idsync(m)
344 with m.If(self.out_do_z):
345 m.next = "put_z"
346 with m.Else():
347 m.next = "denormalise"
348
349
350 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
351 """ special cases: NaNs, infs, zeros, denormalised
352 NOTE: some of these are unique to add. see "Special Operations"
353 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
354 """
355
356 def __init__(self, width, id_wid):
357 FPState.__init__(self, "special_cases")
358 self.smod = FPAddSpecialCasesMod(width, id_wid)
359 self.dmod = FPAddDeNormMod(width, id_wid)
360 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
361 self.o = self.ospec()
362
363 def ispec(self):
364 return self.smod.ispec()
365
366 def ospec(self):
367 return self.dmod.ospec()
368
369 def setup(self, m, i):
370 """ links module to inputs and outputs
371 """
372 # these only needed for break-out (early-out)
373 # out_z = self.smod.ospec()
374 # out_do_z = Signal(reset_less=True)
375 self.smod.setup(m, i)
376 self.dmod.setup(m, self.smod.o)
377 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
378
379 # out_do_z=True, only needed for early-out (split pipeline)
380 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
381 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
382
383 # out_do_z=False
384 # XXX TODO: sync for state-based
385 m.d.comb += self.o.eq(self.dmod.o)
386
387 def process(self, i):
388 return self.o
389
390 def action(self, m):
391 #with m.If(self.out_do_z):
392 # m.next = "put_z"
393 #with m.Else():
394 m.next = "align"
395
396
397 class FPAddDeNormMod(FPState):
398
399 def __init__(self, width, id_wid):
400 self.width = width
401 self.id_wid = id_wid
402 self.i = self.ispec()
403 self.o = self.ospec()
404
405 def ispec(self):
406 return FPSCData(self.width, self.id_wid)
407
408 def ospec(self):
409 return FPSCData(self.width, self.id_wid)
410
411 def setup(self, m, i):
412 """ links module to inputs and outputs
413 """
414 m.submodules.denormalise = self
415 m.d.comb += self.i.eq(i)
416
417 def elaborate(self, platform):
418 m = Module()
419 m.submodules.denorm_in_a = self.i.a
420 m.submodules.denorm_in_b = self.i.b
421 m.submodules.denorm_out_a = self.o.a
422 m.submodules.denorm_out_b = self.o.b
423
424 with m.If(~self.i.out_do_z):
425 # XXX hmmm, don't like repeating identical code
426 m.d.comb += self.o.a.eq(self.i.a)
427 with m.If(self.i.a.exp_n127):
428 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
429 with m.Else():
430 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
431
432 m.d.comb += self.o.b.eq(self.i.b)
433 with m.If(self.i.b.exp_n127):
434 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
435 with m.Else():
436 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
437
438 m.d.comb += self.o.mid.eq(self.i.mid)
439 m.d.comb += self.o.z.eq(self.i.z)
440 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
441 m.d.comb += self.o.oz.eq(self.i.oz)
442
443 return m
444
445
446 class FPAddDeNorm(FPState):
447
448 def __init__(self, width, id_wid):
449 FPState.__init__(self, "denormalise")
450 self.mod = FPAddDeNormMod(width)
451 self.out_a = FPNumBase(width)
452 self.out_b = FPNumBase(width)
453
454 def setup(self, m, i):
455 """ links module to inputs and outputs
456 """
457 self.mod.setup(m, i)
458
459 m.d.sync += self.out_a.eq(self.mod.out_a)
460 m.d.sync += self.out_b.eq(self.mod.out_b)
461
462 def action(self, m):
463 # Denormalised Number checks
464 m.next = "align"
465
466
467 class FPAddAlignMultiMod(FPState):
468
469 def __init__(self, width):
470 self.in_a = FPNumBase(width)
471 self.in_b = FPNumBase(width)
472 self.out_a = FPNumIn(None, width)
473 self.out_b = FPNumIn(None, width)
474 self.exp_eq = Signal(reset_less=True)
475
476 def elaborate(self, platform):
477 # This one however (single-cycle) will do the shift
478 # in one go.
479
480 m = Module()
481
482 m.submodules.align_in_a = self.in_a
483 m.submodules.align_in_b = self.in_b
484 m.submodules.align_out_a = self.out_a
485 m.submodules.align_out_b = self.out_b
486
487 # NOTE: this does *not* do single-cycle multi-shifting,
488 # it *STAYS* in the align state until exponents match
489
490 # exponent of a greater than b: shift b down
491 m.d.comb += self.exp_eq.eq(0)
492 m.d.comb += self.out_a.eq(self.in_a)
493 m.d.comb += self.out_b.eq(self.in_b)
494 agtb = Signal(reset_less=True)
495 altb = Signal(reset_less=True)
496 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
497 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
498 with m.If(agtb):
499 m.d.comb += self.out_b.shift_down(self.in_b)
500 # exponent of b greater than a: shift a down
501 with m.Elif(altb):
502 m.d.comb += self.out_a.shift_down(self.in_a)
503 # exponents equal: move to next stage.
504 with m.Else():
505 m.d.comb += self.exp_eq.eq(1)
506 return m
507
508
509 class FPAddAlignMulti(FPState):
510
511 def __init__(self, width, id_wid):
512 FPState.__init__(self, "align")
513 self.mod = FPAddAlignMultiMod(width)
514 self.out_a = FPNumIn(None, width)
515 self.out_b = FPNumIn(None, width)
516 self.exp_eq = Signal(reset_less=True)
517
518 def setup(self, m, in_a, in_b):
519 """ links module to inputs and outputs
520 """
521 m.submodules.align = self.mod
522 m.d.comb += self.mod.in_a.eq(in_a)
523 m.d.comb += self.mod.in_b.eq(in_b)
524 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
525 m.d.sync += self.out_a.eq(self.mod.out_a)
526 m.d.sync += self.out_b.eq(self.mod.out_b)
527
528 def action(self, m):
529 with m.If(self.exp_eq):
530 m.next = "add_0"
531
532
533 class FPNumIn2Ops:
534
535 def __init__(self, width, id_wid):
536 self.a = FPNumIn(None, width)
537 self.b = FPNumIn(None, width)
538 self.z = FPNumOut(width, False)
539 self.out_do_z = Signal(reset_less=True)
540 self.oz = Signal(width, reset_less=True)
541 self.mid = Signal(id_wid, reset_less=True)
542
543 def eq(self, i):
544 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
545 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
546
547
548 class FPAddAlignSingleMod:
549
550 def __init__(self, width, id_wid):
551 self.width = width
552 self.id_wid = id_wid
553 self.i = self.ispec()
554 self.o = self.ospec()
555
556 def ispec(self):
557 return FPSCData(self.width, self.id_wid)
558
559 def ospec(self):
560 return FPNumIn2Ops(self.width, self.id_wid)
561
562 def process(self, i):
563 return self.o
564
565 def setup(self, m, i):
566 """ links module to inputs and outputs
567 """
568 m.submodules.align = self
569 m.d.comb += self.i.eq(i)
570
571 def elaborate(self, platform):
572 """ Aligns A against B or B against A, depending on which has the
573 greater exponent. This is done in a *single* cycle using
574 variable-width bit-shift
575
576 the shifter used here is quite expensive in terms of gates.
577 Mux A or B in (and out) into temporaries, as only one of them
578 needs to be aligned against the other
579 """
580 m = Module()
581
582 m.submodules.align_in_a = self.i.a
583 m.submodules.align_in_b = self.i.b
584 m.submodules.align_out_a = self.o.a
585 m.submodules.align_out_b = self.o.b
586
587 # temporary (muxed) input and output to be shifted
588 t_inp = FPNumBase(self.width)
589 t_out = FPNumIn(None, self.width)
590 espec = (len(self.i.a.e), True)
591 msr = MultiShiftRMerge(self.i.a.m_width, espec)
592 m.submodules.align_t_in = t_inp
593 m.submodules.align_t_out = t_out
594 m.submodules.multishift_r = msr
595
596 ediff = Signal(espec, reset_less=True)
597 ediffr = Signal(espec, reset_less=True)
598 tdiff = Signal(espec, reset_less=True)
599 elz = Signal(reset_less=True)
600 egz = Signal(reset_less=True)
601
602 # connect multi-shifter to t_inp/out mantissa (and tdiff)
603 m.d.comb += msr.inp.eq(t_inp.m)
604 m.d.comb += msr.diff.eq(tdiff)
605 m.d.comb += t_out.m.eq(msr.m)
606 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
607 m.d.comb += t_out.s.eq(t_inp.s)
608
609 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
610 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
611 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
612 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
613
614 # default: A-exp == B-exp, A and B untouched (fall through)
615 m.d.comb += self.o.a.eq(self.i.a)
616 m.d.comb += self.o.b.eq(self.i.b)
617 # only one shifter (muxed)
618 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
619 # exponent of a greater than b: shift b down
620 with m.If(~self.i.out_do_z):
621 with m.If(egz):
622 m.d.comb += [t_inp.eq(self.i.b),
623 tdiff.eq(ediff),
624 self.o.b.eq(t_out),
625 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
626 ]
627 # exponent of b greater than a: shift a down
628 with m.Elif(elz):
629 m.d.comb += [t_inp.eq(self.i.a),
630 tdiff.eq(ediffr),
631 self.o.a.eq(t_out),
632 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
633 ]
634
635 m.d.comb += self.o.mid.eq(self.i.mid)
636 m.d.comb += self.o.z.eq(self.i.z)
637 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
638 m.d.comb += self.o.oz.eq(self.i.oz)
639
640 return m
641
642
643 class FPAddAlignSingle(FPState):
644
645 def __init__(self, width, id_wid):
646 FPState.__init__(self, "align")
647 self.mod = FPAddAlignSingleMod(width, id_wid)
648 self.out_a = FPNumIn(None, width)
649 self.out_b = FPNumIn(None, width)
650
651 def setup(self, m, i):
652 """ links module to inputs and outputs
653 """
654 self.mod.setup(m, i)
655
656 # NOTE: could be done as comb
657 m.d.sync += self.out_a.eq(self.mod.out_a)
658 m.d.sync += self.out_b.eq(self.mod.out_b)
659
660 def action(self, m):
661 m.next = "add_0"
662
663
664 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
665
666 def __init__(self, width, id_wid):
667 FPState.__init__(self, "align")
668 self.width = width
669 self.id_wid = id_wid
670 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
671 self.a1o = self.ospec()
672
673 def ispec(self):
674 return FPSCData(self.width, self.id_wid)
675
676 def ospec(self):
677 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
678
679 def setup(self, m, i):
680 """ links module to inputs and outputs
681 """
682
683 # chain AddAlignSingle, AddStage0 and AddStage1
684 mod = FPAddAlignSingleMod(self.width, self.id_wid)
685 a0mod = FPAddStage0Mod(self.width, self.id_wid)
686 a1mod = FPAddStage1Mod(self.width, self.id_wid)
687
688 chain = StageChain([mod, a0mod, a1mod])
689 chain.setup(m, i)
690
691 self.o = a1mod.o
692
693 def process(self, i):
694 return self.o
695
696 def action(self, m):
697 m.d.sync += self.a1o.eq(self.process(None))
698 m.next = "normalise_1"
699
700
701 class FPAddStage0Data:
702
703 def __init__(self, width, id_wid):
704 self.z = FPNumBase(width, False)
705 self.out_do_z = Signal(reset_less=True)
706 self.oz = Signal(width, reset_less=True)
707 self.tot = Signal(self.z.m_width + 4, reset_less=True)
708 self.mid = Signal(id_wid, reset_less=True)
709
710 def eq(self, i):
711 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
712 self.tot.eq(i.tot), self.mid.eq(i.mid)]
713
714
715 class FPAddStage0Mod:
716
717 def __init__(self, width, id_wid):
718 self.width = width
719 self.id_wid = id_wid
720 self.i = self.ispec()
721 self.o = self.ospec()
722
723 def ispec(self):
724 return FPSCData(self.width, self.id_wid)
725
726 def ospec(self):
727 return FPAddStage0Data(self.width, self.id_wid)
728
729 def process(self, i):
730 return self.o
731
732 def setup(self, m, i):
733 """ links module to inputs and outputs
734 """
735 m.submodules.add0 = self
736 m.d.comb += self.i.eq(i)
737
738 def elaborate(self, platform):
739 m = Module()
740 m.submodules.add0_in_a = self.i.a
741 m.submodules.add0_in_b = self.i.b
742 m.submodules.add0_out_z = self.o.z
743
744 # store intermediate tests (and zero-extended mantissas)
745 seq = Signal(reset_less=True)
746 mge = Signal(reset_less=True)
747 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
748 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
749 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
750 mge.eq(self.i.a.m >= self.i.b.m),
751 am0.eq(Cat(self.i.a.m, 0)),
752 bm0.eq(Cat(self.i.b.m, 0))
753 ]
754 # same-sign (both negative or both positive) add mantissas
755 with m.If(~self.i.out_do_z):
756 m.d.comb += self.o.z.e.eq(self.i.a.e)
757 with m.If(seq):
758 m.d.comb += [
759 self.o.tot.eq(am0 + bm0),
760 self.o.z.s.eq(self.i.a.s)
761 ]
762 # a mantissa greater than b, use a
763 with m.Elif(mge):
764 m.d.comb += [
765 self.o.tot.eq(am0 - bm0),
766 self.o.z.s.eq(self.i.a.s)
767 ]
768 # b mantissa greater than a, use b
769 with m.Else():
770 m.d.comb += [
771 self.o.tot.eq(bm0 - am0),
772 self.o.z.s.eq(self.i.b.s)
773 ]
774
775 m.d.comb += self.o.oz.eq(self.i.oz)
776 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
777 m.d.comb += self.o.mid.eq(self.i.mid)
778 return m
779
780
781 class FPAddStage0(FPState):
782 """ First stage of add. covers same-sign (add) and subtract
783 special-casing when mantissas are greater or equal, to
784 give greatest accuracy.
785 """
786
787 def __init__(self, width, id_wid):
788 FPState.__init__(self, "add_0")
789 self.mod = FPAddStage0Mod(width)
790 self.o = self.mod.ospec()
791
792 def setup(self, m, i):
793 """ links module to inputs and outputs
794 """
795 self.mod.setup(m, i)
796
797 # NOTE: these could be done as combinatorial (merge add0+add1)
798 m.d.sync += self.o.eq(self.mod.o)
799
800 def action(self, m):
801 m.next = "add_1"
802
803
804 class FPAddStage1Data:
805
806 def __init__(self, width, id_wid):
807 self.z = FPNumBase(width, False)
808 self.out_do_z = Signal(reset_less=True)
809 self.oz = Signal(width, reset_less=True)
810 self.of = Overflow()
811 self.mid = Signal(id_wid, reset_less=True)
812
813 def eq(self, i):
814 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
815 self.of.eq(i.of), self.mid.eq(i.mid)]
816
817
818
819 class FPAddStage1Mod(FPState):
820 """ Second stage of add: preparation for normalisation.
821 detects when tot sum is too big (tot[27] is kinda a carry bit)
822 """
823
824 def __init__(self, width, id_wid):
825 self.width = width
826 self.id_wid = id_wid
827 self.i = self.ispec()
828 self.o = self.ospec()
829
830 def ispec(self):
831 return FPAddStage0Data(self.width, self.id_wid)
832
833 def ospec(self):
834 return FPAddStage1Data(self.width, self.id_wid)
835
836 def process(self, i):
837 return self.o
838
839 def setup(self, m, i):
840 """ links module to inputs and outputs
841 """
842 m.submodules.add1 = self
843 m.submodules.add1_out_overflow = self.o.of
844
845 m.d.comb += self.i.eq(i)
846
847 def elaborate(self, platform):
848 m = Module()
849 m.d.comb += self.o.z.eq(self.i.z)
850 # tot[-1] (MSB) gets set when the sum overflows. shift result down
851 with m.If(~self.i.out_do_z):
852 with m.If(self.i.tot[-1]):
853 m.d.comb += [
854 self.o.z.m.eq(self.i.tot[4:]),
855 self.o.of.m0.eq(self.i.tot[4]),
856 self.o.of.guard.eq(self.i.tot[3]),
857 self.o.of.round_bit.eq(self.i.tot[2]),
858 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
859 self.o.z.e.eq(self.i.z.e + 1)
860 ]
861 # tot[-1] (MSB) zero case
862 with m.Else():
863 m.d.comb += [
864 self.o.z.m.eq(self.i.tot[3:]),
865 self.o.of.m0.eq(self.i.tot[3]),
866 self.o.of.guard.eq(self.i.tot[2]),
867 self.o.of.round_bit.eq(self.i.tot[1]),
868 self.o.of.sticky.eq(self.i.tot[0])
869 ]
870
871 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
872 m.d.comb += self.o.oz.eq(self.i.oz)
873 m.d.comb += self.o.mid.eq(self.i.mid)
874
875 return m
876
877
878 class FPAddStage1(FPState):
879
880 def __init__(self, width, id_wid):
881 FPState.__init__(self, "add_1")
882 self.mod = FPAddStage1Mod(width)
883 self.out_z = FPNumBase(width, False)
884 self.out_of = Overflow()
885 self.norm_stb = Signal()
886
887 def setup(self, m, i):
888 """ links module to inputs and outputs
889 """
890 self.mod.setup(m, i)
891
892 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
893
894 m.d.sync += self.out_of.eq(self.mod.out_of)
895 m.d.sync += self.out_z.eq(self.mod.out_z)
896 m.d.sync += self.norm_stb.eq(1)
897
898 def action(self, m):
899 m.next = "normalise_1"
900
901
902 class FPNormaliseModSingle:
903
904 def __init__(self, width):
905 self.width = width
906 self.in_z = self.ispec()
907 self.out_z = self.ospec()
908
909 def ispec(self):
910 return FPNumBase(self.width, False)
911
912 def ospec(self):
913 return FPNumBase(self.width, False)
914
915 def setup(self, m, i):
916 """ links module to inputs and outputs
917 """
918 m.submodules.normalise = self
919 m.d.comb += self.i.eq(i)
920
921 def elaborate(self, platform):
922 m = Module()
923
924 mwid = self.out_z.m_width+2
925 pe = PriorityEncoder(mwid)
926 m.submodules.norm_pe = pe
927
928 m.submodules.norm1_out_z = self.out_z
929 m.submodules.norm1_in_z = self.in_z
930
931 in_z = FPNumBase(self.width, False)
932 in_of = Overflow()
933 m.submodules.norm1_insel_z = in_z
934 m.submodules.norm1_insel_overflow = in_of
935
936 espec = (len(in_z.e), True)
937 ediff_n126 = Signal(espec, reset_less=True)
938 msr = MultiShiftRMerge(mwid, espec)
939 m.submodules.multishift_r = msr
940
941 m.d.comb += in_z.eq(self.in_z)
942 m.d.comb += in_of.eq(self.in_of)
943 # initialise out from in (overridden below)
944 m.d.comb += self.out_z.eq(in_z)
945 m.d.comb += self.out_of.eq(in_of)
946 # normalisation decrease condition
947 decrease = Signal(reset_less=True)
948 m.d.comb += decrease.eq(in_z.m_msbzero)
949 # decrease exponent
950 with m.If(decrease):
951 # *sigh* not entirely obvious: count leading zeros (clz)
952 # with a PriorityEncoder: to find from the MSB
953 # we reverse the order of the bits.
954 temp_m = Signal(mwid, reset_less=True)
955 temp_s = Signal(mwid+1, reset_less=True)
956 clz = Signal((len(in_z.e), True), reset_less=True)
957 m.d.comb += [
958 # cat round and guard bits back into the mantissa
959 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
960 pe.i.eq(temp_m[::-1]), # inverted
961 clz.eq(pe.o), # count zeros from MSB down
962 temp_s.eq(temp_m << clz), # shift mantissa UP
963 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
964 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
965 ]
966
967 return m
968
969
970 class FPNorm1Data:
971
972 def __init__(self, width, id_wid):
973 self.roundz = Signal(reset_less=True)
974 self.z = FPNumBase(width, False)
975 self.out_do_z = Signal(reset_less=True)
976 self.oz = Signal(width, reset_less=True)
977 self.mid = Signal(id_wid, reset_less=True)
978
979 def eq(self, i):
980 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
981 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
982
983
984 class FPNorm1ModSingle:
985
986 def __init__(self, width, id_wid):
987 self.width = width
988 self.id_wid = id_wid
989 self.i = self.ispec()
990 self.o = self.ospec()
991
992 def ispec(self):
993 return FPAddStage1Data(self.width, self.id_wid)
994
995 def ospec(self):
996 return FPNorm1Data(self.width, self.id_wid)
997
998 def setup(self, m, i):
999 """ links module to inputs and outputs
1000 """
1001 m.submodules.normalise_1 = self
1002 m.d.comb += self.i.eq(i)
1003
1004 def process(self, i):
1005 return self.o
1006
1007 def elaborate(self, platform):
1008 m = Module()
1009
1010 mwid = self.o.z.m_width+2
1011 pe = PriorityEncoder(mwid)
1012 m.submodules.norm_pe = pe
1013
1014 of = Overflow()
1015 m.d.comb += self.o.roundz.eq(of.roundz)
1016
1017 m.submodules.norm1_out_z = self.o.z
1018 m.submodules.norm1_out_overflow = of
1019 m.submodules.norm1_in_z = self.i.z
1020 m.submodules.norm1_in_overflow = self.i.of
1021
1022 i = self.ispec()
1023 m.submodules.norm1_insel_z = i.z
1024 m.submodules.norm1_insel_overflow = i.of
1025
1026 espec = (len(i.z.e), True)
1027 ediff_n126 = Signal(espec, reset_less=True)
1028 msr = MultiShiftRMerge(mwid, espec)
1029 m.submodules.multishift_r = msr
1030
1031 m.d.comb += i.eq(self.i)
1032 # initialise out from in (overridden below)
1033 m.d.comb += self.o.z.eq(i.z)
1034 m.d.comb += of.eq(i.of)
1035 # normalisation increase/decrease conditions
1036 decrease = Signal(reset_less=True)
1037 increase = Signal(reset_less=True)
1038 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1039 m.d.comb += increase.eq(i.z.exp_lt_n126)
1040 # decrease exponent
1041 with m.If(~self.i.out_do_z):
1042 with m.If(decrease):
1043 # *sigh* not entirely obvious: count leading zeros (clz)
1044 # with a PriorityEncoder: to find from the MSB
1045 # we reverse the order of the bits.
1046 temp_m = Signal(mwid, reset_less=True)
1047 temp_s = Signal(mwid+1, reset_less=True)
1048 clz = Signal((len(i.z.e), True), reset_less=True)
1049 # make sure that the amount to decrease by does NOT
1050 # go below the minimum non-INF/NaN exponent
1051 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1052 i.z.exp_sub_n126)
1053 m.d.comb += [
1054 # cat round and guard bits back into the mantissa
1055 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1056 pe.i.eq(temp_m[::-1]), # inverted
1057 clz.eq(limclz), # count zeros from MSB down
1058 temp_s.eq(temp_m << clz), # shift mantissa UP
1059 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1060 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1061 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1062 # overflow in bits 0..1: got shifted too (leave sticky)
1063 of.guard.eq(temp_s[1]), # guard
1064 of.round_bit.eq(temp_s[0]), # round
1065 ]
1066 # increase exponent
1067 with m.Elif(increase):
1068 temp_m = Signal(mwid+1, reset_less=True)
1069 m.d.comb += [
1070 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1071 i.z.m)),
1072 ediff_n126.eq(i.z.N126 - i.z.e),
1073 # connect multi-shifter to inp/out mantissa (and ediff)
1074 msr.inp.eq(temp_m),
1075 msr.diff.eq(ediff_n126),
1076 self.o.z.m.eq(msr.m[3:]),
1077 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1078 # overflow in bits 0..1: got shifted too (leave sticky)
1079 of.guard.eq(temp_s[2]), # guard
1080 of.round_bit.eq(temp_s[1]), # round
1081 of.sticky.eq(temp_s[0]), # sticky
1082 self.o.z.e.eq(i.z.e + ediff_n126),
1083 ]
1084
1085 m.d.comb += self.o.mid.eq(self.i.mid)
1086 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1087 m.d.comb += self.o.oz.eq(self.i.oz)
1088
1089 return m
1090
1091
1092 class FPNorm1ModMulti:
1093
1094 def __init__(self, width, single_cycle=True):
1095 self.width = width
1096 self.in_select = Signal(reset_less=True)
1097 self.in_z = FPNumBase(width, False)
1098 self.in_of = Overflow()
1099 self.temp_z = FPNumBase(width, False)
1100 self.temp_of = Overflow()
1101 self.out_z = FPNumBase(width, False)
1102 self.out_of = Overflow()
1103
1104 def elaborate(self, platform):
1105 m = Module()
1106
1107 m.submodules.norm1_out_z = self.out_z
1108 m.submodules.norm1_out_overflow = self.out_of
1109 m.submodules.norm1_temp_z = self.temp_z
1110 m.submodules.norm1_temp_of = self.temp_of
1111 m.submodules.norm1_in_z = self.in_z
1112 m.submodules.norm1_in_overflow = self.in_of
1113
1114 in_z = FPNumBase(self.width, False)
1115 in_of = Overflow()
1116 m.submodules.norm1_insel_z = in_z
1117 m.submodules.norm1_insel_overflow = in_of
1118
1119 # select which of temp or in z/of to use
1120 with m.If(self.in_select):
1121 m.d.comb += in_z.eq(self.in_z)
1122 m.d.comb += in_of.eq(self.in_of)
1123 with m.Else():
1124 m.d.comb += in_z.eq(self.temp_z)
1125 m.d.comb += in_of.eq(self.temp_of)
1126 # initialise out from in (overridden below)
1127 m.d.comb += self.out_z.eq(in_z)
1128 m.d.comb += self.out_of.eq(in_of)
1129 # normalisation increase/decrease conditions
1130 decrease = Signal(reset_less=True)
1131 increase = Signal(reset_less=True)
1132 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1133 m.d.comb += increase.eq(in_z.exp_lt_n126)
1134 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1135 # decrease exponent
1136 with m.If(decrease):
1137 m.d.comb += [
1138 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1139 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1140 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1141 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1142 self.out_of.round_bit.eq(0), # reset round bit
1143 self.out_of.m0.eq(in_of.guard),
1144 ]
1145 # increase exponent
1146 with m.Elif(increase):
1147 m.d.comb += [
1148 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1149 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1150 self.out_of.guard.eq(in_z.m[0]),
1151 self.out_of.m0.eq(in_z.m[1]),
1152 self.out_of.round_bit.eq(in_of.guard),
1153 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1154 ]
1155
1156 return m
1157
1158
1159 class FPNorm1Single(FPState):
1160
1161 def __init__(self, width, id_wid, single_cycle=True):
1162 FPState.__init__(self, "normalise_1")
1163 self.mod = FPNorm1ModSingle(width)
1164 self.o = self.ospec()
1165 self.out_z = FPNumBase(width, False)
1166 self.out_roundz = Signal(reset_less=True)
1167
1168 def ispec(self):
1169 return self.mod.ispec()
1170
1171 def ospec(self):
1172 return self.mod.ospec()
1173
1174 def setup(self, m, i):
1175 """ links module to inputs and outputs
1176 """
1177 self.mod.setup(m, i)
1178
1179 def action(self, m):
1180 m.next = "round"
1181
1182
1183 class FPNorm1Multi(FPState):
1184
1185 def __init__(self, width, id_wid):
1186 FPState.__init__(self, "normalise_1")
1187 self.mod = FPNorm1ModMulti(width)
1188 self.stb = Signal(reset_less=True)
1189 self.ack = Signal(reset=0, reset_less=True)
1190 self.out_norm = Signal(reset_less=True)
1191 self.in_accept = Signal(reset_less=True)
1192 self.temp_z = FPNumBase(width)
1193 self.temp_of = Overflow()
1194 self.out_z = FPNumBase(width)
1195 self.out_roundz = Signal(reset_less=True)
1196
1197 def setup(self, m, in_z, in_of, norm_stb):
1198 """ links module to inputs and outputs
1199 """
1200 self.mod.setup(m, in_z, in_of, norm_stb,
1201 self.in_accept, self.temp_z, self.temp_of,
1202 self.out_z, self.out_norm)
1203
1204 m.d.comb += self.stb.eq(norm_stb)
1205 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1206
1207 def action(self, m):
1208 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1209 m.d.sync += self.temp_of.eq(self.mod.out_of)
1210 m.d.sync += self.temp_z.eq(self.out_z)
1211 with m.If(self.out_norm):
1212 with m.If(self.in_accept):
1213 m.d.sync += [
1214 self.ack.eq(1),
1215 ]
1216 with m.Else():
1217 m.d.sync += self.ack.eq(0)
1218 with m.Else():
1219 # normalisation not required (or done).
1220 m.next = "round"
1221 m.d.sync += self.ack.eq(1)
1222 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1223
1224
1225 class FPNormToPack(FPState, UnbufferedPipeline):
1226
1227 def __init__(self, width, id_wid):
1228 FPState.__init__(self, "normalise_1")
1229 self.id_wid = id_wid
1230 self.width = width
1231 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1232
1233 def ispec(self):
1234 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1235
1236 def ospec(self):
1237 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1238
1239 def setup(self, m, i):
1240 """ links module to inputs and outputs
1241 """
1242
1243 # Normalisation, Rounding Corrections, Pack - in a chain
1244 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1245 rmod = FPRoundMod(self.width, self.id_wid)
1246 cmod = FPCorrectionsMod(self.width, self.id_wid)
1247 pmod = FPPackMod(self.width, self.id_wid)
1248 chain = StageChain([nmod, rmod, cmod, pmod])
1249 chain.setup(m, i)
1250 self.out_z = pmod.ospec()
1251
1252 # XXX TODO: sync for state-based
1253 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1254 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1255
1256 def process(self, i):
1257 return self.out_z
1258
1259 def action(self, m):
1260 m.next = "pack_put_z"
1261
1262
1263 class FPRoundData:
1264
1265 def __init__(self, width, id_wid):
1266 self.z = FPNumBase(width, False)
1267 self.out_do_z = Signal(reset_less=True)
1268 self.oz = Signal(width, reset_less=True)
1269 self.mid = Signal(id_wid, reset_less=True)
1270
1271 def eq(self, i):
1272 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1273 self.mid.eq(i.mid)]
1274
1275
1276 class FPRoundMod:
1277
1278 def __init__(self, width, id_wid):
1279 self.width = width
1280 self.id_wid = id_wid
1281 self.i = self.ispec()
1282 self.out_z = self.ospec()
1283
1284 def ispec(self):
1285 return FPNorm1Data(self.width, self.id_wid)
1286
1287 def ospec(self):
1288 return FPRoundData(self.width, self.id_wid)
1289
1290 def process(self, i):
1291 return self.out_z
1292
1293 def setup(self, m, i):
1294 m.submodules.roundz = self
1295 m.d.comb += self.i.eq(i)
1296
1297 def elaborate(self, platform):
1298 m = Module()
1299 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1300 with m.If(~self.i.out_do_z):
1301 with m.If(self.i.roundz):
1302 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1303 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1304 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1305
1306 return m
1307
1308
1309 class FPRound(FPState):
1310
1311 def __init__(self, width, id_wid):
1312 FPState.__init__(self, "round")
1313 self.mod = FPRoundMod(width)
1314 self.out_z = self.ospec()
1315
1316 def ispec(self):
1317 return self.mod.ispec()
1318
1319 def ospec(self):
1320 return self.mod.ospec()
1321
1322 def setup(self, m, i):
1323 """ links module to inputs and outputs
1324 """
1325 self.mod.setup(m, i)
1326
1327 self.idsync(m)
1328 m.d.sync += self.out_z.eq(self.mod.out_z)
1329 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1330
1331 def action(self, m):
1332 m.next = "corrections"
1333
1334
1335 class FPCorrectionsMod:
1336
1337 def __init__(self, width, id_wid):
1338 self.width = width
1339 self.id_wid = id_wid
1340 self.i = self.ispec()
1341 self.out_z = self.ospec()
1342
1343 def ispec(self):
1344 return FPRoundData(self.width, self.id_wid)
1345
1346 def ospec(self):
1347 return FPRoundData(self.width, self.id_wid)
1348
1349 def process(self, i):
1350 return self.out_z
1351
1352 def setup(self, m, i):
1353 """ links module to inputs and outputs
1354 """
1355 m.submodules.corrections = self
1356 m.d.comb += self.i.eq(i)
1357
1358 def elaborate(self, platform):
1359 m = Module()
1360 m.submodules.corr_in_z = self.i.z
1361 m.submodules.corr_out_z = self.out_z.z
1362 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1363 with m.If(~self.i.out_do_z):
1364 with m.If(self.i.z.is_denormalised):
1365 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1366 return m
1367
1368
1369 class FPCorrections(FPState):
1370
1371 def __init__(self, width, id_wid):
1372 FPState.__init__(self, "corrections")
1373 self.mod = FPCorrectionsMod(width)
1374 self.out_z = self.ospec()
1375
1376 def ispec(self):
1377 return self.mod.ispec()
1378
1379 def ospec(self):
1380 return self.mod.ospec()
1381
1382 def setup(self, m, in_z):
1383 """ links module to inputs and outputs
1384 """
1385 self.mod.setup(m, in_z)
1386
1387 m.d.sync += self.out_z.eq(self.mod.out_z)
1388 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1389
1390 def action(self, m):
1391 m.next = "pack"
1392
1393
1394 class FPPackData:
1395
1396 def __init__(self, width, id_wid):
1397 self.z = Signal(width, reset_less=True)
1398 self.mid = Signal(id_wid, reset_less=True)
1399
1400 def eq(self, i):
1401 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1402
1403 def ports(self):
1404 return [self.z, self.mid]
1405
1406
1407 class FPPackMod:
1408
1409 def __init__(self, width, id_wid):
1410 self.width = width
1411 self.id_wid = id_wid
1412 self.i = self.ispec()
1413 self.o = self.ospec()
1414
1415 def ispec(self):
1416 return FPRoundData(self.width, self.id_wid)
1417
1418 def ospec(self):
1419 return FPPackData(self.width, self.id_wid)
1420
1421 def process(self, i):
1422 return self.o
1423
1424 def setup(self, m, in_z):
1425 """ links module to inputs and outputs
1426 """
1427 m.submodules.pack = self
1428 m.d.comb += self.i.eq(in_z)
1429
1430 def elaborate(self, platform):
1431 m = Module()
1432 z = FPNumOut(self.width, False)
1433 m.submodules.pack_in_z = self.i.z
1434 m.submodules.pack_out_z = z
1435 m.d.comb += self.o.mid.eq(self.i.mid)
1436 with m.If(~self.i.out_do_z):
1437 with m.If(self.i.z.is_overflowed):
1438 m.d.comb += z.inf(self.i.z.s)
1439 with m.Else():
1440 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1441 with m.Else():
1442 m.d.comb += z.v.eq(self.i.oz)
1443 m.d.comb += self.o.z.eq(z.v)
1444 return m
1445
1446
1447 class FPPack(FPState):
1448
1449 def __init__(self, width, id_wid):
1450 FPState.__init__(self, "pack")
1451 self.mod = FPPackMod(width)
1452 self.out_z = self.ospec()
1453
1454 def ispec(self):
1455 return self.mod.ispec()
1456
1457 def ospec(self):
1458 return self.mod.ospec()
1459
1460 def setup(self, m, in_z):
1461 """ links module to inputs and outputs
1462 """
1463 self.mod.setup(m, in_z)
1464
1465 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1466 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1467
1468 def action(self, m):
1469 m.next = "pack_put_z"
1470
1471
1472 class FPPutZ(FPState):
1473
1474 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1475 FPState.__init__(self, state)
1476 if to_state is None:
1477 to_state = "get_ops"
1478 self.to_state = to_state
1479 self.in_z = in_z
1480 self.out_z = out_z
1481 self.in_mid = in_mid
1482 self.out_mid = out_mid
1483
1484 def action(self, m):
1485 if self.in_mid is not None:
1486 m.d.sync += self.out_mid.eq(self.in_mid)
1487 m.d.sync += [
1488 self.out_z.z.v.eq(self.in_z)
1489 ]
1490 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1491 m.d.sync += self.out_z.z.stb.eq(0)
1492 m.next = self.to_state
1493 with m.Else():
1494 m.d.sync += self.out_z.z.stb.eq(1)
1495
1496
1497 class FPPutZIdx(FPState):
1498
1499 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1500 FPState.__init__(self, state)
1501 if to_state is None:
1502 to_state = "get_ops"
1503 self.to_state = to_state
1504 self.in_z = in_z
1505 self.out_zs = out_zs
1506 self.in_mid = in_mid
1507
1508 def action(self, m):
1509 outz_stb = Signal(reset_less=True)
1510 outz_ack = Signal(reset_less=True)
1511 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1512 outz_ack.eq(self.out_zs[self.in_mid].ack),
1513 ]
1514 m.d.sync += [
1515 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1516 ]
1517 with m.If(outz_stb & outz_ack):
1518 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1519 m.next = self.to_state
1520 with m.Else():
1521 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1522
1523 class FPOpData:
1524 def __init__(self, width, id_wid):
1525 self.z = FPOp(width)
1526 self.mid = Signal(id_wid, reset_less=True)
1527
1528 def eq(self, i):
1529 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1530
1531 def ports(self):
1532 return [self.z, self.mid]
1533
1534
1535 class FPADDBaseMod:
1536
1537 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1538 """ IEEE754 FP Add
1539
1540 * width: bit-width of IEEE754. supported: 16, 32, 64
1541 * id_wid: an identifier that is sync-connected to the input
1542 * single_cycle: True indicates each stage to complete in 1 clock
1543 * compact: True indicates a reduced number of stages
1544 """
1545 self.width = width
1546 self.id_wid = id_wid
1547 self.single_cycle = single_cycle
1548 self.compact = compact
1549
1550 self.in_t = Trigger()
1551 self.i = self.ispec()
1552 self.o = self.ospec()
1553
1554 self.states = []
1555
1556 def ispec(self):
1557 return FPADDBaseData(self.width, self.id_wid)
1558
1559 def ospec(self):
1560 return FPOpData(self.width, self.id_wid)
1561
1562 def add_state(self, state):
1563 self.states.append(state)
1564 return state
1565
1566 def get_fragment(self, platform=None):
1567 """ creates the HDL code-fragment for FPAdd
1568 """
1569 m = Module()
1570 m.submodules.out_z = self.o.z
1571 m.submodules.in_t = self.in_t
1572 if self.compact:
1573 self.get_compact_fragment(m, platform)
1574 else:
1575 self.get_longer_fragment(m, platform)
1576
1577 with m.FSM() as fsm:
1578
1579 for state in self.states:
1580 with m.State(state.state_from):
1581 state.action(m)
1582
1583 return m
1584
1585 def get_longer_fragment(self, m, platform=None):
1586
1587 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1588 self.width))
1589 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1590 a = get.out_op1
1591 b = get.out_op2
1592
1593 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1594 sc.setup(m, a, b, self.in_mid)
1595
1596 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1597 dn.setup(m, a, b, sc.in_mid)
1598
1599 if self.single_cycle:
1600 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1601 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1602 else:
1603 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1604 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1605
1606 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1607 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1608
1609 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1610 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1611
1612 if self.single_cycle:
1613 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1614 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1615 else:
1616 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1617 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1618
1619 rn = self.add_state(FPRound(self.width, self.id_wid))
1620 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1621
1622 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1623 cor.setup(m, rn.out_z, rn.in_mid)
1624
1625 pa = self.add_state(FPPack(self.width, self.id_wid))
1626 pa.setup(m, cor.out_z, rn.in_mid)
1627
1628 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1629 pa.in_mid, self.out_mid))
1630
1631 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1632 pa.in_mid, self.out_mid))
1633
1634 def get_compact_fragment(self, m, platform=None):
1635
1636 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1637 self.width, self.id_wid))
1638 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1639
1640 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1641 sc.setup(m, get.o)
1642
1643 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1644 alm.setup(m, sc.o)
1645
1646 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1647 n1.setup(m, alm.a1o)
1648
1649 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1650 n1.out_z.mid, self.o.mid))
1651
1652 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1653 # sc.o.mid, self.o.mid))
1654
1655
1656 class FPADDBase(FPState):
1657
1658 def __init__(self, width, id_wid=None, single_cycle=False):
1659 """ IEEE754 FP Add
1660
1661 * width: bit-width of IEEE754. supported: 16, 32, 64
1662 * id_wid: an identifier that is sync-connected to the input
1663 * single_cycle: True indicates each stage to complete in 1 clock
1664 """
1665 FPState.__init__(self, "fpadd")
1666 self.width = width
1667 self.single_cycle = single_cycle
1668 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1669 self.o = self.ospec()
1670
1671 self.in_t = Trigger()
1672 self.i = self.ispec()
1673
1674 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1675 self.in_accept = Signal(reset_less=True)
1676 self.add_stb = Signal(reset_less=True)
1677 self.add_ack = Signal(reset=0, reset_less=True)
1678
1679 def ispec(self):
1680 return self.mod.ispec()
1681
1682 def ospec(self):
1683 return self.mod.ospec()
1684
1685 def setup(self, m, i, add_stb, in_mid):
1686 m.d.comb += [self.i.eq(i),
1687 self.mod.i.eq(self.i),
1688 self.z_done.eq(self.mod.o.z.trigger),
1689 #self.add_stb.eq(add_stb),
1690 self.mod.in_t.stb.eq(self.in_t.stb),
1691 self.in_t.ack.eq(self.mod.in_t.ack),
1692 self.o.mid.eq(self.mod.o.mid),
1693 self.o.z.v.eq(self.mod.o.z.v),
1694 self.o.z.stb.eq(self.mod.o.z.stb),
1695 self.mod.o.z.ack.eq(self.o.z.ack),
1696 ]
1697
1698 m.d.sync += self.add_stb.eq(add_stb)
1699 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1700 m.d.sync += self.o.z.ack.eq(0) # likewise
1701 #m.d.sync += self.in_t.stb.eq(0)
1702
1703 m.submodules.fpadd = self.mod
1704
1705 def action(self, m):
1706
1707 # in_accept is set on incoming strobe HIGH and ack LOW.
1708 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1709
1710 #with m.If(self.in_t.ack):
1711 # m.d.sync += self.in_t.stb.eq(0)
1712 with m.If(~self.z_done):
1713 # not done: test for accepting an incoming operand pair
1714 with m.If(self.in_accept):
1715 m.d.sync += [
1716 self.add_ack.eq(1), # acknowledge receipt...
1717 self.in_t.stb.eq(1), # initiate add
1718 ]
1719 with m.Else():
1720 m.d.sync += [self.add_ack.eq(0),
1721 self.in_t.stb.eq(0),
1722 self.o.z.ack.eq(1),
1723 ]
1724 with m.Else():
1725 # done: acknowledge, and write out id and value
1726 m.d.sync += [self.add_ack.eq(1),
1727 self.in_t.stb.eq(0)
1728 ]
1729 m.next = "put_z"
1730
1731 return
1732
1733 if self.in_mid is not None:
1734 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1735
1736 m.d.sync += [
1737 self.out_z.v.eq(self.mod.out_z.v)
1738 ]
1739 # move to output state on detecting z ack
1740 with m.If(self.out_z.trigger):
1741 m.d.sync += self.out_z.stb.eq(0)
1742 m.next = "put_z"
1743 with m.Else():
1744 m.d.sync += self.out_z.stb.eq(1)
1745
1746
1747 class FPADDBasePipe(ControlBase):
1748 def __init__(self, width, id_wid):
1749 ControlBase.__init__(self)
1750 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1751 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1752 self.pipe3 = FPNormToPack(width, id_wid)
1753
1754 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1755
1756 def elaborate(self, platform):
1757 m = Module()
1758 m.submodules.scnorm = self.pipe1
1759 m.submodules.addalign = self.pipe2
1760 m.submodules.normpack = self.pipe3
1761 m.d.comb += self._eqs
1762 return m
1763
1764
1765 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1766 def __init__(self, width, id_wid, num_rows):
1767 self.num_rows = num_rows
1768 def iospec(): return FPADDBaseData(width, id_wid)
1769 stage = PassThroughStage(iospec)
1770 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1771
1772
1773 class FPADDMuxOutPipe(CombMuxOutPipe):
1774 def __init__(self, width, id_wid, num_rows):
1775 self.num_rows = num_rows
1776 def iospec(): return FPPackData(width, id_wid)
1777 stage = PassThroughStage(iospec)
1778 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1779
1780
1781 class FPADDMuxInOut:
1782 """ Reservation-Station version of FPADD pipeline.
1783
1784 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1785 * 3-stage adder pipeline
1786 * fan-out on outputs (an array of FPPackData: z,mid)
1787
1788 Fan-in and Fan-out are combinatorial.
1789 """
1790 def __init__(self, width, id_wid, num_rows):
1791 self.num_rows = num_rows
1792 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1793 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1794 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1795
1796 self.p = self.inpipe.p # kinda annoying,
1797 self.n = self.outpipe.n # use pipe in/out as this class in/out
1798 self._ports = self.inpipe.ports() + self.outpipe.ports()
1799
1800 def elaborate(self, platform):
1801 m = Module()
1802 m.submodules.inpipe = self.inpipe
1803 m.submodules.fpadd = self.fpadd
1804 m.submodules.outpipe = self.outpipe
1805
1806 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1807 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1808
1809 return m
1810
1811 def ports(self):
1812 return self._ports
1813
1814
1815 class FPADD(FPID):
1816 """ FPADD: stages as follows:
1817
1818 FPGetOp (a)
1819 |
1820 FPGetOp (b)
1821 |
1822 FPAddBase---> FPAddBaseMod
1823 | |
1824 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1825
1826 FPAddBase is tricky: it is both a stage and *has* stages.
1827 Connection to FPAddBaseMod therefore requires an in stb/ack
1828 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1829 needs to be the thing that raises the incoming stb.
1830 """
1831
1832 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1833 """ IEEE754 FP Add
1834
1835 * width: bit-width of IEEE754. supported: 16, 32, 64
1836 * id_wid: an identifier that is sync-connected to the input
1837 * single_cycle: True indicates each stage to complete in 1 clock
1838 """
1839 self.width = width
1840 self.id_wid = id_wid
1841 self.single_cycle = single_cycle
1842
1843 #self.out_z = FPOp(width)
1844 self.ids = FPID(id_wid)
1845
1846 rs = []
1847 for i in range(rs_sz):
1848 in_a = FPOp(width)
1849 in_b = FPOp(width)
1850 in_a.name = "in_a_%d" % i
1851 in_b.name = "in_b_%d" % i
1852 rs.append((in_a, in_b))
1853 self.rs = Array(rs)
1854
1855 res = []
1856 for i in range(rs_sz):
1857 out_z = FPOp(width)
1858 out_z.name = "out_z_%d" % i
1859 res.append(out_z)
1860 self.res = Array(res)
1861
1862 self.states = []
1863
1864 def add_state(self, state):
1865 self.states.append(state)
1866 return state
1867
1868 def get_fragment(self, platform=None):
1869 """ creates the HDL code-fragment for FPAdd
1870 """
1871 m = Module()
1872 m.submodules += self.rs
1873
1874 in_a = self.rs[0][0]
1875 in_b = self.rs[0][1]
1876
1877 geta = self.add_state(FPGetOp("get_a", "get_b",
1878 in_a, self.width))
1879 geta.setup(m, in_a)
1880 a = geta.out_op
1881
1882 getb = self.add_state(FPGetOp("get_b", "fpadd",
1883 in_b, self.width))
1884 getb.setup(m, in_b)
1885 b = getb.out_op
1886
1887 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1888 ab = self.add_state(ab)
1889 abd = ab.ispec() # create an input spec object for FPADDBase
1890 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1891 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1892 o = ab.o
1893
1894 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1895 o.mid, "get_a"))
1896
1897 with m.FSM() as fsm:
1898
1899 for state in self.states:
1900 with m.State(state.state_from):
1901 state.action(m)
1902
1903 return m
1904
1905
1906 if __name__ == "__main__":
1907 if True:
1908 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1909 main(alu, ports=alu.rs[0][0].ports() + \
1910 alu.rs[0][1].ports() + \
1911 alu.res[0].ports() + \
1912 [alu.ids.in_mid, alu.ids.out_mid])
1913 else:
1914 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1915 main(alu, ports=[alu.in_a, alu.in_b] + \
1916 alu.in_t.ports() + \
1917 alu.out_z.ports() + \
1918 [alu.in_mid, alu.out_mid])
1919
1920
1921 # works... but don't use, just do "python fname.py convert -t v"
1922 #print (verilog.convert(alu, ports=[
1923 # ports=alu.in_a.ports() + \
1924 # alu.in_b.ports() + \
1925 # alu.out_z.ports())