make FPGet2Ops conform to Stage API, use in compact StageChain
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetOpMod:
36 def __init__(self, width):
37 self.in_op = FPOp(width)
38 self.out_op = Signal(width)
39 self.out_decode = Signal(reset_less=True)
40
41 def elaborate(self, platform):
42 m = Module()
43 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
44 m.submodules.get_op_in = self.in_op
45 #m.submodules.get_op_out = self.out_op
46 with m.If(self.out_decode):
47 m.d.comb += [
48 self.out_op.eq(self.in_op.v),
49 ]
50 return m
51
52
53 class FPGetOp(FPState):
54 """ gets operand
55 """
56
57 def __init__(self, in_state, out_state, in_op, width):
58 FPState.__init__(self, in_state)
59 self.out_state = out_state
60 self.mod = FPGetOpMod(width)
61 self.in_op = in_op
62 self.out_op = Signal(width)
63 self.out_decode = Signal(reset_less=True)
64
65 def setup(self, m, in_op):
66 """ links module to inputs and outputs
67 """
68 setattr(m.submodules, self.state_from, self.mod)
69 m.d.comb += self.mod.in_op.eq(in_op)
70 m.d.comb += self.out_decode.eq(self.mod.out_decode)
71
72 def action(self, m):
73 with m.If(self.out_decode):
74 m.next = self.out_state
75 m.d.sync += [
76 self.in_op.ack.eq(0),
77 self.out_op.eq(self.mod.out_op)
78 ]
79 with m.Else():
80 m.d.sync += self.in_op.ack.eq(1)
81
82
83 class FPNumBase2Ops:
84
85 def __init__(self, width, id_wid, m_extra=True):
86 self.a = FPNumBase(width, m_extra)
87 self.b = FPNumBase(width, m_extra)
88 self.mid = Signal(id_wid, reset_less=True)
89
90 def eq(self, i):
91 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
92
93 def ports(self):
94 return [self.a, self.b, self.mid]
95
96
97 class FPADDBaseData:
98
99 def __init__(self, width, id_wid):
100 self.width = width
101 self.id_wid = id_wid
102 self.a = Signal(width)
103 self.b = Signal(width)
104 self.mid = Signal(id_wid, reset_less=True)
105
106 def eq(self, i):
107 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
108
109 def ports(self):
110 return [self.a, self.b, self.mid]
111
112
113 class FPGet2OpMod(Trigger):
114 def __init__(self, width, id_wid):
115 Trigger.__init__(self)
116 self.width = width
117 self.id_wid = id_wid
118 self.i = self.ispec()
119 self.o = self.ospec()
120
121 def ispec(self):
122 return FPADDBaseData(self.width, self.id_wid)
123
124 def ospec(self):
125 return FPADDBaseData(self.width, self.id_wid)
126
127 def process(self, i):
128 return self.o
129
130 def elaborate(self, platform):
131 m = Trigger.elaborate(self, platform)
132 with m.If(self.trigger):
133 m.d.comb += [
134 self.o.eq(self.i),
135 ]
136 return m
137
138
139 class FPGet2Op(FPState):
140 """ gets operands
141 """
142
143 def __init__(self, in_state, out_state, width, id_wid):
144 FPState.__init__(self, in_state)
145 self.out_state = out_state
146 self.mod = FPGet2OpMod(width, id_wid)
147 self.o = self.ospec()
148 self.in_stb = Signal(reset_less=True)
149 self.out_ack = Signal(reset_less=True)
150 self.out_decode = Signal(reset_less=True)
151
152 def ispec(self):
153 return self.mod.ispec()
154
155 def ospec(self):
156 return self.mod.ospec()
157
158 def trigger_setup(self, m, in_stb, in_ack):
159 """ links stb/ack
160 """
161 m.d.comb += self.mod.stb.eq(in_stb)
162 m.d.comb += in_ack.eq(self.mod.ack)
163
164 def setup(self, m, i):
165 """ links module to inputs and outputs
166 """
167 m.submodules.get_ops = self.mod
168 m.d.comb += self.mod.i.eq(i)
169 m.d.comb += self.out_ack.eq(self.mod.ack)
170 m.d.comb += self.out_decode.eq(self.mod.trigger)
171
172 def process(self, i):
173 return self.o
174
175 def action(self, m):
176 with m.If(self.out_decode):
177 m.next = self.out_state
178 m.d.sync += [
179 self.mod.ack.eq(0),
180 self.o.eq(self.mod.o),
181 ]
182 with m.Else():
183 m.d.sync += self.mod.ack.eq(1)
184
185
186 class FPSCData:
187
188 def __init__(self, width, id_wid):
189 self.a = FPNumBase(width, True)
190 self.b = FPNumBase(width, True)
191 self.z = FPNumOut(width, False)
192 self.oz = Signal(width, reset_less=True)
193 self.out_do_z = Signal(reset_less=True)
194 self.mid = Signal(id_wid, reset_less=True)
195
196 def eq(self, i):
197 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
198 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
199
200
201 class FPAddSpecialCasesMod:
202 """ special cases: NaNs, infs, zeros, denormalised
203 NOTE: some of these are unique to add. see "Special Operations"
204 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
205 """
206
207 def __init__(self, width, id_wid):
208 self.width = width
209 self.id_wid = id_wid
210 self.i = self.ispec()
211 self.o = self.ospec()
212
213 def ispec(self):
214 return FPADDBaseData(self.width, self.id_wid)
215
216 def ospec(self):
217 return FPSCData(self.width, self.id_wid)
218
219 def setup(self, m, i):
220 """ links module to inputs and outputs
221 """
222 m.submodules.specialcases = self
223 m.d.comb += self.i.eq(i)
224
225 def process(self, i):
226 return self.o
227
228 def elaborate(self, platform):
229 m = Module()
230
231 m.submodules.sc_out_z = self.o.z
232
233 # decode: XXX really should move to separate stage
234 a1 = FPNumIn(None, self.width)
235 b1 = FPNumIn(None, self.width)
236 m.submodules.sc_decode_a = a1
237 m.submodules.sc_decode_b = b1
238 m.d.comb += [a1.decode(self.i.a),
239 b1.decode(self.i.b),
240 ]
241
242 s_nomatch = Signal()
243 m.d.comb += s_nomatch.eq(a1.s != b1.s)
244
245 m_match = Signal()
246 m.d.comb += m_match.eq(a1.m == b1.m)
247
248 # if a is NaN or b is NaN return NaN
249 with m.If(a1.is_nan | b1.is_nan):
250 m.d.comb += self.o.out_do_z.eq(1)
251 m.d.comb += self.o.z.nan(0)
252
253 # XXX WEIRDNESS for FP16 non-canonical NaN handling
254 # under review
255
256 ## if a is zero and b is NaN return -b
257 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
258 # m.d.comb += self.o.out_do_z.eq(1)
259 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
260
261 ## if b is zero and a is NaN return -a
262 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
263 # m.d.comb += self.o.out_do_z.eq(1)
264 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
265
266 ## if a is -zero and b is NaN return -b
267 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
268 # m.d.comb += self.o.out_do_z.eq(1)
269 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
270
271 ## if b is -zero and a is NaN return -a
272 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
273 # m.d.comb += self.o.out_do_z.eq(1)
274 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
275
276 # if a is inf return inf (or NaN)
277 with m.Elif(a1.is_inf):
278 m.d.comb += self.o.out_do_z.eq(1)
279 m.d.comb += self.o.z.inf(a1.s)
280 # if a is inf and signs don't match return NaN
281 with m.If(b1.exp_128 & s_nomatch):
282 m.d.comb += self.o.z.nan(0)
283
284 # if b is inf return inf
285 with m.Elif(b1.is_inf):
286 m.d.comb += self.o.out_do_z.eq(1)
287 m.d.comb += self.o.z.inf(b1.s)
288
289 # if a is zero and b zero return signed-a/b
290 with m.Elif(a1.is_zero & b1.is_zero):
291 m.d.comb += self.o.out_do_z.eq(1)
292 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
293
294 # if a is zero return b
295 with m.Elif(a1.is_zero):
296 m.d.comb += self.o.out_do_z.eq(1)
297 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
298
299 # if b is zero return a
300 with m.Elif(b1.is_zero):
301 m.d.comb += self.o.out_do_z.eq(1)
302 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
303
304 # if a equal to -b return zero (+ve zero)
305 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
306 m.d.comb += self.o.out_do_z.eq(1)
307 m.d.comb += self.o.z.zero(0)
308
309 # Denormalised Number checks next, so pass a/b data through
310 with m.Else():
311 m.d.comb += self.o.out_do_z.eq(0)
312 m.d.comb += self.o.a.eq(a1)
313 m.d.comb += self.o.b.eq(b1)
314
315 m.d.comb += self.o.oz.eq(self.o.z.v)
316 m.d.comb += self.o.mid.eq(self.i.mid)
317
318 return m
319
320
321 class FPID:
322 def __init__(self, id_wid):
323 self.id_wid = id_wid
324 if self.id_wid:
325 self.in_mid = Signal(id_wid, reset_less=True)
326 self.out_mid = Signal(id_wid, reset_less=True)
327 else:
328 self.in_mid = None
329 self.out_mid = None
330
331 def idsync(self, m):
332 if self.id_wid is not None:
333 m.d.sync += self.out_mid.eq(self.in_mid)
334
335
336 class FPAddSpecialCases(FPState):
337 """ special cases: NaNs, infs, zeros, denormalised
338 NOTE: some of these are unique to add. see "Special Operations"
339 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
340 """
341
342 def __init__(self, width, id_wid):
343 FPState.__init__(self, "special_cases")
344 self.mod = FPAddSpecialCasesMod(width)
345 self.out_z = self.mod.ospec()
346 self.out_do_z = Signal(reset_less=True)
347
348 def setup(self, m, i):
349 """ links module to inputs and outputs
350 """
351 self.mod.setup(m, i, self.out_do_z)
352 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
353 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
354
355 def action(self, m):
356 self.idsync(m)
357 with m.If(self.out_do_z):
358 m.next = "put_z"
359 with m.Else():
360 m.next = "denormalise"
361
362
363 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
364 """ special cases: NaNs, infs, zeros, denormalised
365 NOTE: some of these are unique to add. see "Special Operations"
366 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
367 """
368
369 def __init__(self, width, id_wid):
370 FPState.__init__(self, "special_cases")
371 self.width = width
372 self.id_wid = id_wid
373 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
374 self.out = self.ospec()
375
376 def ispec(self):
377 return FPADDBaseData(self.width, self.id_wid) # SpecialCases ispec
378
379 def ospec(self):
380 return FPSCData(self.width, self.id_wid) # DeNorm ospec
381
382 def setup(self, m, i):
383 """ links module to inputs and outputs
384 """
385 smod = FPAddSpecialCasesMod(self.width, self.id_wid)
386 dmod = FPAddDeNormMod(self.width, self.id_wid)
387
388 chain = StageChain([smod, dmod])
389 chain.setup(m, i)
390
391 # only needed for break-out (early-out)
392 # self.out_do_z = smod.o.out_do_z
393
394 self.o = dmod.o
395
396 def process(self, i):
397 return self.o
398
399 def action(self, m):
400 # for break-out (early-out)
401 #with m.If(self.out_do_z):
402 # m.next = "put_z"
403 #with m.Else():
404 m.d.sync += self.out.eq(self.process(None))
405 m.next = "align"
406
407
408 class FPAddDeNormMod(FPState):
409
410 def __init__(self, width, id_wid):
411 self.width = width
412 self.id_wid = id_wid
413 self.i = self.ispec()
414 self.o = self.ospec()
415
416 def ispec(self):
417 return FPSCData(self.width, self.id_wid)
418
419 def ospec(self):
420 return FPSCData(self.width, self.id_wid)
421
422 def process(self, i):
423 return self.o
424
425 def setup(self, m, i):
426 """ links module to inputs and outputs
427 """
428 m.submodules.denormalise = self
429 m.d.comb += self.i.eq(i)
430
431 def elaborate(self, platform):
432 m = Module()
433 m.submodules.denorm_in_a = self.i.a
434 m.submodules.denorm_in_b = self.i.b
435 m.submodules.denorm_out_a = self.o.a
436 m.submodules.denorm_out_b = self.o.b
437
438 with m.If(~self.i.out_do_z):
439 # XXX hmmm, don't like repeating identical code
440 m.d.comb += self.o.a.eq(self.i.a)
441 with m.If(self.i.a.exp_n127):
442 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
443 with m.Else():
444 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
445
446 m.d.comb += self.o.b.eq(self.i.b)
447 with m.If(self.i.b.exp_n127):
448 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
449 with m.Else():
450 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
451
452 m.d.comb += self.o.mid.eq(self.i.mid)
453 m.d.comb += self.o.z.eq(self.i.z)
454 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
455 m.d.comb += self.o.oz.eq(self.i.oz)
456
457 return m
458
459
460 class FPAddDeNorm(FPState):
461
462 def __init__(self, width, id_wid):
463 FPState.__init__(self, "denormalise")
464 self.mod = FPAddDeNormMod(width)
465 self.out_a = FPNumBase(width)
466 self.out_b = FPNumBase(width)
467
468 def setup(self, m, i):
469 """ links module to inputs and outputs
470 """
471 self.mod.setup(m, i)
472
473 m.d.sync += self.out_a.eq(self.mod.out_a)
474 m.d.sync += self.out_b.eq(self.mod.out_b)
475
476 def action(self, m):
477 # Denormalised Number checks
478 m.next = "align"
479
480
481 class FPAddAlignMultiMod(FPState):
482
483 def __init__(self, width):
484 self.in_a = FPNumBase(width)
485 self.in_b = FPNumBase(width)
486 self.out_a = FPNumIn(None, width)
487 self.out_b = FPNumIn(None, width)
488 self.exp_eq = Signal(reset_less=True)
489
490 def elaborate(self, platform):
491 # This one however (single-cycle) will do the shift
492 # in one go.
493
494 m = Module()
495
496 m.submodules.align_in_a = self.in_a
497 m.submodules.align_in_b = self.in_b
498 m.submodules.align_out_a = self.out_a
499 m.submodules.align_out_b = self.out_b
500
501 # NOTE: this does *not* do single-cycle multi-shifting,
502 # it *STAYS* in the align state until exponents match
503
504 # exponent of a greater than b: shift b down
505 m.d.comb += self.exp_eq.eq(0)
506 m.d.comb += self.out_a.eq(self.in_a)
507 m.d.comb += self.out_b.eq(self.in_b)
508 agtb = Signal(reset_less=True)
509 altb = Signal(reset_less=True)
510 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
511 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
512 with m.If(agtb):
513 m.d.comb += self.out_b.shift_down(self.in_b)
514 # exponent of b greater than a: shift a down
515 with m.Elif(altb):
516 m.d.comb += self.out_a.shift_down(self.in_a)
517 # exponents equal: move to next stage.
518 with m.Else():
519 m.d.comb += self.exp_eq.eq(1)
520 return m
521
522
523 class FPAddAlignMulti(FPState):
524
525 def __init__(self, width, id_wid):
526 FPState.__init__(self, "align")
527 self.mod = FPAddAlignMultiMod(width)
528 self.out_a = FPNumIn(None, width)
529 self.out_b = FPNumIn(None, width)
530 self.exp_eq = Signal(reset_less=True)
531
532 def setup(self, m, in_a, in_b):
533 """ links module to inputs and outputs
534 """
535 m.submodules.align = self.mod
536 m.d.comb += self.mod.in_a.eq(in_a)
537 m.d.comb += self.mod.in_b.eq(in_b)
538 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
539 m.d.sync += self.out_a.eq(self.mod.out_a)
540 m.d.sync += self.out_b.eq(self.mod.out_b)
541
542 def action(self, m):
543 with m.If(self.exp_eq):
544 m.next = "add_0"
545
546
547 class FPNumIn2Ops:
548
549 def __init__(self, width, id_wid):
550 self.a = FPNumIn(None, width)
551 self.b = FPNumIn(None, width)
552 self.z = FPNumOut(width, False)
553 self.out_do_z = Signal(reset_less=True)
554 self.oz = Signal(width, reset_less=True)
555 self.mid = Signal(id_wid, reset_less=True)
556
557 def eq(self, i):
558 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
559 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
560
561
562 class FPAddAlignSingleMod:
563
564 def __init__(self, width, id_wid):
565 self.width = width
566 self.id_wid = id_wid
567 self.i = self.ispec()
568 self.o = self.ospec()
569
570 def ispec(self):
571 return FPSCData(self.width, self.id_wid)
572
573 def ospec(self):
574 return FPNumIn2Ops(self.width, self.id_wid)
575
576 def process(self, i):
577 return self.o
578
579 def setup(self, m, i):
580 """ links module to inputs and outputs
581 """
582 m.submodules.align = self
583 m.d.comb += self.i.eq(i)
584
585 def elaborate(self, platform):
586 """ Aligns A against B or B against A, depending on which has the
587 greater exponent. This is done in a *single* cycle using
588 variable-width bit-shift
589
590 the shifter used here is quite expensive in terms of gates.
591 Mux A or B in (and out) into temporaries, as only one of them
592 needs to be aligned against the other
593 """
594 m = Module()
595
596 m.submodules.align_in_a = self.i.a
597 m.submodules.align_in_b = self.i.b
598 m.submodules.align_out_a = self.o.a
599 m.submodules.align_out_b = self.o.b
600
601 # temporary (muxed) input and output to be shifted
602 t_inp = FPNumBase(self.width)
603 t_out = FPNumIn(None, self.width)
604 espec = (len(self.i.a.e), True)
605 msr = MultiShiftRMerge(self.i.a.m_width, espec)
606 m.submodules.align_t_in = t_inp
607 m.submodules.align_t_out = t_out
608 m.submodules.multishift_r = msr
609
610 ediff = Signal(espec, reset_less=True)
611 ediffr = Signal(espec, reset_less=True)
612 tdiff = Signal(espec, reset_less=True)
613 elz = Signal(reset_less=True)
614 egz = Signal(reset_less=True)
615
616 # connect multi-shifter to t_inp/out mantissa (and tdiff)
617 m.d.comb += msr.inp.eq(t_inp.m)
618 m.d.comb += msr.diff.eq(tdiff)
619 m.d.comb += t_out.m.eq(msr.m)
620 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
621 m.d.comb += t_out.s.eq(t_inp.s)
622
623 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
624 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
625 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
626 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
627
628 # default: A-exp == B-exp, A and B untouched (fall through)
629 m.d.comb += self.o.a.eq(self.i.a)
630 m.d.comb += self.o.b.eq(self.i.b)
631 # only one shifter (muxed)
632 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
633 # exponent of a greater than b: shift b down
634 with m.If(~self.i.out_do_z):
635 with m.If(egz):
636 m.d.comb += [t_inp.eq(self.i.b),
637 tdiff.eq(ediff),
638 self.o.b.eq(t_out),
639 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
640 ]
641 # exponent of b greater than a: shift a down
642 with m.Elif(elz):
643 m.d.comb += [t_inp.eq(self.i.a),
644 tdiff.eq(ediffr),
645 self.o.a.eq(t_out),
646 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
647 ]
648
649 m.d.comb += self.o.mid.eq(self.i.mid)
650 m.d.comb += self.o.z.eq(self.i.z)
651 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
652 m.d.comb += self.o.oz.eq(self.i.oz)
653
654 return m
655
656
657 class FPAddAlignSingle(FPState):
658
659 def __init__(self, width, id_wid):
660 FPState.__init__(self, "align")
661 self.mod = FPAddAlignSingleMod(width, id_wid)
662 self.out_a = FPNumIn(None, width)
663 self.out_b = FPNumIn(None, width)
664
665 def setup(self, m, i):
666 """ links module to inputs and outputs
667 """
668 self.mod.setup(m, i)
669
670 # NOTE: could be done as comb
671 m.d.sync += self.out_a.eq(self.mod.out_a)
672 m.d.sync += self.out_b.eq(self.mod.out_b)
673
674 def action(self, m):
675 m.next = "add_0"
676
677
678 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
679
680 def __init__(self, width, id_wid):
681 FPState.__init__(self, "align")
682 self.width = width
683 self.id_wid = id_wid
684 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
685 self.a1o = self.ospec()
686
687 def ispec(self):
688 return FPSCData(self.width, self.id_wid)
689
690 def ospec(self):
691 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
692
693 def setup(self, m, i):
694 """ links module to inputs and outputs
695 """
696
697 # chain AddAlignSingle, AddStage0 and AddStage1
698 mod = FPAddAlignSingleMod(self.width, self.id_wid)
699 a0mod = FPAddStage0Mod(self.width, self.id_wid)
700 a1mod = FPAddStage1Mod(self.width, self.id_wid)
701
702 chain = StageChain([mod, a0mod, a1mod])
703 chain.setup(m, i)
704
705 self.o = a1mod.o
706
707 def process(self, i):
708 return self.o
709
710 def action(self, m):
711 m.d.sync += self.a1o.eq(self.process(None))
712 m.next = "normalise_1"
713
714
715 class FPAddStage0Data:
716
717 def __init__(self, width, id_wid):
718 self.z = FPNumBase(width, False)
719 self.out_do_z = Signal(reset_less=True)
720 self.oz = Signal(width, reset_less=True)
721 self.tot = Signal(self.z.m_width + 4, reset_less=True)
722 self.mid = Signal(id_wid, reset_less=True)
723
724 def eq(self, i):
725 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
726 self.tot.eq(i.tot), self.mid.eq(i.mid)]
727
728
729 class FPAddStage0Mod:
730
731 def __init__(self, width, id_wid):
732 self.width = width
733 self.id_wid = id_wid
734 self.i = self.ispec()
735 self.o = self.ospec()
736
737 def ispec(self):
738 return FPSCData(self.width, self.id_wid)
739
740 def ospec(self):
741 return FPAddStage0Data(self.width, self.id_wid)
742
743 def process(self, i):
744 return self.o
745
746 def setup(self, m, i):
747 """ links module to inputs and outputs
748 """
749 m.submodules.add0 = self
750 m.d.comb += self.i.eq(i)
751
752 def elaborate(self, platform):
753 m = Module()
754 m.submodules.add0_in_a = self.i.a
755 m.submodules.add0_in_b = self.i.b
756 m.submodules.add0_out_z = self.o.z
757
758 # store intermediate tests (and zero-extended mantissas)
759 seq = Signal(reset_less=True)
760 mge = Signal(reset_less=True)
761 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
762 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
763 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
764 mge.eq(self.i.a.m >= self.i.b.m),
765 am0.eq(Cat(self.i.a.m, 0)),
766 bm0.eq(Cat(self.i.b.m, 0))
767 ]
768 # same-sign (both negative or both positive) add mantissas
769 with m.If(~self.i.out_do_z):
770 m.d.comb += self.o.z.e.eq(self.i.a.e)
771 with m.If(seq):
772 m.d.comb += [
773 self.o.tot.eq(am0 + bm0),
774 self.o.z.s.eq(self.i.a.s)
775 ]
776 # a mantissa greater than b, use a
777 with m.Elif(mge):
778 m.d.comb += [
779 self.o.tot.eq(am0 - bm0),
780 self.o.z.s.eq(self.i.a.s)
781 ]
782 # b mantissa greater than a, use b
783 with m.Else():
784 m.d.comb += [
785 self.o.tot.eq(bm0 - am0),
786 self.o.z.s.eq(self.i.b.s)
787 ]
788
789 m.d.comb += self.o.oz.eq(self.i.oz)
790 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
791 m.d.comb += self.o.mid.eq(self.i.mid)
792 return m
793
794
795 class FPAddStage0(FPState):
796 """ First stage of add. covers same-sign (add) and subtract
797 special-casing when mantissas are greater or equal, to
798 give greatest accuracy.
799 """
800
801 def __init__(self, width, id_wid):
802 FPState.__init__(self, "add_0")
803 self.mod = FPAddStage0Mod(width)
804 self.o = self.mod.ospec()
805
806 def setup(self, m, i):
807 """ links module to inputs and outputs
808 """
809 self.mod.setup(m, i)
810
811 # NOTE: these could be done as combinatorial (merge add0+add1)
812 m.d.sync += self.o.eq(self.mod.o)
813
814 def action(self, m):
815 m.next = "add_1"
816
817
818 class FPAddStage1Data:
819
820 def __init__(self, width, id_wid):
821 self.z = FPNumBase(width, False)
822 self.out_do_z = Signal(reset_less=True)
823 self.oz = Signal(width, reset_less=True)
824 self.of = Overflow()
825 self.mid = Signal(id_wid, reset_less=True)
826
827 def eq(self, i):
828 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
829 self.of.eq(i.of), self.mid.eq(i.mid)]
830
831
832
833 class FPAddStage1Mod(FPState):
834 """ Second stage of add: preparation for normalisation.
835 detects when tot sum is too big (tot[27] is kinda a carry bit)
836 """
837
838 def __init__(self, width, id_wid):
839 self.width = width
840 self.id_wid = id_wid
841 self.i = self.ispec()
842 self.o = self.ospec()
843
844 def ispec(self):
845 return FPAddStage0Data(self.width, self.id_wid)
846
847 def ospec(self):
848 return FPAddStage1Data(self.width, self.id_wid)
849
850 def process(self, i):
851 return self.o
852
853 def setup(self, m, i):
854 """ links module to inputs and outputs
855 """
856 m.submodules.add1 = self
857 m.submodules.add1_out_overflow = self.o.of
858
859 m.d.comb += self.i.eq(i)
860
861 def elaborate(self, platform):
862 m = Module()
863 m.d.comb += self.o.z.eq(self.i.z)
864 # tot[-1] (MSB) gets set when the sum overflows. shift result down
865 with m.If(~self.i.out_do_z):
866 with m.If(self.i.tot[-1]):
867 m.d.comb += [
868 self.o.z.m.eq(self.i.tot[4:]),
869 self.o.of.m0.eq(self.i.tot[4]),
870 self.o.of.guard.eq(self.i.tot[3]),
871 self.o.of.round_bit.eq(self.i.tot[2]),
872 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
873 self.o.z.e.eq(self.i.z.e + 1)
874 ]
875 # tot[-1] (MSB) zero case
876 with m.Else():
877 m.d.comb += [
878 self.o.z.m.eq(self.i.tot[3:]),
879 self.o.of.m0.eq(self.i.tot[3]),
880 self.o.of.guard.eq(self.i.tot[2]),
881 self.o.of.round_bit.eq(self.i.tot[1]),
882 self.o.of.sticky.eq(self.i.tot[0])
883 ]
884
885 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
886 m.d.comb += self.o.oz.eq(self.i.oz)
887 m.d.comb += self.o.mid.eq(self.i.mid)
888
889 return m
890
891
892 class FPAddStage1(FPState):
893
894 def __init__(self, width, id_wid):
895 FPState.__init__(self, "add_1")
896 self.mod = FPAddStage1Mod(width)
897 self.out_z = FPNumBase(width, False)
898 self.out_of = Overflow()
899 self.norm_stb = Signal()
900
901 def setup(self, m, i):
902 """ links module to inputs and outputs
903 """
904 self.mod.setup(m, i)
905
906 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
907
908 m.d.sync += self.out_of.eq(self.mod.out_of)
909 m.d.sync += self.out_z.eq(self.mod.out_z)
910 m.d.sync += self.norm_stb.eq(1)
911
912 def action(self, m):
913 m.next = "normalise_1"
914
915
916 class FPNormaliseModSingle:
917
918 def __init__(self, width):
919 self.width = width
920 self.in_z = self.ispec()
921 self.out_z = self.ospec()
922
923 def ispec(self):
924 return FPNumBase(self.width, False)
925
926 def ospec(self):
927 return FPNumBase(self.width, False)
928
929 def setup(self, m, i):
930 """ links module to inputs and outputs
931 """
932 m.submodules.normalise = self
933 m.d.comb += self.i.eq(i)
934
935 def elaborate(self, platform):
936 m = Module()
937
938 mwid = self.out_z.m_width+2
939 pe = PriorityEncoder(mwid)
940 m.submodules.norm_pe = pe
941
942 m.submodules.norm1_out_z = self.out_z
943 m.submodules.norm1_in_z = self.in_z
944
945 in_z = FPNumBase(self.width, False)
946 in_of = Overflow()
947 m.submodules.norm1_insel_z = in_z
948 m.submodules.norm1_insel_overflow = in_of
949
950 espec = (len(in_z.e), True)
951 ediff_n126 = Signal(espec, reset_less=True)
952 msr = MultiShiftRMerge(mwid, espec)
953 m.submodules.multishift_r = msr
954
955 m.d.comb += in_z.eq(self.in_z)
956 m.d.comb += in_of.eq(self.in_of)
957 # initialise out from in (overridden below)
958 m.d.comb += self.out_z.eq(in_z)
959 m.d.comb += self.out_of.eq(in_of)
960 # normalisation decrease condition
961 decrease = Signal(reset_less=True)
962 m.d.comb += decrease.eq(in_z.m_msbzero)
963 # decrease exponent
964 with m.If(decrease):
965 # *sigh* not entirely obvious: count leading zeros (clz)
966 # with a PriorityEncoder: to find from the MSB
967 # we reverse the order of the bits.
968 temp_m = Signal(mwid, reset_less=True)
969 temp_s = Signal(mwid+1, reset_less=True)
970 clz = Signal((len(in_z.e), True), reset_less=True)
971 m.d.comb += [
972 # cat round and guard bits back into the mantissa
973 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
974 pe.i.eq(temp_m[::-1]), # inverted
975 clz.eq(pe.o), # count zeros from MSB down
976 temp_s.eq(temp_m << clz), # shift mantissa UP
977 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
978 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
979 ]
980
981 return m
982
983
984 class FPNorm1Data:
985
986 def __init__(self, width, id_wid):
987 self.roundz = Signal(reset_less=True)
988 self.z = FPNumBase(width, False)
989 self.out_do_z = Signal(reset_less=True)
990 self.oz = Signal(width, reset_less=True)
991 self.mid = Signal(id_wid, reset_less=True)
992
993 def eq(self, i):
994 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
995 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
996
997
998 class FPNorm1ModSingle:
999
1000 def __init__(self, width, id_wid):
1001 self.width = width
1002 self.id_wid = id_wid
1003 self.i = self.ispec()
1004 self.o = self.ospec()
1005
1006 def ispec(self):
1007 return FPAddStage1Data(self.width, self.id_wid)
1008
1009 def ospec(self):
1010 return FPNorm1Data(self.width, self.id_wid)
1011
1012 def setup(self, m, i):
1013 """ links module to inputs and outputs
1014 """
1015 m.submodules.normalise_1 = self
1016 m.d.comb += self.i.eq(i)
1017
1018 def process(self, i):
1019 return self.o
1020
1021 def elaborate(self, platform):
1022 m = Module()
1023
1024 mwid = self.o.z.m_width+2
1025 pe = PriorityEncoder(mwid)
1026 m.submodules.norm_pe = pe
1027
1028 of = Overflow()
1029 m.d.comb += self.o.roundz.eq(of.roundz)
1030
1031 m.submodules.norm1_out_z = self.o.z
1032 m.submodules.norm1_out_overflow = of
1033 m.submodules.norm1_in_z = self.i.z
1034 m.submodules.norm1_in_overflow = self.i.of
1035
1036 i = self.ispec()
1037 m.submodules.norm1_insel_z = i.z
1038 m.submodules.norm1_insel_overflow = i.of
1039
1040 espec = (len(i.z.e), True)
1041 ediff_n126 = Signal(espec, reset_less=True)
1042 msr = MultiShiftRMerge(mwid, espec)
1043 m.submodules.multishift_r = msr
1044
1045 m.d.comb += i.eq(self.i)
1046 # initialise out from in (overridden below)
1047 m.d.comb += self.o.z.eq(i.z)
1048 m.d.comb += of.eq(i.of)
1049 # normalisation increase/decrease conditions
1050 decrease = Signal(reset_less=True)
1051 increase = Signal(reset_less=True)
1052 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1053 m.d.comb += increase.eq(i.z.exp_lt_n126)
1054 # decrease exponent
1055 with m.If(~self.i.out_do_z):
1056 with m.If(decrease):
1057 # *sigh* not entirely obvious: count leading zeros (clz)
1058 # with a PriorityEncoder: to find from the MSB
1059 # we reverse the order of the bits.
1060 temp_m = Signal(mwid, reset_less=True)
1061 temp_s = Signal(mwid+1, reset_less=True)
1062 clz = Signal((len(i.z.e), True), reset_less=True)
1063 # make sure that the amount to decrease by does NOT
1064 # go below the minimum non-INF/NaN exponent
1065 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1066 i.z.exp_sub_n126)
1067 m.d.comb += [
1068 # cat round and guard bits back into the mantissa
1069 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1070 pe.i.eq(temp_m[::-1]), # inverted
1071 clz.eq(limclz), # count zeros from MSB down
1072 temp_s.eq(temp_m << clz), # shift mantissa UP
1073 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1074 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1075 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1076 # overflow in bits 0..1: got shifted too (leave sticky)
1077 of.guard.eq(temp_s[1]), # guard
1078 of.round_bit.eq(temp_s[0]), # round
1079 ]
1080 # increase exponent
1081 with m.Elif(increase):
1082 temp_m = Signal(mwid+1, reset_less=True)
1083 m.d.comb += [
1084 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1085 i.z.m)),
1086 ediff_n126.eq(i.z.N126 - i.z.e),
1087 # connect multi-shifter to inp/out mantissa (and ediff)
1088 msr.inp.eq(temp_m),
1089 msr.diff.eq(ediff_n126),
1090 self.o.z.m.eq(msr.m[3:]),
1091 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1092 # overflow in bits 0..1: got shifted too (leave sticky)
1093 of.guard.eq(temp_s[2]), # guard
1094 of.round_bit.eq(temp_s[1]), # round
1095 of.sticky.eq(temp_s[0]), # sticky
1096 self.o.z.e.eq(i.z.e + ediff_n126),
1097 ]
1098
1099 m.d.comb += self.o.mid.eq(self.i.mid)
1100 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1101 m.d.comb += self.o.oz.eq(self.i.oz)
1102
1103 return m
1104
1105
1106 class FPNorm1ModMulti:
1107
1108 def __init__(self, width, single_cycle=True):
1109 self.width = width
1110 self.in_select = Signal(reset_less=True)
1111 self.in_z = FPNumBase(width, False)
1112 self.in_of = Overflow()
1113 self.temp_z = FPNumBase(width, False)
1114 self.temp_of = Overflow()
1115 self.out_z = FPNumBase(width, False)
1116 self.out_of = Overflow()
1117
1118 def elaborate(self, platform):
1119 m = Module()
1120
1121 m.submodules.norm1_out_z = self.out_z
1122 m.submodules.norm1_out_overflow = self.out_of
1123 m.submodules.norm1_temp_z = self.temp_z
1124 m.submodules.norm1_temp_of = self.temp_of
1125 m.submodules.norm1_in_z = self.in_z
1126 m.submodules.norm1_in_overflow = self.in_of
1127
1128 in_z = FPNumBase(self.width, False)
1129 in_of = Overflow()
1130 m.submodules.norm1_insel_z = in_z
1131 m.submodules.norm1_insel_overflow = in_of
1132
1133 # select which of temp or in z/of to use
1134 with m.If(self.in_select):
1135 m.d.comb += in_z.eq(self.in_z)
1136 m.d.comb += in_of.eq(self.in_of)
1137 with m.Else():
1138 m.d.comb += in_z.eq(self.temp_z)
1139 m.d.comb += in_of.eq(self.temp_of)
1140 # initialise out from in (overridden below)
1141 m.d.comb += self.out_z.eq(in_z)
1142 m.d.comb += self.out_of.eq(in_of)
1143 # normalisation increase/decrease conditions
1144 decrease = Signal(reset_less=True)
1145 increase = Signal(reset_less=True)
1146 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1147 m.d.comb += increase.eq(in_z.exp_lt_n126)
1148 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1149 # decrease exponent
1150 with m.If(decrease):
1151 m.d.comb += [
1152 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1153 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1154 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1155 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1156 self.out_of.round_bit.eq(0), # reset round bit
1157 self.out_of.m0.eq(in_of.guard),
1158 ]
1159 # increase exponent
1160 with m.Elif(increase):
1161 m.d.comb += [
1162 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1163 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1164 self.out_of.guard.eq(in_z.m[0]),
1165 self.out_of.m0.eq(in_z.m[1]),
1166 self.out_of.round_bit.eq(in_of.guard),
1167 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1168 ]
1169
1170 return m
1171
1172
1173 class FPNorm1Single(FPState):
1174
1175 def __init__(self, width, id_wid, single_cycle=True):
1176 FPState.__init__(self, "normalise_1")
1177 self.mod = FPNorm1ModSingle(width)
1178 self.o = self.ospec()
1179 self.out_z = FPNumBase(width, False)
1180 self.out_roundz = Signal(reset_less=True)
1181
1182 def ispec(self):
1183 return self.mod.ispec()
1184
1185 def ospec(self):
1186 return self.mod.ospec()
1187
1188 def setup(self, m, i):
1189 """ links module to inputs and outputs
1190 """
1191 self.mod.setup(m, i)
1192
1193 def action(self, m):
1194 m.next = "round"
1195
1196
1197 class FPNorm1Multi(FPState):
1198
1199 def __init__(self, width, id_wid):
1200 FPState.__init__(self, "normalise_1")
1201 self.mod = FPNorm1ModMulti(width)
1202 self.stb = Signal(reset_less=True)
1203 self.ack = Signal(reset=0, reset_less=True)
1204 self.out_norm = Signal(reset_less=True)
1205 self.in_accept = Signal(reset_less=True)
1206 self.temp_z = FPNumBase(width)
1207 self.temp_of = Overflow()
1208 self.out_z = FPNumBase(width)
1209 self.out_roundz = Signal(reset_less=True)
1210
1211 def setup(self, m, in_z, in_of, norm_stb):
1212 """ links module to inputs and outputs
1213 """
1214 self.mod.setup(m, in_z, in_of, norm_stb,
1215 self.in_accept, self.temp_z, self.temp_of,
1216 self.out_z, self.out_norm)
1217
1218 m.d.comb += self.stb.eq(norm_stb)
1219 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1220
1221 def action(self, m):
1222 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1223 m.d.sync += self.temp_of.eq(self.mod.out_of)
1224 m.d.sync += self.temp_z.eq(self.out_z)
1225 with m.If(self.out_norm):
1226 with m.If(self.in_accept):
1227 m.d.sync += [
1228 self.ack.eq(1),
1229 ]
1230 with m.Else():
1231 m.d.sync += self.ack.eq(0)
1232 with m.Else():
1233 # normalisation not required (or done).
1234 m.next = "round"
1235 m.d.sync += self.ack.eq(1)
1236 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1237
1238
1239 class FPNormToPack(FPState, UnbufferedPipeline):
1240
1241 def __init__(self, width, id_wid):
1242 FPState.__init__(self, "normalise_1")
1243 self.id_wid = id_wid
1244 self.width = width
1245 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1246
1247 def ispec(self):
1248 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1249
1250 def ospec(self):
1251 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1252
1253 def setup(self, m, i):
1254 """ links module to inputs and outputs
1255 """
1256
1257 # Normalisation, Rounding Corrections, Pack - in a chain
1258 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1259 rmod = FPRoundMod(self.width, self.id_wid)
1260 cmod = FPCorrectionsMod(self.width, self.id_wid)
1261 pmod = FPPackMod(self.width, self.id_wid)
1262 chain = StageChain([nmod, rmod, cmod, pmod])
1263 chain.setup(m, i)
1264 self.out_z = pmod.ospec()
1265
1266 self.o = pmod.o
1267
1268 def process(self, i):
1269 return self.o
1270
1271 def action(self, m):
1272 m.d.sync += self.out_z.eq(self.process(None))
1273 m.next = "pack_put_z"
1274
1275
1276 class FPRoundData:
1277
1278 def __init__(self, width, id_wid):
1279 self.z = FPNumBase(width, False)
1280 self.out_do_z = Signal(reset_less=True)
1281 self.oz = Signal(width, reset_less=True)
1282 self.mid = Signal(id_wid, reset_less=True)
1283
1284 def eq(self, i):
1285 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1286 self.mid.eq(i.mid)]
1287
1288
1289 class FPRoundMod:
1290
1291 def __init__(self, width, id_wid):
1292 self.width = width
1293 self.id_wid = id_wid
1294 self.i = self.ispec()
1295 self.out_z = self.ospec()
1296
1297 def ispec(self):
1298 return FPNorm1Data(self.width, self.id_wid)
1299
1300 def ospec(self):
1301 return FPRoundData(self.width, self.id_wid)
1302
1303 def process(self, i):
1304 return self.out_z
1305
1306 def setup(self, m, i):
1307 m.submodules.roundz = self
1308 m.d.comb += self.i.eq(i)
1309
1310 def elaborate(self, platform):
1311 m = Module()
1312 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1313 with m.If(~self.i.out_do_z):
1314 with m.If(self.i.roundz):
1315 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1316 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1317 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1318
1319 return m
1320
1321
1322 class FPRound(FPState):
1323
1324 def __init__(self, width, id_wid):
1325 FPState.__init__(self, "round")
1326 self.mod = FPRoundMod(width)
1327 self.out_z = self.ospec()
1328
1329 def ispec(self):
1330 return self.mod.ispec()
1331
1332 def ospec(self):
1333 return self.mod.ospec()
1334
1335 def setup(self, m, i):
1336 """ links module to inputs and outputs
1337 """
1338 self.mod.setup(m, i)
1339
1340 self.idsync(m)
1341 m.d.sync += self.out_z.eq(self.mod.out_z)
1342 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1343
1344 def action(self, m):
1345 m.next = "corrections"
1346
1347
1348 class FPCorrectionsMod:
1349
1350 def __init__(self, width, id_wid):
1351 self.width = width
1352 self.id_wid = id_wid
1353 self.i = self.ispec()
1354 self.out_z = self.ospec()
1355
1356 def ispec(self):
1357 return FPRoundData(self.width, self.id_wid)
1358
1359 def ospec(self):
1360 return FPRoundData(self.width, self.id_wid)
1361
1362 def process(self, i):
1363 return self.out_z
1364
1365 def setup(self, m, i):
1366 """ links module to inputs and outputs
1367 """
1368 m.submodules.corrections = self
1369 m.d.comb += self.i.eq(i)
1370
1371 def elaborate(self, platform):
1372 m = Module()
1373 m.submodules.corr_in_z = self.i.z
1374 m.submodules.corr_out_z = self.out_z.z
1375 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1376 with m.If(~self.i.out_do_z):
1377 with m.If(self.i.z.is_denormalised):
1378 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1379 return m
1380
1381
1382 class FPCorrections(FPState):
1383
1384 def __init__(self, width, id_wid):
1385 FPState.__init__(self, "corrections")
1386 self.mod = FPCorrectionsMod(width)
1387 self.out_z = self.ospec()
1388
1389 def ispec(self):
1390 return self.mod.ispec()
1391
1392 def ospec(self):
1393 return self.mod.ospec()
1394
1395 def setup(self, m, in_z):
1396 """ links module to inputs and outputs
1397 """
1398 self.mod.setup(m, in_z)
1399
1400 m.d.sync += self.out_z.eq(self.mod.out_z)
1401 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1402
1403 def action(self, m):
1404 m.next = "pack"
1405
1406
1407 class FPPackData:
1408
1409 def __init__(self, width, id_wid):
1410 self.z = Signal(width, reset_less=True)
1411 self.mid = Signal(id_wid, reset_less=True)
1412
1413 def eq(self, i):
1414 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1415
1416 def ports(self):
1417 return [self.z, self.mid]
1418
1419
1420 class FPPackMod:
1421
1422 def __init__(self, width, id_wid):
1423 self.width = width
1424 self.id_wid = id_wid
1425 self.i = self.ispec()
1426 self.o = self.ospec()
1427
1428 def ispec(self):
1429 return FPRoundData(self.width, self.id_wid)
1430
1431 def ospec(self):
1432 return FPPackData(self.width, self.id_wid)
1433
1434 def process(self, i):
1435 return self.o
1436
1437 def setup(self, m, in_z):
1438 """ links module to inputs and outputs
1439 """
1440 m.submodules.pack = self
1441 m.d.comb += self.i.eq(in_z)
1442
1443 def elaborate(self, platform):
1444 m = Module()
1445 z = FPNumOut(self.width, False)
1446 m.submodules.pack_in_z = self.i.z
1447 m.submodules.pack_out_z = z
1448 m.d.comb += self.o.mid.eq(self.i.mid)
1449 with m.If(~self.i.out_do_z):
1450 with m.If(self.i.z.is_overflowed):
1451 m.d.comb += z.inf(self.i.z.s)
1452 with m.Else():
1453 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1454 with m.Else():
1455 m.d.comb += z.v.eq(self.i.oz)
1456 m.d.comb += self.o.z.eq(z.v)
1457 return m
1458
1459
1460 class FPPack(FPState):
1461
1462 def __init__(self, width, id_wid):
1463 FPState.__init__(self, "pack")
1464 self.mod = FPPackMod(width)
1465 self.out_z = self.ospec()
1466
1467 def ispec(self):
1468 return self.mod.ispec()
1469
1470 def ospec(self):
1471 return self.mod.ospec()
1472
1473 def setup(self, m, in_z):
1474 """ links module to inputs and outputs
1475 """
1476 self.mod.setup(m, in_z)
1477
1478 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1479 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1480
1481 def action(self, m):
1482 m.next = "pack_put_z"
1483
1484
1485 class FPPutZ(FPState):
1486
1487 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1488 FPState.__init__(self, state)
1489 if to_state is None:
1490 to_state = "get_ops"
1491 self.to_state = to_state
1492 self.in_z = in_z
1493 self.out_z = out_z
1494 self.in_mid = in_mid
1495 self.out_mid = out_mid
1496
1497 def action(self, m):
1498 if self.in_mid is not None:
1499 m.d.sync += self.out_mid.eq(self.in_mid)
1500 m.d.sync += [
1501 self.out_z.z.v.eq(self.in_z)
1502 ]
1503 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1504 m.d.sync += self.out_z.z.stb.eq(0)
1505 m.next = self.to_state
1506 with m.Else():
1507 m.d.sync += self.out_z.z.stb.eq(1)
1508
1509
1510 class FPPutZIdx(FPState):
1511
1512 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1513 FPState.__init__(self, state)
1514 if to_state is None:
1515 to_state = "get_ops"
1516 self.to_state = to_state
1517 self.in_z = in_z
1518 self.out_zs = out_zs
1519 self.in_mid = in_mid
1520
1521 def action(self, m):
1522 outz_stb = Signal(reset_less=True)
1523 outz_ack = Signal(reset_less=True)
1524 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1525 outz_ack.eq(self.out_zs[self.in_mid].ack),
1526 ]
1527 m.d.sync += [
1528 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1529 ]
1530 with m.If(outz_stb & outz_ack):
1531 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1532 m.next = self.to_state
1533 with m.Else():
1534 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1535
1536
1537 class FPOpData:
1538 def __init__(self, width, id_wid):
1539 self.z = FPOp(width)
1540 self.mid = Signal(id_wid, reset_less=True)
1541
1542 def eq(self, i):
1543 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1544
1545 def ports(self):
1546 return [self.z, self.mid]
1547
1548
1549 class FPADDBaseMod:
1550
1551 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1552 """ IEEE754 FP Add
1553
1554 * width: bit-width of IEEE754. supported: 16, 32, 64
1555 * id_wid: an identifier that is sync-connected to the input
1556 * single_cycle: True indicates each stage to complete in 1 clock
1557 * compact: True indicates a reduced number of stages
1558 """
1559 self.width = width
1560 self.id_wid = id_wid
1561 self.single_cycle = single_cycle
1562 self.compact = compact
1563
1564 self.in_t = Trigger()
1565 self.i = self.ispec()
1566 self.o = self.ospec()
1567
1568 self.states = []
1569
1570 def ispec(self):
1571 return FPADDBaseData(self.width, self.id_wid)
1572
1573 def ospec(self):
1574 return FPOpData(self.width, self.id_wid)
1575
1576 def add_state(self, state):
1577 self.states.append(state)
1578 return state
1579
1580 def get_fragment(self, platform=None):
1581 """ creates the HDL code-fragment for FPAdd
1582 """
1583 m = Module()
1584 m.submodules.out_z = self.o.z
1585 m.submodules.in_t = self.in_t
1586 if self.compact:
1587 self.get_compact_fragment(m, platform)
1588 else:
1589 self.get_longer_fragment(m, platform)
1590
1591 with m.FSM() as fsm:
1592
1593 for state in self.states:
1594 with m.State(state.state_from):
1595 state.action(m)
1596
1597 return m
1598
1599 def get_longer_fragment(self, m, platform=None):
1600
1601 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1602 self.width))
1603 get.setup(m, self.i)
1604 a = get.out_op1
1605 b = get.out_op2
1606 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1607
1608 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1609 sc.setup(m, a, b, self.in_mid)
1610
1611 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1612 dn.setup(m, a, b, sc.in_mid)
1613
1614 if self.single_cycle:
1615 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1616 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1617 else:
1618 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1619 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1620
1621 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1622 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1623
1624 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1625 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1626
1627 if self.single_cycle:
1628 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1629 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1630 else:
1631 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1632 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1633
1634 rn = self.add_state(FPRound(self.width, self.id_wid))
1635 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1636
1637 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1638 cor.setup(m, rn.out_z, rn.in_mid)
1639
1640 pa = self.add_state(FPPack(self.width, self.id_wid))
1641 pa.setup(m, cor.out_z, rn.in_mid)
1642
1643 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1644 pa.in_mid, self.out_mid))
1645
1646 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1647 pa.in_mid, self.out_mid))
1648
1649 def get_compact_fragment(self, m, platform=None):
1650
1651
1652 get = FPGet2Op("get_ops", "special_cases", self.width, self.id_wid)
1653 sc = FPAddSpecialCasesDeNorm(self.width, self.id_wid)
1654 alm = FPAddAlignSingleAdd(self.width, self.id_wid)
1655 n1 = FPNormToPack(self.width, self.id_wid)
1656
1657 get.trigger_setup(m, self.in_t.stb, self.in_t.ack)
1658
1659 chainlist = [get, sc, alm, n1]
1660 chain = StageChain(chainlist, specallocate=True)
1661 chain.setup(m, self.i)
1662
1663 for mod in chainlist:
1664 sc = self.add_state(mod)
1665
1666 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1667 n1.out_z.mid, self.o.mid))
1668
1669 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1670 # sc.o.mid, self.o.mid))
1671
1672
1673 class FPADDBase(FPState):
1674
1675 def __init__(self, width, id_wid=None, single_cycle=False):
1676 """ IEEE754 FP Add
1677
1678 * width: bit-width of IEEE754. supported: 16, 32, 64
1679 * id_wid: an identifier that is sync-connected to the input
1680 * single_cycle: True indicates each stage to complete in 1 clock
1681 """
1682 FPState.__init__(self, "fpadd")
1683 self.width = width
1684 self.single_cycle = single_cycle
1685 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1686 self.o = self.ospec()
1687
1688 self.in_t = Trigger()
1689 self.i = self.ispec()
1690
1691 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1692 self.in_accept = Signal(reset_less=True)
1693 self.add_stb = Signal(reset_less=True)
1694 self.add_ack = Signal(reset=0, reset_less=True)
1695
1696 def ispec(self):
1697 return self.mod.ispec()
1698
1699 def ospec(self):
1700 return self.mod.ospec()
1701
1702 def setup(self, m, i, add_stb, in_mid):
1703 m.d.comb += [self.i.eq(i),
1704 self.mod.i.eq(self.i),
1705 self.z_done.eq(self.mod.o.z.trigger),
1706 #self.add_stb.eq(add_stb),
1707 self.mod.in_t.stb.eq(self.in_t.stb),
1708 self.in_t.ack.eq(self.mod.in_t.ack),
1709 self.o.mid.eq(self.mod.o.mid),
1710 self.o.z.v.eq(self.mod.o.z.v),
1711 self.o.z.stb.eq(self.mod.o.z.stb),
1712 self.mod.o.z.ack.eq(self.o.z.ack),
1713 ]
1714
1715 m.d.sync += self.add_stb.eq(add_stb)
1716 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1717 m.d.sync += self.o.z.ack.eq(0) # likewise
1718 #m.d.sync += self.in_t.stb.eq(0)
1719
1720 m.submodules.fpadd = self.mod
1721
1722 def action(self, m):
1723
1724 # in_accept is set on incoming strobe HIGH and ack LOW.
1725 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1726
1727 #with m.If(self.in_t.ack):
1728 # m.d.sync += self.in_t.stb.eq(0)
1729 with m.If(~self.z_done):
1730 # not done: test for accepting an incoming operand pair
1731 with m.If(self.in_accept):
1732 m.d.sync += [
1733 self.add_ack.eq(1), # acknowledge receipt...
1734 self.in_t.stb.eq(1), # initiate add
1735 ]
1736 with m.Else():
1737 m.d.sync += [self.add_ack.eq(0),
1738 self.in_t.stb.eq(0),
1739 self.o.z.ack.eq(1),
1740 ]
1741 with m.Else():
1742 # done: acknowledge, and write out id and value
1743 m.d.sync += [self.add_ack.eq(1),
1744 self.in_t.stb.eq(0)
1745 ]
1746 m.next = "put_z"
1747
1748 return
1749
1750 if self.in_mid is not None:
1751 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1752
1753 m.d.sync += [
1754 self.out_z.v.eq(self.mod.out_z.v)
1755 ]
1756 # move to output state on detecting z ack
1757 with m.If(self.out_z.trigger):
1758 m.d.sync += self.out_z.stb.eq(0)
1759 m.next = "put_z"
1760 with m.Else():
1761 m.d.sync += self.out_z.stb.eq(1)
1762
1763
1764 class FPADDBasePipe(ControlBase):
1765 def __init__(self, width, id_wid):
1766 ControlBase.__init__(self)
1767 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1768 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1769 self.pipe3 = FPNormToPack(width, id_wid)
1770
1771 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1772
1773 def elaborate(self, platform):
1774 m = Module()
1775 m.submodules.scnorm = self.pipe1
1776 m.submodules.addalign = self.pipe2
1777 m.submodules.normpack = self.pipe3
1778 m.d.comb += self._eqs
1779 return m
1780
1781
1782 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1783 def __init__(self, width, id_wid, num_rows):
1784 self.num_rows = num_rows
1785 def iospec(): return FPADDBaseData(width, id_wid)
1786 stage = PassThroughStage(iospec)
1787 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1788
1789
1790 class FPADDMuxOutPipe(CombMuxOutPipe):
1791 def __init__(self, width, id_wid, num_rows):
1792 self.num_rows = num_rows
1793 def iospec(): return FPPackData(width, id_wid)
1794 stage = PassThroughStage(iospec)
1795 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1796
1797
1798 class FPADDMuxInOut:
1799 """ Reservation-Station version of FPADD pipeline.
1800
1801 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
1802 * 3-stage adder pipeline
1803 * fan-out on outputs (an array of FPPackData: z,mid)
1804
1805 Fan-in and Fan-out are combinatorial.
1806 """
1807 def __init__(self, width, id_wid, num_rows):
1808 self.num_rows = num_rows
1809 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1810 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1811 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1812
1813 self.p = self.inpipe.p # kinda annoying,
1814 self.n = self.outpipe.n # use pipe in/out as this class in/out
1815 self._ports = self.inpipe.ports() + self.outpipe.ports()
1816
1817 def elaborate(self, platform):
1818 m = Module()
1819 m.submodules.inpipe = self.inpipe
1820 m.submodules.fpadd = self.fpadd
1821 m.submodules.outpipe = self.outpipe
1822
1823 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1824 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1825
1826 return m
1827
1828 def ports(self):
1829 return self._ports
1830
1831
1832 class FPADD(FPID):
1833 """ FPADD: stages as follows:
1834
1835 FPGetOp (a)
1836 |
1837 FPGetOp (b)
1838 |
1839 FPAddBase---> FPAddBaseMod
1840 | |
1841 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1842
1843 FPAddBase is tricky: it is both a stage and *has* stages.
1844 Connection to FPAddBaseMod therefore requires an in stb/ack
1845 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1846 needs to be the thing that raises the incoming stb.
1847 """
1848
1849 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1850 """ IEEE754 FP Add
1851
1852 * width: bit-width of IEEE754. supported: 16, 32, 64
1853 * id_wid: an identifier that is sync-connected to the input
1854 * single_cycle: True indicates each stage to complete in 1 clock
1855 """
1856 self.width = width
1857 self.id_wid = id_wid
1858 self.single_cycle = single_cycle
1859
1860 #self.out_z = FPOp(width)
1861 self.ids = FPID(id_wid)
1862
1863 rs = []
1864 for i in range(rs_sz):
1865 in_a = FPOp(width)
1866 in_b = FPOp(width)
1867 in_a.name = "in_a_%d" % i
1868 in_b.name = "in_b_%d" % i
1869 rs.append((in_a, in_b))
1870 self.rs = Array(rs)
1871
1872 res = []
1873 for i in range(rs_sz):
1874 out_z = FPOp(width)
1875 out_z.name = "out_z_%d" % i
1876 res.append(out_z)
1877 self.res = Array(res)
1878
1879 self.states = []
1880
1881 def add_state(self, state):
1882 self.states.append(state)
1883 return state
1884
1885 def get_fragment(self, platform=None):
1886 """ creates the HDL code-fragment for FPAdd
1887 """
1888 m = Module()
1889 m.submodules += self.rs
1890
1891 in_a = self.rs[0][0]
1892 in_b = self.rs[0][1]
1893
1894 geta = self.add_state(FPGetOp("get_a", "get_b",
1895 in_a, self.width))
1896 geta.setup(m, in_a)
1897 a = geta.out_op
1898
1899 getb = self.add_state(FPGetOp("get_b", "fpadd",
1900 in_b, self.width))
1901 getb.setup(m, in_b)
1902 b = getb.out_op
1903
1904 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1905 ab = self.add_state(ab)
1906 abd = ab.ispec() # create an input spec object for FPADDBase
1907 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
1908 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
1909 o = ab.o
1910
1911 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
1912 o.mid, "get_a"))
1913
1914 with m.FSM() as fsm:
1915
1916 for state in self.states:
1917 with m.State(state.state_from):
1918 state.action(m)
1919
1920 return m
1921
1922
1923 if __name__ == "__main__":
1924 if True:
1925 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1926 main(alu, ports=alu.rs[0][0].ports() + \
1927 alu.rs[0][1].ports() + \
1928 alu.res[0].ports() + \
1929 [alu.ids.in_mid, alu.ids.out_mid])
1930 else:
1931 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1932 main(alu, ports=[alu.in_a, alu.in_b] + \
1933 alu.in_t.ports() + \
1934 alu.out_z.ports() + \
1935 [alu.in_mid, alu.out_mid])
1936
1937
1938 # works... but don't use, just do "python fname.py convert -t v"
1939 #print (verilog.convert(alu, ports=[
1940 # ports=alu.in_a.ports() + \
1941 # alu.in_b.ports() + \
1942 # alu.out_z.ports())