cleanup: remove redundant classes/code
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMuxOutPipe
14 from multipipe import PriorityCombMuxInPipe
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPNumBase2Ops:
191
192 def __init__(self, width, id_wid, m_extra=True):
193 self.a = FPNumBase(width, m_extra)
194 self.b = FPNumBase(width, m_extra)
195 self.mid = Signal(id_wid, reset_less=True)
196
197 def eq(self, i):
198 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
199
200 def ports(self):
201 return [self.a, self.b, self.mid]
202
203
204 class FPADDBaseData:
205
206 def __init__(self, width, id_wid):
207 self.width = width
208 self.id_wid = id_wid
209 self.a = Signal(width)
210 self.b = Signal(width)
211 self.mid = Signal(id_wid, reset_less=True)
212
213 def eq(self, i):
214 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
215
216 def ports(self):
217 return [self.a, self.b, self.mid]
218
219
220 class FPGet2OpMod(Trigger):
221 def __init__(self, width, id_wid):
222 Trigger.__init__(self)
223 self.width = width
224 self.id_wid = id_wid
225 self.i = self.ispec()
226 self.o = self.ospec()
227
228 def ispec(self):
229 return FPADDBaseData(self.width, self.id_wid)
230
231 def ospec(self):
232 return FPADDBaseData(self.width, self.id_wid)
233
234 def process(self, i):
235 return self.o
236
237 def elaborate(self, platform):
238 m = Trigger.elaborate(self, platform)
239 with m.If(self.trigger):
240 m.d.comb += [
241 self.o.eq(self.i),
242 ]
243 return m
244
245
246 class FPGet2Op(FPState):
247 """ gets operands
248 """
249
250 def __init__(self, in_state, out_state, width, id_wid):
251 FPState.__init__(self, in_state)
252 self.out_state = out_state
253 self.mod = FPGet2OpMod(width, id_wid)
254 self.o = self.mod.ospec()
255 self.in_stb = Signal(reset_less=True)
256 self.out_ack = Signal(reset_less=True)
257 self.out_decode = Signal(reset_less=True)
258
259 def setup(self, m, i, in_stb, in_ack):
260 """ links module to inputs and outputs
261 """
262 m.submodules.get_ops = self.mod
263 m.d.comb += self.mod.i.eq(i)
264 m.d.comb += self.mod.stb.eq(in_stb)
265 m.d.comb += self.out_ack.eq(self.mod.ack)
266 m.d.comb += self.out_decode.eq(self.mod.trigger)
267 m.d.comb += in_ack.eq(self.mod.ack)
268
269 def action(self, m):
270 with m.If(self.out_decode):
271 m.next = self.out_state
272 m.d.sync += [
273 self.mod.ack.eq(0),
274 self.o.eq(self.mod.o),
275 ]
276 with m.Else():
277 m.d.sync += self.mod.ack.eq(1)
278
279
280 class FPSCData:
281
282 def __init__(self, width, id_wid):
283 self.a = FPNumBase(width, True)
284 self.b = FPNumBase(width, True)
285 self.z = FPNumOut(width, False)
286 self.oz = Signal(width, reset_less=True)
287 self.out_do_z = Signal(reset_less=True)
288 self.mid = Signal(id_wid, reset_less=True)
289
290 def eq(self, i):
291 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
292 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
293
294
295 class FPAddSpecialCasesMod:
296 """ special cases: NaNs, infs, zeros, denormalised
297 NOTE: some of these are unique to add. see "Special Operations"
298 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
299 """
300
301 def __init__(self, width, id_wid):
302 self.width = width
303 self.id_wid = id_wid
304 self.i = self.ispec()
305 self.o = self.ospec()
306
307 def ispec(self):
308 return FPADDBaseData(self.width, self.id_wid)
309
310 def ospec(self):
311 return FPSCData(self.width, self.id_wid)
312
313 def setup(self, m, i):
314 """ links module to inputs and outputs
315 """
316 m.submodules.specialcases = self
317 m.d.comb += self.i.eq(i)
318
319 def process(self, i):
320 return self.o
321
322 def elaborate(self, platform):
323 m = Module()
324
325 m.submodules.sc_out_z = self.o.z
326
327 # decode: XXX really should move to separate stage
328 a1 = FPNumIn(None, self.width)
329 b1 = FPNumIn(None, self.width)
330 m.submodules.sc_decode_a = a1
331 m.submodules.sc_decode_b = b1
332 m.d.comb += [a1.decode(self.i.a),
333 b1.decode(self.i.b),
334 ]
335
336 s_nomatch = Signal()
337 m.d.comb += s_nomatch.eq(a1.s != b1.s)
338
339 m_match = Signal()
340 m.d.comb += m_match.eq(a1.m == b1.m)
341
342 # if a is NaN or b is NaN return NaN
343 with m.If(a1.is_nan | b1.is_nan):
344 m.d.comb += self.o.out_do_z.eq(1)
345 m.d.comb += self.o.z.nan(0)
346
347 # XXX WEIRDNESS for FP16 non-canonical NaN handling
348 # under review
349
350 ## if a is zero and b is NaN return -b
351 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
352 # m.d.comb += self.o.out_do_z.eq(1)
353 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
354
355 ## if b is zero and a is NaN return -a
356 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
357 # m.d.comb += self.o.out_do_z.eq(1)
358 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
359
360 ## if a is -zero and b is NaN return -b
361 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
362 # m.d.comb += self.o.out_do_z.eq(1)
363 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
364
365 ## if b is -zero and a is NaN return -a
366 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
367 # m.d.comb += self.o.out_do_z.eq(1)
368 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
369
370 # if a is inf return inf (or NaN)
371 with m.Elif(a1.is_inf):
372 m.d.comb += self.o.out_do_z.eq(1)
373 m.d.comb += self.o.z.inf(a1.s)
374 # if a is inf and signs don't match return NaN
375 with m.If(b1.exp_128 & s_nomatch):
376 m.d.comb += self.o.z.nan(0)
377
378 # if b is inf return inf
379 with m.Elif(b1.is_inf):
380 m.d.comb += self.o.out_do_z.eq(1)
381 m.d.comb += self.o.z.inf(b1.s)
382
383 # if a is zero and b zero return signed-a/b
384 with m.Elif(a1.is_zero & b1.is_zero):
385 m.d.comb += self.o.out_do_z.eq(1)
386 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
387
388 # if a is zero return b
389 with m.Elif(a1.is_zero):
390 m.d.comb += self.o.out_do_z.eq(1)
391 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
392
393 # if b is zero return a
394 with m.Elif(b1.is_zero):
395 m.d.comb += self.o.out_do_z.eq(1)
396 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
397
398 # if a equal to -b return zero (+ve zero)
399 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
400 m.d.comb += self.o.out_do_z.eq(1)
401 m.d.comb += self.o.z.zero(0)
402
403 # Denormalised Number checks next, so pass a/b data through
404 with m.Else():
405 m.d.comb += self.o.out_do_z.eq(0)
406 m.d.comb += self.o.a.eq(a1)
407 m.d.comb += self.o.b.eq(b1)
408
409 m.d.comb += self.o.oz.eq(self.o.z.v)
410 m.d.comb += self.o.mid.eq(self.i.mid)
411
412 return m
413
414
415 class FPID:
416 def __init__(self, id_wid):
417 self.id_wid = id_wid
418 if self.id_wid:
419 self.in_mid = Signal(id_wid, reset_less=True)
420 self.out_mid = Signal(id_wid, reset_less=True)
421 else:
422 self.in_mid = None
423 self.out_mid = None
424
425 def idsync(self, m):
426 if self.id_wid is not None:
427 m.d.sync += self.out_mid.eq(self.in_mid)
428
429
430 class FPAddSpecialCases(FPState):
431 """ special cases: NaNs, infs, zeros, denormalised
432 NOTE: some of these are unique to add. see "Special Operations"
433 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
434 """
435
436 def __init__(self, width, id_wid):
437 FPState.__init__(self, "special_cases")
438 self.mod = FPAddSpecialCasesMod(width)
439 self.out_z = self.mod.ospec()
440 self.out_do_z = Signal(reset_less=True)
441
442 def setup(self, m, i):
443 """ links module to inputs and outputs
444 """
445 self.mod.setup(m, i, self.out_do_z)
446 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
447 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
448
449 def action(self, m):
450 self.idsync(m)
451 with m.If(self.out_do_z):
452 m.next = "put_z"
453 with m.Else():
454 m.next = "denormalise"
455
456
457 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
458 """ special cases: NaNs, infs, zeros, denormalised
459 NOTE: some of these are unique to add. see "Special Operations"
460 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
461 """
462
463 def __init__(self, width, id_wid):
464 FPState.__init__(self, "special_cases")
465 self.smod = FPAddSpecialCasesMod(width, id_wid)
466 self.dmod = FPAddDeNormMod(width, id_wid)
467 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
468 self.o = self.ospec()
469
470 def ispec(self):
471 return self.smod.ispec()
472
473 def ospec(self):
474 return self.dmod.ospec()
475
476 def setup(self, m, i):
477 """ links module to inputs and outputs
478 """
479 # these only needed for break-out (early-out)
480 # out_z = self.smod.ospec()
481 # out_do_z = Signal(reset_less=True)
482 self.smod.setup(m, i)
483 self.dmod.setup(m, self.smod.o)
484 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
485
486 # out_do_z=True, only needed for early-out (split pipeline)
487 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
488 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
489
490 # out_do_z=False
491 m.d.comb += self.o.eq(self.dmod.o)
492
493 def process(self, i):
494 return self.o
495
496 def action(self, m):
497 #with m.If(self.out_do_z):
498 # m.next = "put_z"
499 #with m.Else():
500 m.next = "align"
501
502
503 class FPAddDeNormMod(FPState):
504
505 def __init__(self, width, id_wid):
506 self.width = width
507 self.id_wid = id_wid
508 self.i = self.ispec()
509 self.o = self.ospec()
510
511 def ispec(self):
512 return FPSCData(self.width, self.id_wid)
513
514 def ospec(self):
515 return FPSCData(self.width, self.id_wid)
516
517 def setup(self, m, i):
518 """ links module to inputs and outputs
519 """
520 m.submodules.denormalise = self
521 m.d.comb += self.i.eq(i)
522
523 def elaborate(self, platform):
524 m = Module()
525 m.submodules.denorm_in_a = self.i.a
526 m.submodules.denorm_in_b = self.i.b
527 m.submodules.denorm_out_a = self.o.a
528 m.submodules.denorm_out_b = self.o.b
529
530 with m.If(~self.i.out_do_z):
531 # XXX hmmm, don't like repeating identical code
532 m.d.comb += self.o.a.eq(self.i.a)
533 with m.If(self.i.a.exp_n127):
534 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
535 with m.Else():
536 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
537
538 m.d.comb += self.o.b.eq(self.i.b)
539 with m.If(self.i.b.exp_n127):
540 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
541 with m.Else():
542 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
543
544 m.d.comb += self.o.mid.eq(self.i.mid)
545 m.d.comb += self.o.z.eq(self.i.z)
546 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
547 m.d.comb += self.o.oz.eq(self.i.oz)
548
549 return m
550
551
552 class FPAddDeNorm(FPState):
553
554 def __init__(self, width, id_wid):
555 FPState.__init__(self, "denormalise")
556 self.mod = FPAddDeNormMod(width)
557 self.out_a = FPNumBase(width)
558 self.out_b = FPNumBase(width)
559
560 def setup(self, m, i):
561 """ links module to inputs and outputs
562 """
563 self.mod.setup(m, i)
564
565 m.d.sync += self.out_a.eq(self.mod.out_a)
566 m.d.sync += self.out_b.eq(self.mod.out_b)
567
568 def action(self, m):
569 # Denormalised Number checks
570 m.next = "align"
571
572
573 class FPAddAlignMultiMod(FPState):
574
575 def __init__(self, width):
576 self.in_a = FPNumBase(width)
577 self.in_b = FPNumBase(width)
578 self.out_a = FPNumIn(None, width)
579 self.out_b = FPNumIn(None, width)
580 self.exp_eq = Signal(reset_less=True)
581
582 def elaborate(self, platform):
583 # This one however (single-cycle) will do the shift
584 # in one go.
585
586 m = Module()
587
588 m.submodules.align_in_a = self.in_a
589 m.submodules.align_in_b = self.in_b
590 m.submodules.align_out_a = self.out_a
591 m.submodules.align_out_b = self.out_b
592
593 # NOTE: this does *not* do single-cycle multi-shifting,
594 # it *STAYS* in the align state until exponents match
595
596 # exponent of a greater than b: shift b down
597 m.d.comb += self.exp_eq.eq(0)
598 m.d.comb += self.out_a.eq(self.in_a)
599 m.d.comb += self.out_b.eq(self.in_b)
600 agtb = Signal(reset_less=True)
601 altb = Signal(reset_less=True)
602 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
603 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
604 with m.If(agtb):
605 m.d.comb += self.out_b.shift_down(self.in_b)
606 # exponent of b greater than a: shift a down
607 with m.Elif(altb):
608 m.d.comb += self.out_a.shift_down(self.in_a)
609 # exponents equal: move to next stage.
610 with m.Else():
611 m.d.comb += self.exp_eq.eq(1)
612 return m
613
614
615 class FPAddAlignMulti(FPState):
616
617 def __init__(self, width, id_wid):
618 FPState.__init__(self, "align")
619 self.mod = FPAddAlignMultiMod(width)
620 self.out_a = FPNumIn(None, width)
621 self.out_b = FPNumIn(None, width)
622 self.exp_eq = Signal(reset_less=True)
623
624 def setup(self, m, in_a, in_b):
625 """ links module to inputs and outputs
626 """
627 m.submodules.align = self.mod
628 m.d.comb += self.mod.in_a.eq(in_a)
629 m.d.comb += self.mod.in_b.eq(in_b)
630 #m.d.comb += self.out_a.eq(self.mod.out_a)
631 #m.d.comb += self.out_b.eq(self.mod.out_b)
632 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
633 m.d.sync += self.out_a.eq(self.mod.out_a)
634 m.d.sync += self.out_b.eq(self.mod.out_b)
635
636 def action(self, m):
637 with m.If(self.exp_eq):
638 m.next = "add_0"
639
640
641 class FPNumIn2Ops:
642
643 def __init__(self, width, id_wid):
644 self.a = FPNumIn(None, width)
645 self.b = FPNumIn(None, width)
646 self.z = FPNumOut(width, False)
647 self.out_do_z = Signal(reset_less=True)
648 self.oz = Signal(width, reset_less=True)
649 self.mid = Signal(id_wid, reset_less=True)
650
651 def eq(self, i):
652 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
653 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
654
655
656 class FPAddAlignSingleMod:
657
658 def __init__(self, width, id_wid):
659 self.width = width
660 self.id_wid = id_wid
661 self.i = self.ispec()
662 self.o = self.ospec()
663
664 def ispec(self):
665 return FPSCData(self.width, self.id_wid)
666
667 def ospec(self):
668 return FPNumIn2Ops(self.width, self.id_wid)
669
670 def process(self, i):
671 return self.o
672
673 def setup(self, m, i):
674 """ links module to inputs and outputs
675 """
676 m.submodules.align = self
677 m.d.comb += self.i.eq(i)
678
679 def elaborate(self, platform):
680 """ Aligns A against B or B against A, depending on which has the
681 greater exponent. This is done in a *single* cycle using
682 variable-width bit-shift
683
684 the shifter used here is quite expensive in terms of gates.
685 Mux A or B in (and out) into temporaries, as only one of them
686 needs to be aligned against the other
687 """
688 m = Module()
689
690 m.submodules.align_in_a = self.i.a
691 m.submodules.align_in_b = self.i.b
692 m.submodules.align_out_a = self.o.a
693 m.submodules.align_out_b = self.o.b
694
695 # temporary (muxed) input and output to be shifted
696 t_inp = FPNumBase(self.width)
697 t_out = FPNumIn(None, self.width)
698 espec = (len(self.i.a.e), True)
699 msr = MultiShiftRMerge(self.i.a.m_width, espec)
700 m.submodules.align_t_in = t_inp
701 m.submodules.align_t_out = t_out
702 m.submodules.multishift_r = msr
703
704 ediff = Signal(espec, reset_less=True)
705 ediffr = Signal(espec, reset_less=True)
706 tdiff = Signal(espec, reset_less=True)
707 elz = Signal(reset_less=True)
708 egz = Signal(reset_less=True)
709
710 # connect multi-shifter to t_inp/out mantissa (and tdiff)
711 m.d.comb += msr.inp.eq(t_inp.m)
712 m.d.comb += msr.diff.eq(tdiff)
713 m.d.comb += t_out.m.eq(msr.m)
714 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
715 m.d.comb += t_out.s.eq(t_inp.s)
716
717 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
718 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
719 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
720 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
721
722 # default: A-exp == B-exp, A and B untouched (fall through)
723 m.d.comb += self.o.a.eq(self.i.a)
724 m.d.comb += self.o.b.eq(self.i.b)
725 # only one shifter (muxed)
726 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
727 # exponent of a greater than b: shift b down
728 with m.If(~self.i.out_do_z):
729 with m.If(egz):
730 m.d.comb += [t_inp.eq(self.i.b),
731 tdiff.eq(ediff),
732 self.o.b.eq(t_out),
733 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
734 ]
735 # exponent of b greater than a: shift a down
736 with m.Elif(elz):
737 m.d.comb += [t_inp.eq(self.i.a),
738 tdiff.eq(ediffr),
739 self.o.a.eq(t_out),
740 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
741 ]
742
743 m.d.comb += self.o.mid.eq(self.i.mid)
744 m.d.comb += self.o.z.eq(self.i.z)
745 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
746 m.d.comb += self.o.oz.eq(self.i.oz)
747
748 return m
749
750
751 class FPAddAlignSingle(FPState):
752
753 def __init__(self, width, id_wid):
754 FPState.__init__(self, "align")
755 self.mod = FPAddAlignSingleMod(width, id_wid)
756 self.out_a = FPNumIn(None, width)
757 self.out_b = FPNumIn(None, width)
758
759 def setup(self, m, i):
760 """ links module to inputs and outputs
761 """
762 self.mod.setup(m, i)
763
764 # NOTE: could be done as comb
765 m.d.sync += self.out_a.eq(self.mod.out_a)
766 m.d.sync += self.out_b.eq(self.mod.out_b)
767
768 def action(self, m):
769 m.next = "add_0"
770
771
772 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
773
774 def __init__(self, width, id_wid):
775 FPState.__init__(self, "align")
776 self.width = width
777 self.id_wid = id_wid
778 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
779 self.a1o = self.ospec()
780
781 def ispec(self):
782 return FPSCData(self.width, self.id_wid)
783 #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
784
785 def ospec(self):
786 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
787
788 def setup(self, m, i):
789 """ links module to inputs and outputs
790 """
791
792 # chain AddAlignSingle, AddStage0 and AddStage1
793 mod = FPAddAlignSingleMod(self.width, self.id_wid)
794 a0mod = FPAddStage0Mod(self.width, self.id_wid)
795 a1mod = FPAddStage1Mod(self.width, self.id_wid)
796
797 chain = StageChain([mod, a0mod, a1mod])
798 chain.setup(m, i)
799
800 m.d.comb += self.a1o.eq(a1mod.o)
801
802 def process(self, i):
803 return self.a1o
804
805 def action(self, m):
806 m.next = "normalise_1"
807
808
809 class FPAddStage0Data:
810
811 def __init__(self, width, id_wid):
812 self.z = FPNumBase(width, False)
813 self.out_do_z = Signal(reset_less=True)
814 self.oz = Signal(width, reset_less=True)
815 self.tot = Signal(self.z.m_width + 4, reset_less=True)
816 self.mid = Signal(id_wid, reset_less=True)
817
818 def eq(self, i):
819 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
820 self.tot.eq(i.tot), self.mid.eq(i.mid)]
821
822
823 class FPAddStage0Mod:
824
825 def __init__(self, width, id_wid):
826 self.width = width
827 self.id_wid = id_wid
828 self.i = self.ispec()
829 self.o = self.ospec()
830
831 def ispec(self):
832 return FPSCData(self.width, self.id_wid)
833
834 def ospec(self):
835 return FPAddStage0Data(self.width, self.id_wid)
836
837 def process(self, i):
838 return self.o
839
840 def setup(self, m, i):
841 """ links module to inputs and outputs
842 """
843 m.submodules.add0 = self
844 m.d.comb += self.i.eq(i)
845
846 def elaborate(self, platform):
847 m = Module()
848 m.submodules.add0_in_a = self.i.a
849 m.submodules.add0_in_b = self.i.b
850 m.submodules.add0_out_z = self.o.z
851
852 # store intermediate tests (and zero-extended mantissas)
853 seq = Signal(reset_less=True)
854 mge = Signal(reset_less=True)
855 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
856 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
857 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
858 mge.eq(self.i.a.m >= self.i.b.m),
859 am0.eq(Cat(self.i.a.m, 0)),
860 bm0.eq(Cat(self.i.b.m, 0))
861 ]
862 # same-sign (both negative or both positive) add mantissas
863 with m.If(~self.i.out_do_z):
864 m.d.comb += self.o.z.e.eq(self.i.a.e)
865 with m.If(seq):
866 m.d.comb += [
867 self.o.tot.eq(am0 + bm0),
868 self.o.z.s.eq(self.i.a.s)
869 ]
870 # a mantissa greater than b, use a
871 with m.Elif(mge):
872 m.d.comb += [
873 self.o.tot.eq(am0 - bm0),
874 self.o.z.s.eq(self.i.a.s)
875 ]
876 # b mantissa greater than a, use b
877 with m.Else():
878 m.d.comb += [
879 self.o.tot.eq(bm0 - am0),
880 self.o.z.s.eq(self.i.b.s)
881 ]
882
883 m.d.comb += self.o.oz.eq(self.i.oz)
884 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
885 m.d.comb += self.o.mid.eq(self.i.mid)
886 return m
887
888
889 class FPAddStage0(FPState):
890 """ First stage of add. covers same-sign (add) and subtract
891 special-casing when mantissas are greater or equal, to
892 give greatest accuracy.
893 """
894
895 def __init__(self, width, id_wid):
896 FPState.__init__(self, "add_0")
897 self.mod = FPAddStage0Mod(width)
898 self.o = self.mod.ospec()
899
900 def setup(self, m, i):
901 """ links module to inputs and outputs
902 """
903 self.mod.setup(m, i)
904
905 # NOTE: these could be done as combinatorial (merge add0+add1)
906 m.d.sync += self.o.eq(self.mod.o)
907
908 def action(self, m):
909 m.next = "add_1"
910
911
912 class FPAddStage1Data:
913
914 def __init__(self, width, id_wid):
915 self.z = FPNumBase(width, False)
916 self.out_do_z = Signal(reset_less=True)
917 self.oz = Signal(width, reset_less=True)
918 self.of = Overflow()
919 self.mid = Signal(id_wid, reset_less=True)
920
921 def eq(self, i):
922 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
923 self.of.eq(i.of), self.mid.eq(i.mid)]
924
925
926
927 class FPAddStage1Mod(FPState):
928 """ Second stage of add: preparation for normalisation.
929 detects when tot sum is too big (tot[27] is kinda a carry bit)
930 """
931
932 def __init__(self, width, id_wid):
933 self.width = width
934 self.id_wid = id_wid
935 self.i = self.ispec()
936 self.o = self.ospec()
937
938 def ispec(self):
939 return FPAddStage0Data(self.width, self.id_wid)
940
941 def ospec(self):
942 return FPAddStage1Data(self.width, self.id_wid)
943
944 def process(self, i):
945 return self.o
946
947 def setup(self, m, i):
948 """ links module to inputs and outputs
949 """
950 m.submodules.add1 = self
951 m.submodules.add1_out_overflow = self.o.of
952
953 m.d.comb += self.i.eq(i)
954
955 def elaborate(self, platform):
956 m = Module()
957 #m.submodules.norm1_in_overflow = self.in_of
958 #m.submodules.norm1_out_overflow = self.out_of
959 #m.submodules.norm1_in_z = self.in_z
960 #m.submodules.norm1_out_z = self.out_z
961 m.d.comb += self.o.z.eq(self.i.z)
962 # tot[-1] (MSB) gets set when the sum overflows. shift result down
963 with m.If(~self.i.out_do_z):
964 with m.If(self.i.tot[-1]):
965 m.d.comb += [
966 self.o.z.m.eq(self.i.tot[4:]),
967 self.o.of.m0.eq(self.i.tot[4]),
968 self.o.of.guard.eq(self.i.tot[3]),
969 self.o.of.round_bit.eq(self.i.tot[2]),
970 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
971 self.o.z.e.eq(self.i.z.e + 1)
972 ]
973 # tot[-1] (MSB) zero case
974 with m.Else():
975 m.d.comb += [
976 self.o.z.m.eq(self.i.tot[3:]),
977 self.o.of.m0.eq(self.i.tot[3]),
978 self.o.of.guard.eq(self.i.tot[2]),
979 self.o.of.round_bit.eq(self.i.tot[1]),
980 self.o.of.sticky.eq(self.i.tot[0])
981 ]
982
983 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
984 m.d.comb += self.o.oz.eq(self.i.oz)
985 m.d.comb += self.o.mid.eq(self.i.mid)
986
987 return m
988
989
990 class FPAddStage1(FPState):
991
992 def __init__(self, width, id_wid):
993 FPState.__init__(self, "add_1")
994 self.mod = FPAddStage1Mod(width)
995 self.out_z = FPNumBase(width, False)
996 self.out_of = Overflow()
997 self.norm_stb = Signal()
998
999 def setup(self, m, i):
1000 """ links module to inputs and outputs
1001 """
1002 self.mod.setup(m, i)
1003
1004 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1005
1006 m.d.sync += self.out_of.eq(self.mod.out_of)
1007 m.d.sync += self.out_z.eq(self.mod.out_z)
1008 m.d.sync += self.norm_stb.eq(1)
1009
1010 def action(self, m):
1011 m.next = "normalise_1"
1012
1013
1014 class FPNormaliseModSingle:
1015
1016 def __init__(self, width):
1017 self.width = width
1018 self.in_z = self.ispec()
1019 self.out_z = self.ospec()
1020
1021 def ispec(self):
1022 return FPNumBase(self.width, False)
1023
1024 def ospec(self):
1025 return FPNumBase(self.width, False)
1026
1027 def setup(self, m, i):
1028 """ links module to inputs and outputs
1029 """
1030 m.submodules.normalise = self
1031 m.d.comb += self.i.eq(i)
1032
1033 def elaborate(self, platform):
1034 m = Module()
1035
1036 mwid = self.out_z.m_width+2
1037 pe = PriorityEncoder(mwid)
1038 m.submodules.norm_pe = pe
1039
1040 m.submodules.norm1_out_z = self.out_z
1041 m.submodules.norm1_in_z = self.in_z
1042
1043 in_z = FPNumBase(self.width, False)
1044 in_of = Overflow()
1045 m.submodules.norm1_insel_z = in_z
1046 m.submodules.norm1_insel_overflow = in_of
1047
1048 espec = (len(in_z.e), True)
1049 ediff_n126 = Signal(espec, reset_less=True)
1050 msr = MultiShiftRMerge(mwid, espec)
1051 m.submodules.multishift_r = msr
1052
1053 m.d.comb += in_z.eq(self.in_z)
1054 m.d.comb += in_of.eq(self.in_of)
1055 # initialise out from in (overridden below)
1056 m.d.comb += self.out_z.eq(in_z)
1057 m.d.comb += self.out_of.eq(in_of)
1058 # normalisation decrease condition
1059 decrease = Signal(reset_less=True)
1060 m.d.comb += decrease.eq(in_z.m_msbzero)
1061 # decrease exponent
1062 with m.If(decrease):
1063 # *sigh* not entirely obvious: count leading zeros (clz)
1064 # with a PriorityEncoder: to find from the MSB
1065 # we reverse the order of the bits.
1066 temp_m = Signal(mwid, reset_less=True)
1067 temp_s = Signal(mwid+1, reset_less=True)
1068 clz = Signal((len(in_z.e), True), reset_less=True)
1069 m.d.comb += [
1070 # cat round and guard bits back into the mantissa
1071 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1072 pe.i.eq(temp_m[::-1]), # inverted
1073 clz.eq(pe.o), # count zeros from MSB down
1074 temp_s.eq(temp_m << clz), # shift mantissa UP
1075 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1076 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1077 ]
1078
1079 return m
1080
1081 class FPNorm1Data:
1082
1083 def __init__(self, width, id_wid):
1084 self.roundz = Signal(reset_less=True)
1085 self.z = FPNumBase(width, False)
1086 self.out_do_z = Signal(reset_less=True)
1087 self.oz = Signal(width, reset_less=True)
1088 self.mid = Signal(id_wid, reset_less=True)
1089
1090 def eq(self, i):
1091 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1092 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1093
1094
1095 class FPNorm1ModSingle:
1096
1097 def __init__(self, width, id_wid):
1098 self.width = width
1099 self.id_wid = id_wid
1100 self.i = self.ispec()
1101 self.o = self.ospec()
1102
1103 def ispec(self):
1104 return FPAddStage1Data(self.width, self.id_wid)
1105
1106 def ospec(self):
1107 return FPNorm1Data(self.width, self.id_wid)
1108
1109 def setup(self, m, i):
1110 """ links module to inputs and outputs
1111 """
1112 m.submodules.normalise_1 = self
1113 m.d.comb += self.i.eq(i)
1114
1115 def process(self, i):
1116 return self.o
1117
1118 def elaborate(self, platform):
1119 m = Module()
1120
1121 mwid = self.o.z.m_width+2
1122 pe = PriorityEncoder(mwid)
1123 m.submodules.norm_pe = pe
1124
1125 of = Overflow()
1126 m.d.comb += self.o.roundz.eq(of.roundz)
1127
1128 m.submodules.norm1_out_z = self.o.z
1129 m.submodules.norm1_out_overflow = of
1130 m.submodules.norm1_in_z = self.i.z
1131 m.submodules.norm1_in_overflow = self.i.of
1132
1133 i = self.ispec()
1134 m.submodules.norm1_insel_z = i.z
1135 m.submodules.norm1_insel_overflow = i.of
1136
1137 espec = (len(i.z.e), True)
1138 ediff_n126 = Signal(espec, reset_less=True)
1139 msr = MultiShiftRMerge(mwid, espec)
1140 m.submodules.multishift_r = msr
1141
1142 m.d.comb += i.eq(self.i)
1143 # initialise out from in (overridden below)
1144 m.d.comb += self.o.z.eq(i.z)
1145 m.d.comb += of.eq(i.of)
1146 # normalisation increase/decrease conditions
1147 decrease = Signal(reset_less=True)
1148 increase = Signal(reset_less=True)
1149 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1150 m.d.comb += increase.eq(i.z.exp_lt_n126)
1151 # decrease exponent
1152 with m.If(~self.i.out_do_z):
1153 with m.If(decrease):
1154 # *sigh* not entirely obvious: count leading zeros (clz)
1155 # with a PriorityEncoder: to find from the MSB
1156 # we reverse the order of the bits.
1157 temp_m = Signal(mwid, reset_less=True)
1158 temp_s = Signal(mwid+1, reset_less=True)
1159 clz = Signal((len(i.z.e), True), reset_less=True)
1160 # make sure that the amount to decrease by does NOT
1161 # go below the minimum non-INF/NaN exponent
1162 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1163 i.z.exp_sub_n126)
1164 m.d.comb += [
1165 # cat round and guard bits back into the mantissa
1166 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1167 pe.i.eq(temp_m[::-1]), # inverted
1168 clz.eq(limclz), # count zeros from MSB down
1169 temp_s.eq(temp_m << clz), # shift mantissa UP
1170 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1171 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1172 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1173 # overflow in bits 0..1: got shifted too (leave sticky)
1174 of.guard.eq(temp_s[1]), # guard
1175 of.round_bit.eq(temp_s[0]), # round
1176 ]
1177 # increase exponent
1178 with m.Elif(increase):
1179 temp_m = Signal(mwid+1, reset_less=True)
1180 m.d.comb += [
1181 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1182 i.z.m)),
1183 ediff_n126.eq(i.z.N126 - i.z.e),
1184 # connect multi-shifter to inp/out mantissa (and ediff)
1185 msr.inp.eq(temp_m),
1186 msr.diff.eq(ediff_n126),
1187 self.o.z.m.eq(msr.m[3:]),
1188 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1189 # overflow in bits 0..1: got shifted too (leave sticky)
1190 of.guard.eq(temp_s[2]), # guard
1191 of.round_bit.eq(temp_s[1]), # round
1192 of.sticky.eq(temp_s[0]), # sticky
1193 self.o.z.e.eq(i.z.e + ediff_n126),
1194 ]
1195
1196 m.d.comb += self.o.mid.eq(self.i.mid)
1197 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1198 m.d.comb += self.o.oz.eq(self.i.oz)
1199
1200 return m
1201
1202
1203 class FPNorm1ModMulti:
1204
1205 def __init__(self, width, single_cycle=True):
1206 self.width = width
1207 self.in_select = Signal(reset_less=True)
1208 self.in_z = FPNumBase(width, False)
1209 self.in_of = Overflow()
1210 self.temp_z = FPNumBase(width, False)
1211 self.temp_of = Overflow()
1212 self.out_z = FPNumBase(width, False)
1213 self.out_of = Overflow()
1214
1215 def elaborate(self, platform):
1216 m = Module()
1217
1218 m.submodules.norm1_out_z = self.out_z
1219 m.submodules.norm1_out_overflow = self.out_of
1220 m.submodules.norm1_temp_z = self.temp_z
1221 m.submodules.norm1_temp_of = self.temp_of
1222 m.submodules.norm1_in_z = self.in_z
1223 m.submodules.norm1_in_overflow = self.in_of
1224
1225 in_z = FPNumBase(self.width, False)
1226 in_of = Overflow()
1227 m.submodules.norm1_insel_z = in_z
1228 m.submodules.norm1_insel_overflow = in_of
1229
1230 # select which of temp or in z/of to use
1231 with m.If(self.in_select):
1232 m.d.comb += in_z.eq(self.in_z)
1233 m.d.comb += in_of.eq(self.in_of)
1234 with m.Else():
1235 m.d.comb += in_z.eq(self.temp_z)
1236 m.d.comb += in_of.eq(self.temp_of)
1237 # initialise out from in (overridden below)
1238 m.d.comb += self.out_z.eq(in_z)
1239 m.d.comb += self.out_of.eq(in_of)
1240 # normalisation increase/decrease conditions
1241 decrease = Signal(reset_less=True)
1242 increase = Signal(reset_less=True)
1243 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1244 m.d.comb += increase.eq(in_z.exp_lt_n126)
1245 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1246 # decrease exponent
1247 with m.If(decrease):
1248 m.d.comb += [
1249 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1250 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1251 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1252 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1253 self.out_of.round_bit.eq(0), # reset round bit
1254 self.out_of.m0.eq(in_of.guard),
1255 ]
1256 # increase exponent
1257 with m.Elif(increase):
1258 m.d.comb += [
1259 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1260 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1261 self.out_of.guard.eq(in_z.m[0]),
1262 self.out_of.m0.eq(in_z.m[1]),
1263 self.out_of.round_bit.eq(in_of.guard),
1264 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1265 ]
1266
1267 return m
1268
1269
1270 class FPNorm1Single(FPState):
1271
1272 def __init__(self, width, id_wid, single_cycle=True):
1273 FPState.__init__(self, "normalise_1")
1274 self.mod = FPNorm1ModSingle(width)
1275 self.o = self.ospec()
1276 self.out_z = FPNumBase(width, False)
1277 self.out_roundz = Signal(reset_less=True)
1278
1279 def ispec(self):
1280 return self.mod.ispec()
1281
1282 def ospec(self):
1283 return self.mod.ospec()
1284
1285 def setup(self, m, i):
1286 """ links module to inputs and outputs
1287 """
1288 self.mod.setup(m, i)
1289
1290 def action(self, m):
1291 m.next = "round"
1292
1293
1294 class FPNorm1Multi(FPState):
1295
1296 def __init__(self, width, id_wid):
1297 FPState.__init__(self, "normalise_1")
1298 self.mod = FPNorm1ModMulti(width)
1299 self.stb = Signal(reset_less=True)
1300 self.ack = Signal(reset=0, reset_less=True)
1301 self.out_norm = Signal(reset_less=True)
1302 self.in_accept = Signal(reset_less=True)
1303 self.temp_z = FPNumBase(width)
1304 self.temp_of = Overflow()
1305 self.out_z = FPNumBase(width)
1306 self.out_roundz = Signal(reset_less=True)
1307
1308 def setup(self, m, in_z, in_of, norm_stb):
1309 """ links module to inputs and outputs
1310 """
1311 self.mod.setup(m, in_z, in_of, norm_stb,
1312 self.in_accept, self.temp_z, self.temp_of,
1313 self.out_z, self.out_norm)
1314
1315 m.d.comb += self.stb.eq(norm_stb)
1316 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1317
1318 def action(self, m):
1319 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1320 m.d.sync += self.temp_of.eq(self.mod.out_of)
1321 m.d.sync += self.temp_z.eq(self.out_z)
1322 with m.If(self.out_norm):
1323 with m.If(self.in_accept):
1324 m.d.sync += [
1325 self.ack.eq(1),
1326 ]
1327 with m.Else():
1328 m.d.sync += self.ack.eq(0)
1329 with m.Else():
1330 # normalisation not required (or done).
1331 m.next = "round"
1332 m.d.sync += self.ack.eq(1)
1333 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1334
1335
1336 class FPNormToPack(FPState, UnbufferedPipeline):
1337
1338 def __init__(self, width, id_wid):
1339 FPState.__init__(self, "normalise_1")
1340 self.id_wid = id_wid
1341 self.width = width
1342 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1343
1344 def ispec(self):
1345 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1346
1347 def ospec(self):
1348 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1349
1350 def setup(self, m, i):
1351 """ links module to inputs and outputs
1352 """
1353
1354 # Normalisation, Rounding Corrections, Pack - in a chain
1355 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1356 rmod = FPRoundMod(self.width, self.id_wid)
1357 cmod = FPCorrectionsMod(self.width, self.id_wid)
1358 pmod = FPPackMod(self.width, self.id_wid)
1359 chain = StageChain([nmod, rmod, cmod, pmod])
1360 chain.setup(m, i)
1361 self.out_z = pmod.ospec()
1362
1363 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1364 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1365
1366 def process(self, i):
1367 return self.out_z
1368
1369 def action(self, m):
1370 m.next = "pack_put_z"
1371
1372
1373 class FPRoundData:
1374
1375 def __init__(self, width, id_wid):
1376 self.z = FPNumBase(width, False)
1377 self.out_do_z = Signal(reset_less=True)
1378 self.oz = Signal(width, reset_less=True)
1379 self.mid = Signal(id_wid, reset_less=True)
1380
1381 def eq(self, i):
1382 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1383 self.mid.eq(i.mid)]
1384
1385
1386 class FPRoundMod:
1387
1388 def __init__(self, width, id_wid):
1389 self.width = width
1390 self.id_wid = id_wid
1391 self.i = self.ispec()
1392 self.out_z = self.ospec()
1393
1394 def ispec(self):
1395 return FPNorm1Data(self.width, self.id_wid)
1396
1397 def ospec(self):
1398 return FPRoundData(self.width, self.id_wid)
1399
1400 def process(self, i):
1401 return self.out_z
1402
1403 def setup(self, m, i):
1404 m.submodules.roundz = self
1405 m.d.comb += self.i.eq(i)
1406
1407 def elaborate(self, platform):
1408 m = Module()
1409 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1410 with m.If(~self.i.out_do_z):
1411 with m.If(self.i.roundz):
1412 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1413 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1414 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1415
1416 return m
1417
1418
1419 class FPRound(FPState):
1420
1421 def __init__(self, width, id_wid):
1422 FPState.__init__(self, "round")
1423 self.mod = FPRoundMod(width)
1424 self.out_z = self.ospec()
1425
1426 def ispec(self):
1427 return self.mod.ispec()
1428
1429 def ospec(self):
1430 return self.mod.ospec()
1431
1432 def setup(self, m, i):
1433 """ links module to inputs and outputs
1434 """
1435 self.mod.setup(m, i)
1436
1437 self.idsync(m)
1438 m.d.sync += self.out_z.eq(self.mod.out_z)
1439 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1440
1441 def action(self, m):
1442 m.next = "corrections"
1443
1444
1445 class FPCorrectionsMod:
1446
1447 def __init__(self, width, id_wid):
1448 self.width = width
1449 self.id_wid = id_wid
1450 self.i = self.ispec()
1451 self.out_z = self.ospec()
1452
1453 def ispec(self):
1454 return FPRoundData(self.width, self.id_wid)
1455
1456 def ospec(self):
1457 return FPRoundData(self.width, self.id_wid)
1458
1459 def process(self, i):
1460 return self.out_z
1461
1462 def setup(self, m, i):
1463 """ links module to inputs and outputs
1464 """
1465 m.submodules.corrections = self
1466 m.d.comb += self.i.eq(i)
1467
1468 def elaborate(self, platform):
1469 m = Module()
1470 m.submodules.corr_in_z = self.i.z
1471 m.submodules.corr_out_z = self.out_z.z
1472 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1473 with m.If(~self.i.out_do_z):
1474 with m.If(self.i.z.is_denormalised):
1475 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1476 return m
1477
1478
1479 class FPCorrections(FPState):
1480
1481 def __init__(self, width, id_wid):
1482 FPState.__init__(self, "corrections")
1483 self.mod = FPCorrectionsMod(width)
1484 self.out_z = self.ospec()
1485
1486 def ispec(self):
1487 return self.mod.ispec()
1488
1489 def ospec(self):
1490 return self.mod.ospec()
1491
1492 def setup(self, m, in_z):
1493 """ links module to inputs and outputs
1494 """
1495 self.mod.setup(m, in_z)
1496
1497 m.d.sync += self.out_z.eq(self.mod.out_z)
1498 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1499
1500 def action(self, m):
1501 m.next = "pack"
1502
1503
1504 class FPPackData:
1505
1506 def __init__(self, width, id_wid):
1507 self.z = Signal(width, reset_less=True)
1508 self.mid = Signal(id_wid, reset_less=True)
1509
1510 def eq(self, i):
1511 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1512
1513 def ports(self):
1514 return [self.z, self.mid]
1515
1516
1517 class FPPackMod:
1518
1519 def __init__(self, width, id_wid):
1520 self.width = width
1521 self.id_wid = id_wid
1522 self.i = self.ispec()
1523 self.o = self.ospec()
1524
1525 def ispec(self):
1526 return FPRoundData(self.width, self.id_wid)
1527
1528 def ospec(self):
1529 return FPPackData(self.width, self.id_wid)
1530
1531 def process(self, i):
1532 return self.o
1533
1534 def setup(self, m, in_z):
1535 """ links module to inputs and outputs
1536 """
1537 m.submodules.pack = self
1538 m.d.comb += self.i.eq(in_z)
1539
1540 def elaborate(self, platform):
1541 m = Module()
1542 z = FPNumOut(self.width, False)
1543 m.submodules.pack_in_z = self.i.z
1544 m.submodules.pack_out_z = z
1545 m.d.comb += self.o.mid.eq(self.i.mid)
1546 with m.If(~self.i.out_do_z):
1547 with m.If(self.i.z.is_overflowed):
1548 m.d.comb += z.inf(self.i.z.s)
1549 with m.Else():
1550 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1551 with m.Else():
1552 m.d.comb += z.v.eq(self.i.oz)
1553 m.d.comb += self.o.z.eq(z.v)
1554 return m
1555
1556
1557 class FPPack(FPState):
1558
1559 def __init__(self, width, id_wid):
1560 FPState.__init__(self, "pack")
1561 self.mod = FPPackMod(width)
1562 self.out_z = self.ospec()
1563
1564 def ispec(self):
1565 return self.mod.ispec()
1566
1567 def ospec(self):
1568 return self.mod.ospec()
1569
1570 def setup(self, m, in_z):
1571 """ links module to inputs and outputs
1572 """
1573 self.mod.setup(m, in_z)
1574
1575 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1576 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1577
1578 def action(self, m):
1579 m.next = "pack_put_z"
1580
1581
1582 class FPPutZ(FPState):
1583
1584 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1585 FPState.__init__(self, state)
1586 if to_state is None:
1587 to_state = "get_ops"
1588 self.to_state = to_state
1589 self.in_z = in_z
1590 self.out_z = out_z
1591 self.in_mid = in_mid
1592 self.out_mid = out_mid
1593
1594 def action(self, m):
1595 if self.in_mid is not None:
1596 m.d.sync += self.out_mid.eq(self.in_mid)
1597 m.d.sync += [
1598 self.out_z.z.v.eq(self.in_z)
1599 ]
1600 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1601 m.d.sync += self.out_z.z.stb.eq(0)
1602 m.next = self.to_state
1603 with m.Else():
1604 m.d.sync += self.out_z.z.stb.eq(1)
1605
1606
1607 class FPPutZIdx(FPState):
1608
1609 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1610 FPState.__init__(self, state)
1611 if to_state is None:
1612 to_state = "get_ops"
1613 self.to_state = to_state
1614 self.in_z = in_z
1615 self.out_zs = out_zs
1616 self.in_mid = in_mid
1617
1618 def action(self, m):
1619 outz_stb = Signal(reset_less=True)
1620 outz_ack = Signal(reset_less=True)
1621 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1622 outz_ack.eq(self.out_zs[self.in_mid].ack),
1623 ]
1624 m.d.sync += [
1625 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1626 ]
1627 with m.If(outz_stb & outz_ack):
1628 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1629 m.next = self.to_state
1630 with m.Else():
1631 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1632
1633 class FPOpData:
1634 def __init__(self, width, id_wid):
1635 self.z = FPOp(width)
1636 self.mid = Signal(id_wid, reset_less=True)
1637
1638 def eq(self, i):
1639 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1640
1641 def ports(self):
1642 return [self.z, self.mid]
1643
1644
1645 class FPADDBaseMod:
1646
1647 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1648 """ IEEE754 FP Add
1649
1650 * width: bit-width of IEEE754. supported: 16, 32, 64
1651 * id_wid: an identifier that is sync-connected to the input
1652 * single_cycle: True indicates each stage to complete in 1 clock
1653 * compact: True indicates a reduced number of stages
1654 """
1655 self.width = width
1656 self.id_wid = id_wid
1657 self.single_cycle = single_cycle
1658 self.compact = compact
1659
1660 self.in_t = Trigger()
1661 self.i = self.ispec()
1662 self.o = self.ospec()
1663
1664 self.states = []
1665
1666 def ispec(self):
1667 return FPADDBaseData(self.width, self.id_wid)
1668
1669 def ospec(self):
1670 return FPOpData(self.width, self.id_wid)
1671
1672 def add_state(self, state):
1673 self.states.append(state)
1674 return state
1675
1676 def get_fragment(self, platform=None):
1677 """ creates the HDL code-fragment for FPAdd
1678 """
1679 m = Module()
1680 m.submodules.out_z = self.o.z
1681 m.submodules.in_t = self.in_t
1682 if self.compact:
1683 self.get_compact_fragment(m, platform)
1684 else:
1685 self.get_longer_fragment(m, platform)
1686
1687 with m.FSM() as fsm:
1688
1689 for state in self.states:
1690 with m.State(state.state_from):
1691 state.action(m)
1692
1693 return m
1694
1695 def get_longer_fragment(self, m, platform=None):
1696
1697 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1698 self.width))
1699 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1700 a = get.out_op1
1701 b = get.out_op2
1702
1703 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1704 sc.setup(m, a, b, self.in_mid)
1705
1706 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1707 dn.setup(m, a, b, sc.in_mid)
1708
1709 if self.single_cycle:
1710 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1711 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1712 else:
1713 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1714 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1715
1716 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1717 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1718
1719 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1720 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1721
1722 if self.single_cycle:
1723 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1724 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1725 else:
1726 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1727 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1728
1729 rn = self.add_state(FPRound(self.width, self.id_wid))
1730 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1731
1732 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1733 cor.setup(m, rn.out_z, rn.in_mid)
1734
1735 pa = self.add_state(FPPack(self.width, self.id_wid))
1736 pa.setup(m, cor.out_z, rn.in_mid)
1737
1738 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1739 pa.in_mid, self.out_mid))
1740
1741 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1742 pa.in_mid, self.out_mid))
1743
1744 def get_compact_fragment(self, m, platform=None):
1745
1746 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1747 self.width, self.id_wid))
1748 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1749
1750 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1751 sc.setup(m, get.o)
1752
1753 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1754 alm.setup(m, sc.o)
1755
1756 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1757 n1.setup(m, alm.a1o)
1758
1759 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1760 n1.out_z.mid, self.o.mid))
1761
1762 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1763 # sc.o.mid, self.o.mid))
1764
1765
1766 class FPADDBase(FPState):
1767
1768 def __init__(self, width, id_wid=None, single_cycle=False):
1769 """ IEEE754 FP Add
1770
1771 * width: bit-width of IEEE754. supported: 16, 32, 64
1772 * id_wid: an identifier that is sync-connected to the input
1773 * single_cycle: True indicates each stage to complete in 1 clock
1774 """
1775 FPState.__init__(self, "fpadd")
1776 self.width = width
1777 self.single_cycle = single_cycle
1778 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1779 self.o = self.ospec()
1780
1781 self.in_t = Trigger()
1782 self.i = self.ispec()
1783
1784 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1785 self.in_accept = Signal(reset_less=True)
1786 self.add_stb = Signal(reset_less=True)
1787 self.add_ack = Signal(reset=0, reset_less=True)
1788
1789 def ispec(self):
1790 return self.mod.ispec()
1791
1792 def ospec(self):
1793 return self.mod.ospec()
1794
1795 def setup(self, m, i, add_stb, in_mid):
1796 m.d.comb += [self.i.eq(i),
1797 self.mod.i.eq(self.i),
1798 self.z_done.eq(self.mod.o.z.trigger),
1799 #self.add_stb.eq(add_stb),
1800 self.mod.in_t.stb.eq(self.in_t.stb),
1801 self.in_t.ack.eq(self.mod.in_t.ack),
1802 self.o.mid.eq(self.mod.o.mid),
1803 self.o.z.v.eq(self.mod.o.z.v),
1804 self.o.z.stb.eq(self.mod.o.z.stb),
1805 self.mod.o.z.ack.eq(self.o.z.ack),
1806 ]
1807
1808 m.d.sync += self.add_stb.eq(add_stb)
1809 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1810 m.d.sync += self.o.z.ack.eq(0) # likewise
1811 #m.d.sync += self.in_t.stb.eq(0)
1812
1813 m.submodules.fpadd = self.mod
1814
1815 def action(self, m):
1816
1817 # in_accept is set on incoming strobe HIGH and ack LOW.
1818 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1819
1820 #with m.If(self.in_t.ack):
1821 # m.d.sync += self.in_t.stb.eq(0)
1822 with m.If(~self.z_done):
1823 # not done: test for accepting an incoming operand pair
1824 with m.If(self.in_accept):
1825 m.d.sync += [
1826 self.add_ack.eq(1), # acknowledge receipt...
1827 self.in_t.stb.eq(1), # initiate add
1828 ]
1829 with m.Else():
1830 m.d.sync += [self.add_ack.eq(0),
1831 self.in_t.stb.eq(0),
1832 self.o.z.ack.eq(1),
1833 ]
1834 with m.Else():
1835 # done: acknowledge, and write out id and value
1836 m.d.sync += [self.add_ack.eq(1),
1837 self.in_t.stb.eq(0)
1838 ]
1839 m.next = "put_z"
1840
1841 return
1842
1843 if self.in_mid is not None:
1844 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1845
1846 m.d.sync += [
1847 self.out_z.v.eq(self.mod.out_z.v)
1848 ]
1849 # move to output state on detecting z ack
1850 with m.If(self.out_z.trigger):
1851 m.d.sync += self.out_z.stb.eq(0)
1852 m.next = "put_z"
1853 with m.Else():
1854 m.d.sync += self.out_z.stb.eq(1)
1855
1856
1857 class FPADDBasePipe(ControlBase):
1858 def __init__(self, width, id_wid):
1859 ControlBase.__init__(self)
1860 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1861 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1862 self.pipe3 = FPNormToPack(width, id_wid)
1863
1864 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1865
1866 def elaborate(self, platform):
1867 m = Module()
1868 m.submodules.scnorm = self.pipe1
1869 m.submodules.addalign = self.pipe2
1870 m.submodules.normpack = self.pipe3
1871 m.d.comb += self._eqs
1872 return m
1873
1874
1875 class FPAddInPassThruStage:
1876 def __init__(self, width, id_wid):
1877 self.width, self.id_wid = width, id_wid
1878 def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1879 def ospec(self): return self.ispec()
1880 def process(self, i): return i
1881
1882
1883 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1884 def __init__(self, width, id_width, num_rows):
1885 self.num_rows = num_rows
1886 stage = FPAddInPassThruStage(width, id_width)
1887 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1888
1889 def ports(self):
1890 res = []
1891 for i in range(len(self.p)):
1892 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1893 self.p[i].i_data.ports()
1894 res += [self.n.i_ready, self.n.o_valid] + \
1895 self.n.o_data.ports()
1896 return res
1897
1898
1899
1900
1901 class FPAddOutPassThruStage:
1902 def __init__(self, width, id_wid):
1903 self.width, self.id_wid = width, id_wid
1904 def ispec(self): return FPPackData(self.width, self.id_wid)
1905 def ospec(self): return self.ispec()
1906 def process(self, i): return i
1907
1908
1909 class FPADDMuxOutPipe(CombMuxOutPipe):
1910 def __init__(self, width, id_wid, num_rows):
1911 self.num_rows = num_rows
1912 stage = FPAddOutPassThruStage(width, id_wid)
1913 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1914
1915 def ports(self):
1916 res = [self.p.i_valid, self.p.o_ready] + \
1917 self.p.i_data.ports()
1918 for i in range(len(self.n)):
1919 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1920 self.n[i].o_data.ports()
1921 return res
1922
1923
1924 class FPADDMuxInOut:
1925 """ Reservation-Station version of FPADD pipeline.
1926
1927 fan-in on
1928 """
1929 def __init__(self, width, id_wid, num_rows):
1930 self.num_rows = num_rows
1931 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1932 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1933 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1934
1935 self.p = self.inpipe.p # kinda annoying,
1936 self.n = self.outpipe.n # use pipe in/out as this class in/out
1937 self._ports = self.inpipe.ports() + self.outpipe.ports()
1938
1939 def elaborate(self, platform):
1940 m = Module()
1941 m.submodules.inpipe = self.inpipe
1942 m.submodules.fpadd = self.fpadd
1943 m.submodules.outpipe = self.outpipe
1944
1945 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1946 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1947
1948 return m
1949
1950 def ports(self):
1951 return self._ports
1952
1953
1954 class ResArray:
1955 def __init__(self, width, id_wid):
1956 self.width = width
1957 self.id_wid = id_wid
1958 res = []
1959 for i in range(rs_sz):
1960 out_z = FPOp(width)
1961 out_z.name = "out_z_%d" % i
1962 res.append(out_z)
1963 self.res = Array(res)
1964 self.in_z = FPOp(width)
1965 self.in_mid = Signal(self.id_wid, reset_less=True)
1966
1967 def setup(self, m, in_z, in_mid):
1968 m.d.comb += [self.in_z.eq(in_z),
1969 self.in_mid.eq(in_mid)]
1970
1971 def get_fragment(self, platform=None):
1972 """ creates the HDL code-fragment for FPAdd
1973 """
1974 m = Module()
1975 m.submodules.res_in_z = self.in_z
1976 m.submodules += self.res
1977
1978 return m
1979
1980 def ports(self):
1981 res = []
1982 for z in self.res:
1983 res += z.ports()
1984 return res
1985
1986
1987 class FPADD(FPID):
1988 """ FPADD: stages as follows:
1989
1990 FPGetOp (a)
1991 |
1992 FPGetOp (b)
1993 |
1994 FPAddBase---> FPAddBaseMod
1995 | |
1996 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1997
1998 FPAddBase is tricky: it is both a stage and *has* stages.
1999 Connection to FPAddBaseMod therefore requires an in stb/ack
2000 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2001 needs to be the thing that raises the incoming stb.
2002 """
2003
2004 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2005 """ IEEE754 FP Add
2006
2007 * width: bit-width of IEEE754. supported: 16, 32, 64
2008 * id_wid: an identifier that is sync-connected to the input
2009 * single_cycle: True indicates each stage to complete in 1 clock
2010 """
2011 self.width = width
2012 self.id_wid = id_wid
2013 self.single_cycle = single_cycle
2014
2015 #self.out_z = FPOp(width)
2016 self.ids = FPID(id_wid)
2017
2018 rs = []
2019 for i in range(rs_sz):
2020 in_a = FPOp(width)
2021 in_b = FPOp(width)
2022 in_a.name = "in_a_%d" % i
2023 in_b.name = "in_b_%d" % i
2024 rs.append((in_a, in_b))
2025 self.rs = Array(rs)
2026
2027 res = []
2028 for i in range(rs_sz):
2029 out_z = FPOp(width)
2030 out_z.name = "out_z_%d" % i
2031 res.append(out_z)
2032 self.res = Array(res)
2033
2034 self.states = []
2035
2036 def add_state(self, state):
2037 self.states.append(state)
2038 return state
2039
2040 def get_fragment(self, platform=None):
2041 """ creates the HDL code-fragment for FPAdd
2042 """
2043 m = Module()
2044 m.submodules += self.rs
2045
2046 in_a = self.rs[0][0]
2047 in_b = self.rs[0][1]
2048
2049 geta = self.add_state(FPGetOp("get_a", "get_b",
2050 in_a, self.width))
2051 geta.setup(m, in_a)
2052 a = geta.out_op
2053
2054 getb = self.add_state(FPGetOp("get_b", "fpadd",
2055 in_b, self.width))
2056 getb.setup(m, in_b)
2057 b = getb.out_op
2058
2059 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2060 ab = self.add_state(ab)
2061 abd = ab.ispec() # create an input spec object for FPADDBase
2062 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2063 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2064 o = ab.o
2065
2066 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2067 o.mid, "get_a"))
2068
2069 with m.FSM() as fsm:
2070
2071 for state in self.states:
2072 with m.State(state.state_from):
2073 state.action(m)
2074
2075 return m
2076
2077
2078 if __name__ == "__main__":
2079 if True:
2080 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2081 main(alu, ports=alu.rs[0][0].ports() + \
2082 alu.rs[0][1].ports() + \
2083 alu.res[0].ports() + \
2084 [alu.ids.in_mid, alu.ids.out_mid])
2085 else:
2086 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2087 main(alu, ports=[alu.in_a, alu.in_b] + \
2088 alu.in_t.ports() + \
2089 alu.out_z.ports() + \
2090 [alu.in_mid, alu.out_mid])
2091
2092
2093 # works... but don't use, just do "python fname.py convert -t v"
2094 #print (verilog.convert(alu, ports=[
2095 # ports=alu.in_a.ports() + \
2096 # alu.in_b.ports() + \
2097 # alu.out_z.ports())