move PriorityCombMuxInPipe to multipipe
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMultiOutPipeline
14 from multipipe import PriorityCombMuxInPipe
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPNumBase2Ops:
191
192 def __init__(self, width, id_wid, m_extra=True):
193 self.a = FPNumBase(width, m_extra)
194 self.b = FPNumBase(width, m_extra)
195 self.mid = Signal(id_wid, reset_less=True)
196
197 def eq(self, i):
198 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
199
200 def ports(self):
201 return [self.a, self.b, self.mid]
202
203
204 class FPADDBaseData:
205
206 def __init__(self, width, id_wid):
207 self.width = width
208 self.id_wid = id_wid
209 self.a = Signal(width)
210 self.b = Signal(width)
211 self.mid = Signal(id_wid, reset_less=True)
212
213 def eq(self, i):
214 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
215
216 def ports(self):
217 return [self.a, self.b, self.mid]
218
219
220 class FPGet2OpMod(Trigger):
221 def __init__(self, width, id_wid):
222 Trigger.__init__(self)
223 self.width = width
224 self.id_wid = id_wid
225 self.i = self.ispec()
226 self.o = self.ospec()
227
228 def ispec(self):
229 return FPADDBaseData(self.width, self.id_wid)
230
231 def ospec(self):
232 return FPADDBaseData(self.width, self.id_wid)
233
234 def process(self, i):
235 return self.o
236
237 def elaborate(self, platform):
238 m = Trigger.elaborate(self, platform)
239 with m.If(self.trigger):
240 m.d.comb += [
241 self.o.eq(self.i),
242 ]
243 return m
244
245
246 class FPGet2Op(FPState):
247 """ gets operands
248 """
249
250 def __init__(self, in_state, out_state, width, id_wid):
251 FPState.__init__(self, in_state)
252 self.out_state = out_state
253 self.mod = FPGet2OpMod(width, id_wid)
254 self.o = self.mod.ospec()
255 self.in_stb = Signal(reset_less=True)
256 self.out_ack = Signal(reset_less=True)
257 self.out_decode = Signal(reset_less=True)
258
259 def setup(self, m, i, in_stb, in_ack):
260 """ links module to inputs and outputs
261 """
262 m.submodules.get_ops = self.mod
263 m.d.comb += self.mod.i.eq(i)
264 m.d.comb += self.mod.stb.eq(in_stb)
265 m.d.comb += self.out_ack.eq(self.mod.ack)
266 m.d.comb += self.out_decode.eq(self.mod.trigger)
267 m.d.comb += in_ack.eq(self.mod.ack)
268
269 def action(self, m):
270 with m.If(self.out_decode):
271 m.next = self.out_state
272 m.d.sync += [
273 self.mod.ack.eq(0),
274 self.o.eq(self.mod.o),
275 ]
276 with m.Else():
277 m.d.sync += self.mod.ack.eq(1)
278
279
280 class FPSCData:
281
282 def __init__(self, width, id_wid):
283 self.a = FPNumBase(width, True)
284 self.b = FPNumBase(width, True)
285 self.z = FPNumOut(width, False)
286 self.oz = Signal(width, reset_less=True)
287 self.out_do_z = Signal(reset_less=True)
288 self.mid = Signal(id_wid, reset_less=True)
289
290 def eq(self, i):
291 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
292 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
293
294
295 class FPAddSpecialCasesMod:
296 """ special cases: NaNs, infs, zeros, denormalised
297 NOTE: some of these are unique to add. see "Special Operations"
298 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
299 """
300
301 def __init__(self, width, id_wid):
302 self.width = width
303 self.id_wid = id_wid
304 self.i = self.ispec()
305 self.o = self.ospec()
306
307 def ispec(self):
308 return FPADDBaseData(self.width, self.id_wid)
309
310 def ospec(self):
311 return FPSCData(self.width, self.id_wid)
312
313 def setup(self, m, i):
314 """ links module to inputs and outputs
315 """
316 m.submodules.specialcases = self
317 m.d.comb += self.i.eq(i)
318
319 def process(self, i):
320 return self.o
321
322 def elaborate(self, platform):
323 m = Module()
324
325 m.submodules.sc_out_z = self.o.z
326
327 # decode: XXX really should move to separate stage
328 a1 = FPNumIn(None, self.width)
329 b1 = FPNumIn(None, self.width)
330 m.submodules.sc_decode_a = a1
331 m.submodules.sc_decode_b = b1
332 m.d.comb += [a1.decode(self.i.a),
333 b1.decode(self.i.b),
334 ]
335
336 s_nomatch = Signal()
337 m.d.comb += s_nomatch.eq(a1.s != b1.s)
338
339 m_match = Signal()
340 m.d.comb += m_match.eq(a1.m == b1.m)
341
342 # if a is NaN or b is NaN return NaN
343 with m.If(a1.is_nan | b1.is_nan):
344 m.d.comb += self.o.out_do_z.eq(1)
345 m.d.comb += self.o.z.nan(0)
346
347 # XXX WEIRDNESS for FP16 non-canonical NaN handling
348 # under review
349
350 ## if a is zero and b is NaN return -b
351 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
352 # m.d.comb += self.o.out_do_z.eq(1)
353 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
354
355 ## if b is zero and a is NaN return -a
356 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
357 # m.d.comb += self.o.out_do_z.eq(1)
358 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
359
360 ## if a is -zero and b is NaN return -b
361 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
362 # m.d.comb += self.o.out_do_z.eq(1)
363 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
364
365 ## if b is -zero and a is NaN return -a
366 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
367 # m.d.comb += self.o.out_do_z.eq(1)
368 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
369
370 # if a is inf return inf (or NaN)
371 with m.Elif(a1.is_inf):
372 m.d.comb += self.o.out_do_z.eq(1)
373 m.d.comb += self.o.z.inf(a1.s)
374 # if a is inf and signs don't match return NaN
375 with m.If(b1.exp_128 & s_nomatch):
376 m.d.comb += self.o.z.nan(0)
377
378 # if b is inf return inf
379 with m.Elif(b1.is_inf):
380 m.d.comb += self.o.out_do_z.eq(1)
381 m.d.comb += self.o.z.inf(b1.s)
382
383 # if a is zero and b zero return signed-a/b
384 with m.Elif(a1.is_zero & b1.is_zero):
385 m.d.comb += self.o.out_do_z.eq(1)
386 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
387
388 # if a is zero return b
389 with m.Elif(a1.is_zero):
390 m.d.comb += self.o.out_do_z.eq(1)
391 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
392
393 # if b is zero return a
394 with m.Elif(b1.is_zero):
395 m.d.comb += self.o.out_do_z.eq(1)
396 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
397
398 # if a equal to -b return zero (+ve zero)
399 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
400 m.d.comb += self.o.out_do_z.eq(1)
401 m.d.comb += self.o.z.zero(0)
402
403 # Denormalised Number checks next, so pass a/b data through
404 with m.Else():
405 m.d.comb += self.o.out_do_z.eq(0)
406 m.d.comb += self.o.a.eq(a1)
407 m.d.comb += self.o.b.eq(b1)
408
409 m.d.comb += self.o.oz.eq(self.o.z.v)
410 m.d.comb += self.o.mid.eq(self.i.mid)
411
412 return m
413
414
415 class FPID:
416 def __init__(self, id_wid):
417 self.id_wid = id_wid
418 if self.id_wid:
419 self.in_mid = Signal(id_wid, reset_less=True)
420 self.out_mid = Signal(id_wid, reset_less=True)
421 else:
422 self.in_mid = None
423 self.out_mid = None
424
425 def idsync(self, m):
426 if self.id_wid is not None:
427 m.d.sync += self.out_mid.eq(self.in_mid)
428
429
430 class FPAddSpecialCases(FPState):
431 """ special cases: NaNs, infs, zeros, denormalised
432 NOTE: some of these are unique to add. see "Special Operations"
433 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
434 """
435
436 def __init__(self, width, id_wid):
437 FPState.__init__(self, "special_cases")
438 self.mod = FPAddSpecialCasesMod(width)
439 self.out_z = self.mod.ospec()
440 self.out_do_z = Signal(reset_less=True)
441
442 def setup(self, m, i):
443 """ links module to inputs and outputs
444 """
445 self.mod.setup(m, i, self.out_do_z)
446 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
447 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
448
449 def action(self, m):
450 self.idsync(m)
451 with m.If(self.out_do_z):
452 m.next = "put_z"
453 with m.Else():
454 m.next = "denormalise"
455
456
457 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
458 """ special cases: NaNs, infs, zeros, denormalised
459 NOTE: some of these are unique to add. see "Special Operations"
460 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
461 """
462
463 def __init__(self, width, id_wid):
464 FPState.__init__(self, "special_cases")
465 self.smod = FPAddSpecialCasesMod(width, id_wid)
466 self.dmod = FPAddDeNormMod(width, id_wid)
467 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
468 self.o = self.ospec()
469
470 def ispec(self):
471 return self.smod.ispec()
472
473 def ospec(self):
474 return self.dmod.ospec()
475
476 def setup(self, m, i):
477 """ links module to inputs and outputs
478 """
479 # these only needed for break-out (early-out)
480 # out_z = self.smod.ospec()
481 # out_do_z = Signal(reset_less=True)
482 self.smod.setup(m, i)
483 self.dmod.setup(m, self.smod.o)
484 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
485
486 # out_do_z=True, only needed for early-out (split pipeline)
487 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
488 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
489
490 # out_do_z=False
491 m.d.comb += self.o.eq(self.dmod.o)
492
493 def process(self, i):
494 return self.o
495
496 def action(self, m):
497 #with m.If(self.out_do_z):
498 # m.next = "put_z"
499 #with m.Else():
500 m.next = "align"
501
502
503 class FPAddDeNormMod(FPState):
504
505 def __init__(self, width, id_wid):
506 self.width = width
507 self.id_wid = id_wid
508 self.i = self.ispec()
509 self.o = self.ospec()
510
511 def ispec(self):
512 return FPSCData(self.width, self.id_wid)
513
514 def ospec(self):
515 return FPSCData(self.width, self.id_wid)
516
517 def setup(self, m, i):
518 """ links module to inputs and outputs
519 """
520 m.submodules.denormalise = self
521 m.d.comb += self.i.eq(i)
522
523 def elaborate(self, platform):
524 m = Module()
525 m.submodules.denorm_in_a = self.i.a
526 m.submodules.denorm_in_b = self.i.b
527 m.submodules.denorm_out_a = self.o.a
528 m.submodules.denorm_out_b = self.o.b
529
530 with m.If(~self.i.out_do_z):
531 # XXX hmmm, don't like repeating identical code
532 m.d.comb += self.o.a.eq(self.i.a)
533 with m.If(self.i.a.exp_n127):
534 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
535 with m.Else():
536 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
537
538 m.d.comb += self.o.b.eq(self.i.b)
539 with m.If(self.i.b.exp_n127):
540 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
541 with m.Else():
542 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
543
544 m.d.comb += self.o.mid.eq(self.i.mid)
545 m.d.comb += self.o.z.eq(self.i.z)
546 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
547 m.d.comb += self.o.oz.eq(self.i.oz)
548
549 return m
550
551
552 class FPAddDeNorm(FPState):
553
554 def __init__(self, width, id_wid):
555 FPState.__init__(self, "denormalise")
556 self.mod = FPAddDeNormMod(width)
557 self.out_a = FPNumBase(width)
558 self.out_b = FPNumBase(width)
559
560 def setup(self, m, i):
561 """ links module to inputs and outputs
562 """
563 self.mod.setup(m, i)
564
565 m.d.sync += self.out_a.eq(self.mod.out_a)
566 m.d.sync += self.out_b.eq(self.mod.out_b)
567
568 def action(self, m):
569 # Denormalised Number checks
570 m.next = "align"
571
572
573 class FPAddAlignMultiMod(FPState):
574
575 def __init__(self, width):
576 self.in_a = FPNumBase(width)
577 self.in_b = FPNumBase(width)
578 self.out_a = FPNumIn(None, width)
579 self.out_b = FPNumIn(None, width)
580 self.exp_eq = Signal(reset_less=True)
581
582 def elaborate(self, platform):
583 # This one however (single-cycle) will do the shift
584 # in one go.
585
586 m = Module()
587
588 m.submodules.align_in_a = self.in_a
589 m.submodules.align_in_b = self.in_b
590 m.submodules.align_out_a = self.out_a
591 m.submodules.align_out_b = self.out_b
592
593 # NOTE: this does *not* do single-cycle multi-shifting,
594 # it *STAYS* in the align state until exponents match
595
596 # exponent of a greater than b: shift b down
597 m.d.comb += self.exp_eq.eq(0)
598 m.d.comb += self.out_a.eq(self.in_a)
599 m.d.comb += self.out_b.eq(self.in_b)
600 agtb = Signal(reset_less=True)
601 altb = Signal(reset_less=True)
602 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
603 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
604 with m.If(agtb):
605 m.d.comb += self.out_b.shift_down(self.in_b)
606 # exponent of b greater than a: shift a down
607 with m.Elif(altb):
608 m.d.comb += self.out_a.shift_down(self.in_a)
609 # exponents equal: move to next stage.
610 with m.Else():
611 m.d.comb += self.exp_eq.eq(1)
612 return m
613
614
615 class FPAddAlignMulti(FPState):
616
617 def __init__(self, width, id_wid):
618 FPState.__init__(self, "align")
619 self.mod = FPAddAlignMultiMod(width)
620 self.out_a = FPNumIn(None, width)
621 self.out_b = FPNumIn(None, width)
622 self.exp_eq = Signal(reset_less=True)
623
624 def setup(self, m, in_a, in_b):
625 """ links module to inputs and outputs
626 """
627 m.submodules.align = self.mod
628 m.d.comb += self.mod.in_a.eq(in_a)
629 m.d.comb += self.mod.in_b.eq(in_b)
630 #m.d.comb += self.out_a.eq(self.mod.out_a)
631 #m.d.comb += self.out_b.eq(self.mod.out_b)
632 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
633 m.d.sync += self.out_a.eq(self.mod.out_a)
634 m.d.sync += self.out_b.eq(self.mod.out_b)
635
636 def action(self, m):
637 with m.If(self.exp_eq):
638 m.next = "add_0"
639
640
641 class FPNumIn2Ops:
642
643 def __init__(self, width, id_wid):
644 self.a = FPNumIn(None, width)
645 self.b = FPNumIn(None, width)
646 self.z = FPNumOut(width, False)
647 self.out_do_z = Signal(reset_less=True)
648 self.oz = Signal(width, reset_less=True)
649 self.mid = Signal(id_wid, reset_less=True)
650
651 def eq(self, i):
652 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
653 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
654
655
656 class FPAddAlignSingleMod:
657
658 def __init__(self, width, id_wid):
659 self.width = width
660 self.id_wid = id_wid
661 self.i = self.ispec()
662 self.o = self.ospec()
663
664 def ispec(self):
665 return FPSCData(self.width, self.id_wid)
666
667 def ospec(self):
668 return FPNumIn2Ops(self.width, self.id_wid)
669
670 def process(self, i):
671 return self.o
672
673 def setup(self, m, i):
674 """ links module to inputs and outputs
675 """
676 m.submodules.align = self
677 m.d.comb += self.i.eq(i)
678
679 def elaborate(self, platform):
680 """ Aligns A against B or B against A, depending on which has the
681 greater exponent. This is done in a *single* cycle using
682 variable-width bit-shift
683
684 the shifter used here is quite expensive in terms of gates.
685 Mux A or B in (and out) into temporaries, as only one of them
686 needs to be aligned against the other
687 """
688 m = Module()
689
690 m.submodules.align_in_a = self.i.a
691 m.submodules.align_in_b = self.i.b
692 m.submodules.align_out_a = self.o.a
693 m.submodules.align_out_b = self.o.b
694
695 # temporary (muxed) input and output to be shifted
696 t_inp = FPNumBase(self.width)
697 t_out = FPNumIn(None, self.width)
698 espec = (len(self.i.a.e), True)
699 msr = MultiShiftRMerge(self.i.a.m_width, espec)
700 m.submodules.align_t_in = t_inp
701 m.submodules.align_t_out = t_out
702 m.submodules.multishift_r = msr
703
704 ediff = Signal(espec, reset_less=True)
705 ediffr = Signal(espec, reset_less=True)
706 tdiff = Signal(espec, reset_less=True)
707 elz = Signal(reset_less=True)
708 egz = Signal(reset_less=True)
709
710 # connect multi-shifter to t_inp/out mantissa (and tdiff)
711 m.d.comb += msr.inp.eq(t_inp.m)
712 m.d.comb += msr.diff.eq(tdiff)
713 m.d.comb += t_out.m.eq(msr.m)
714 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
715 m.d.comb += t_out.s.eq(t_inp.s)
716
717 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
718 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
719 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
720 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
721
722 # default: A-exp == B-exp, A and B untouched (fall through)
723 m.d.comb += self.o.a.eq(self.i.a)
724 m.d.comb += self.o.b.eq(self.i.b)
725 # only one shifter (muxed)
726 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
727 # exponent of a greater than b: shift b down
728 with m.If(~self.i.out_do_z):
729 with m.If(egz):
730 m.d.comb += [t_inp.eq(self.i.b),
731 tdiff.eq(ediff),
732 self.o.b.eq(t_out),
733 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
734 ]
735 # exponent of b greater than a: shift a down
736 with m.Elif(elz):
737 m.d.comb += [t_inp.eq(self.i.a),
738 tdiff.eq(ediffr),
739 self.o.a.eq(t_out),
740 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
741 ]
742
743 m.d.comb += self.o.mid.eq(self.i.mid)
744 m.d.comb += self.o.z.eq(self.i.z)
745 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
746 m.d.comb += self.o.oz.eq(self.i.oz)
747
748 return m
749
750
751 class FPAddAlignSingle(FPState):
752
753 def __init__(self, width, id_wid):
754 FPState.__init__(self, "align")
755 self.mod = FPAddAlignSingleMod(width, id_wid)
756 self.out_a = FPNumIn(None, width)
757 self.out_b = FPNumIn(None, width)
758
759 def setup(self, m, i):
760 """ links module to inputs and outputs
761 """
762 self.mod.setup(m, i)
763
764 # NOTE: could be done as comb
765 m.d.sync += self.out_a.eq(self.mod.out_a)
766 m.d.sync += self.out_b.eq(self.mod.out_b)
767
768 def action(self, m):
769 m.next = "add_0"
770
771
772 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
773
774 def __init__(self, width, id_wid):
775 FPState.__init__(self, "align")
776 self.width = width
777 self.id_wid = id_wid
778 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
779 self.a1o = self.ospec()
780
781 def ispec(self):
782 return FPSCData(self.width, self.id_wid)
783 #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
784
785 def ospec(self):
786 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
787
788 def setup(self, m, i):
789 """ links module to inputs and outputs
790 """
791
792 # chain AddAlignSingle, AddStage0 and AddStage1
793 mod = FPAddAlignSingleMod(self.width, self.id_wid)
794 a0mod = FPAddStage0Mod(self.width, self.id_wid)
795 a1mod = FPAddStage1Mod(self.width, self.id_wid)
796
797 chain = StageChain([mod, a0mod, a1mod])
798 chain.setup(m, i)
799
800 m.d.comb += self.a1o.eq(a1mod.o)
801
802 def process(self, i):
803 return self.a1o
804
805 def action(self, m):
806 m.next = "normalise_1"
807
808
809 class FPAddStage0Data:
810
811 def __init__(self, width, id_wid):
812 self.z = FPNumBase(width, False)
813 self.out_do_z = Signal(reset_less=True)
814 self.oz = Signal(width, reset_less=True)
815 self.tot = Signal(self.z.m_width + 4, reset_less=True)
816 self.mid = Signal(id_wid, reset_less=True)
817
818 def eq(self, i):
819 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
820 self.tot.eq(i.tot), self.mid.eq(i.mid)]
821
822
823 class FPAddStage0Mod:
824
825 def __init__(self, width, id_wid):
826 self.width = width
827 self.id_wid = id_wid
828 self.i = self.ispec()
829 self.o = self.ospec()
830
831 def ispec(self):
832 return FPSCData(self.width, self.id_wid)
833
834 def ospec(self):
835 return FPAddStage0Data(self.width, self.id_wid)
836
837 def process(self, i):
838 return self.o
839
840 def setup(self, m, i):
841 """ links module to inputs and outputs
842 """
843 m.submodules.add0 = self
844 m.d.comb += self.i.eq(i)
845
846 def elaborate(self, platform):
847 m = Module()
848 m.submodules.add0_in_a = self.i.a
849 m.submodules.add0_in_b = self.i.b
850 m.submodules.add0_out_z = self.o.z
851
852 # store intermediate tests (and zero-extended mantissas)
853 seq = Signal(reset_less=True)
854 mge = Signal(reset_less=True)
855 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
856 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
857 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
858 mge.eq(self.i.a.m >= self.i.b.m),
859 am0.eq(Cat(self.i.a.m, 0)),
860 bm0.eq(Cat(self.i.b.m, 0))
861 ]
862 # same-sign (both negative or both positive) add mantissas
863 with m.If(~self.i.out_do_z):
864 m.d.comb += self.o.z.e.eq(self.i.a.e)
865 with m.If(seq):
866 m.d.comb += [
867 self.o.tot.eq(am0 + bm0),
868 self.o.z.s.eq(self.i.a.s)
869 ]
870 # a mantissa greater than b, use a
871 with m.Elif(mge):
872 m.d.comb += [
873 self.o.tot.eq(am0 - bm0),
874 self.o.z.s.eq(self.i.a.s)
875 ]
876 # b mantissa greater than a, use b
877 with m.Else():
878 m.d.comb += [
879 self.o.tot.eq(bm0 - am0),
880 self.o.z.s.eq(self.i.b.s)
881 ]
882
883 m.d.comb += self.o.oz.eq(self.i.oz)
884 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
885 m.d.comb += self.o.mid.eq(self.i.mid)
886 return m
887
888
889 class FPAddStage0(FPState):
890 """ First stage of add. covers same-sign (add) and subtract
891 special-casing when mantissas are greater or equal, to
892 give greatest accuracy.
893 """
894
895 def __init__(self, width, id_wid):
896 FPState.__init__(self, "add_0")
897 self.mod = FPAddStage0Mod(width)
898 self.o = self.mod.ospec()
899
900 def setup(self, m, i):
901 """ links module to inputs and outputs
902 """
903 self.mod.setup(m, i)
904
905 # NOTE: these could be done as combinatorial (merge add0+add1)
906 m.d.sync += self.o.eq(self.mod.o)
907
908 def action(self, m):
909 m.next = "add_1"
910
911
912 class FPAddStage1Data:
913
914 def __init__(self, width, id_wid):
915 self.z = FPNumBase(width, False)
916 self.out_do_z = Signal(reset_less=True)
917 self.oz = Signal(width, reset_less=True)
918 self.of = Overflow()
919 self.mid = Signal(id_wid, reset_less=True)
920
921 def eq(self, i):
922 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
923 self.of.eq(i.of), self.mid.eq(i.mid)]
924
925
926
927 class FPAddStage1Mod(FPState):
928 """ Second stage of add: preparation for normalisation.
929 detects when tot sum is too big (tot[27] is kinda a carry bit)
930 """
931
932 def __init__(self, width, id_wid):
933 self.width = width
934 self.id_wid = id_wid
935 self.i = self.ispec()
936 self.o = self.ospec()
937
938 def ispec(self):
939 return FPAddStage0Data(self.width, self.id_wid)
940
941 def ospec(self):
942 return FPAddStage1Data(self.width, self.id_wid)
943
944 def process(self, i):
945 return self.o
946
947 def setup(self, m, i):
948 """ links module to inputs and outputs
949 """
950 m.submodules.add1 = self
951 m.submodules.add1_out_overflow = self.o.of
952
953 m.d.comb += self.i.eq(i)
954
955 def elaborate(self, platform):
956 m = Module()
957 #m.submodules.norm1_in_overflow = self.in_of
958 #m.submodules.norm1_out_overflow = self.out_of
959 #m.submodules.norm1_in_z = self.in_z
960 #m.submodules.norm1_out_z = self.out_z
961 m.d.comb += self.o.z.eq(self.i.z)
962 # tot[-1] (MSB) gets set when the sum overflows. shift result down
963 with m.If(~self.i.out_do_z):
964 with m.If(self.i.tot[-1]):
965 m.d.comb += [
966 self.o.z.m.eq(self.i.tot[4:]),
967 self.o.of.m0.eq(self.i.tot[4]),
968 self.o.of.guard.eq(self.i.tot[3]),
969 self.o.of.round_bit.eq(self.i.tot[2]),
970 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
971 self.o.z.e.eq(self.i.z.e + 1)
972 ]
973 # tot[-1] (MSB) zero case
974 with m.Else():
975 m.d.comb += [
976 self.o.z.m.eq(self.i.tot[3:]),
977 self.o.of.m0.eq(self.i.tot[3]),
978 self.o.of.guard.eq(self.i.tot[2]),
979 self.o.of.round_bit.eq(self.i.tot[1]),
980 self.o.of.sticky.eq(self.i.tot[0])
981 ]
982
983 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
984 m.d.comb += self.o.oz.eq(self.i.oz)
985 m.d.comb += self.o.mid.eq(self.i.mid)
986
987 return m
988
989
990 class FPAddStage1(FPState):
991
992 def __init__(self, width, id_wid):
993 FPState.__init__(self, "add_1")
994 self.mod = FPAddStage1Mod(width)
995 self.out_z = FPNumBase(width, False)
996 self.out_of = Overflow()
997 self.norm_stb = Signal()
998
999 def setup(self, m, i):
1000 """ links module to inputs and outputs
1001 """
1002 self.mod.setup(m, i)
1003
1004 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1005
1006 m.d.sync += self.out_of.eq(self.mod.out_of)
1007 m.d.sync += self.out_z.eq(self.mod.out_z)
1008 m.d.sync += self.norm_stb.eq(1)
1009
1010 def action(self, m):
1011 m.next = "normalise_1"
1012
1013
1014 class FPNormaliseModSingle:
1015
1016 def __init__(self, width):
1017 self.width = width
1018 self.in_z = self.ispec()
1019 self.out_z = self.ospec()
1020
1021 def ispec(self):
1022 return FPNumBase(self.width, False)
1023
1024 def ospec(self):
1025 return FPNumBase(self.width, False)
1026
1027 def setup(self, m, i):
1028 """ links module to inputs and outputs
1029 """
1030 m.submodules.normalise = self
1031 m.d.comb += self.i.eq(i)
1032
1033 def elaborate(self, platform):
1034 m = Module()
1035
1036 mwid = self.out_z.m_width+2
1037 pe = PriorityEncoder(mwid)
1038 m.submodules.norm_pe = pe
1039
1040 m.submodules.norm1_out_z = self.out_z
1041 m.submodules.norm1_in_z = self.in_z
1042
1043 in_z = FPNumBase(self.width, False)
1044 in_of = Overflow()
1045 m.submodules.norm1_insel_z = in_z
1046 m.submodules.norm1_insel_overflow = in_of
1047
1048 espec = (len(in_z.e), True)
1049 ediff_n126 = Signal(espec, reset_less=True)
1050 msr = MultiShiftRMerge(mwid, espec)
1051 m.submodules.multishift_r = msr
1052
1053 m.d.comb += in_z.eq(self.in_z)
1054 m.d.comb += in_of.eq(self.in_of)
1055 # initialise out from in (overridden below)
1056 m.d.comb += self.out_z.eq(in_z)
1057 m.d.comb += self.out_of.eq(in_of)
1058 # normalisation decrease condition
1059 decrease = Signal(reset_less=True)
1060 m.d.comb += decrease.eq(in_z.m_msbzero)
1061 # decrease exponent
1062 with m.If(decrease):
1063 # *sigh* not entirely obvious: count leading zeros (clz)
1064 # with a PriorityEncoder: to find from the MSB
1065 # we reverse the order of the bits.
1066 temp_m = Signal(mwid, reset_less=True)
1067 temp_s = Signal(mwid+1, reset_less=True)
1068 clz = Signal((len(in_z.e), True), reset_less=True)
1069 m.d.comb += [
1070 # cat round and guard bits back into the mantissa
1071 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1072 pe.i.eq(temp_m[::-1]), # inverted
1073 clz.eq(pe.o), # count zeros from MSB down
1074 temp_s.eq(temp_m << clz), # shift mantissa UP
1075 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1076 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1077 ]
1078
1079 return m
1080
1081 class FPNorm1Data:
1082
1083 def __init__(self, width, id_wid):
1084 self.roundz = Signal(reset_less=True)
1085 self.z = FPNumBase(width, False)
1086 self.out_do_z = Signal(reset_less=True)
1087 self.oz = Signal(width, reset_less=True)
1088 self.mid = Signal(id_wid, reset_less=True)
1089
1090 def eq(self, i):
1091 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1092 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1093
1094
1095 class FPNorm1ModSingle:
1096
1097 def __init__(self, width, id_wid):
1098 self.width = width
1099 self.id_wid = id_wid
1100 self.i = self.ispec()
1101 self.o = self.ospec()
1102
1103 def ispec(self):
1104 return FPAddStage1Data(self.width, self.id_wid)
1105
1106 def ospec(self):
1107 return FPNorm1Data(self.width, self.id_wid)
1108
1109 def setup(self, m, i):
1110 """ links module to inputs and outputs
1111 """
1112 m.submodules.normalise_1 = self
1113 m.d.comb += self.i.eq(i)
1114
1115 def process(self, i):
1116 return self.o
1117
1118 def elaborate(self, platform):
1119 m = Module()
1120
1121 mwid = self.o.z.m_width+2
1122 pe = PriorityEncoder(mwid)
1123 m.submodules.norm_pe = pe
1124
1125 of = Overflow()
1126 m.d.comb += self.o.roundz.eq(of.roundz)
1127
1128 m.submodules.norm1_out_z = self.o.z
1129 m.submodules.norm1_out_overflow = of
1130 m.submodules.norm1_in_z = self.i.z
1131 m.submodules.norm1_in_overflow = self.i.of
1132
1133 i = self.ispec()
1134 m.submodules.norm1_insel_z = i.z
1135 m.submodules.norm1_insel_overflow = i.of
1136
1137 espec = (len(i.z.e), True)
1138 ediff_n126 = Signal(espec, reset_less=True)
1139 msr = MultiShiftRMerge(mwid, espec)
1140 m.submodules.multishift_r = msr
1141
1142 m.d.comb += i.eq(self.i)
1143 # initialise out from in (overridden below)
1144 m.d.comb += self.o.z.eq(i.z)
1145 m.d.comb += of.eq(i.of)
1146 # normalisation increase/decrease conditions
1147 decrease = Signal(reset_less=True)
1148 increase = Signal(reset_less=True)
1149 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1150 m.d.comb += increase.eq(i.z.exp_lt_n126)
1151 # decrease exponent
1152 with m.If(~self.i.out_do_z):
1153 with m.If(decrease):
1154 # *sigh* not entirely obvious: count leading zeros (clz)
1155 # with a PriorityEncoder: to find from the MSB
1156 # we reverse the order of the bits.
1157 temp_m = Signal(mwid, reset_less=True)
1158 temp_s = Signal(mwid+1, reset_less=True)
1159 clz = Signal((len(i.z.e), True), reset_less=True)
1160 # make sure that the amount to decrease by does NOT
1161 # go below the minimum non-INF/NaN exponent
1162 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1163 i.z.exp_sub_n126)
1164 m.d.comb += [
1165 # cat round and guard bits back into the mantissa
1166 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1167 pe.i.eq(temp_m[::-1]), # inverted
1168 clz.eq(limclz), # count zeros from MSB down
1169 temp_s.eq(temp_m << clz), # shift mantissa UP
1170 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1171 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1172 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1173 # overflow in bits 0..1: got shifted too (leave sticky)
1174 of.guard.eq(temp_s[1]), # guard
1175 of.round_bit.eq(temp_s[0]), # round
1176 ]
1177 # increase exponent
1178 with m.Elif(increase):
1179 temp_m = Signal(mwid+1, reset_less=True)
1180 m.d.comb += [
1181 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1182 i.z.m)),
1183 ediff_n126.eq(i.z.N126 - i.z.e),
1184 # connect multi-shifter to inp/out mantissa (and ediff)
1185 msr.inp.eq(temp_m),
1186 msr.diff.eq(ediff_n126),
1187 self.o.z.m.eq(msr.m[3:]),
1188 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1189 # overflow in bits 0..1: got shifted too (leave sticky)
1190 of.guard.eq(temp_s[2]), # guard
1191 of.round_bit.eq(temp_s[1]), # round
1192 of.sticky.eq(temp_s[0]), # sticky
1193 self.o.z.e.eq(i.z.e + ediff_n126),
1194 ]
1195
1196 m.d.comb += self.o.mid.eq(self.i.mid)
1197 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1198 m.d.comb += self.o.oz.eq(self.i.oz)
1199
1200 return m
1201
1202
1203 class FPNorm1ModMulti:
1204
1205 def __init__(self, width, single_cycle=True):
1206 self.width = width
1207 self.in_select = Signal(reset_less=True)
1208 self.in_z = FPNumBase(width, False)
1209 self.in_of = Overflow()
1210 self.temp_z = FPNumBase(width, False)
1211 self.temp_of = Overflow()
1212 self.out_z = FPNumBase(width, False)
1213 self.out_of = Overflow()
1214
1215 def elaborate(self, platform):
1216 m = Module()
1217
1218 m.submodules.norm1_out_z = self.out_z
1219 m.submodules.norm1_out_overflow = self.out_of
1220 m.submodules.norm1_temp_z = self.temp_z
1221 m.submodules.norm1_temp_of = self.temp_of
1222 m.submodules.norm1_in_z = self.in_z
1223 m.submodules.norm1_in_overflow = self.in_of
1224
1225 in_z = FPNumBase(self.width, False)
1226 in_of = Overflow()
1227 m.submodules.norm1_insel_z = in_z
1228 m.submodules.norm1_insel_overflow = in_of
1229
1230 # select which of temp or in z/of to use
1231 with m.If(self.in_select):
1232 m.d.comb += in_z.eq(self.in_z)
1233 m.d.comb += in_of.eq(self.in_of)
1234 with m.Else():
1235 m.d.comb += in_z.eq(self.temp_z)
1236 m.d.comb += in_of.eq(self.temp_of)
1237 # initialise out from in (overridden below)
1238 m.d.comb += self.out_z.eq(in_z)
1239 m.d.comb += self.out_of.eq(in_of)
1240 # normalisation increase/decrease conditions
1241 decrease = Signal(reset_less=True)
1242 increase = Signal(reset_less=True)
1243 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1244 m.d.comb += increase.eq(in_z.exp_lt_n126)
1245 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1246 # decrease exponent
1247 with m.If(decrease):
1248 m.d.comb += [
1249 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1250 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1251 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1252 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1253 self.out_of.round_bit.eq(0), # reset round bit
1254 self.out_of.m0.eq(in_of.guard),
1255 ]
1256 # increase exponent
1257 with m.Elif(increase):
1258 m.d.comb += [
1259 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1260 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1261 self.out_of.guard.eq(in_z.m[0]),
1262 self.out_of.m0.eq(in_z.m[1]),
1263 self.out_of.round_bit.eq(in_of.guard),
1264 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1265 ]
1266
1267 return m
1268
1269
1270 class FPNorm1Single(FPState):
1271
1272 def __init__(self, width, id_wid, single_cycle=True):
1273 FPState.__init__(self, "normalise_1")
1274 self.mod = FPNorm1ModSingle(width)
1275 self.o = self.ospec()
1276 self.out_z = FPNumBase(width, False)
1277 self.out_roundz = Signal(reset_less=True)
1278
1279 def ispec(self):
1280 return self.mod.ispec()
1281
1282 def ospec(self):
1283 return self.mod.ospec()
1284
1285 def setup(self, m, i):
1286 """ links module to inputs and outputs
1287 """
1288 self.mod.setup(m, i)
1289
1290 def action(self, m):
1291 m.next = "round"
1292
1293
1294 class FPNorm1Multi(FPState):
1295
1296 def __init__(self, width, id_wid):
1297 FPState.__init__(self, "normalise_1")
1298 self.mod = FPNorm1ModMulti(width)
1299 self.stb = Signal(reset_less=True)
1300 self.ack = Signal(reset=0, reset_less=True)
1301 self.out_norm = Signal(reset_less=True)
1302 self.in_accept = Signal(reset_less=True)
1303 self.temp_z = FPNumBase(width)
1304 self.temp_of = Overflow()
1305 self.out_z = FPNumBase(width)
1306 self.out_roundz = Signal(reset_less=True)
1307
1308 def setup(self, m, in_z, in_of, norm_stb):
1309 """ links module to inputs and outputs
1310 """
1311 self.mod.setup(m, in_z, in_of, norm_stb,
1312 self.in_accept, self.temp_z, self.temp_of,
1313 self.out_z, self.out_norm)
1314
1315 m.d.comb += self.stb.eq(norm_stb)
1316 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1317
1318 def action(self, m):
1319 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1320 m.d.sync += self.temp_of.eq(self.mod.out_of)
1321 m.d.sync += self.temp_z.eq(self.out_z)
1322 with m.If(self.out_norm):
1323 with m.If(self.in_accept):
1324 m.d.sync += [
1325 self.ack.eq(1),
1326 ]
1327 with m.Else():
1328 m.d.sync += self.ack.eq(0)
1329 with m.Else():
1330 # normalisation not required (or done).
1331 m.next = "round"
1332 m.d.sync += self.ack.eq(1)
1333 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1334
1335
1336 class FPNormToPack(FPState, UnbufferedPipeline):
1337
1338 def __init__(self, width, id_wid):
1339 FPState.__init__(self, "normalise_1")
1340 self.id_wid = id_wid
1341 self.width = width
1342 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1343
1344 def ispec(self):
1345 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1346
1347 def ospec(self):
1348 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1349
1350 def setup(self, m, i):
1351 """ links module to inputs and outputs
1352 """
1353
1354 # Normalisation, Rounding Corrections, Pack - in a chain
1355 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1356 rmod = FPRoundMod(self.width, self.id_wid)
1357 cmod = FPCorrectionsMod(self.width, self.id_wid)
1358 pmod = FPPackMod(self.width, self.id_wid)
1359 chain = StageChain([nmod, rmod, cmod, pmod])
1360 chain.setup(m, i)
1361 self.out_z = pmod.ospec()
1362
1363 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1364 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1365
1366 def process(self, i):
1367 return self.out_z
1368
1369 def action(self, m):
1370 m.next = "pack_put_z"
1371
1372
1373 class FPRoundData:
1374
1375 def __init__(self, width, id_wid):
1376 self.z = FPNumBase(width, False)
1377 self.out_do_z = Signal(reset_less=True)
1378 self.oz = Signal(width, reset_less=True)
1379 self.mid = Signal(id_wid, reset_less=True)
1380
1381 def eq(self, i):
1382 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1383 self.mid.eq(i.mid)]
1384
1385
1386 class FPRoundMod:
1387
1388 def __init__(self, width, id_wid):
1389 self.width = width
1390 self.id_wid = id_wid
1391 self.i = self.ispec()
1392 self.out_z = self.ospec()
1393
1394 def ispec(self):
1395 return FPNorm1Data(self.width, self.id_wid)
1396
1397 def ospec(self):
1398 return FPRoundData(self.width, self.id_wid)
1399
1400 def process(self, i):
1401 return self.out_z
1402
1403 def setup(self, m, i):
1404 m.submodules.roundz = self
1405 m.d.comb += self.i.eq(i)
1406
1407 def elaborate(self, platform):
1408 m = Module()
1409 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1410 with m.If(~self.i.out_do_z):
1411 with m.If(self.i.roundz):
1412 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1413 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1414 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1415
1416 return m
1417
1418
1419 class FPRound(FPState):
1420
1421 def __init__(self, width, id_wid):
1422 FPState.__init__(self, "round")
1423 self.mod = FPRoundMod(width)
1424 self.out_z = self.ospec()
1425
1426 def ispec(self):
1427 return self.mod.ispec()
1428
1429 def ospec(self):
1430 return self.mod.ospec()
1431
1432 def setup(self, m, i):
1433 """ links module to inputs and outputs
1434 """
1435 self.mod.setup(m, i)
1436
1437 self.idsync(m)
1438 m.d.sync += self.out_z.eq(self.mod.out_z)
1439 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1440
1441 def action(self, m):
1442 m.next = "corrections"
1443
1444
1445 class FPCorrectionsMod:
1446
1447 def __init__(self, width, id_wid):
1448 self.width = width
1449 self.id_wid = id_wid
1450 self.i = self.ispec()
1451 self.out_z = self.ospec()
1452
1453 def ispec(self):
1454 return FPRoundData(self.width, self.id_wid)
1455
1456 def ospec(self):
1457 return FPRoundData(self.width, self.id_wid)
1458
1459 def process(self, i):
1460 return self.out_z
1461
1462 def setup(self, m, i):
1463 """ links module to inputs and outputs
1464 """
1465 m.submodules.corrections = self
1466 m.d.comb += self.i.eq(i)
1467
1468 def elaborate(self, platform):
1469 m = Module()
1470 m.submodules.corr_in_z = self.i.z
1471 m.submodules.corr_out_z = self.out_z.z
1472 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1473 with m.If(~self.i.out_do_z):
1474 with m.If(self.i.z.is_denormalised):
1475 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1476 return m
1477
1478
1479 class FPCorrections(FPState):
1480
1481 def __init__(self, width, id_wid):
1482 FPState.__init__(self, "corrections")
1483 self.mod = FPCorrectionsMod(width)
1484 self.out_z = self.ospec()
1485
1486 def ispec(self):
1487 return self.mod.ispec()
1488
1489 def ospec(self):
1490 return self.mod.ospec()
1491
1492 def setup(self, m, in_z):
1493 """ links module to inputs and outputs
1494 """
1495 self.mod.setup(m, in_z)
1496
1497 m.d.sync += self.out_z.eq(self.mod.out_z)
1498 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1499
1500 def action(self, m):
1501 m.next = "pack"
1502
1503
1504 class FPPackData:
1505
1506 def __init__(self, width, id_wid):
1507 self.z = Signal(width, reset_less=True)
1508 self.mid = Signal(id_wid, reset_less=True)
1509
1510 def eq(self, i):
1511 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1512
1513
1514 class FPPackMod:
1515
1516 def __init__(self, width, id_wid):
1517 self.width = width
1518 self.id_wid = id_wid
1519 self.i = self.ispec()
1520 self.o = self.ospec()
1521
1522 def ispec(self):
1523 return FPRoundData(self.width, self.id_wid)
1524
1525 def ospec(self):
1526 return FPPackData(self.width, self.id_wid)
1527
1528 def process(self, i):
1529 return self.o
1530
1531 def setup(self, m, in_z):
1532 """ links module to inputs and outputs
1533 """
1534 m.submodules.pack = self
1535 m.d.comb += self.i.eq(in_z)
1536
1537 def elaborate(self, platform):
1538 m = Module()
1539 z = FPNumOut(self.width, False)
1540 m.submodules.pack_in_z = self.i.z
1541 m.submodules.pack_out_z = z
1542 m.d.comb += self.o.mid.eq(self.i.mid)
1543 with m.If(~self.i.out_do_z):
1544 with m.If(self.i.z.is_overflowed):
1545 m.d.comb += z.inf(self.i.z.s)
1546 with m.Else():
1547 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1548 with m.Else():
1549 m.d.comb += z.v.eq(self.i.oz)
1550 m.d.comb += self.o.z.eq(z.v)
1551 return m
1552
1553
1554 class FPPack(FPState):
1555
1556 def __init__(self, width, id_wid):
1557 FPState.__init__(self, "pack")
1558 self.mod = FPPackMod(width)
1559 self.out_z = self.ospec()
1560
1561 def ispec(self):
1562 return self.mod.ispec()
1563
1564 def ospec(self):
1565 return self.mod.ospec()
1566
1567 def setup(self, m, in_z):
1568 """ links module to inputs and outputs
1569 """
1570 self.mod.setup(m, in_z)
1571
1572 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1573 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1574
1575 def action(self, m):
1576 m.next = "pack_put_z"
1577
1578
1579 class FPPutZ(FPState):
1580
1581 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1582 FPState.__init__(self, state)
1583 if to_state is None:
1584 to_state = "get_ops"
1585 self.to_state = to_state
1586 self.in_z = in_z
1587 self.out_z = out_z
1588 self.in_mid = in_mid
1589 self.out_mid = out_mid
1590
1591 def action(self, m):
1592 if self.in_mid is not None:
1593 m.d.sync += self.out_mid.eq(self.in_mid)
1594 m.d.sync += [
1595 self.out_z.z.v.eq(self.in_z)
1596 ]
1597 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1598 m.d.sync += self.out_z.z.stb.eq(0)
1599 m.next = self.to_state
1600 with m.Else():
1601 m.d.sync += self.out_z.z.stb.eq(1)
1602
1603
1604 class FPPutZIdx(FPState):
1605
1606 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1607 FPState.__init__(self, state)
1608 if to_state is None:
1609 to_state = "get_ops"
1610 self.to_state = to_state
1611 self.in_z = in_z
1612 self.out_zs = out_zs
1613 self.in_mid = in_mid
1614
1615 def action(self, m):
1616 outz_stb = Signal(reset_less=True)
1617 outz_ack = Signal(reset_less=True)
1618 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1619 outz_ack.eq(self.out_zs[self.in_mid].ack),
1620 ]
1621 m.d.sync += [
1622 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1623 ]
1624 with m.If(outz_stb & outz_ack):
1625 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1626 m.next = self.to_state
1627 with m.Else():
1628 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1629
1630 class FPOpData:
1631 def __init__(self, width, id_wid):
1632 self.z = FPOp(width)
1633 self.mid = Signal(id_wid, reset_less=True)
1634
1635 def eq(self, i):
1636 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1637
1638 def ports(self):
1639 return [self.z, self.mid]
1640
1641
1642 class FPADDBaseMod:
1643
1644 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1645 """ IEEE754 FP Add
1646
1647 * width: bit-width of IEEE754. supported: 16, 32, 64
1648 * id_wid: an identifier that is sync-connected to the input
1649 * single_cycle: True indicates each stage to complete in 1 clock
1650 * compact: True indicates a reduced number of stages
1651 """
1652 self.width = width
1653 self.id_wid = id_wid
1654 self.single_cycle = single_cycle
1655 self.compact = compact
1656
1657 self.in_t = Trigger()
1658 self.i = self.ispec()
1659 self.o = self.ospec()
1660
1661 self.states = []
1662
1663 def ispec(self):
1664 return FPADDBaseData(self.width, self.id_wid)
1665
1666 def ospec(self):
1667 return FPOpData(self.width, self.id_wid)
1668
1669 def add_state(self, state):
1670 self.states.append(state)
1671 return state
1672
1673 def get_fragment(self, platform=None):
1674 """ creates the HDL code-fragment for FPAdd
1675 """
1676 m = Module()
1677 m.submodules.out_z = self.o.z
1678 m.submodules.in_t = self.in_t
1679 if self.compact:
1680 self.get_compact_fragment(m, platform)
1681 else:
1682 self.get_longer_fragment(m, platform)
1683
1684 with m.FSM() as fsm:
1685
1686 for state in self.states:
1687 with m.State(state.state_from):
1688 state.action(m)
1689
1690 return m
1691
1692 def get_longer_fragment(self, m, platform=None):
1693
1694 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1695 self.width))
1696 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1697 a = get.out_op1
1698 b = get.out_op2
1699
1700 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1701 sc.setup(m, a, b, self.in_mid)
1702
1703 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1704 dn.setup(m, a, b, sc.in_mid)
1705
1706 if self.single_cycle:
1707 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1708 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1709 else:
1710 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1711 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1712
1713 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1714 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1715
1716 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1717 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1718
1719 if self.single_cycle:
1720 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1721 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1722 else:
1723 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1724 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1725
1726 rn = self.add_state(FPRound(self.width, self.id_wid))
1727 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1728
1729 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1730 cor.setup(m, rn.out_z, rn.in_mid)
1731
1732 pa = self.add_state(FPPack(self.width, self.id_wid))
1733 pa.setup(m, cor.out_z, rn.in_mid)
1734
1735 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1736 pa.in_mid, self.out_mid))
1737
1738 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1739 pa.in_mid, self.out_mid))
1740
1741 def get_compact_fragment(self, m, platform=None):
1742
1743 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1744 self.width, self.id_wid))
1745 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1746
1747 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1748 sc.setup(m, get.o)
1749
1750 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1751 alm.setup(m, sc.o)
1752
1753 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1754 n1.setup(m, alm.a1o)
1755
1756 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1757 n1.out_z.mid, self.o.mid))
1758
1759 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1760 # sc.o.mid, self.o.mid))
1761
1762
1763 class FPADDBase(FPState):
1764
1765 def __init__(self, width, id_wid=None, single_cycle=False):
1766 """ IEEE754 FP Add
1767
1768 * width: bit-width of IEEE754. supported: 16, 32, 64
1769 * id_wid: an identifier that is sync-connected to the input
1770 * single_cycle: True indicates each stage to complete in 1 clock
1771 """
1772 FPState.__init__(self, "fpadd")
1773 self.width = width
1774 self.single_cycle = single_cycle
1775 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1776 self.o = self.ospec()
1777
1778 self.in_t = Trigger()
1779 self.i = self.ispec()
1780
1781 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1782 self.in_accept = Signal(reset_less=True)
1783 self.add_stb = Signal(reset_less=True)
1784 self.add_ack = Signal(reset=0, reset_less=True)
1785
1786 def ispec(self):
1787 return self.mod.ispec()
1788
1789 def ospec(self):
1790 return self.mod.ospec()
1791
1792 def setup(self, m, i, add_stb, in_mid):
1793 m.d.comb += [self.i.eq(i),
1794 self.mod.i.eq(self.i),
1795 self.z_done.eq(self.mod.o.z.trigger),
1796 #self.add_stb.eq(add_stb),
1797 self.mod.in_t.stb.eq(self.in_t.stb),
1798 self.in_t.ack.eq(self.mod.in_t.ack),
1799 self.o.mid.eq(self.mod.o.mid),
1800 self.o.z.v.eq(self.mod.o.z.v),
1801 self.o.z.stb.eq(self.mod.o.z.stb),
1802 self.mod.o.z.ack.eq(self.o.z.ack),
1803 ]
1804
1805 m.d.sync += self.add_stb.eq(add_stb)
1806 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1807 m.d.sync += self.o.z.ack.eq(0) # likewise
1808 #m.d.sync += self.in_t.stb.eq(0)
1809
1810 m.submodules.fpadd = self.mod
1811
1812 def action(self, m):
1813
1814 # in_accept is set on incoming strobe HIGH and ack LOW.
1815 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1816
1817 #with m.If(self.in_t.ack):
1818 # m.d.sync += self.in_t.stb.eq(0)
1819 with m.If(~self.z_done):
1820 # not done: test for accepting an incoming operand pair
1821 with m.If(self.in_accept):
1822 m.d.sync += [
1823 self.add_ack.eq(1), # acknowledge receipt...
1824 self.in_t.stb.eq(1), # initiate add
1825 ]
1826 with m.Else():
1827 m.d.sync += [self.add_ack.eq(0),
1828 self.in_t.stb.eq(0),
1829 self.o.z.ack.eq(1),
1830 ]
1831 with m.Else():
1832 # done: acknowledge, and write out id and value
1833 m.d.sync += [self.add_ack.eq(1),
1834 self.in_t.stb.eq(0)
1835 ]
1836 m.next = "put_z"
1837
1838 return
1839
1840 if self.in_mid is not None:
1841 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1842
1843 m.d.sync += [
1844 self.out_z.v.eq(self.mod.out_z.v)
1845 ]
1846 # move to output state on detecting z ack
1847 with m.If(self.out_z.trigger):
1848 m.d.sync += self.out_z.stb.eq(0)
1849 m.next = "put_z"
1850 with m.Else():
1851 m.d.sync += self.out_z.stb.eq(1)
1852
1853
1854 class FPADDStageOut:
1855 def __init__(self, width, id_wid):
1856 self.z = Signal(width)
1857 self.mid = Signal(id_wid, reset_less=True)
1858
1859 def eq(self, i):
1860 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1861
1862 def ports(self):
1863 return [self.z, self.mid]
1864
1865
1866 # matches the format of FPADDStageOut, allows eq function to do assignments
1867 class PlaceHolder: pass
1868
1869
1870 class FPAddBaseStage:
1871 def __init__(self, width, id_wid):
1872 self.width = width
1873 self.id_wid = id_wid
1874
1875 def ispec(self):
1876 return FPADDBaseData(self.width, self.id_wid)
1877
1878 def ospec(self):
1879 return FPADDStageOut(self.width, self.id_wid)
1880
1881 def process(self, i):
1882 o = PlaceHolder()
1883 o.z = i.a + i.b
1884 o.mid = i.mid
1885 return o
1886
1887
1888 class FPADDBasePipe1(UnbufferedPipeline):
1889 def __init__(self, width, id_wid):
1890 stage = FPAddBaseStage(width, id_wid)
1891 UnbufferedPipeline.__init__(self, stage)
1892
1893
1894 class FPADDBasePipe(ControlBase):
1895 def __init__(self, width, id_wid):
1896 ControlBase.__init__(self)
1897 #self.pipe1 = FPADDBasePipe1(width, id_wid)
1898 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1899 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1900 self.pipe3 = FPNormToPack(width, id_wid)
1901
1902 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1903
1904 def elaborate(self, platform):
1905 m = Module()
1906 m.submodules.scnorm = self.pipe1
1907 m.submodules.addalign = self.pipe2
1908 m.submodules.normpack = self.pipe3
1909 m.d.comb += self._eqs
1910 return m
1911
1912
1913 class FPAddInPassThruStage:
1914 def __init__(self, width, id_wid):
1915 self.width, self.id_wid = width, id_wid
1916 def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1917 def ospec(self): return self.ispec()
1918 def process(self, i): return i
1919
1920
1921 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1922 def __init__(self, width, id_width, num_rows):
1923 self.num_rows = num_rows
1924 stage = FPAddInPassThruStage(width, id_width)
1925 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1926
1927 def ports(self):
1928 res = []
1929 for i in range(len(self.p)):
1930 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1931 self.p[i].i_data.ports()
1932 res += [self.n.i_ready, self.n.o_valid] + \
1933 self.n.o_data.ports()
1934 return res
1935
1936
1937 class MuxCombPipeline(CombMultiOutPipeline):
1938 def __init__(self, stage, n_len):
1939 # HACK: stage is also the n-way multiplexer
1940 CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1941
1942 # HACK: n-mux is also the stage... so set the muxid equal to input mid
1943 stage.m_id = self.p.i_data.mid
1944
1945 def ports(self):
1946 return self.p_mux.ports()
1947
1948
1949 class FPAddOutPassThruStage:
1950 def __init__(self, width, id_wid):
1951 self.width, self.id_wid = width, id_wid
1952 def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1953 def ospec(self): return self.ispec()
1954 def process(self, i): return i
1955
1956
1957 class FPADDMuxOutPipe(MuxCombPipeline):
1958 def __init__(self, width, id_wid, num_rows):
1959 self.num_rows = num_rows
1960 stage = FPAddOutPassThruStage(width, id_wid)
1961 MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1962 #self.p.i_data = stage.ispec()
1963 #self.n.o_data = stage.ospec()
1964
1965 def ports(self):
1966 res = [self.p.i_valid, self.p.o_ready] + \
1967 self.p.i_data.ports()
1968 for i in range(len(self.n)):
1969 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1970 self.n[i].o_data.ports()
1971 return res
1972
1973
1974 class FPADDMuxInOut:
1975 """ Reservation-Station version of FPADD pipeline.
1976
1977 fan-in on
1978 """
1979 def __init__(self, width, id_wid, num_rows):
1980 self.num_rows = num_rows
1981 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1982 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1983 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1984
1985 self.p = self.inpipe.p # kinda annoying,
1986 self.n = self.outpipe.n # use pipe in/out as this class in/out
1987 self._ports = self.inpipe.ports() + self.outpipe.ports()
1988
1989 def elaborate(self, platform):
1990 m = Module()
1991 m.submodules.inpipe = self.inpipe
1992 m.submodules.fpadd = self.fpadd
1993 m.submodules.outpipe = self.outpipe
1994
1995 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1996 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1997
1998 return m
1999
2000 def ports(self):
2001 return self._ports
2002
2003
2004 class ResArray:
2005 def __init__(self, width, id_wid):
2006 self.width = width
2007 self.id_wid = id_wid
2008 res = []
2009 for i in range(rs_sz):
2010 out_z = FPOp(width)
2011 out_z.name = "out_z_%d" % i
2012 res.append(out_z)
2013 self.res = Array(res)
2014 self.in_z = FPOp(width)
2015 self.in_mid = Signal(self.id_wid, reset_less=True)
2016
2017 def setup(self, m, in_z, in_mid):
2018 m.d.comb += [self.in_z.eq(in_z),
2019 self.in_mid.eq(in_mid)]
2020
2021 def get_fragment(self, platform=None):
2022 """ creates the HDL code-fragment for FPAdd
2023 """
2024 m = Module()
2025 m.submodules.res_in_z = self.in_z
2026 m.submodules += self.res
2027
2028 return m
2029
2030 def ports(self):
2031 res = []
2032 for z in self.res:
2033 res += z.ports()
2034 return res
2035
2036
2037 class FPADD(FPID):
2038 """ FPADD: stages as follows:
2039
2040 FPGetOp (a)
2041 |
2042 FPGetOp (b)
2043 |
2044 FPAddBase---> FPAddBaseMod
2045 | |
2046 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2047
2048 FPAddBase is tricky: it is both a stage and *has* stages.
2049 Connection to FPAddBaseMod therefore requires an in stb/ack
2050 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2051 needs to be the thing that raises the incoming stb.
2052 """
2053
2054 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2055 """ IEEE754 FP Add
2056
2057 * width: bit-width of IEEE754. supported: 16, 32, 64
2058 * id_wid: an identifier that is sync-connected to the input
2059 * single_cycle: True indicates each stage to complete in 1 clock
2060 """
2061 self.width = width
2062 self.id_wid = id_wid
2063 self.single_cycle = single_cycle
2064
2065 #self.out_z = FPOp(width)
2066 self.ids = FPID(id_wid)
2067
2068 rs = []
2069 for i in range(rs_sz):
2070 in_a = FPOp(width)
2071 in_b = FPOp(width)
2072 in_a.name = "in_a_%d" % i
2073 in_b.name = "in_b_%d" % i
2074 rs.append((in_a, in_b))
2075 self.rs = Array(rs)
2076
2077 res = []
2078 for i in range(rs_sz):
2079 out_z = FPOp(width)
2080 out_z.name = "out_z_%d" % i
2081 res.append(out_z)
2082 self.res = Array(res)
2083
2084 self.states = []
2085
2086 def add_state(self, state):
2087 self.states.append(state)
2088 return state
2089
2090 def get_fragment(self, platform=None):
2091 """ creates the HDL code-fragment for FPAdd
2092 """
2093 m = Module()
2094 m.submodules += self.rs
2095
2096 in_a = self.rs[0][0]
2097 in_b = self.rs[0][1]
2098
2099 geta = self.add_state(FPGetOp("get_a", "get_b",
2100 in_a, self.width))
2101 geta.setup(m, in_a)
2102 a = geta.out_op
2103
2104 getb = self.add_state(FPGetOp("get_b", "fpadd",
2105 in_b, self.width))
2106 getb.setup(m, in_b)
2107 b = getb.out_op
2108
2109 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2110 ab = self.add_state(ab)
2111 abd = ab.ispec() # create an input spec object for FPADDBase
2112 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2113 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2114 o = ab.o
2115
2116 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2117 o.mid, "get_a"))
2118
2119 with m.FSM() as fsm:
2120
2121 for state in self.states:
2122 with m.State(state.state_from):
2123 state.action(m)
2124
2125 return m
2126
2127
2128 if __name__ == "__main__":
2129 if True:
2130 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2131 main(alu, ports=alu.rs[0][0].ports() + \
2132 alu.rs[0][1].ports() + \
2133 alu.res[0].ports() + \
2134 [alu.ids.in_mid, alu.ids.out_mid])
2135 else:
2136 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2137 main(alu, ports=[alu.in_a, alu.in_b] + \
2138 alu.in_t.ports() + \
2139 alu.out_z.ports() + \
2140 [alu.in_mid, alu.out_mid])
2141
2142
2143 # works... but don't use, just do "python fname.py convert -t v"
2144 #print (verilog.convert(alu, ports=[
2145 # ports=alu.in_a.ports() + \
2146 # alu.in_b.ports() + \
2147 # alu.out_z.ports())