move flexible ports fn to MultiOutControlBase
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline,
13 PassThroughStage)
14 from multipipe import CombMuxOutPipe
15 from multipipe import PriorityCombMuxInPipe
16
17 #from fpbase import FPNumShiftMultiRight
18
19
20 class FPState(FPBase):
21 def __init__(self, state_from):
22 self.state_from = state_from
23
24 def set_inputs(self, inputs):
25 self.inputs = inputs
26 for k,v in inputs.items():
27 setattr(self, k, v)
28
29 def set_outputs(self, outputs):
30 self.outputs = outputs
31 for k,v in outputs.items():
32 setattr(self, k, v)
33
34
35 class FPGetSyncOpsMod:
36 def __init__(self, width, num_ops=2):
37 self.width = width
38 self.num_ops = num_ops
39 inops = []
40 outops = []
41 for i in range(num_ops):
42 inops.append(Signal(width, reset_less=True))
43 outops.append(Signal(width, reset_less=True))
44 self.in_op = inops
45 self.out_op = outops
46 self.stb = Signal(num_ops)
47 self.ack = Signal()
48 self.ready = Signal(reset_less=True)
49 self.out_decode = Signal(reset_less=True)
50
51 def elaborate(self, platform):
52 m = Module()
53 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
54 m.d.comb += self.out_decode.eq(self.ack & self.ready)
55 with m.If(self.out_decode):
56 for i in range(self.num_ops):
57 m.d.comb += [
58 self.out_op[i].eq(self.in_op[i]),
59 ]
60 return m
61
62 def ports(self):
63 return self.in_op + self.out_op + [self.stb, self.ack]
64
65
66 class FPOps(Trigger):
67 def __init__(self, width, num_ops):
68 Trigger.__init__(self)
69 self.width = width
70 self.num_ops = num_ops
71
72 res = []
73 for i in range(num_ops):
74 res.append(Signal(width))
75 self.v = Array(res)
76
77 def ports(self):
78 res = []
79 for i in range(self.num_ops):
80 res.append(self.v[i])
81 res.append(self.ack)
82 res.append(self.stb)
83 return res
84
85
86 class InputGroup:
87 def __init__(self, width, num_ops=2, num_rows=4):
88 self.width = width
89 self.num_ops = num_ops
90 self.num_rows = num_rows
91 self.mmax = int(log(self.num_rows) / log(2))
92 self.rs = []
93 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
94 for i in range(num_rows):
95 self.rs.append(FPGetSyncOpsMod(width, num_ops))
96 self.rs = Array(self.rs)
97
98 self.out_op = FPOps(width, num_ops)
99
100 def elaborate(self, platform):
101 m = Module()
102
103 pe = PriorityEncoder(self.num_rows)
104 m.submodules.selector = pe
105 m.submodules.out_op = self.out_op
106 m.submodules += self.rs
107
108 # connect priority encoder
109 in_ready = []
110 for i in range(self.num_rows):
111 in_ready.append(self.rs[i].ready)
112 m.d.comb += pe.i.eq(Cat(*in_ready))
113
114 active = Signal(reset_less=True)
115 out_en = Signal(reset_less=True)
116 m.d.comb += active.eq(~pe.n) # encoder active
117 m.d.comb += out_en.eq(active & self.out_op.trigger)
118
119 # encoder active: ack relevant input, record MID, pass output
120 with m.If(out_en):
121 rs = self.rs[pe.o]
122 m.d.sync += self.mid.eq(pe.o)
123 m.d.sync += rs.ack.eq(0)
124 m.d.sync += self.out_op.stb.eq(0)
125 for j in range(self.num_ops):
126 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
127 with m.Else():
128 m.d.sync += self.out_op.stb.eq(1)
129 # acks all default to zero
130 for i in range(self.num_rows):
131 m.d.sync += self.rs[i].ack.eq(1)
132
133 return m
134
135 def ports(self):
136 res = []
137 for i in range(self.num_rows):
138 inop = self.rs[i]
139 res += inop.in_op + [inop.stb]
140 return self.out_op.ports() + res + [self.mid]
141
142
143 class FPGetOpMod:
144 def __init__(self, width):
145 self.in_op = FPOp(width)
146 self.out_op = Signal(width)
147 self.out_decode = Signal(reset_less=True)
148
149 def elaborate(self, platform):
150 m = Module()
151 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
152 m.submodules.get_op_in = self.in_op
153 #m.submodules.get_op_out = self.out_op
154 with m.If(self.out_decode):
155 m.d.comb += [
156 self.out_op.eq(self.in_op.v),
157 ]
158 return m
159
160
161 class FPGetOp(FPState):
162 """ gets operand
163 """
164
165 def __init__(self, in_state, out_state, in_op, width):
166 FPState.__init__(self, in_state)
167 self.out_state = out_state
168 self.mod = FPGetOpMod(width)
169 self.in_op = in_op
170 self.out_op = Signal(width)
171 self.out_decode = Signal(reset_less=True)
172
173 def setup(self, m, in_op):
174 """ links module to inputs and outputs
175 """
176 setattr(m.submodules, self.state_from, self.mod)
177 m.d.comb += self.mod.in_op.eq(in_op)
178 m.d.comb += self.out_decode.eq(self.mod.out_decode)
179
180 def action(self, m):
181 with m.If(self.out_decode):
182 m.next = self.out_state
183 m.d.sync += [
184 self.in_op.ack.eq(0),
185 self.out_op.eq(self.mod.out_op)
186 ]
187 with m.Else():
188 m.d.sync += self.in_op.ack.eq(1)
189
190
191 class FPNumBase2Ops:
192
193 def __init__(self, width, id_wid, m_extra=True):
194 self.a = FPNumBase(width, m_extra)
195 self.b = FPNumBase(width, m_extra)
196 self.mid = Signal(id_wid, reset_less=True)
197
198 def eq(self, i):
199 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
200
201 def ports(self):
202 return [self.a, self.b, self.mid]
203
204
205 class FPADDBaseData:
206
207 def __init__(self, width, id_wid):
208 self.width = width
209 self.id_wid = id_wid
210 self.a = Signal(width)
211 self.b = Signal(width)
212 self.mid = Signal(id_wid, reset_less=True)
213
214 def eq(self, i):
215 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
216
217 def ports(self):
218 return [self.a, self.b, self.mid]
219
220
221 class FPGet2OpMod(Trigger):
222 def __init__(self, width, id_wid):
223 Trigger.__init__(self)
224 self.width = width
225 self.id_wid = id_wid
226 self.i = self.ispec()
227 self.o = self.ospec()
228
229 def ispec(self):
230 return FPADDBaseData(self.width, self.id_wid)
231
232 def ospec(self):
233 return FPADDBaseData(self.width, self.id_wid)
234
235 def process(self, i):
236 return self.o
237
238 def elaborate(self, platform):
239 m = Trigger.elaborate(self, platform)
240 with m.If(self.trigger):
241 m.d.comb += [
242 self.o.eq(self.i),
243 ]
244 return m
245
246
247 class FPGet2Op(FPState):
248 """ gets operands
249 """
250
251 def __init__(self, in_state, out_state, width, id_wid):
252 FPState.__init__(self, in_state)
253 self.out_state = out_state
254 self.mod = FPGet2OpMod(width, id_wid)
255 self.o = self.mod.ospec()
256 self.in_stb = Signal(reset_less=True)
257 self.out_ack = Signal(reset_less=True)
258 self.out_decode = Signal(reset_less=True)
259
260 def setup(self, m, i, in_stb, in_ack):
261 """ links module to inputs and outputs
262 """
263 m.submodules.get_ops = self.mod
264 m.d.comb += self.mod.i.eq(i)
265 m.d.comb += self.mod.stb.eq(in_stb)
266 m.d.comb += self.out_ack.eq(self.mod.ack)
267 m.d.comb += self.out_decode.eq(self.mod.trigger)
268 m.d.comb += in_ack.eq(self.mod.ack)
269
270 def action(self, m):
271 with m.If(self.out_decode):
272 m.next = self.out_state
273 m.d.sync += [
274 self.mod.ack.eq(0),
275 self.o.eq(self.mod.o),
276 ]
277 with m.Else():
278 m.d.sync += self.mod.ack.eq(1)
279
280
281 class FPSCData:
282
283 def __init__(self, width, id_wid):
284 self.a = FPNumBase(width, True)
285 self.b = FPNumBase(width, True)
286 self.z = FPNumOut(width, False)
287 self.oz = Signal(width, reset_less=True)
288 self.out_do_z = Signal(reset_less=True)
289 self.mid = Signal(id_wid, reset_less=True)
290
291 def eq(self, i):
292 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
293 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
294
295
296 class FPAddSpecialCasesMod:
297 """ special cases: NaNs, infs, zeros, denormalised
298 NOTE: some of these are unique to add. see "Special Operations"
299 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
300 """
301
302 def __init__(self, width, id_wid):
303 self.width = width
304 self.id_wid = id_wid
305 self.i = self.ispec()
306 self.o = self.ospec()
307
308 def ispec(self):
309 return FPADDBaseData(self.width, self.id_wid)
310
311 def ospec(self):
312 return FPSCData(self.width, self.id_wid)
313
314 def setup(self, m, i):
315 """ links module to inputs and outputs
316 """
317 m.submodules.specialcases = self
318 m.d.comb += self.i.eq(i)
319
320 def process(self, i):
321 return self.o
322
323 def elaborate(self, platform):
324 m = Module()
325
326 m.submodules.sc_out_z = self.o.z
327
328 # decode: XXX really should move to separate stage
329 a1 = FPNumIn(None, self.width)
330 b1 = FPNumIn(None, self.width)
331 m.submodules.sc_decode_a = a1
332 m.submodules.sc_decode_b = b1
333 m.d.comb += [a1.decode(self.i.a),
334 b1.decode(self.i.b),
335 ]
336
337 s_nomatch = Signal()
338 m.d.comb += s_nomatch.eq(a1.s != b1.s)
339
340 m_match = Signal()
341 m.d.comb += m_match.eq(a1.m == b1.m)
342
343 # if a is NaN or b is NaN return NaN
344 with m.If(a1.is_nan | b1.is_nan):
345 m.d.comb += self.o.out_do_z.eq(1)
346 m.d.comb += self.o.z.nan(0)
347
348 # XXX WEIRDNESS for FP16 non-canonical NaN handling
349 # under review
350
351 ## if a is zero and b is NaN return -b
352 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
353 # m.d.comb += self.o.out_do_z.eq(1)
354 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
355
356 ## if b is zero and a is NaN return -a
357 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
358 # m.d.comb += self.o.out_do_z.eq(1)
359 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
360
361 ## if a is -zero and b is NaN return -b
362 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
363 # m.d.comb += self.o.out_do_z.eq(1)
364 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
365
366 ## if b is -zero and a is NaN return -a
367 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
368 # m.d.comb += self.o.out_do_z.eq(1)
369 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
370
371 # if a is inf return inf (or NaN)
372 with m.Elif(a1.is_inf):
373 m.d.comb += self.o.out_do_z.eq(1)
374 m.d.comb += self.o.z.inf(a1.s)
375 # if a is inf and signs don't match return NaN
376 with m.If(b1.exp_128 & s_nomatch):
377 m.d.comb += self.o.z.nan(0)
378
379 # if b is inf return inf
380 with m.Elif(b1.is_inf):
381 m.d.comb += self.o.out_do_z.eq(1)
382 m.d.comb += self.o.z.inf(b1.s)
383
384 # if a is zero and b zero return signed-a/b
385 with m.Elif(a1.is_zero & b1.is_zero):
386 m.d.comb += self.o.out_do_z.eq(1)
387 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
388
389 # if a is zero return b
390 with m.Elif(a1.is_zero):
391 m.d.comb += self.o.out_do_z.eq(1)
392 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
393
394 # if b is zero return a
395 with m.Elif(b1.is_zero):
396 m.d.comb += self.o.out_do_z.eq(1)
397 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
398
399 # if a equal to -b return zero (+ve zero)
400 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
401 m.d.comb += self.o.out_do_z.eq(1)
402 m.d.comb += self.o.z.zero(0)
403
404 # Denormalised Number checks next, so pass a/b data through
405 with m.Else():
406 m.d.comb += self.o.out_do_z.eq(0)
407 m.d.comb += self.o.a.eq(a1)
408 m.d.comb += self.o.b.eq(b1)
409
410 m.d.comb += self.o.oz.eq(self.o.z.v)
411 m.d.comb += self.o.mid.eq(self.i.mid)
412
413 return m
414
415
416 class FPID:
417 def __init__(self, id_wid):
418 self.id_wid = id_wid
419 if self.id_wid:
420 self.in_mid = Signal(id_wid, reset_less=True)
421 self.out_mid = Signal(id_wid, reset_less=True)
422 else:
423 self.in_mid = None
424 self.out_mid = None
425
426 def idsync(self, m):
427 if self.id_wid is not None:
428 m.d.sync += self.out_mid.eq(self.in_mid)
429
430
431 class FPAddSpecialCases(FPState):
432 """ special cases: NaNs, infs, zeros, denormalised
433 NOTE: some of these are unique to add. see "Special Operations"
434 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
435 """
436
437 def __init__(self, width, id_wid):
438 FPState.__init__(self, "special_cases")
439 self.mod = FPAddSpecialCasesMod(width)
440 self.out_z = self.mod.ospec()
441 self.out_do_z = Signal(reset_less=True)
442
443 def setup(self, m, i):
444 """ links module to inputs and outputs
445 """
446 self.mod.setup(m, i, self.out_do_z)
447 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
448 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
449
450 def action(self, m):
451 self.idsync(m)
452 with m.If(self.out_do_z):
453 m.next = "put_z"
454 with m.Else():
455 m.next = "denormalise"
456
457
458 class FPAddSpecialCasesDeNorm(FPState, UnbufferedPipeline):
459 """ special cases: NaNs, infs, zeros, denormalised
460 NOTE: some of these are unique to add. see "Special Operations"
461 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
462 """
463
464 def __init__(self, width, id_wid):
465 FPState.__init__(self, "special_cases")
466 self.smod = FPAddSpecialCasesMod(width, id_wid)
467 self.dmod = FPAddDeNormMod(width, id_wid)
468 UnbufferedPipeline.__init__(self, self) # pipe is its own stage
469 self.o = self.ospec()
470
471 def ispec(self):
472 return self.smod.ispec()
473
474 def ospec(self):
475 return self.dmod.ospec()
476
477 def setup(self, m, i):
478 """ links module to inputs and outputs
479 """
480 # these only needed for break-out (early-out)
481 # out_z = self.smod.ospec()
482 # out_do_z = Signal(reset_less=True)
483 self.smod.setup(m, i)
484 self.dmod.setup(m, self.smod.o)
485 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
486
487 # out_do_z=True, only needed for early-out (split pipeline)
488 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
489 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
490
491 # out_do_z=False
492 m.d.comb += self.o.eq(self.dmod.o)
493
494 def process(self, i):
495 return self.o
496
497 def action(self, m):
498 #with m.If(self.out_do_z):
499 # m.next = "put_z"
500 #with m.Else():
501 m.next = "align"
502
503
504 class FPAddDeNormMod(FPState):
505
506 def __init__(self, width, id_wid):
507 self.width = width
508 self.id_wid = id_wid
509 self.i = self.ispec()
510 self.o = self.ospec()
511
512 def ispec(self):
513 return FPSCData(self.width, self.id_wid)
514
515 def ospec(self):
516 return FPSCData(self.width, self.id_wid)
517
518 def setup(self, m, i):
519 """ links module to inputs and outputs
520 """
521 m.submodules.denormalise = self
522 m.d.comb += self.i.eq(i)
523
524 def elaborate(self, platform):
525 m = Module()
526 m.submodules.denorm_in_a = self.i.a
527 m.submodules.denorm_in_b = self.i.b
528 m.submodules.denorm_out_a = self.o.a
529 m.submodules.denorm_out_b = self.o.b
530
531 with m.If(~self.i.out_do_z):
532 # XXX hmmm, don't like repeating identical code
533 m.d.comb += self.o.a.eq(self.i.a)
534 with m.If(self.i.a.exp_n127):
535 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
536 with m.Else():
537 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
538
539 m.d.comb += self.o.b.eq(self.i.b)
540 with m.If(self.i.b.exp_n127):
541 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
542 with m.Else():
543 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
544
545 m.d.comb += self.o.mid.eq(self.i.mid)
546 m.d.comb += self.o.z.eq(self.i.z)
547 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
548 m.d.comb += self.o.oz.eq(self.i.oz)
549
550 return m
551
552
553 class FPAddDeNorm(FPState):
554
555 def __init__(self, width, id_wid):
556 FPState.__init__(self, "denormalise")
557 self.mod = FPAddDeNormMod(width)
558 self.out_a = FPNumBase(width)
559 self.out_b = FPNumBase(width)
560
561 def setup(self, m, i):
562 """ links module to inputs and outputs
563 """
564 self.mod.setup(m, i)
565
566 m.d.sync += self.out_a.eq(self.mod.out_a)
567 m.d.sync += self.out_b.eq(self.mod.out_b)
568
569 def action(self, m):
570 # Denormalised Number checks
571 m.next = "align"
572
573
574 class FPAddAlignMultiMod(FPState):
575
576 def __init__(self, width):
577 self.in_a = FPNumBase(width)
578 self.in_b = FPNumBase(width)
579 self.out_a = FPNumIn(None, width)
580 self.out_b = FPNumIn(None, width)
581 self.exp_eq = Signal(reset_less=True)
582
583 def elaborate(self, platform):
584 # This one however (single-cycle) will do the shift
585 # in one go.
586
587 m = Module()
588
589 m.submodules.align_in_a = self.in_a
590 m.submodules.align_in_b = self.in_b
591 m.submodules.align_out_a = self.out_a
592 m.submodules.align_out_b = self.out_b
593
594 # NOTE: this does *not* do single-cycle multi-shifting,
595 # it *STAYS* in the align state until exponents match
596
597 # exponent of a greater than b: shift b down
598 m.d.comb += self.exp_eq.eq(0)
599 m.d.comb += self.out_a.eq(self.in_a)
600 m.d.comb += self.out_b.eq(self.in_b)
601 agtb = Signal(reset_less=True)
602 altb = Signal(reset_less=True)
603 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
604 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
605 with m.If(agtb):
606 m.d.comb += self.out_b.shift_down(self.in_b)
607 # exponent of b greater than a: shift a down
608 with m.Elif(altb):
609 m.d.comb += self.out_a.shift_down(self.in_a)
610 # exponents equal: move to next stage.
611 with m.Else():
612 m.d.comb += self.exp_eq.eq(1)
613 return m
614
615
616 class FPAddAlignMulti(FPState):
617
618 def __init__(self, width, id_wid):
619 FPState.__init__(self, "align")
620 self.mod = FPAddAlignMultiMod(width)
621 self.out_a = FPNumIn(None, width)
622 self.out_b = FPNumIn(None, width)
623 self.exp_eq = Signal(reset_less=True)
624
625 def setup(self, m, in_a, in_b):
626 """ links module to inputs and outputs
627 """
628 m.submodules.align = self.mod
629 m.d.comb += self.mod.in_a.eq(in_a)
630 m.d.comb += self.mod.in_b.eq(in_b)
631 #m.d.comb += self.out_a.eq(self.mod.out_a)
632 #m.d.comb += self.out_b.eq(self.mod.out_b)
633 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
634 m.d.sync += self.out_a.eq(self.mod.out_a)
635 m.d.sync += self.out_b.eq(self.mod.out_b)
636
637 def action(self, m):
638 with m.If(self.exp_eq):
639 m.next = "add_0"
640
641
642 class FPNumIn2Ops:
643
644 def __init__(self, width, id_wid):
645 self.a = FPNumIn(None, width)
646 self.b = FPNumIn(None, width)
647 self.z = FPNumOut(width, False)
648 self.out_do_z = Signal(reset_less=True)
649 self.oz = Signal(width, reset_less=True)
650 self.mid = Signal(id_wid, reset_less=True)
651
652 def eq(self, i):
653 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
654 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
655
656
657 class FPAddAlignSingleMod:
658
659 def __init__(self, width, id_wid):
660 self.width = width
661 self.id_wid = id_wid
662 self.i = self.ispec()
663 self.o = self.ospec()
664
665 def ispec(self):
666 return FPSCData(self.width, self.id_wid)
667
668 def ospec(self):
669 return FPNumIn2Ops(self.width, self.id_wid)
670
671 def process(self, i):
672 return self.o
673
674 def setup(self, m, i):
675 """ links module to inputs and outputs
676 """
677 m.submodules.align = self
678 m.d.comb += self.i.eq(i)
679
680 def elaborate(self, platform):
681 """ Aligns A against B or B against A, depending on which has the
682 greater exponent. This is done in a *single* cycle using
683 variable-width bit-shift
684
685 the shifter used here is quite expensive in terms of gates.
686 Mux A or B in (and out) into temporaries, as only one of them
687 needs to be aligned against the other
688 """
689 m = Module()
690
691 m.submodules.align_in_a = self.i.a
692 m.submodules.align_in_b = self.i.b
693 m.submodules.align_out_a = self.o.a
694 m.submodules.align_out_b = self.o.b
695
696 # temporary (muxed) input and output to be shifted
697 t_inp = FPNumBase(self.width)
698 t_out = FPNumIn(None, self.width)
699 espec = (len(self.i.a.e), True)
700 msr = MultiShiftRMerge(self.i.a.m_width, espec)
701 m.submodules.align_t_in = t_inp
702 m.submodules.align_t_out = t_out
703 m.submodules.multishift_r = msr
704
705 ediff = Signal(espec, reset_less=True)
706 ediffr = Signal(espec, reset_less=True)
707 tdiff = Signal(espec, reset_less=True)
708 elz = Signal(reset_less=True)
709 egz = Signal(reset_less=True)
710
711 # connect multi-shifter to t_inp/out mantissa (and tdiff)
712 m.d.comb += msr.inp.eq(t_inp.m)
713 m.d.comb += msr.diff.eq(tdiff)
714 m.d.comb += t_out.m.eq(msr.m)
715 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
716 m.d.comb += t_out.s.eq(t_inp.s)
717
718 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
719 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
720 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
721 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
722
723 # default: A-exp == B-exp, A and B untouched (fall through)
724 m.d.comb += self.o.a.eq(self.i.a)
725 m.d.comb += self.o.b.eq(self.i.b)
726 # only one shifter (muxed)
727 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
728 # exponent of a greater than b: shift b down
729 with m.If(~self.i.out_do_z):
730 with m.If(egz):
731 m.d.comb += [t_inp.eq(self.i.b),
732 tdiff.eq(ediff),
733 self.o.b.eq(t_out),
734 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
735 ]
736 # exponent of b greater than a: shift a down
737 with m.Elif(elz):
738 m.d.comb += [t_inp.eq(self.i.a),
739 tdiff.eq(ediffr),
740 self.o.a.eq(t_out),
741 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
742 ]
743
744 m.d.comb += self.o.mid.eq(self.i.mid)
745 m.d.comb += self.o.z.eq(self.i.z)
746 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
747 m.d.comb += self.o.oz.eq(self.i.oz)
748
749 return m
750
751
752 class FPAddAlignSingle(FPState):
753
754 def __init__(self, width, id_wid):
755 FPState.__init__(self, "align")
756 self.mod = FPAddAlignSingleMod(width, id_wid)
757 self.out_a = FPNumIn(None, width)
758 self.out_b = FPNumIn(None, width)
759
760 def setup(self, m, i):
761 """ links module to inputs and outputs
762 """
763 self.mod.setup(m, i)
764
765 # NOTE: could be done as comb
766 m.d.sync += self.out_a.eq(self.mod.out_a)
767 m.d.sync += self.out_b.eq(self.mod.out_b)
768
769 def action(self, m):
770 m.next = "add_0"
771
772
773 class FPAddAlignSingleAdd(FPState, UnbufferedPipeline):
774
775 def __init__(self, width, id_wid):
776 FPState.__init__(self, "align")
777 self.width = width
778 self.id_wid = id_wid
779 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
780 self.a1o = self.ospec()
781
782 def ispec(self):
783 return FPSCData(self.width, self.id_wid)
784 #return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
785
786 def ospec(self):
787 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
788
789 def setup(self, m, i):
790 """ links module to inputs and outputs
791 """
792
793 # chain AddAlignSingle, AddStage0 and AddStage1
794 mod = FPAddAlignSingleMod(self.width, self.id_wid)
795 a0mod = FPAddStage0Mod(self.width, self.id_wid)
796 a1mod = FPAddStage1Mod(self.width, self.id_wid)
797
798 chain = StageChain([mod, a0mod, a1mod])
799 chain.setup(m, i)
800
801 m.d.comb += self.a1o.eq(a1mod.o)
802
803 def process(self, i):
804 return self.a1o
805
806 def action(self, m):
807 m.next = "normalise_1"
808
809
810 class FPAddStage0Data:
811
812 def __init__(self, width, id_wid):
813 self.z = FPNumBase(width, False)
814 self.out_do_z = Signal(reset_less=True)
815 self.oz = Signal(width, reset_less=True)
816 self.tot = Signal(self.z.m_width + 4, reset_less=True)
817 self.mid = Signal(id_wid, reset_less=True)
818
819 def eq(self, i):
820 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
821 self.tot.eq(i.tot), self.mid.eq(i.mid)]
822
823
824 class FPAddStage0Mod:
825
826 def __init__(self, width, id_wid):
827 self.width = width
828 self.id_wid = id_wid
829 self.i = self.ispec()
830 self.o = self.ospec()
831
832 def ispec(self):
833 return FPSCData(self.width, self.id_wid)
834
835 def ospec(self):
836 return FPAddStage0Data(self.width, self.id_wid)
837
838 def process(self, i):
839 return self.o
840
841 def setup(self, m, i):
842 """ links module to inputs and outputs
843 """
844 m.submodules.add0 = self
845 m.d.comb += self.i.eq(i)
846
847 def elaborate(self, platform):
848 m = Module()
849 m.submodules.add0_in_a = self.i.a
850 m.submodules.add0_in_b = self.i.b
851 m.submodules.add0_out_z = self.o.z
852
853 # store intermediate tests (and zero-extended mantissas)
854 seq = Signal(reset_less=True)
855 mge = Signal(reset_less=True)
856 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
857 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
858 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
859 mge.eq(self.i.a.m >= self.i.b.m),
860 am0.eq(Cat(self.i.a.m, 0)),
861 bm0.eq(Cat(self.i.b.m, 0))
862 ]
863 # same-sign (both negative or both positive) add mantissas
864 with m.If(~self.i.out_do_z):
865 m.d.comb += self.o.z.e.eq(self.i.a.e)
866 with m.If(seq):
867 m.d.comb += [
868 self.o.tot.eq(am0 + bm0),
869 self.o.z.s.eq(self.i.a.s)
870 ]
871 # a mantissa greater than b, use a
872 with m.Elif(mge):
873 m.d.comb += [
874 self.o.tot.eq(am0 - bm0),
875 self.o.z.s.eq(self.i.a.s)
876 ]
877 # b mantissa greater than a, use b
878 with m.Else():
879 m.d.comb += [
880 self.o.tot.eq(bm0 - am0),
881 self.o.z.s.eq(self.i.b.s)
882 ]
883
884 m.d.comb += self.o.oz.eq(self.i.oz)
885 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
886 m.d.comb += self.o.mid.eq(self.i.mid)
887 return m
888
889
890 class FPAddStage0(FPState):
891 """ First stage of add. covers same-sign (add) and subtract
892 special-casing when mantissas are greater or equal, to
893 give greatest accuracy.
894 """
895
896 def __init__(self, width, id_wid):
897 FPState.__init__(self, "add_0")
898 self.mod = FPAddStage0Mod(width)
899 self.o = self.mod.ospec()
900
901 def setup(self, m, i):
902 """ links module to inputs and outputs
903 """
904 self.mod.setup(m, i)
905
906 # NOTE: these could be done as combinatorial (merge add0+add1)
907 m.d.sync += self.o.eq(self.mod.o)
908
909 def action(self, m):
910 m.next = "add_1"
911
912
913 class FPAddStage1Data:
914
915 def __init__(self, width, id_wid):
916 self.z = FPNumBase(width, False)
917 self.out_do_z = Signal(reset_less=True)
918 self.oz = Signal(width, reset_less=True)
919 self.of = Overflow()
920 self.mid = Signal(id_wid, reset_less=True)
921
922 def eq(self, i):
923 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
924 self.of.eq(i.of), self.mid.eq(i.mid)]
925
926
927
928 class FPAddStage1Mod(FPState):
929 """ Second stage of add: preparation for normalisation.
930 detects when tot sum is too big (tot[27] is kinda a carry bit)
931 """
932
933 def __init__(self, width, id_wid):
934 self.width = width
935 self.id_wid = id_wid
936 self.i = self.ispec()
937 self.o = self.ospec()
938
939 def ispec(self):
940 return FPAddStage0Data(self.width, self.id_wid)
941
942 def ospec(self):
943 return FPAddStage1Data(self.width, self.id_wid)
944
945 def process(self, i):
946 return self.o
947
948 def setup(self, m, i):
949 """ links module to inputs and outputs
950 """
951 m.submodules.add1 = self
952 m.submodules.add1_out_overflow = self.o.of
953
954 m.d.comb += self.i.eq(i)
955
956 def elaborate(self, platform):
957 m = Module()
958 #m.submodules.norm1_in_overflow = self.in_of
959 #m.submodules.norm1_out_overflow = self.out_of
960 #m.submodules.norm1_in_z = self.in_z
961 #m.submodules.norm1_out_z = self.out_z
962 m.d.comb += self.o.z.eq(self.i.z)
963 # tot[-1] (MSB) gets set when the sum overflows. shift result down
964 with m.If(~self.i.out_do_z):
965 with m.If(self.i.tot[-1]):
966 m.d.comb += [
967 self.o.z.m.eq(self.i.tot[4:]),
968 self.o.of.m0.eq(self.i.tot[4]),
969 self.o.of.guard.eq(self.i.tot[3]),
970 self.o.of.round_bit.eq(self.i.tot[2]),
971 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
972 self.o.z.e.eq(self.i.z.e + 1)
973 ]
974 # tot[-1] (MSB) zero case
975 with m.Else():
976 m.d.comb += [
977 self.o.z.m.eq(self.i.tot[3:]),
978 self.o.of.m0.eq(self.i.tot[3]),
979 self.o.of.guard.eq(self.i.tot[2]),
980 self.o.of.round_bit.eq(self.i.tot[1]),
981 self.o.of.sticky.eq(self.i.tot[0])
982 ]
983
984 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
985 m.d.comb += self.o.oz.eq(self.i.oz)
986 m.d.comb += self.o.mid.eq(self.i.mid)
987
988 return m
989
990
991 class FPAddStage1(FPState):
992
993 def __init__(self, width, id_wid):
994 FPState.__init__(self, "add_1")
995 self.mod = FPAddStage1Mod(width)
996 self.out_z = FPNumBase(width, False)
997 self.out_of = Overflow()
998 self.norm_stb = Signal()
999
1000 def setup(self, m, i):
1001 """ links module to inputs and outputs
1002 """
1003 self.mod.setup(m, i)
1004
1005 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1006
1007 m.d.sync += self.out_of.eq(self.mod.out_of)
1008 m.d.sync += self.out_z.eq(self.mod.out_z)
1009 m.d.sync += self.norm_stb.eq(1)
1010
1011 def action(self, m):
1012 m.next = "normalise_1"
1013
1014
1015 class FPNormaliseModSingle:
1016
1017 def __init__(self, width):
1018 self.width = width
1019 self.in_z = self.ispec()
1020 self.out_z = self.ospec()
1021
1022 def ispec(self):
1023 return FPNumBase(self.width, False)
1024
1025 def ospec(self):
1026 return FPNumBase(self.width, False)
1027
1028 def setup(self, m, i):
1029 """ links module to inputs and outputs
1030 """
1031 m.submodules.normalise = self
1032 m.d.comb += self.i.eq(i)
1033
1034 def elaborate(self, platform):
1035 m = Module()
1036
1037 mwid = self.out_z.m_width+2
1038 pe = PriorityEncoder(mwid)
1039 m.submodules.norm_pe = pe
1040
1041 m.submodules.norm1_out_z = self.out_z
1042 m.submodules.norm1_in_z = self.in_z
1043
1044 in_z = FPNumBase(self.width, False)
1045 in_of = Overflow()
1046 m.submodules.norm1_insel_z = in_z
1047 m.submodules.norm1_insel_overflow = in_of
1048
1049 espec = (len(in_z.e), True)
1050 ediff_n126 = Signal(espec, reset_less=True)
1051 msr = MultiShiftRMerge(mwid, espec)
1052 m.submodules.multishift_r = msr
1053
1054 m.d.comb += in_z.eq(self.in_z)
1055 m.d.comb += in_of.eq(self.in_of)
1056 # initialise out from in (overridden below)
1057 m.d.comb += self.out_z.eq(in_z)
1058 m.d.comb += self.out_of.eq(in_of)
1059 # normalisation decrease condition
1060 decrease = Signal(reset_less=True)
1061 m.d.comb += decrease.eq(in_z.m_msbzero)
1062 # decrease exponent
1063 with m.If(decrease):
1064 # *sigh* not entirely obvious: count leading zeros (clz)
1065 # with a PriorityEncoder: to find from the MSB
1066 # we reverse the order of the bits.
1067 temp_m = Signal(mwid, reset_less=True)
1068 temp_s = Signal(mwid+1, reset_less=True)
1069 clz = Signal((len(in_z.e), True), reset_less=True)
1070 m.d.comb += [
1071 # cat round and guard bits back into the mantissa
1072 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1073 pe.i.eq(temp_m[::-1]), # inverted
1074 clz.eq(pe.o), # count zeros from MSB down
1075 temp_s.eq(temp_m << clz), # shift mantissa UP
1076 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1077 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1078 ]
1079
1080 return m
1081
1082 class FPNorm1Data:
1083
1084 def __init__(self, width, id_wid):
1085 self.roundz = Signal(reset_less=True)
1086 self.z = FPNumBase(width, False)
1087 self.out_do_z = Signal(reset_less=True)
1088 self.oz = Signal(width, reset_less=True)
1089 self.mid = Signal(id_wid, reset_less=True)
1090
1091 def eq(self, i):
1092 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1093 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1094
1095
1096 class FPNorm1ModSingle:
1097
1098 def __init__(self, width, id_wid):
1099 self.width = width
1100 self.id_wid = id_wid
1101 self.i = self.ispec()
1102 self.o = self.ospec()
1103
1104 def ispec(self):
1105 return FPAddStage1Data(self.width, self.id_wid)
1106
1107 def ospec(self):
1108 return FPNorm1Data(self.width, self.id_wid)
1109
1110 def setup(self, m, i):
1111 """ links module to inputs and outputs
1112 """
1113 m.submodules.normalise_1 = self
1114 m.d.comb += self.i.eq(i)
1115
1116 def process(self, i):
1117 return self.o
1118
1119 def elaborate(self, platform):
1120 m = Module()
1121
1122 mwid = self.o.z.m_width+2
1123 pe = PriorityEncoder(mwid)
1124 m.submodules.norm_pe = pe
1125
1126 of = Overflow()
1127 m.d.comb += self.o.roundz.eq(of.roundz)
1128
1129 m.submodules.norm1_out_z = self.o.z
1130 m.submodules.norm1_out_overflow = of
1131 m.submodules.norm1_in_z = self.i.z
1132 m.submodules.norm1_in_overflow = self.i.of
1133
1134 i = self.ispec()
1135 m.submodules.norm1_insel_z = i.z
1136 m.submodules.norm1_insel_overflow = i.of
1137
1138 espec = (len(i.z.e), True)
1139 ediff_n126 = Signal(espec, reset_less=True)
1140 msr = MultiShiftRMerge(mwid, espec)
1141 m.submodules.multishift_r = msr
1142
1143 m.d.comb += i.eq(self.i)
1144 # initialise out from in (overridden below)
1145 m.d.comb += self.o.z.eq(i.z)
1146 m.d.comb += of.eq(i.of)
1147 # normalisation increase/decrease conditions
1148 decrease = Signal(reset_less=True)
1149 increase = Signal(reset_less=True)
1150 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1151 m.d.comb += increase.eq(i.z.exp_lt_n126)
1152 # decrease exponent
1153 with m.If(~self.i.out_do_z):
1154 with m.If(decrease):
1155 # *sigh* not entirely obvious: count leading zeros (clz)
1156 # with a PriorityEncoder: to find from the MSB
1157 # we reverse the order of the bits.
1158 temp_m = Signal(mwid, reset_less=True)
1159 temp_s = Signal(mwid+1, reset_less=True)
1160 clz = Signal((len(i.z.e), True), reset_less=True)
1161 # make sure that the amount to decrease by does NOT
1162 # go below the minimum non-INF/NaN exponent
1163 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1164 i.z.exp_sub_n126)
1165 m.d.comb += [
1166 # cat round and guard bits back into the mantissa
1167 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1168 pe.i.eq(temp_m[::-1]), # inverted
1169 clz.eq(limclz), # count zeros from MSB down
1170 temp_s.eq(temp_m << clz), # shift mantissa UP
1171 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1172 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1173 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1174 # overflow in bits 0..1: got shifted too (leave sticky)
1175 of.guard.eq(temp_s[1]), # guard
1176 of.round_bit.eq(temp_s[0]), # round
1177 ]
1178 # increase exponent
1179 with m.Elif(increase):
1180 temp_m = Signal(mwid+1, reset_less=True)
1181 m.d.comb += [
1182 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1183 i.z.m)),
1184 ediff_n126.eq(i.z.N126 - i.z.e),
1185 # connect multi-shifter to inp/out mantissa (and ediff)
1186 msr.inp.eq(temp_m),
1187 msr.diff.eq(ediff_n126),
1188 self.o.z.m.eq(msr.m[3:]),
1189 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1190 # overflow in bits 0..1: got shifted too (leave sticky)
1191 of.guard.eq(temp_s[2]), # guard
1192 of.round_bit.eq(temp_s[1]), # round
1193 of.sticky.eq(temp_s[0]), # sticky
1194 self.o.z.e.eq(i.z.e + ediff_n126),
1195 ]
1196
1197 m.d.comb += self.o.mid.eq(self.i.mid)
1198 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1199 m.d.comb += self.o.oz.eq(self.i.oz)
1200
1201 return m
1202
1203
1204 class FPNorm1ModMulti:
1205
1206 def __init__(self, width, single_cycle=True):
1207 self.width = width
1208 self.in_select = Signal(reset_less=True)
1209 self.in_z = FPNumBase(width, False)
1210 self.in_of = Overflow()
1211 self.temp_z = FPNumBase(width, False)
1212 self.temp_of = Overflow()
1213 self.out_z = FPNumBase(width, False)
1214 self.out_of = Overflow()
1215
1216 def elaborate(self, platform):
1217 m = Module()
1218
1219 m.submodules.norm1_out_z = self.out_z
1220 m.submodules.norm1_out_overflow = self.out_of
1221 m.submodules.norm1_temp_z = self.temp_z
1222 m.submodules.norm1_temp_of = self.temp_of
1223 m.submodules.norm1_in_z = self.in_z
1224 m.submodules.norm1_in_overflow = self.in_of
1225
1226 in_z = FPNumBase(self.width, False)
1227 in_of = Overflow()
1228 m.submodules.norm1_insel_z = in_z
1229 m.submodules.norm1_insel_overflow = in_of
1230
1231 # select which of temp or in z/of to use
1232 with m.If(self.in_select):
1233 m.d.comb += in_z.eq(self.in_z)
1234 m.d.comb += in_of.eq(self.in_of)
1235 with m.Else():
1236 m.d.comb += in_z.eq(self.temp_z)
1237 m.d.comb += in_of.eq(self.temp_of)
1238 # initialise out from in (overridden below)
1239 m.d.comb += self.out_z.eq(in_z)
1240 m.d.comb += self.out_of.eq(in_of)
1241 # normalisation increase/decrease conditions
1242 decrease = Signal(reset_less=True)
1243 increase = Signal(reset_less=True)
1244 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1245 m.d.comb += increase.eq(in_z.exp_lt_n126)
1246 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1247 # decrease exponent
1248 with m.If(decrease):
1249 m.d.comb += [
1250 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1251 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1252 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1253 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1254 self.out_of.round_bit.eq(0), # reset round bit
1255 self.out_of.m0.eq(in_of.guard),
1256 ]
1257 # increase exponent
1258 with m.Elif(increase):
1259 m.d.comb += [
1260 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1261 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1262 self.out_of.guard.eq(in_z.m[0]),
1263 self.out_of.m0.eq(in_z.m[1]),
1264 self.out_of.round_bit.eq(in_of.guard),
1265 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1266 ]
1267
1268 return m
1269
1270
1271 class FPNorm1Single(FPState):
1272
1273 def __init__(self, width, id_wid, single_cycle=True):
1274 FPState.__init__(self, "normalise_1")
1275 self.mod = FPNorm1ModSingle(width)
1276 self.o = self.ospec()
1277 self.out_z = FPNumBase(width, False)
1278 self.out_roundz = Signal(reset_less=True)
1279
1280 def ispec(self):
1281 return self.mod.ispec()
1282
1283 def ospec(self):
1284 return self.mod.ospec()
1285
1286 def setup(self, m, i):
1287 """ links module to inputs and outputs
1288 """
1289 self.mod.setup(m, i)
1290
1291 def action(self, m):
1292 m.next = "round"
1293
1294
1295 class FPNorm1Multi(FPState):
1296
1297 def __init__(self, width, id_wid):
1298 FPState.__init__(self, "normalise_1")
1299 self.mod = FPNorm1ModMulti(width)
1300 self.stb = Signal(reset_less=True)
1301 self.ack = Signal(reset=0, reset_less=True)
1302 self.out_norm = Signal(reset_less=True)
1303 self.in_accept = Signal(reset_less=True)
1304 self.temp_z = FPNumBase(width)
1305 self.temp_of = Overflow()
1306 self.out_z = FPNumBase(width)
1307 self.out_roundz = Signal(reset_less=True)
1308
1309 def setup(self, m, in_z, in_of, norm_stb):
1310 """ links module to inputs and outputs
1311 """
1312 self.mod.setup(m, in_z, in_of, norm_stb,
1313 self.in_accept, self.temp_z, self.temp_of,
1314 self.out_z, self.out_norm)
1315
1316 m.d.comb += self.stb.eq(norm_stb)
1317 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1318
1319 def action(self, m):
1320 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1321 m.d.sync += self.temp_of.eq(self.mod.out_of)
1322 m.d.sync += self.temp_z.eq(self.out_z)
1323 with m.If(self.out_norm):
1324 with m.If(self.in_accept):
1325 m.d.sync += [
1326 self.ack.eq(1),
1327 ]
1328 with m.Else():
1329 m.d.sync += self.ack.eq(0)
1330 with m.Else():
1331 # normalisation not required (or done).
1332 m.next = "round"
1333 m.d.sync += self.ack.eq(1)
1334 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1335
1336
1337 class FPNormToPack(FPState, UnbufferedPipeline):
1338
1339 def __init__(self, width, id_wid):
1340 FPState.__init__(self, "normalise_1")
1341 self.id_wid = id_wid
1342 self.width = width
1343 UnbufferedPipeline.__init__(self, self) # pipeline is its own stage
1344
1345 def ispec(self):
1346 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1347
1348 def ospec(self):
1349 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1350
1351 def setup(self, m, i):
1352 """ links module to inputs and outputs
1353 """
1354
1355 # Normalisation, Rounding Corrections, Pack - in a chain
1356 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1357 rmod = FPRoundMod(self.width, self.id_wid)
1358 cmod = FPCorrectionsMod(self.width, self.id_wid)
1359 pmod = FPPackMod(self.width, self.id_wid)
1360 chain = StageChain([nmod, rmod, cmod, pmod])
1361 chain.setup(m, i)
1362 self.out_z = pmod.ospec()
1363
1364 m.d.comb += self.out_z.mid.eq(pmod.o.mid)
1365 m.d.comb += self.out_z.z.eq(pmod.o.z) # outputs packed result
1366
1367 def process(self, i):
1368 return self.out_z
1369
1370 def action(self, m):
1371 m.next = "pack_put_z"
1372
1373
1374 class FPRoundData:
1375
1376 def __init__(self, width, id_wid):
1377 self.z = FPNumBase(width, False)
1378 self.out_do_z = Signal(reset_less=True)
1379 self.oz = Signal(width, reset_less=True)
1380 self.mid = Signal(id_wid, reset_less=True)
1381
1382 def eq(self, i):
1383 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1384 self.mid.eq(i.mid)]
1385
1386
1387 class FPRoundMod:
1388
1389 def __init__(self, width, id_wid):
1390 self.width = width
1391 self.id_wid = id_wid
1392 self.i = self.ispec()
1393 self.out_z = self.ospec()
1394
1395 def ispec(self):
1396 return FPNorm1Data(self.width, self.id_wid)
1397
1398 def ospec(self):
1399 return FPRoundData(self.width, self.id_wid)
1400
1401 def process(self, i):
1402 return self.out_z
1403
1404 def setup(self, m, i):
1405 m.submodules.roundz = self
1406 m.d.comb += self.i.eq(i)
1407
1408 def elaborate(self, platform):
1409 m = Module()
1410 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1411 with m.If(~self.i.out_do_z):
1412 with m.If(self.i.roundz):
1413 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1414 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1415 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1416
1417 return m
1418
1419
1420 class FPRound(FPState):
1421
1422 def __init__(self, width, id_wid):
1423 FPState.__init__(self, "round")
1424 self.mod = FPRoundMod(width)
1425 self.out_z = self.ospec()
1426
1427 def ispec(self):
1428 return self.mod.ispec()
1429
1430 def ospec(self):
1431 return self.mod.ospec()
1432
1433 def setup(self, m, i):
1434 """ links module to inputs and outputs
1435 """
1436 self.mod.setup(m, i)
1437
1438 self.idsync(m)
1439 m.d.sync += self.out_z.eq(self.mod.out_z)
1440 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1441
1442 def action(self, m):
1443 m.next = "corrections"
1444
1445
1446 class FPCorrectionsMod:
1447
1448 def __init__(self, width, id_wid):
1449 self.width = width
1450 self.id_wid = id_wid
1451 self.i = self.ispec()
1452 self.out_z = self.ospec()
1453
1454 def ispec(self):
1455 return FPRoundData(self.width, self.id_wid)
1456
1457 def ospec(self):
1458 return FPRoundData(self.width, self.id_wid)
1459
1460 def process(self, i):
1461 return self.out_z
1462
1463 def setup(self, m, i):
1464 """ links module to inputs and outputs
1465 """
1466 m.submodules.corrections = self
1467 m.d.comb += self.i.eq(i)
1468
1469 def elaborate(self, platform):
1470 m = Module()
1471 m.submodules.corr_in_z = self.i.z
1472 m.submodules.corr_out_z = self.out_z.z
1473 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1474 with m.If(~self.i.out_do_z):
1475 with m.If(self.i.z.is_denormalised):
1476 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1477 return m
1478
1479
1480 class FPCorrections(FPState):
1481
1482 def __init__(self, width, id_wid):
1483 FPState.__init__(self, "corrections")
1484 self.mod = FPCorrectionsMod(width)
1485 self.out_z = self.ospec()
1486
1487 def ispec(self):
1488 return self.mod.ispec()
1489
1490 def ospec(self):
1491 return self.mod.ospec()
1492
1493 def setup(self, m, in_z):
1494 """ links module to inputs and outputs
1495 """
1496 self.mod.setup(m, in_z)
1497
1498 m.d.sync += self.out_z.eq(self.mod.out_z)
1499 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1500
1501 def action(self, m):
1502 m.next = "pack"
1503
1504
1505 class FPPackData:
1506
1507 def __init__(self, width, id_wid):
1508 self.z = Signal(width, reset_less=True)
1509 self.mid = Signal(id_wid, reset_less=True)
1510
1511 def eq(self, i):
1512 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1513
1514 def ports(self):
1515 return [self.z, self.mid]
1516
1517
1518 class FPPackMod:
1519
1520 def __init__(self, width, id_wid):
1521 self.width = width
1522 self.id_wid = id_wid
1523 self.i = self.ispec()
1524 self.o = self.ospec()
1525
1526 def ispec(self):
1527 return FPRoundData(self.width, self.id_wid)
1528
1529 def ospec(self):
1530 return FPPackData(self.width, self.id_wid)
1531
1532 def process(self, i):
1533 return self.o
1534
1535 def setup(self, m, in_z):
1536 """ links module to inputs and outputs
1537 """
1538 m.submodules.pack = self
1539 m.d.comb += self.i.eq(in_z)
1540
1541 def elaborate(self, platform):
1542 m = Module()
1543 z = FPNumOut(self.width, False)
1544 m.submodules.pack_in_z = self.i.z
1545 m.submodules.pack_out_z = z
1546 m.d.comb += self.o.mid.eq(self.i.mid)
1547 with m.If(~self.i.out_do_z):
1548 with m.If(self.i.z.is_overflowed):
1549 m.d.comb += z.inf(self.i.z.s)
1550 with m.Else():
1551 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1552 with m.Else():
1553 m.d.comb += z.v.eq(self.i.oz)
1554 m.d.comb += self.o.z.eq(z.v)
1555 return m
1556
1557
1558 class FPPack(FPState):
1559
1560 def __init__(self, width, id_wid):
1561 FPState.__init__(self, "pack")
1562 self.mod = FPPackMod(width)
1563 self.out_z = self.ospec()
1564
1565 def ispec(self):
1566 return self.mod.ispec()
1567
1568 def ospec(self):
1569 return self.mod.ospec()
1570
1571 def setup(self, m, in_z):
1572 """ links module to inputs and outputs
1573 """
1574 self.mod.setup(m, in_z)
1575
1576 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1577 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1578
1579 def action(self, m):
1580 m.next = "pack_put_z"
1581
1582
1583 class FPPutZ(FPState):
1584
1585 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1586 FPState.__init__(self, state)
1587 if to_state is None:
1588 to_state = "get_ops"
1589 self.to_state = to_state
1590 self.in_z = in_z
1591 self.out_z = out_z
1592 self.in_mid = in_mid
1593 self.out_mid = out_mid
1594
1595 def action(self, m):
1596 if self.in_mid is not None:
1597 m.d.sync += self.out_mid.eq(self.in_mid)
1598 m.d.sync += [
1599 self.out_z.z.v.eq(self.in_z)
1600 ]
1601 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1602 m.d.sync += self.out_z.z.stb.eq(0)
1603 m.next = self.to_state
1604 with m.Else():
1605 m.d.sync += self.out_z.z.stb.eq(1)
1606
1607
1608 class FPPutZIdx(FPState):
1609
1610 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1611 FPState.__init__(self, state)
1612 if to_state is None:
1613 to_state = "get_ops"
1614 self.to_state = to_state
1615 self.in_z = in_z
1616 self.out_zs = out_zs
1617 self.in_mid = in_mid
1618
1619 def action(self, m):
1620 outz_stb = Signal(reset_less=True)
1621 outz_ack = Signal(reset_less=True)
1622 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1623 outz_ack.eq(self.out_zs[self.in_mid].ack),
1624 ]
1625 m.d.sync += [
1626 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1627 ]
1628 with m.If(outz_stb & outz_ack):
1629 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1630 m.next = self.to_state
1631 with m.Else():
1632 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1633
1634 class FPOpData:
1635 def __init__(self, width, id_wid):
1636 self.z = FPOp(width)
1637 self.mid = Signal(id_wid, reset_less=True)
1638
1639 def eq(self, i):
1640 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1641
1642 def ports(self):
1643 return [self.z, self.mid]
1644
1645
1646 class FPADDBaseMod:
1647
1648 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1649 """ IEEE754 FP Add
1650
1651 * width: bit-width of IEEE754. supported: 16, 32, 64
1652 * id_wid: an identifier that is sync-connected to the input
1653 * single_cycle: True indicates each stage to complete in 1 clock
1654 * compact: True indicates a reduced number of stages
1655 """
1656 self.width = width
1657 self.id_wid = id_wid
1658 self.single_cycle = single_cycle
1659 self.compact = compact
1660
1661 self.in_t = Trigger()
1662 self.i = self.ispec()
1663 self.o = self.ospec()
1664
1665 self.states = []
1666
1667 def ispec(self):
1668 return FPADDBaseData(self.width, self.id_wid)
1669
1670 def ospec(self):
1671 return FPOpData(self.width, self.id_wid)
1672
1673 def add_state(self, state):
1674 self.states.append(state)
1675 return state
1676
1677 def get_fragment(self, platform=None):
1678 """ creates the HDL code-fragment for FPAdd
1679 """
1680 m = Module()
1681 m.submodules.out_z = self.o.z
1682 m.submodules.in_t = self.in_t
1683 if self.compact:
1684 self.get_compact_fragment(m, platform)
1685 else:
1686 self.get_longer_fragment(m, platform)
1687
1688 with m.FSM() as fsm:
1689
1690 for state in self.states:
1691 with m.State(state.state_from):
1692 state.action(m)
1693
1694 return m
1695
1696 def get_longer_fragment(self, m, platform=None):
1697
1698 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1699 self.width))
1700 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1701 a = get.out_op1
1702 b = get.out_op2
1703
1704 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1705 sc.setup(m, a, b, self.in_mid)
1706
1707 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1708 dn.setup(m, a, b, sc.in_mid)
1709
1710 if self.single_cycle:
1711 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1712 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1713 else:
1714 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1715 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1716
1717 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1718 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1719
1720 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1721 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1722
1723 if self.single_cycle:
1724 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1725 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1726 else:
1727 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1728 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1729
1730 rn = self.add_state(FPRound(self.width, self.id_wid))
1731 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1732
1733 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1734 cor.setup(m, rn.out_z, rn.in_mid)
1735
1736 pa = self.add_state(FPPack(self.width, self.id_wid))
1737 pa.setup(m, cor.out_z, rn.in_mid)
1738
1739 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1740 pa.in_mid, self.out_mid))
1741
1742 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1743 pa.in_mid, self.out_mid))
1744
1745 def get_compact_fragment(self, m, platform=None):
1746
1747 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1748 self.width, self.id_wid))
1749 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1750
1751 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1752 sc.setup(m, get.o)
1753
1754 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1755 alm.setup(m, sc.o)
1756
1757 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1758 n1.setup(m, alm.a1o)
1759
1760 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1761 n1.out_z.mid, self.o.mid))
1762
1763 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1764 # sc.o.mid, self.o.mid))
1765
1766
1767 class FPADDBase(FPState):
1768
1769 def __init__(self, width, id_wid=None, single_cycle=False):
1770 """ IEEE754 FP Add
1771
1772 * width: bit-width of IEEE754. supported: 16, 32, 64
1773 * id_wid: an identifier that is sync-connected to the input
1774 * single_cycle: True indicates each stage to complete in 1 clock
1775 """
1776 FPState.__init__(self, "fpadd")
1777 self.width = width
1778 self.single_cycle = single_cycle
1779 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1780 self.o = self.ospec()
1781
1782 self.in_t = Trigger()
1783 self.i = self.ispec()
1784
1785 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1786 self.in_accept = Signal(reset_less=True)
1787 self.add_stb = Signal(reset_less=True)
1788 self.add_ack = Signal(reset=0, reset_less=True)
1789
1790 def ispec(self):
1791 return self.mod.ispec()
1792
1793 def ospec(self):
1794 return self.mod.ospec()
1795
1796 def setup(self, m, i, add_stb, in_mid):
1797 m.d.comb += [self.i.eq(i),
1798 self.mod.i.eq(self.i),
1799 self.z_done.eq(self.mod.o.z.trigger),
1800 #self.add_stb.eq(add_stb),
1801 self.mod.in_t.stb.eq(self.in_t.stb),
1802 self.in_t.ack.eq(self.mod.in_t.ack),
1803 self.o.mid.eq(self.mod.o.mid),
1804 self.o.z.v.eq(self.mod.o.z.v),
1805 self.o.z.stb.eq(self.mod.o.z.stb),
1806 self.mod.o.z.ack.eq(self.o.z.ack),
1807 ]
1808
1809 m.d.sync += self.add_stb.eq(add_stb)
1810 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1811 m.d.sync += self.o.z.ack.eq(0) # likewise
1812 #m.d.sync += self.in_t.stb.eq(0)
1813
1814 m.submodules.fpadd = self.mod
1815
1816 def action(self, m):
1817
1818 # in_accept is set on incoming strobe HIGH and ack LOW.
1819 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1820
1821 #with m.If(self.in_t.ack):
1822 # m.d.sync += self.in_t.stb.eq(0)
1823 with m.If(~self.z_done):
1824 # not done: test for accepting an incoming operand pair
1825 with m.If(self.in_accept):
1826 m.d.sync += [
1827 self.add_ack.eq(1), # acknowledge receipt...
1828 self.in_t.stb.eq(1), # initiate add
1829 ]
1830 with m.Else():
1831 m.d.sync += [self.add_ack.eq(0),
1832 self.in_t.stb.eq(0),
1833 self.o.z.ack.eq(1),
1834 ]
1835 with m.Else():
1836 # done: acknowledge, and write out id and value
1837 m.d.sync += [self.add_ack.eq(1),
1838 self.in_t.stb.eq(0)
1839 ]
1840 m.next = "put_z"
1841
1842 return
1843
1844 if self.in_mid is not None:
1845 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1846
1847 m.d.sync += [
1848 self.out_z.v.eq(self.mod.out_z.v)
1849 ]
1850 # move to output state on detecting z ack
1851 with m.If(self.out_z.trigger):
1852 m.d.sync += self.out_z.stb.eq(0)
1853 m.next = "put_z"
1854 with m.Else():
1855 m.d.sync += self.out_z.stb.eq(1)
1856
1857
1858 class FPADDBasePipe(ControlBase):
1859 def __init__(self, width, id_wid):
1860 ControlBase.__init__(self)
1861 self.pipe1 = FPAddSpecialCasesDeNorm(width, id_wid)
1862 self.pipe2 = FPAddAlignSingleAdd(width, id_wid)
1863 self.pipe3 = FPNormToPack(width, id_wid)
1864
1865 self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
1866
1867 def elaborate(self, platform):
1868 m = Module()
1869 m.submodules.scnorm = self.pipe1
1870 m.submodules.addalign = self.pipe2
1871 m.submodules.normpack = self.pipe3
1872 m.d.comb += self._eqs
1873 return m
1874
1875
1876 class FPADDInMuxPipe(PriorityCombMuxInPipe):
1877 def __init__(self, width, id_wid, num_rows):
1878 self.num_rows = num_rows
1879 def iospec(): return FPADDBaseData(width, id_wid)
1880 stage = PassThroughStage(iospec)
1881 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows)
1882
1883 def ports(self):
1884 res = []
1885 for i in range(len(self.p)):
1886 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1887 self.p[i].i_data.ports()
1888 res += [self.n.i_ready, self.n.o_valid] + \
1889 self.n.o_data.ports()
1890 return res
1891
1892
1893 class FPADDMuxOutPipe(CombMuxOutPipe):
1894 def __init__(self, width, id_wid, num_rows):
1895 self.num_rows = num_rows
1896 def iospec(): return FPPackData(width, id_wid)
1897 stage = PassThroughStage(iospec)
1898 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows)
1899
1900
1901 class FPADDMuxInOut:
1902 """ Reservation-Station version of FPADD pipeline.
1903
1904 fan-in on
1905 """
1906 def __init__(self, width, id_wid, num_rows):
1907 self.num_rows = num_rows
1908 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1909 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1910 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1911
1912 self.p = self.inpipe.p # kinda annoying,
1913 self.n = self.outpipe.n # use pipe in/out as this class in/out
1914 self._ports = self.inpipe.ports() + self.outpipe.ports()
1915
1916 def elaborate(self, platform):
1917 m = Module()
1918 m.submodules.inpipe = self.inpipe
1919 m.submodules.fpadd = self.fpadd
1920 m.submodules.outpipe = self.outpipe
1921
1922 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1923 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1924
1925 return m
1926
1927 def ports(self):
1928 return self._ports
1929
1930
1931 class ResArray:
1932 def __init__(self, width, id_wid):
1933 self.width = width
1934 self.id_wid = id_wid
1935 res = []
1936 for i in range(rs_sz):
1937 out_z = FPOp(width)
1938 out_z.name = "out_z_%d" % i
1939 res.append(out_z)
1940 self.res = Array(res)
1941 self.in_z = FPOp(width)
1942 self.in_mid = Signal(self.id_wid, reset_less=True)
1943
1944 def setup(self, m, in_z, in_mid):
1945 m.d.comb += [self.in_z.eq(in_z),
1946 self.in_mid.eq(in_mid)]
1947
1948 def get_fragment(self, platform=None):
1949 """ creates the HDL code-fragment for FPAdd
1950 """
1951 m = Module()
1952 m.submodules.res_in_z = self.in_z
1953 m.submodules += self.res
1954
1955 return m
1956
1957 def ports(self):
1958 res = []
1959 for z in self.res:
1960 res += z.ports()
1961 return res
1962
1963
1964 class FPADD(FPID):
1965 """ FPADD: stages as follows:
1966
1967 FPGetOp (a)
1968 |
1969 FPGetOp (b)
1970 |
1971 FPAddBase---> FPAddBaseMod
1972 | |
1973 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1974
1975 FPAddBase is tricky: it is both a stage and *has* stages.
1976 Connection to FPAddBaseMod therefore requires an in stb/ack
1977 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1978 needs to be the thing that raises the incoming stb.
1979 """
1980
1981 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1982 """ IEEE754 FP Add
1983
1984 * width: bit-width of IEEE754. supported: 16, 32, 64
1985 * id_wid: an identifier that is sync-connected to the input
1986 * single_cycle: True indicates each stage to complete in 1 clock
1987 """
1988 self.width = width
1989 self.id_wid = id_wid
1990 self.single_cycle = single_cycle
1991
1992 #self.out_z = FPOp(width)
1993 self.ids = FPID(id_wid)
1994
1995 rs = []
1996 for i in range(rs_sz):
1997 in_a = FPOp(width)
1998 in_b = FPOp(width)
1999 in_a.name = "in_a_%d" % i
2000 in_b.name = "in_b_%d" % i
2001 rs.append((in_a, in_b))
2002 self.rs = Array(rs)
2003
2004 res = []
2005 for i in range(rs_sz):
2006 out_z = FPOp(width)
2007 out_z.name = "out_z_%d" % i
2008 res.append(out_z)
2009 self.res = Array(res)
2010
2011 self.states = []
2012
2013 def add_state(self, state):
2014 self.states.append(state)
2015 return state
2016
2017 def get_fragment(self, platform=None):
2018 """ creates the HDL code-fragment for FPAdd
2019 """
2020 m = Module()
2021 m.submodules += self.rs
2022
2023 in_a = self.rs[0][0]
2024 in_b = self.rs[0][1]
2025
2026 geta = self.add_state(FPGetOp("get_a", "get_b",
2027 in_a, self.width))
2028 geta.setup(m, in_a)
2029 a = geta.out_op
2030
2031 getb = self.add_state(FPGetOp("get_b", "fpadd",
2032 in_b, self.width))
2033 getb.setup(m, in_b)
2034 b = getb.out_op
2035
2036 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2037 ab = self.add_state(ab)
2038 abd = ab.ispec() # create an input spec object for FPADDBase
2039 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2040 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2041 o = ab.o
2042
2043 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2044 o.mid, "get_a"))
2045
2046 with m.FSM() as fsm:
2047
2048 for state in self.states:
2049 with m.State(state.state_from):
2050 state.action(m)
2051
2052 return m
2053
2054
2055 if __name__ == "__main__":
2056 if True:
2057 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2058 main(alu, ports=alu.rs[0][0].ports() + \
2059 alu.rs[0][1].ports() + \
2060 alu.res[0].ports() + \
2061 [alu.ids.in_mid, alu.ids.out_mid])
2062 else:
2063 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2064 main(alu, ports=[alu.in_a, alu.in_b] + \
2065 alu.in_t.ports() + \
2066 alu.out_z.ports() + \
2067 [alu.in_mid, alu.out_mid])
2068
2069
2070 # works... but don't use, just do "python fname.py convert -t v"
2071 #print (verilog.convert(alu, ports=[
2072 # ports=alu.in_a.ports() + \
2073 # alu.in_b.ports() + \
2074 # alu.out_z.ports())