move classes to before use
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMultiOutPipeline
14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPNumBase2Ops:
191
192 def __init__(self, width, id_wid, m_extra=True):
193 self.a = FPNumBase(width, m_extra)
194 self.b = FPNumBase(width, m_extra)
195 self.mid = Signal(id_wid, reset_less=True)
196
197 def eq(self, i):
198 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
199
200 def ports(self):
201 return [self.a, self.b, self.mid]
202
203
204 class FPADDBaseData:
205
206 def __init__(self, width, id_wid):
207 self.width = width
208 self.id_wid = id_wid
209 self.a = Signal(width)
210 self.b = Signal(width)
211 self.mid = Signal(id_wid, reset_less=True)
212
213 def eq(self, i):
214 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
215
216 def ports(self):
217 return [self.a, self.b, self.mid]
218
219
220 class FPGet2OpMod(Trigger):
221 def __init__(self, width, id_wid):
222 Trigger.__init__(self)
223 self.width = width
224 self.id_wid = id_wid
225 self.i = self.ispec()
226 self.o = self.ospec()
227
228 def ispec(self):
229 return FPADDBaseData(self.width, self.id_wid)
230
231 def ospec(self):
232 return FPNumBase2Ops(self.width, self.id_wid)
233
234 def process(self, i):
235 return self.o
236
237 def elaborate(self, platform):
238 m = Trigger.elaborate(self, platform)
239 m.submodules.get_op1_out = self.o.a
240 m.submodules.get_op2_out = self.o.b
241 out_op1 = FPNumIn(None, self.width)
242 out_op2 = FPNumIn(None, self.width)
243 with m.If(self.trigger):
244 m.d.comb += [
245 out_op1.decode(self.i.a),
246 out_op2.decode(self.i.b),
247 self.o.a.eq(out_op1),
248 self.o.b.eq(out_op2),
249 self.o.mid.eq(self.i.mid)
250 ]
251 return m
252
253
254 class FPGet2Op(FPState):
255 """ gets operands
256 """
257
258 def __init__(self, in_state, out_state, width, id_wid):
259 FPState.__init__(self, in_state)
260 self.out_state = out_state
261 self.mod = FPGet2OpMod(width, id_wid)
262 self.o = self.mod.ospec()
263 self.in_stb = Signal(reset_less=True)
264 self.out_ack = Signal(reset_less=True)
265 self.out_decode = Signal(reset_less=True)
266
267 def setup(self, m, i, in_stb, in_ack):
268 """ links module to inputs and outputs
269 """
270 m.submodules.get_ops = self.mod
271 m.d.comb += self.mod.i.eq(i)
272 m.d.comb += self.mod.stb.eq(in_stb)
273 m.d.comb += self.out_ack.eq(self.mod.ack)
274 m.d.comb += self.out_decode.eq(self.mod.trigger)
275 m.d.comb += in_ack.eq(self.mod.ack)
276
277 def action(self, m):
278 with m.If(self.out_decode):
279 m.next = self.out_state
280 m.d.sync += [
281 self.mod.ack.eq(0),
282 self.o.eq(self.mod.o),
283 ]
284 with m.Else():
285 m.d.sync += self.mod.ack.eq(1)
286
287
288 class FPSCData:
289
290 def __init__(self, width, id_wid):
291 self.a = FPNumBase(width, True)
292 self.b = FPNumBase(width, True)
293 self.z = FPNumOut(width, False)
294 self.oz = Signal(width, reset_less=True)
295 self.out_do_z = Signal(reset_less=True)
296 self.mid = Signal(id_wid, reset_less=True)
297
298 def eq(self, i):
299 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
300 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
301
302
303 class FPAddSpecialCasesMod:
304 """ special cases: NaNs, infs, zeros, denormalised
305 NOTE: some of these are unique to add. see "Special Operations"
306 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
307 """
308
309 def __init__(self, width, id_wid):
310 self.width = width
311 self.id_wid = id_wid
312 self.i = self.ispec()
313 self.o = self.ospec()
314
315 def ispec(self):
316 return FPNumBase2Ops(self.width, self.id_wid)
317
318 def ospec(self):
319 return FPSCData(self.width, self.id_wid)
320
321 def setup(self, m, i):
322 """ links module to inputs and outputs
323 """
324 m.submodules.specialcases = self
325 m.d.comb += self.i.eq(i)
326
327 def process(self, i):
328 return self.o
329
330 def elaborate(self, platform):
331 m = Module()
332
333 m.submodules.sc_in_a = self.i.a
334 m.submodules.sc_in_b = self.i.b
335 m.submodules.sc_out_z = self.o.z
336
337 s_nomatch = Signal()
338 m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
339
340 m_match = Signal()
341 m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
342
343 # if a is NaN or b is NaN return NaN
344 with m.If(self.i.a.is_nan | self.i.b.is_nan):
345 m.d.comb += self.o.out_do_z.eq(1)
346 m.d.comb += self.o.z.nan(0)
347
348 # XXX WEIRDNESS for FP16 non-canonical NaN handling
349 # under review
350
351 ## if a is zero and b is NaN return -b
352 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
353 # m.d.comb += self.o.out_do_z.eq(1)
354 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
355
356 ## if b is zero and a is NaN return -a
357 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
358 # m.d.comb += self.o.out_do_z.eq(1)
359 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
360
361 ## if a is -zero and b is NaN return -b
362 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
363 # m.d.comb += self.o.out_do_z.eq(1)
364 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
365
366 ## if b is -zero and a is NaN return -a
367 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
368 # m.d.comb += self.o.out_do_z.eq(1)
369 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
370
371 # if a is inf return inf (or NaN)
372 with m.Elif(self.i.a.is_inf):
373 m.d.comb += self.o.out_do_z.eq(1)
374 m.d.comb += self.o.z.inf(self.i.a.s)
375 # if a is inf and signs don't match return NaN
376 with m.If(self.i.b.exp_128 & s_nomatch):
377 m.d.comb += self.o.z.nan(0)
378
379 # if b is inf return inf
380 with m.Elif(self.i.b.is_inf):
381 m.d.comb += self.o.out_do_z.eq(1)
382 m.d.comb += self.o.z.inf(self.i.b.s)
383
384 # if a is zero and b zero return signed-a/b
385 with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
386 m.d.comb += self.o.out_do_z.eq(1)
387 m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
388 self.i.b.e,
389 self.i.b.m[3:-1])
390
391 # if a is zero return b
392 with m.Elif(self.i.a.is_zero):
393 m.d.comb += self.o.out_do_z.eq(1)
394 m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
395 self.i.b.m[3:-1])
396
397 # if b is zero return a
398 with m.Elif(self.i.b.is_zero):
399 m.d.comb += self.o.out_do_z.eq(1)
400 m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
401 self.i.a.m[3:-1])
402
403 # if a equal to -b return zero (+ve zero)
404 with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
405 m.d.comb += self.o.out_do_z.eq(1)
406 m.d.comb += self.o.z.zero(0)
407
408 # Denormalised Number checks next, so pass a/b data through
409 with m.Else():
410 m.d.comb += self.o.out_do_z.eq(0)
411 m.d.comb += self.o.a.eq(self.i.a)
412 m.d.comb += self.o.b.eq(self.i.b)
413
414 m.d.comb += self.o.oz.eq(self.o.z.v)
415 m.d.comb += self.o.mid.eq(self.i.mid)
416
417 return m
418
419
420 class FPID:
421 def __init__(self, id_wid):
422 self.id_wid = id_wid
423 if self.id_wid:
424 self.in_mid = Signal(id_wid, reset_less=True)
425 self.out_mid = Signal(id_wid, reset_less=True)
426 else:
427 self.in_mid = None
428 self.out_mid = None
429
430 def idsync(self, m):
431 if self.id_wid is not None:
432 m.d.sync += self.out_mid.eq(self.in_mid)
433
434
435 class FPAddSpecialCases(FPState):
436 """ special cases: NaNs, infs, zeros, denormalised
437 NOTE: some of these are unique to add. see "Special Operations"
438 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
439 """
440
441 def __init__(self, width, id_wid):
442 FPState.__init__(self, "special_cases")
443 self.mod = FPAddSpecialCasesMod(width)
444 self.out_z = self.mod.ospec()
445 self.out_do_z = Signal(reset_less=True)
446
447 def setup(self, m, i):
448 """ links module to inputs and outputs
449 """
450 self.mod.setup(m, i, self.out_do_z)
451 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
452 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
453
454 def action(self, m):
455 self.idsync(m)
456 with m.If(self.out_do_z):
457 m.next = "put_z"
458 with m.Else():
459 m.next = "denormalise"
460
461
462 class FPAddSpecialCasesDeNorm(FPState):
463 """ special cases: NaNs, infs, zeros, denormalised
464 NOTE: some of these are unique to add. see "Special Operations"
465 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
466 """
467
468 def __init__(self, width, id_wid):
469 FPState.__init__(self, "special_cases")
470 self.smod = FPAddSpecialCasesMod(width, id_wid)
471 self.dmod = FPAddDeNormMod(width, id_wid)
472 self.o = self.ospec()
473
474 def ispec(self):
475 return self.smod.ispec()
476
477 def ospec(self):
478 return self.dmod.ospec()
479
480 def setup(self, m, i):
481 """ links module to inputs and outputs
482 """
483 # these only needed for break-out (early-out)
484 # out_z = self.smod.ospec()
485 # out_do_z = Signal(reset_less=True)
486 self.smod.setup(m, i)
487 self.dmod.setup(m, self.smod.o)
488 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
489
490 # out_do_z=True, only needed for early-out (split pipeline)
491 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
492 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
493
494 # out_do_z=False
495 m.d.sync += self.o.eq(self.dmod.o)
496
497 def process(self, i):
498 return self.o
499
500 def action(self, m):
501 #with m.If(self.out_do_z):
502 # m.next = "put_z"
503 #with m.Else():
504 m.next = "align"
505
506
507 class FPAddDeNormMod(FPState):
508
509 def __init__(self, width, id_wid):
510 self.width = width
511 self.id_wid = id_wid
512 self.i = self.ispec()
513 self.o = self.ospec()
514
515 def ispec(self):
516 return FPSCData(self.width, self.id_wid)
517
518 def ospec(self):
519 return FPSCData(self.width, self.id_wid)
520
521 def setup(self, m, i):
522 """ links module to inputs and outputs
523 """
524 m.submodules.denormalise = self
525 m.d.comb += self.i.eq(i)
526
527 def elaborate(self, platform):
528 m = Module()
529 m.submodules.denorm_in_a = self.i.a
530 m.submodules.denorm_in_b = self.i.b
531 m.submodules.denorm_out_a = self.o.a
532 m.submodules.denorm_out_b = self.o.b
533
534 with m.If(~self.i.out_do_z):
535 # XXX hmmm, don't like repeating identical code
536 m.d.comb += self.o.a.eq(self.i.a)
537 with m.If(self.i.a.exp_n127):
538 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
539 with m.Else():
540 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
541
542 m.d.comb += self.o.b.eq(self.i.b)
543 with m.If(self.i.b.exp_n127):
544 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
545 with m.Else():
546 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
547
548 m.d.comb += self.o.mid.eq(self.i.mid)
549 m.d.comb += self.o.z.eq(self.i.z)
550 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
551 m.d.comb += self.o.oz.eq(self.i.oz)
552
553 return m
554
555
556 class FPAddDeNorm(FPState):
557
558 def __init__(self, width, id_wid):
559 FPState.__init__(self, "denormalise")
560 self.mod = FPAddDeNormMod(width)
561 self.out_a = FPNumBase(width)
562 self.out_b = FPNumBase(width)
563
564 def setup(self, m, i):
565 """ links module to inputs and outputs
566 """
567 self.mod.setup(m, i)
568
569 m.d.sync += self.out_a.eq(self.mod.out_a)
570 m.d.sync += self.out_b.eq(self.mod.out_b)
571
572 def action(self, m):
573 # Denormalised Number checks
574 m.next = "align"
575
576
577 class FPAddAlignMultiMod(FPState):
578
579 def __init__(self, width):
580 self.in_a = FPNumBase(width)
581 self.in_b = FPNumBase(width)
582 self.out_a = FPNumIn(None, width)
583 self.out_b = FPNumIn(None, width)
584 self.exp_eq = Signal(reset_less=True)
585
586 def elaborate(self, platform):
587 # This one however (single-cycle) will do the shift
588 # in one go.
589
590 m = Module()
591
592 m.submodules.align_in_a = self.in_a
593 m.submodules.align_in_b = self.in_b
594 m.submodules.align_out_a = self.out_a
595 m.submodules.align_out_b = self.out_b
596
597 # NOTE: this does *not* do single-cycle multi-shifting,
598 # it *STAYS* in the align state until exponents match
599
600 # exponent of a greater than b: shift b down
601 m.d.comb += self.exp_eq.eq(0)
602 m.d.comb += self.out_a.eq(self.in_a)
603 m.d.comb += self.out_b.eq(self.in_b)
604 agtb = Signal(reset_less=True)
605 altb = Signal(reset_less=True)
606 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
607 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
608 with m.If(agtb):
609 m.d.comb += self.out_b.shift_down(self.in_b)
610 # exponent of b greater than a: shift a down
611 with m.Elif(altb):
612 m.d.comb += self.out_a.shift_down(self.in_a)
613 # exponents equal: move to next stage.
614 with m.Else():
615 m.d.comb += self.exp_eq.eq(1)
616 return m
617
618
619 class FPAddAlignMulti(FPState):
620
621 def __init__(self, width, id_wid):
622 FPState.__init__(self, "align")
623 self.mod = FPAddAlignMultiMod(width)
624 self.out_a = FPNumIn(None, width)
625 self.out_b = FPNumIn(None, width)
626 self.exp_eq = Signal(reset_less=True)
627
628 def setup(self, m, in_a, in_b):
629 """ links module to inputs and outputs
630 """
631 m.submodules.align = self.mod
632 m.d.comb += self.mod.in_a.eq(in_a)
633 m.d.comb += self.mod.in_b.eq(in_b)
634 #m.d.comb += self.out_a.eq(self.mod.out_a)
635 #m.d.comb += self.out_b.eq(self.mod.out_b)
636 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
637 m.d.sync += self.out_a.eq(self.mod.out_a)
638 m.d.sync += self.out_b.eq(self.mod.out_b)
639
640 def action(self, m):
641 with m.If(self.exp_eq):
642 m.next = "add_0"
643
644
645 class FPNumIn2Ops:
646
647 def __init__(self, width, id_wid):
648 self.a = FPNumIn(None, width)
649 self.b = FPNumIn(None, width)
650 self.z = FPNumOut(width, False)
651 self.out_do_z = Signal(reset_less=True)
652 self.oz = Signal(width, reset_less=True)
653 self.mid = Signal(id_wid, reset_less=True)
654
655 def eq(self, i):
656 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
657 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
658
659
660 class FPAddAlignSingleMod:
661
662 def __init__(self, width, id_wid):
663 self.width = width
664 self.id_wid = id_wid
665 self.i = self.ispec()
666 self.o = self.ospec()
667
668 def ispec(self):
669 return FPSCData(self.width, self.id_wid)
670
671 def ospec(self):
672 return FPNumIn2Ops(self.width, self.id_wid)
673
674 def process(self, i):
675 return self.o
676
677 def setup(self, m, i):
678 """ links module to inputs and outputs
679 """
680 m.submodules.align = self
681 m.d.comb += self.i.eq(i)
682
683 def elaborate(self, platform):
684 """ Aligns A against B or B against A, depending on which has the
685 greater exponent. This is done in a *single* cycle using
686 variable-width bit-shift
687
688 the shifter used here is quite expensive in terms of gates.
689 Mux A or B in (and out) into temporaries, as only one of them
690 needs to be aligned against the other
691 """
692 m = Module()
693
694 m.submodules.align_in_a = self.i.a
695 m.submodules.align_in_b = self.i.b
696 m.submodules.align_out_a = self.o.a
697 m.submodules.align_out_b = self.o.b
698
699 # temporary (muxed) input and output to be shifted
700 t_inp = FPNumBase(self.width)
701 t_out = FPNumIn(None, self.width)
702 espec = (len(self.i.a.e), True)
703 msr = MultiShiftRMerge(self.i.a.m_width, espec)
704 m.submodules.align_t_in = t_inp
705 m.submodules.align_t_out = t_out
706 m.submodules.multishift_r = msr
707
708 ediff = Signal(espec, reset_less=True)
709 ediffr = Signal(espec, reset_less=True)
710 tdiff = Signal(espec, reset_less=True)
711 elz = Signal(reset_less=True)
712 egz = Signal(reset_less=True)
713
714 # connect multi-shifter to t_inp/out mantissa (and tdiff)
715 m.d.comb += msr.inp.eq(t_inp.m)
716 m.d.comb += msr.diff.eq(tdiff)
717 m.d.comb += t_out.m.eq(msr.m)
718 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
719 m.d.comb += t_out.s.eq(t_inp.s)
720
721 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
722 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
723 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
724 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
725
726 # default: A-exp == B-exp, A and B untouched (fall through)
727 m.d.comb += self.o.a.eq(self.i.a)
728 m.d.comb += self.o.b.eq(self.i.b)
729 # only one shifter (muxed)
730 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
731 # exponent of a greater than b: shift b down
732 with m.If(~self.i.out_do_z):
733 with m.If(egz):
734 m.d.comb += [t_inp.eq(self.i.b),
735 tdiff.eq(ediff),
736 self.o.b.eq(t_out),
737 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
738 ]
739 # exponent of b greater than a: shift a down
740 with m.Elif(elz):
741 m.d.comb += [t_inp.eq(self.i.a),
742 tdiff.eq(ediffr),
743 self.o.a.eq(t_out),
744 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
745 ]
746
747 m.d.comb += self.o.mid.eq(self.i.mid)
748 m.d.comb += self.o.z.eq(self.i.z)
749 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
750 m.d.comb += self.o.oz.eq(self.i.oz)
751
752 return m
753
754
755 class FPAddAlignSingle(FPState):
756
757 def __init__(self, width, id_wid):
758 FPState.__init__(self, "align")
759 self.mod = FPAddAlignSingleMod(width, id_wid)
760 self.out_a = FPNumIn(None, width)
761 self.out_b = FPNumIn(None, width)
762
763 def setup(self, m, i):
764 """ links module to inputs and outputs
765 """
766 self.mod.setup(m, i)
767
768 # NOTE: could be done as comb
769 m.d.sync += self.out_a.eq(self.mod.out_a)
770 m.d.sync += self.out_b.eq(self.mod.out_b)
771
772 def action(self, m):
773 m.next = "add_0"
774
775
776 class FPAddAlignSingleAdd(FPState):
777
778 def __init__(self, width, id_wid):
779 FPState.__init__(self, "align")
780 self.width = width
781 self.id_wid = id_wid
782 self.a1o = self.ospec()
783
784 def ispec(self):
785 return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
786
787 def ospec(self):
788 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
789
790 def setup(self, m, i):
791 """ links module to inputs and outputs
792 """
793
794 # chain AddAlignSingle, AddStage0 and AddStage1
795 mod = FPAddAlignSingleMod(self.width, self.id_wid)
796 a0mod = FPAddStage0Mod(self.width, self.id_wid)
797 a1mod = FPAddStage1Mod(self.width, self.id_wid)
798
799 chain = StageChain([mod, a0mod, a1mod])
800 chain.setup(m, i)
801
802 m.d.sync += self.a1o.eq(a1mod.o)
803
804 def process(self, i):
805 return self.a1o
806
807 def action(self, m):
808 m.next = "normalise_1"
809
810
811 class FPAddStage0Data:
812
813 def __init__(self, width, id_wid):
814 self.z = FPNumBase(width, False)
815 self.out_do_z = Signal(reset_less=True)
816 self.oz = Signal(width, reset_less=True)
817 self.tot = Signal(self.z.m_width + 4, reset_less=True)
818 self.mid = Signal(id_wid, reset_less=True)
819
820 def eq(self, i):
821 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
822 self.tot.eq(i.tot), self.mid.eq(i.mid)]
823
824
825 class FPAddStage0Mod:
826
827 def __init__(self, width, id_wid):
828 self.width = width
829 self.id_wid = id_wid
830 self.i = self.ispec()
831 self.o = self.ospec()
832
833 def ispec(self):
834 return FPSCData(self.width, self.id_wid)
835
836 def ospec(self):
837 return FPAddStage0Data(self.width, self.id_wid)
838
839 def process(self, i):
840 return self.o
841
842 def setup(self, m, i):
843 """ links module to inputs and outputs
844 """
845 m.submodules.add0 = self
846 m.d.comb += self.i.eq(i)
847
848 def elaborate(self, platform):
849 m = Module()
850 m.submodules.add0_in_a = self.i.a
851 m.submodules.add0_in_b = self.i.b
852 m.submodules.add0_out_z = self.o.z
853
854 # store intermediate tests (and zero-extended mantissas)
855 seq = Signal(reset_less=True)
856 mge = Signal(reset_less=True)
857 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
858 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
859 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
860 mge.eq(self.i.a.m >= self.i.b.m),
861 am0.eq(Cat(self.i.a.m, 0)),
862 bm0.eq(Cat(self.i.b.m, 0))
863 ]
864 # same-sign (both negative or both positive) add mantissas
865 with m.If(~self.i.out_do_z):
866 m.d.comb += self.o.z.e.eq(self.i.a.e)
867 with m.If(seq):
868 m.d.comb += [
869 self.o.tot.eq(am0 + bm0),
870 self.o.z.s.eq(self.i.a.s)
871 ]
872 # a mantissa greater than b, use a
873 with m.Elif(mge):
874 m.d.comb += [
875 self.o.tot.eq(am0 - bm0),
876 self.o.z.s.eq(self.i.a.s)
877 ]
878 # b mantissa greater than a, use b
879 with m.Else():
880 m.d.comb += [
881 self.o.tot.eq(bm0 - am0),
882 self.o.z.s.eq(self.i.b.s)
883 ]
884
885 m.d.comb += self.o.oz.eq(self.i.oz)
886 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
887 m.d.comb += self.o.mid.eq(self.i.mid)
888 return m
889
890
891 class FPAddStage0(FPState):
892 """ First stage of add. covers same-sign (add) and subtract
893 special-casing when mantissas are greater or equal, to
894 give greatest accuracy.
895 """
896
897 def __init__(self, width, id_wid):
898 FPState.__init__(self, "add_0")
899 self.mod = FPAddStage0Mod(width)
900 self.o = self.mod.ospec()
901
902 def setup(self, m, i):
903 """ links module to inputs and outputs
904 """
905 self.mod.setup(m, i)
906
907 # NOTE: these could be done as combinatorial (merge add0+add1)
908 m.d.sync += self.o.eq(self.mod.o)
909
910 def action(self, m):
911 m.next = "add_1"
912
913
914 class FPAddStage1Data:
915
916 def __init__(self, width, id_wid):
917 self.z = FPNumBase(width, False)
918 self.out_do_z = Signal(reset_less=True)
919 self.oz = Signal(width, reset_less=True)
920 self.of = Overflow()
921 self.mid = Signal(id_wid, reset_less=True)
922
923 def eq(self, i):
924 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
925 self.of.eq(i.of), self.mid.eq(i.mid)]
926
927
928
929 class FPAddStage1Mod(FPState):
930 """ Second stage of add: preparation for normalisation.
931 detects when tot sum is too big (tot[27] is kinda a carry bit)
932 """
933
934 def __init__(self, width, id_wid):
935 self.width = width
936 self.id_wid = id_wid
937 self.i = self.ispec()
938 self.o = self.ospec()
939
940 def ispec(self):
941 return FPAddStage0Data(self.width, self.id_wid)
942
943 def ospec(self):
944 return FPAddStage1Data(self.width, self.id_wid)
945
946 def process(self, i):
947 return self.o
948
949 def setup(self, m, i):
950 """ links module to inputs and outputs
951 """
952 m.submodules.add1 = self
953 m.submodules.add1_out_overflow = self.o.of
954
955 m.d.comb += self.i.eq(i)
956
957 def elaborate(self, platform):
958 m = Module()
959 #m.submodules.norm1_in_overflow = self.in_of
960 #m.submodules.norm1_out_overflow = self.out_of
961 #m.submodules.norm1_in_z = self.in_z
962 #m.submodules.norm1_out_z = self.out_z
963 m.d.comb += self.o.z.eq(self.i.z)
964 # tot[-1] (MSB) gets set when the sum overflows. shift result down
965 with m.If(~self.i.out_do_z):
966 with m.If(self.i.tot[-1]):
967 m.d.comb += [
968 self.o.z.m.eq(self.i.tot[4:]),
969 self.o.of.m0.eq(self.i.tot[4]),
970 self.o.of.guard.eq(self.i.tot[3]),
971 self.o.of.round_bit.eq(self.i.tot[2]),
972 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
973 self.o.z.e.eq(self.i.z.e + 1)
974 ]
975 # tot[-1] (MSB) zero case
976 with m.Else():
977 m.d.comb += [
978 self.o.z.m.eq(self.i.tot[3:]),
979 self.o.of.m0.eq(self.i.tot[3]),
980 self.o.of.guard.eq(self.i.tot[2]),
981 self.o.of.round_bit.eq(self.i.tot[1]),
982 self.o.of.sticky.eq(self.i.tot[0])
983 ]
984
985 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
986 m.d.comb += self.o.oz.eq(self.i.oz)
987 m.d.comb += self.o.mid.eq(self.i.mid)
988
989 return m
990
991
992 class FPAddStage1(FPState):
993
994 def __init__(self, width, id_wid):
995 FPState.__init__(self, "add_1")
996 self.mod = FPAddStage1Mod(width)
997 self.out_z = FPNumBase(width, False)
998 self.out_of = Overflow()
999 self.norm_stb = Signal()
1000
1001 def setup(self, m, i):
1002 """ links module to inputs and outputs
1003 """
1004 self.mod.setup(m, i)
1005
1006 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1007
1008 m.d.sync += self.out_of.eq(self.mod.out_of)
1009 m.d.sync += self.out_z.eq(self.mod.out_z)
1010 m.d.sync += self.norm_stb.eq(1)
1011
1012 def action(self, m):
1013 m.next = "normalise_1"
1014
1015
1016 class FPNormaliseModSingle:
1017
1018 def __init__(self, width):
1019 self.width = width
1020 self.in_z = self.ispec()
1021 self.out_z = self.ospec()
1022
1023 def ispec(self):
1024 return FPNumBase(self.width, False)
1025
1026 def ospec(self):
1027 return FPNumBase(self.width, False)
1028
1029 def setup(self, m, i):
1030 """ links module to inputs and outputs
1031 """
1032 m.submodules.normalise = self
1033 m.d.comb += self.i.eq(i)
1034
1035 def elaborate(self, platform):
1036 m = Module()
1037
1038 mwid = self.out_z.m_width+2
1039 pe = PriorityEncoder(mwid)
1040 m.submodules.norm_pe = pe
1041
1042 m.submodules.norm1_out_z = self.out_z
1043 m.submodules.norm1_in_z = self.in_z
1044
1045 in_z = FPNumBase(self.width, False)
1046 in_of = Overflow()
1047 m.submodules.norm1_insel_z = in_z
1048 m.submodules.norm1_insel_overflow = in_of
1049
1050 espec = (len(in_z.e), True)
1051 ediff_n126 = Signal(espec, reset_less=True)
1052 msr = MultiShiftRMerge(mwid, espec)
1053 m.submodules.multishift_r = msr
1054
1055 m.d.comb += in_z.eq(self.in_z)
1056 m.d.comb += in_of.eq(self.in_of)
1057 # initialise out from in (overridden below)
1058 m.d.comb += self.out_z.eq(in_z)
1059 m.d.comb += self.out_of.eq(in_of)
1060 # normalisation decrease condition
1061 decrease = Signal(reset_less=True)
1062 m.d.comb += decrease.eq(in_z.m_msbzero)
1063 # decrease exponent
1064 with m.If(decrease):
1065 # *sigh* not entirely obvious: count leading zeros (clz)
1066 # with a PriorityEncoder: to find from the MSB
1067 # we reverse the order of the bits.
1068 temp_m = Signal(mwid, reset_less=True)
1069 temp_s = Signal(mwid+1, reset_less=True)
1070 clz = Signal((len(in_z.e), True), reset_less=True)
1071 m.d.comb += [
1072 # cat round and guard bits back into the mantissa
1073 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1074 pe.i.eq(temp_m[::-1]), # inverted
1075 clz.eq(pe.o), # count zeros from MSB down
1076 temp_s.eq(temp_m << clz), # shift mantissa UP
1077 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1078 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1079 ]
1080
1081 return m
1082
1083 class FPNorm1Data:
1084
1085 def __init__(self, width, id_wid):
1086 self.roundz = Signal(reset_less=True)
1087 self.z = FPNumBase(width, False)
1088 self.out_do_z = Signal(reset_less=True)
1089 self.oz = Signal(width, reset_less=True)
1090 self.mid = Signal(id_wid, reset_less=True)
1091
1092 def eq(self, i):
1093 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1094 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1095
1096
1097 class FPNorm1ModSingle:
1098
1099 def __init__(self, width, id_wid):
1100 self.width = width
1101 self.id_wid = id_wid
1102 self.i = self.ispec()
1103 self.o = self.ospec()
1104
1105 def ispec(self):
1106 return FPAddStage1Data(self.width, self.id_wid)
1107
1108 def ospec(self):
1109 return FPNorm1Data(self.width, self.id_wid)
1110
1111 def setup(self, m, i):
1112 """ links module to inputs and outputs
1113 """
1114 m.submodules.normalise_1 = self
1115 m.d.comb += self.i.eq(i)
1116
1117 def process(self, i):
1118 return self.o
1119
1120 def elaborate(self, platform):
1121 m = Module()
1122
1123 mwid = self.o.z.m_width+2
1124 pe = PriorityEncoder(mwid)
1125 m.submodules.norm_pe = pe
1126
1127 of = Overflow()
1128 m.d.comb += self.o.roundz.eq(of.roundz)
1129
1130 m.submodules.norm1_out_z = self.o.z
1131 m.submodules.norm1_out_overflow = of
1132 m.submodules.norm1_in_z = self.i.z
1133 m.submodules.norm1_in_overflow = self.i.of
1134
1135 i = self.ispec()
1136 m.submodules.norm1_insel_z = i.z
1137 m.submodules.norm1_insel_overflow = i.of
1138
1139 espec = (len(i.z.e), True)
1140 ediff_n126 = Signal(espec, reset_less=True)
1141 msr = MultiShiftRMerge(mwid, espec)
1142 m.submodules.multishift_r = msr
1143
1144 m.d.comb += i.eq(self.i)
1145 # initialise out from in (overridden below)
1146 m.d.comb += self.o.z.eq(i.z)
1147 m.d.comb += of.eq(i.of)
1148 # normalisation increase/decrease conditions
1149 decrease = Signal(reset_less=True)
1150 increase = Signal(reset_less=True)
1151 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1152 m.d.comb += increase.eq(i.z.exp_lt_n126)
1153 # decrease exponent
1154 with m.If(~self.i.out_do_z):
1155 with m.If(decrease):
1156 # *sigh* not entirely obvious: count leading zeros (clz)
1157 # with a PriorityEncoder: to find from the MSB
1158 # we reverse the order of the bits.
1159 temp_m = Signal(mwid, reset_less=True)
1160 temp_s = Signal(mwid+1, reset_less=True)
1161 clz = Signal((len(i.z.e), True), reset_less=True)
1162 # make sure that the amount to decrease by does NOT
1163 # go below the minimum non-INF/NaN exponent
1164 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1165 i.z.exp_sub_n126)
1166 m.d.comb += [
1167 # cat round and guard bits back into the mantissa
1168 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1169 pe.i.eq(temp_m[::-1]), # inverted
1170 clz.eq(limclz), # count zeros from MSB down
1171 temp_s.eq(temp_m << clz), # shift mantissa UP
1172 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1173 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1174 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1175 # overflow in bits 0..1: got shifted too (leave sticky)
1176 of.guard.eq(temp_s[1]), # guard
1177 of.round_bit.eq(temp_s[0]), # round
1178 ]
1179 # increase exponent
1180 with m.Elif(increase):
1181 temp_m = Signal(mwid+1, reset_less=True)
1182 m.d.comb += [
1183 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1184 i.z.m)),
1185 ediff_n126.eq(i.z.N126 - i.z.e),
1186 # connect multi-shifter to inp/out mantissa (and ediff)
1187 msr.inp.eq(temp_m),
1188 msr.diff.eq(ediff_n126),
1189 self.o.z.m.eq(msr.m[3:]),
1190 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1191 # overflow in bits 0..1: got shifted too (leave sticky)
1192 of.guard.eq(temp_s[2]), # guard
1193 of.round_bit.eq(temp_s[1]), # round
1194 of.sticky.eq(temp_s[0]), # sticky
1195 self.o.z.e.eq(i.z.e + ediff_n126),
1196 ]
1197
1198 m.d.comb += self.o.mid.eq(self.i.mid)
1199 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1200 m.d.comb += self.o.oz.eq(self.i.oz)
1201
1202 return m
1203
1204
1205 class FPNorm1ModMulti:
1206
1207 def __init__(self, width, single_cycle=True):
1208 self.width = width
1209 self.in_select = Signal(reset_less=True)
1210 self.in_z = FPNumBase(width, False)
1211 self.in_of = Overflow()
1212 self.temp_z = FPNumBase(width, False)
1213 self.temp_of = Overflow()
1214 self.out_z = FPNumBase(width, False)
1215 self.out_of = Overflow()
1216
1217 def elaborate(self, platform):
1218 m = Module()
1219
1220 m.submodules.norm1_out_z = self.out_z
1221 m.submodules.norm1_out_overflow = self.out_of
1222 m.submodules.norm1_temp_z = self.temp_z
1223 m.submodules.norm1_temp_of = self.temp_of
1224 m.submodules.norm1_in_z = self.in_z
1225 m.submodules.norm1_in_overflow = self.in_of
1226
1227 in_z = FPNumBase(self.width, False)
1228 in_of = Overflow()
1229 m.submodules.norm1_insel_z = in_z
1230 m.submodules.norm1_insel_overflow = in_of
1231
1232 # select which of temp or in z/of to use
1233 with m.If(self.in_select):
1234 m.d.comb += in_z.eq(self.in_z)
1235 m.d.comb += in_of.eq(self.in_of)
1236 with m.Else():
1237 m.d.comb += in_z.eq(self.temp_z)
1238 m.d.comb += in_of.eq(self.temp_of)
1239 # initialise out from in (overridden below)
1240 m.d.comb += self.out_z.eq(in_z)
1241 m.d.comb += self.out_of.eq(in_of)
1242 # normalisation increase/decrease conditions
1243 decrease = Signal(reset_less=True)
1244 increase = Signal(reset_less=True)
1245 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1246 m.d.comb += increase.eq(in_z.exp_lt_n126)
1247 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1248 # decrease exponent
1249 with m.If(decrease):
1250 m.d.comb += [
1251 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1252 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1253 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1254 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1255 self.out_of.round_bit.eq(0), # reset round bit
1256 self.out_of.m0.eq(in_of.guard),
1257 ]
1258 # increase exponent
1259 with m.Elif(increase):
1260 m.d.comb += [
1261 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1262 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1263 self.out_of.guard.eq(in_z.m[0]),
1264 self.out_of.m0.eq(in_z.m[1]),
1265 self.out_of.round_bit.eq(in_of.guard),
1266 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1267 ]
1268
1269 return m
1270
1271
1272 class FPNorm1Single(FPState):
1273
1274 def __init__(self, width, id_wid, single_cycle=True):
1275 FPState.__init__(self, "normalise_1")
1276 self.mod = FPNorm1ModSingle(width)
1277 self.o = self.ospec()
1278 self.out_z = FPNumBase(width, False)
1279 self.out_roundz = Signal(reset_less=True)
1280
1281 def ispec(self):
1282 return self.mod.ispec()
1283
1284 def ospec(self):
1285 return self.mod.ospec()
1286
1287 def setup(self, m, i):
1288 """ links module to inputs and outputs
1289 """
1290 self.mod.setup(m, i)
1291
1292 def action(self, m):
1293 m.next = "round"
1294
1295
1296 class FPNorm1Multi(FPState):
1297
1298 def __init__(self, width, id_wid):
1299 FPState.__init__(self, "normalise_1")
1300 self.mod = FPNorm1ModMulti(width)
1301 self.stb = Signal(reset_less=True)
1302 self.ack = Signal(reset=0, reset_less=True)
1303 self.out_norm = Signal(reset_less=True)
1304 self.in_accept = Signal(reset_less=True)
1305 self.temp_z = FPNumBase(width)
1306 self.temp_of = Overflow()
1307 self.out_z = FPNumBase(width)
1308 self.out_roundz = Signal(reset_less=True)
1309
1310 def setup(self, m, in_z, in_of, norm_stb):
1311 """ links module to inputs and outputs
1312 """
1313 self.mod.setup(m, in_z, in_of, norm_stb,
1314 self.in_accept, self.temp_z, self.temp_of,
1315 self.out_z, self.out_norm)
1316
1317 m.d.comb += self.stb.eq(norm_stb)
1318 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1319
1320 def action(self, m):
1321 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1322 m.d.sync += self.temp_of.eq(self.mod.out_of)
1323 m.d.sync += self.temp_z.eq(self.out_z)
1324 with m.If(self.out_norm):
1325 with m.If(self.in_accept):
1326 m.d.sync += [
1327 self.ack.eq(1),
1328 ]
1329 with m.Else():
1330 m.d.sync += self.ack.eq(0)
1331 with m.Else():
1332 # normalisation not required (or done).
1333 m.next = "round"
1334 m.d.sync += self.ack.eq(1)
1335 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1336
1337
1338 class FPNormToPack(FPState):
1339
1340 def __init__(self, width, id_wid):
1341 FPState.__init__(self, "normalise_1")
1342 self.id_wid = id_wid
1343 self.width = width
1344
1345 def ispec(self):
1346 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1347
1348 def ospec(self):
1349 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1350
1351 def setup(self, m, i):
1352 """ links module to inputs and outputs
1353 """
1354
1355 # Normalisation, Rounding Corrections, Pack - in a chain
1356 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1357 rmod = FPRoundMod(self.width, self.id_wid)
1358 cmod = FPCorrectionsMod(self.width, self.id_wid)
1359 pmod = FPPackMod(self.width, self.id_wid)
1360 chain = StageChain([nmod, rmod, cmod, pmod])
1361 chain.setup(m, i)
1362 self.out_z = pmod.ospec()
1363
1364 m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1365 m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1366
1367 def process(self, i):
1368 return self.out_z
1369
1370 def action(self, m):
1371 m.next = "pack_put_z"
1372
1373
1374 class FPRoundData:
1375
1376 def __init__(self, width, id_wid):
1377 self.z = FPNumBase(width, False)
1378 self.out_do_z = Signal(reset_less=True)
1379 self.oz = Signal(width, reset_less=True)
1380 self.mid = Signal(id_wid, reset_less=True)
1381
1382 def eq(self, i):
1383 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1384 self.mid.eq(i.mid)]
1385
1386
1387 class FPRoundMod:
1388
1389 def __init__(self, width, id_wid):
1390 self.width = width
1391 self.id_wid = id_wid
1392 self.i = self.ispec()
1393 self.out_z = self.ospec()
1394
1395 def ispec(self):
1396 return FPNorm1Data(self.width, self.id_wid)
1397
1398 def ospec(self):
1399 return FPRoundData(self.width, self.id_wid)
1400
1401 def process(self, i):
1402 return self.out_z
1403
1404 def setup(self, m, i):
1405 m.submodules.roundz = self
1406 m.d.comb += self.i.eq(i)
1407
1408 def elaborate(self, platform):
1409 m = Module()
1410 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1411 with m.If(~self.i.out_do_z):
1412 with m.If(self.i.roundz):
1413 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1414 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1415 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1416
1417 return m
1418
1419
1420 class FPRound(FPState):
1421
1422 def __init__(self, width, id_wid):
1423 FPState.__init__(self, "round")
1424 self.mod = FPRoundMod(width)
1425 self.out_z = self.ospec()
1426
1427 def ispec(self):
1428 return self.mod.ispec()
1429
1430 def ospec(self):
1431 return self.mod.ospec()
1432
1433 def setup(self, m, i):
1434 """ links module to inputs and outputs
1435 """
1436 self.mod.setup(m, i)
1437
1438 self.idsync(m)
1439 m.d.sync += self.out_z.eq(self.mod.out_z)
1440 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1441
1442 def action(self, m):
1443 m.next = "corrections"
1444
1445
1446 class FPCorrectionsMod:
1447
1448 def __init__(self, width, id_wid):
1449 self.width = width
1450 self.id_wid = id_wid
1451 self.i = self.ispec()
1452 self.out_z = self.ospec()
1453
1454 def ispec(self):
1455 return FPRoundData(self.width, self.id_wid)
1456
1457 def ospec(self):
1458 return FPRoundData(self.width, self.id_wid)
1459
1460 def process(self, i):
1461 return self.out_z
1462
1463 def setup(self, m, i):
1464 """ links module to inputs and outputs
1465 """
1466 m.submodules.corrections = self
1467 m.d.comb += self.i.eq(i)
1468
1469 def elaborate(self, platform):
1470 m = Module()
1471 m.submodules.corr_in_z = self.i.z
1472 m.submodules.corr_out_z = self.out_z.z
1473 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1474 with m.If(~self.i.out_do_z):
1475 with m.If(self.i.z.is_denormalised):
1476 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1477 return m
1478
1479
1480 class FPCorrections(FPState):
1481
1482 def __init__(self, width, id_wid):
1483 FPState.__init__(self, "corrections")
1484 self.mod = FPCorrectionsMod(width)
1485 self.out_z = self.ospec()
1486
1487 def ispec(self):
1488 return self.mod.ispec()
1489
1490 def ospec(self):
1491 return self.mod.ospec()
1492
1493 def setup(self, m, in_z):
1494 """ links module to inputs and outputs
1495 """
1496 self.mod.setup(m, in_z)
1497
1498 m.d.sync += self.out_z.eq(self.mod.out_z)
1499 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1500
1501 def action(self, m):
1502 m.next = "pack"
1503
1504
1505 class FPPackData:
1506
1507 def __init__(self, width, id_wid):
1508 self.z = Signal(width, reset_less=True)
1509 self.mid = Signal(id_wid, reset_less=True)
1510
1511 def eq(self, i):
1512 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1513
1514
1515 class FPPackMod:
1516
1517 def __init__(self, width, id_wid):
1518 self.width = width
1519 self.id_wid = id_wid
1520 self.i = self.ispec()
1521 self.o = self.ospec()
1522
1523 def ispec(self):
1524 return FPRoundData(self.width, self.id_wid)
1525
1526 def ospec(self):
1527 return FPPackData(self.width, self.id_wid)
1528
1529 def process(self, i):
1530 return self.o
1531
1532 def setup(self, m, in_z):
1533 """ links module to inputs and outputs
1534 """
1535 m.submodules.pack = self
1536 m.d.comb += self.i.eq(in_z)
1537
1538 def elaborate(self, platform):
1539 m = Module()
1540 z = FPNumOut(self.width, False)
1541 m.submodules.pack_in_z = self.i.z
1542 m.submodules.pack_out_z = z
1543 m.d.comb += self.o.mid.eq(self.i.mid)
1544 with m.If(~self.i.out_do_z):
1545 with m.If(self.i.z.is_overflowed):
1546 m.d.comb += z.inf(self.i.z.s)
1547 with m.Else():
1548 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1549 with m.Else():
1550 m.d.comb += z.v.eq(self.i.oz)
1551 m.d.comb += self.o.z.eq(z.v)
1552 return m
1553
1554
1555 class FPPack(FPState):
1556
1557 def __init__(self, width, id_wid):
1558 FPState.__init__(self, "pack")
1559 self.mod = FPPackMod(width)
1560 self.out_z = self.ospec()
1561
1562 def ispec(self):
1563 return self.mod.ispec()
1564
1565 def ospec(self):
1566 return self.mod.ospec()
1567
1568 def setup(self, m, in_z):
1569 """ links module to inputs and outputs
1570 """
1571 self.mod.setup(m, in_z)
1572
1573 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1574 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1575
1576 def action(self, m):
1577 m.next = "pack_put_z"
1578
1579
1580 class FPPutZ(FPState):
1581
1582 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1583 FPState.__init__(self, state)
1584 if to_state is None:
1585 to_state = "get_ops"
1586 self.to_state = to_state
1587 self.in_z = in_z
1588 self.out_z = out_z
1589 self.in_mid = in_mid
1590 self.out_mid = out_mid
1591
1592 def action(self, m):
1593 if self.in_mid is not None:
1594 m.d.sync += self.out_mid.eq(self.in_mid)
1595 m.d.sync += [
1596 self.out_z.z.v.eq(self.in_z)
1597 ]
1598 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1599 m.d.sync += self.out_z.z.stb.eq(0)
1600 m.next = self.to_state
1601 with m.Else():
1602 m.d.sync += self.out_z.z.stb.eq(1)
1603
1604
1605 class FPPutZIdx(FPState):
1606
1607 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1608 FPState.__init__(self, state)
1609 if to_state is None:
1610 to_state = "get_ops"
1611 self.to_state = to_state
1612 self.in_z = in_z
1613 self.out_zs = out_zs
1614 self.in_mid = in_mid
1615
1616 def action(self, m):
1617 outz_stb = Signal(reset_less=True)
1618 outz_ack = Signal(reset_less=True)
1619 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1620 outz_ack.eq(self.out_zs[self.in_mid].ack),
1621 ]
1622 m.d.sync += [
1623 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1624 ]
1625 with m.If(outz_stb & outz_ack):
1626 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1627 m.next = self.to_state
1628 with m.Else():
1629 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1630
1631 class FPOpData:
1632 def __init__(self, width, id_wid):
1633 self.z = FPOp(width)
1634 self.mid = Signal(id_wid, reset_less=True)
1635
1636 def eq(self, i):
1637 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1638
1639 def ports(self):
1640 return [self.z, self.mid]
1641
1642
1643 class FPADDBaseMod:
1644
1645 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1646 """ IEEE754 FP Add
1647
1648 * width: bit-width of IEEE754. supported: 16, 32, 64
1649 * id_wid: an identifier that is sync-connected to the input
1650 * single_cycle: True indicates each stage to complete in 1 clock
1651 * compact: True indicates a reduced number of stages
1652 """
1653 self.width = width
1654 self.id_wid = id_wid
1655 self.single_cycle = single_cycle
1656 self.compact = compact
1657
1658 self.in_t = Trigger()
1659 self.i = self.ispec()
1660 self.o = self.ospec()
1661
1662 self.states = []
1663
1664 def ispec(self):
1665 return FPADDBaseData(self.width, self.id_wid)
1666
1667 def ospec(self):
1668 return FPOpData(self.width, self.id_wid)
1669
1670 def add_state(self, state):
1671 self.states.append(state)
1672 return state
1673
1674 def get_fragment(self, platform=None):
1675 """ creates the HDL code-fragment for FPAdd
1676 """
1677 m = Module()
1678 m.submodules.out_z = self.o.z
1679 m.submodules.in_t = self.in_t
1680 if self.compact:
1681 self.get_compact_fragment(m, platform)
1682 else:
1683 self.get_longer_fragment(m, platform)
1684
1685 with m.FSM() as fsm:
1686
1687 for state in self.states:
1688 with m.State(state.state_from):
1689 state.action(m)
1690
1691 return m
1692
1693 def get_longer_fragment(self, m, platform=None):
1694
1695 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1696 self.width))
1697 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1698 a = get.out_op1
1699 b = get.out_op2
1700
1701 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1702 sc.setup(m, a, b, self.in_mid)
1703
1704 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1705 dn.setup(m, a, b, sc.in_mid)
1706
1707 if self.single_cycle:
1708 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1709 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1710 else:
1711 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1712 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1713
1714 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1715 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1716
1717 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1718 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1719
1720 if self.single_cycle:
1721 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1722 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1723 else:
1724 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1725 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1726
1727 rn = self.add_state(FPRound(self.width, self.id_wid))
1728 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1729
1730 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1731 cor.setup(m, rn.out_z, rn.in_mid)
1732
1733 pa = self.add_state(FPPack(self.width, self.id_wid))
1734 pa.setup(m, cor.out_z, rn.in_mid)
1735
1736 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1737 pa.in_mid, self.out_mid))
1738
1739 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1740 pa.in_mid, self.out_mid))
1741
1742 def get_compact_fragment(self, m, platform=None):
1743
1744 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1745 self.width, self.id_wid))
1746 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1747
1748 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1749 sc.setup(m, get.o)
1750
1751 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1752 alm.setup(m, sc.o)
1753
1754 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1755 n1.setup(m, alm.a1o)
1756
1757 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1758 n1.out_z.mid, self.o.mid))
1759
1760 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1761 # sc.o.mid, self.o.mid))
1762
1763
1764 class FPADDBase(FPState):
1765
1766 def __init__(self, width, id_wid=None, single_cycle=False):
1767 """ IEEE754 FP Add
1768
1769 * width: bit-width of IEEE754. supported: 16, 32, 64
1770 * id_wid: an identifier that is sync-connected to the input
1771 * single_cycle: True indicates each stage to complete in 1 clock
1772 """
1773 FPState.__init__(self, "fpadd")
1774 self.width = width
1775 self.single_cycle = single_cycle
1776 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1777 self.o = self.ospec()
1778
1779 self.in_t = Trigger()
1780 self.i = self.ispec()
1781
1782 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1783 self.in_accept = Signal(reset_less=True)
1784 self.add_stb = Signal(reset_less=True)
1785 self.add_ack = Signal(reset=0, reset_less=True)
1786
1787 def ispec(self):
1788 return self.mod.ispec()
1789
1790 def ospec(self):
1791 return self.mod.ospec()
1792
1793 def setup(self, m, i, add_stb, in_mid):
1794 m.d.comb += [self.i.eq(i),
1795 self.mod.i.eq(self.i),
1796 self.z_done.eq(self.mod.o.z.trigger),
1797 #self.add_stb.eq(add_stb),
1798 self.mod.in_t.stb.eq(self.in_t.stb),
1799 self.in_t.ack.eq(self.mod.in_t.ack),
1800 self.o.mid.eq(self.mod.o.mid),
1801 self.o.z.v.eq(self.mod.o.z.v),
1802 self.o.z.stb.eq(self.mod.o.z.stb),
1803 self.mod.o.z.ack.eq(self.o.z.ack),
1804 ]
1805
1806 m.d.sync += self.add_stb.eq(add_stb)
1807 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1808 m.d.sync += self.o.z.ack.eq(0) # likewise
1809 #m.d.sync += self.in_t.stb.eq(0)
1810
1811 m.submodules.fpadd = self.mod
1812
1813 def action(self, m):
1814
1815 # in_accept is set on incoming strobe HIGH and ack LOW.
1816 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1817
1818 #with m.If(self.in_t.ack):
1819 # m.d.sync += self.in_t.stb.eq(0)
1820 with m.If(~self.z_done):
1821 # not done: test for accepting an incoming operand pair
1822 with m.If(self.in_accept):
1823 m.d.sync += [
1824 self.add_ack.eq(1), # acknowledge receipt...
1825 self.in_t.stb.eq(1), # initiate add
1826 ]
1827 with m.Else():
1828 m.d.sync += [self.add_ack.eq(0),
1829 self.in_t.stb.eq(0),
1830 self.o.z.ack.eq(1),
1831 ]
1832 with m.Else():
1833 # done: acknowledge, and write out id and value
1834 m.d.sync += [self.add_ack.eq(1),
1835 self.in_t.stb.eq(0)
1836 ]
1837 m.next = "put_z"
1838
1839 return
1840
1841 if self.in_mid is not None:
1842 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1843
1844 m.d.sync += [
1845 self.out_z.v.eq(self.mod.out_z.v)
1846 ]
1847 # move to output state on detecting z ack
1848 with m.If(self.out_z.trigger):
1849 m.d.sync += self.out_z.stb.eq(0)
1850 m.next = "put_z"
1851 with m.Else():
1852 m.d.sync += self.out_z.stb.eq(1)
1853
1854
1855 class FPADDStageOut:
1856 def __init__(self, width, id_wid):
1857 self.z = Signal(width)
1858 self.mid = Signal(id_wid, reset_less=True)
1859
1860 def eq(self, i):
1861 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1862
1863 def ports(self):
1864 return [self.z, self.mid]
1865
1866
1867 # matches the format of FPADDStageOut, allows eq function to do assignments
1868 class PlaceHolder: pass
1869
1870
1871 class FPAddBaseStage:
1872 def __init__(self, width, id_wid):
1873 self.width = width
1874 self.id_wid = id_wid
1875
1876 def ispec(self):
1877 return FPADDBaseData(self.width, self.id_wid)
1878
1879 def ospec(self):
1880 return FPADDStageOut(self.width, self.id_wid)
1881
1882 def process(self, i):
1883 o = PlaceHolder()
1884 o.z = i.a + i.b
1885 o.mid = i.mid
1886 return o
1887
1888
1889 class FPADDBasePipe1(UnbufferedPipeline):
1890 def __init__(self, width, id_wid):
1891 stage = FPAddBaseStage(width, id_wid)
1892 UnbufferedPipeline.__init__(self, stage)
1893
1894
1895 class FPADDBasePipe(ControlBase):
1896 def __init__(self, width, id_wid):
1897 ControlBase.__init__(self)
1898 self.pipe1 = FPADDBasePipe1(width, id_wid)
1899 self._eqs = self.connect([self.pipe1])
1900
1901 def elaborate(self, platform):
1902 m = Module()
1903 m.submodules.pipe1 = self.pipe1
1904 m.d.comb += self._eqs
1905 return m
1906
1907
1908 class PriorityCombPipeline(CombMultiInPipeline):
1909 def __init__(self, stage, p_len):
1910 p_mux = InputPriorityArbiter(self, p_len)
1911 CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1912
1913 def ports(self):
1914 return self.p_mux.ports()
1915
1916
1917 class FPAddInPassThruStage:
1918 def __init__(self, width, id_wid):
1919 self.width, self.id_wid = width, id_wid
1920 def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1921 def ospec(self): return self.ispec()
1922 def process(self, i): return i
1923
1924
1925 class FPADDInMuxPipe(PriorityCombPipeline):
1926 def __init__(self, width, id_width, num_rows):
1927 self.num_rows = num_rows
1928 stage = FPAddInPassThruStage(width, id_width)
1929 PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1930 #self.p.i_data = stage.ispec()
1931 #self.n.o_data = stage.ospec()
1932
1933 def ports(self):
1934 res = []
1935 for i in range(len(self.p)):
1936 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1937 self.p[i].i_data.ports()
1938 res += [self.n.i_ready, self.n.o_valid] + \
1939 self.n.o_data.ports()
1940 return res
1941
1942
1943 class MuxCombPipeline(CombMultiOutPipeline):
1944 def __init__(self, stage, n_len):
1945 # HACK: stage is also the n-way multiplexer
1946 CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1947
1948 # HACK: n-mux is also the stage... so set the muxid equal to input mid
1949 stage.m_id = self.p.i_data.mid
1950
1951 def ports(self):
1952 return self.p_mux.ports()
1953
1954
1955 class FPAddOutPassThruStage:
1956 def __init__(self, width, id_wid):
1957 self.width, self.id_wid = width, id_wid
1958 def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1959 def ospec(self): return self.ispec()
1960 def process(self, i): return i
1961
1962
1963 class FPADDMuxOutPipe(MuxCombPipeline):
1964 def __init__(self, width, id_wid, num_rows):
1965 self.num_rows = num_rows
1966 stage = FPAddOutPassThruStage(width, id_wid)
1967 MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1968 #self.p.i_data = stage.ispec()
1969 #self.n.o_data = stage.ospec()
1970
1971 def ports(self):
1972 res = [self.p.i_valid, self.p.o_ready] + \
1973 self.p.i_data.ports()
1974 for i in range(len(self.n)):
1975 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1976 self.n[i].o_data.ports()
1977 return res
1978
1979
1980 class FPADDMuxInOut:
1981 """ Reservation-Station version of FPADD pipeline.
1982
1983 fan-in on
1984 """
1985 def __init__(self, width, id_wid, num_rows):
1986 self.num_rows = num_rows
1987 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1988 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1989 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1990
1991 self.p = self.inpipe.p # kinda annoying,
1992 self.n = self.outpipe.n # use pipe in/out as this class in/out
1993 self._ports = self.inpipe.ports() + self.outpipe.ports()
1994
1995 def elaborate(self, platform):
1996 m = Module()
1997 m.submodules.inpipe = self.inpipe
1998 m.submodules.fpadd = self.fpadd
1999 m.submodules.outpipe = self.outpipe
2000
2001 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
2002 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
2003
2004 return m
2005
2006 def ports(self):
2007 return self._ports
2008
2009
2010 class ResArray:
2011 def __init__(self, width, id_wid):
2012 self.width = width
2013 self.id_wid = id_wid
2014 res = []
2015 for i in range(rs_sz):
2016 out_z = FPOp(width)
2017 out_z.name = "out_z_%d" % i
2018 res.append(out_z)
2019 self.res = Array(res)
2020 self.in_z = FPOp(width)
2021 self.in_mid = Signal(self.id_wid, reset_less=True)
2022
2023 def setup(self, m, in_z, in_mid):
2024 m.d.comb += [self.in_z.eq(in_z),
2025 self.in_mid.eq(in_mid)]
2026
2027 def get_fragment(self, platform=None):
2028 """ creates the HDL code-fragment for FPAdd
2029 """
2030 m = Module()
2031 m.submodules.res_in_z = self.in_z
2032 m.submodules += self.res
2033
2034 return m
2035
2036 def ports(self):
2037 res = []
2038 for z in self.res:
2039 res += z.ports()
2040 return res
2041
2042
2043 class FPADD(FPID):
2044 """ FPADD: stages as follows:
2045
2046 FPGetOp (a)
2047 |
2048 FPGetOp (b)
2049 |
2050 FPAddBase---> FPAddBaseMod
2051 | |
2052 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2053
2054 FPAddBase is tricky: it is both a stage and *has* stages.
2055 Connection to FPAddBaseMod therefore requires an in stb/ack
2056 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2057 needs to be the thing that raises the incoming stb.
2058 """
2059
2060 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2061 """ IEEE754 FP Add
2062
2063 * width: bit-width of IEEE754. supported: 16, 32, 64
2064 * id_wid: an identifier that is sync-connected to the input
2065 * single_cycle: True indicates each stage to complete in 1 clock
2066 """
2067 self.width = width
2068 self.id_wid = id_wid
2069 self.single_cycle = single_cycle
2070
2071 #self.out_z = FPOp(width)
2072 self.ids = FPID(id_wid)
2073
2074 rs = []
2075 for i in range(rs_sz):
2076 in_a = FPOp(width)
2077 in_b = FPOp(width)
2078 in_a.name = "in_a_%d" % i
2079 in_b.name = "in_b_%d" % i
2080 rs.append((in_a, in_b))
2081 self.rs = Array(rs)
2082
2083 res = []
2084 for i in range(rs_sz):
2085 out_z = FPOp(width)
2086 out_z.name = "out_z_%d" % i
2087 res.append(out_z)
2088 self.res = Array(res)
2089
2090 self.states = []
2091
2092 def add_state(self, state):
2093 self.states.append(state)
2094 return state
2095
2096 def get_fragment(self, platform=None):
2097 """ creates the HDL code-fragment for FPAdd
2098 """
2099 m = Module()
2100 m.submodules += self.rs
2101
2102 in_a = self.rs[0][0]
2103 in_b = self.rs[0][1]
2104
2105 geta = self.add_state(FPGetOp("get_a", "get_b",
2106 in_a, self.width))
2107 geta.setup(m, in_a)
2108 a = geta.out_op
2109
2110 getb = self.add_state(FPGetOp("get_b", "fpadd",
2111 in_b, self.width))
2112 getb.setup(m, in_b)
2113 b = getb.out_op
2114
2115 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2116 ab = self.add_state(ab)
2117 abd = ab.ispec() # create an input spec object for FPADDBase
2118 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2119 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2120 o = ab.o
2121
2122 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2123 o.mid, "get_a"))
2124
2125 with m.FSM() as fsm:
2126
2127 for state in self.states:
2128 with m.State(state.state_from):
2129 state.action(m)
2130
2131 return m
2132
2133
2134 if __name__ == "__main__":
2135 if True:
2136 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2137 main(alu, ports=alu.rs[0][0].ports() + \
2138 alu.rs[0][1].ports() + \
2139 alu.res[0].ports() + \
2140 [alu.ids.in_mid, alu.ids.out_mid])
2141 else:
2142 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2143 main(alu, ports=[alu.in_a, alu.in_b] + \
2144 alu.in_t.ports() + \
2145 alu.out_z.ports() + \
2146 [alu.in_mid, alu.out_mid])
2147
2148
2149 # works... but don't use, just do "python fname.py convert -t v"
2150 #print (verilog.convert(alu, ports=[
2151 # ports=alu.in_a.ports() + \
2152 # alu.in_b.ports() + \
2153 # alu.out_z.ports())