complicated. change ControlBase.connect API to return list of eq statements
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMultiOutPipeline
14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPGet2OpMod(Trigger):
191 def __init__(self, width, id_wid):
192 Trigger.__init__(self)
193 self.width = width
194 self.id_wid = id_wid
195 self.i = self.ispec()
196 self.o = self.ospec()
197
198 def ispec(self):
199 return FPADDBaseData(self.width, self.id_wid)
200
201 def ospec(self):
202 return FPNumBase2Ops(self.width, self.id_wid)
203
204 def elaborate(self, platform):
205 m = Trigger.elaborate(self, platform)
206 m.submodules.get_op1_out = self.o.a
207 m.submodules.get_op2_out = self.o.b
208 out_op1 = FPNumIn(None, self.width)
209 out_op2 = FPNumIn(None, self.width)
210 with m.If(self.trigger):
211 m.d.comb += [
212 out_op1.decode(self.i.a),
213 out_op2.decode(self.i.b),
214 self.o.a.eq(out_op1),
215 self.o.b.eq(out_op2),
216 self.o.mid.eq(self.i.mid)
217 ]
218 return m
219
220
221 class FPGet2Op(FPState):
222 """ gets operands
223 """
224
225 def __init__(self, in_state, out_state, width, id_wid):
226 FPState.__init__(self, in_state)
227 self.out_state = out_state
228 self.mod = FPGet2OpMod(width, id_wid)
229 self.o = self.mod.ospec()
230 self.in_stb = Signal(reset_less=True)
231 self.out_ack = Signal(reset_less=True)
232 self.out_decode = Signal(reset_less=True)
233
234 def setup(self, m, i, in_stb, in_ack):
235 """ links module to inputs and outputs
236 """
237 m.submodules.get_ops = self.mod
238 m.d.comb += self.mod.i.eq(i)
239 m.d.comb += self.mod.stb.eq(in_stb)
240 m.d.comb += self.out_ack.eq(self.mod.ack)
241 m.d.comb += self.out_decode.eq(self.mod.trigger)
242 m.d.comb += in_ack.eq(self.mod.ack)
243
244 def action(self, m):
245 with m.If(self.out_decode):
246 m.next = self.out_state
247 m.d.sync += [
248 self.mod.ack.eq(0),
249 self.o.eq(self.mod.o),
250 ]
251 with m.Else():
252 m.d.sync += self.mod.ack.eq(1)
253
254
255 class FPNumBase2Ops:
256
257 def __init__(self, width, id_wid, m_extra=True):
258 self.a = FPNumBase(width, m_extra)
259 self.b = FPNumBase(width, m_extra)
260 self.mid = Signal(id_wid, reset_less=True)
261
262 def eq(self, i):
263 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
264
265
266 class FPSCData:
267
268 def __init__(self, width, id_wid):
269 self.a = FPNumBase(width, True)
270 self.b = FPNumBase(width, True)
271 self.z = FPNumOut(width, False)
272 self.oz = Signal(width, reset_less=True)
273 self.out_do_z = Signal(reset_less=True)
274 self.mid = Signal(id_wid, reset_less=True)
275
276 def eq(self, i):
277 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
278 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
279
280
281 class FPAddSpecialCasesMod:
282 """ special cases: NaNs, infs, zeros, denormalised
283 NOTE: some of these are unique to add. see "Special Operations"
284 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
285 """
286
287 def __init__(self, width, id_wid):
288 self.width = width
289 self.id_wid = id_wid
290 self.i = self.ispec()
291 self.o = self.ospec()
292
293 def ispec(self):
294 return FPNumBase2Ops(self.width, self.id_wid)
295
296 def ospec(self):
297 return FPSCData(self.width, self.id_wid)
298
299 def setup(self, m, i):
300 """ links module to inputs and outputs
301 """
302 m.submodules.specialcases = self
303 m.d.comb += self.i.eq(i)
304
305 def process(self, i):
306 return self.o
307
308 def elaborate(self, platform):
309 m = Module()
310
311 m.submodules.sc_in_a = self.i.a
312 m.submodules.sc_in_b = self.i.b
313 m.submodules.sc_out_z = self.o.z
314
315 s_nomatch = Signal()
316 m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
317
318 m_match = Signal()
319 m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
320
321 # if a is NaN or b is NaN return NaN
322 with m.If(self.i.a.is_nan | self.i.b.is_nan):
323 m.d.comb += self.o.out_do_z.eq(1)
324 m.d.comb += self.o.z.nan(0)
325
326 # XXX WEIRDNESS for FP16 non-canonical NaN handling
327 # under review
328
329 ## if a is zero and b is NaN return -b
330 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
331 # m.d.comb += self.o.out_do_z.eq(1)
332 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
333
334 ## if b is zero and a is NaN return -a
335 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
336 # m.d.comb += self.o.out_do_z.eq(1)
337 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
338
339 ## if a is -zero and b is NaN return -b
340 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
341 # m.d.comb += self.o.out_do_z.eq(1)
342 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
343
344 ## if b is -zero and a is NaN return -a
345 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
346 # m.d.comb += self.o.out_do_z.eq(1)
347 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
348
349 # if a is inf return inf (or NaN)
350 with m.Elif(self.i.a.is_inf):
351 m.d.comb += self.o.out_do_z.eq(1)
352 m.d.comb += self.o.z.inf(self.i.a.s)
353 # if a is inf and signs don't match return NaN
354 with m.If(self.i.b.exp_128 & s_nomatch):
355 m.d.comb += self.o.z.nan(0)
356
357 # if b is inf return inf
358 with m.Elif(self.i.b.is_inf):
359 m.d.comb += self.o.out_do_z.eq(1)
360 m.d.comb += self.o.z.inf(self.i.b.s)
361
362 # if a is zero and b zero return signed-a/b
363 with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
364 m.d.comb += self.o.out_do_z.eq(1)
365 m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
366 self.i.b.e,
367 self.i.b.m[3:-1])
368
369 # if a is zero return b
370 with m.Elif(self.i.a.is_zero):
371 m.d.comb += self.o.out_do_z.eq(1)
372 m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
373 self.i.b.m[3:-1])
374
375 # if b is zero return a
376 with m.Elif(self.i.b.is_zero):
377 m.d.comb += self.o.out_do_z.eq(1)
378 m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
379 self.i.a.m[3:-1])
380
381 # if a equal to -b return zero (+ve zero)
382 with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
383 m.d.comb += self.o.out_do_z.eq(1)
384 m.d.comb += self.o.z.zero(0)
385
386 # Denormalised Number checks next, so pass a/b data through
387 with m.Else():
388 m.d.comb += self.o.out_do_z.eq(0)
389 m.d.comb += self.o.a.eq(self.i.a)
390 m.d.comb += self.o.b.eq(self.i.b)
391
392 m.d.comb += self.o.oz.eq(self.o.z.v)
393 m.d.comb += self.o.mid.eq(self.i.mid)
394
395 return m
396
397
398 class FPID:
399 def __init__(self, id_wid):
400 self.id_wid = id_wid
401 if self.id_wid:
402 self.in_mid = Signal(id_wid, reset_less=True)
403 self.out_mid = Signal(id_wid, reset_less=True)
404 else:
405 self.in_mid = None
406 self.out_mid = None
407
408 def idsync(self, m):
409 if self.id_wid is not None:
410 m.d.sync += self.out_mid.eq(self.in_mid)
411
412
413 class FPAddSpecialCases(FPState):
414 """ special cases: NaNs, infs, zeros, denormalised
415 NOTE: some of these are unique to add. see "Special Operations"
416 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
417 """
418
419 def __init__(self, width, id_wid):
420 FPState.__init__(self, "special_cases")
421 self.mod = FPAddSpecialCasesMod(width)
422 self.out_z = self.mod.ospec()
423 self.out_do_z = Signal(reset_less=True)
424
425 def setup(self, m, i):
426 """ links module to inputs and outputs
427 """
428 self.mod.setup(m, i, self.out_do_z)
429 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
430 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
431
432 def action(self, m):
433 self.idsync(m)
434 with m.If(self.out_do_z):
435 m.next = "put_z"
436 with m.Else():
437 m.next = "denormalise"
438
439
440 class FPAddSpecialCasesDeNorm(FPState):
441 """ special cases: NaNs, infs, zeros, denormalised
442 NOTE: some of these are unique to add. see "Special Operations"
443 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
444 """
445
446 def __init__(self, width, id_wid):
447 FPState.__init__(self, "special_cases")
448 self.smod = FPAddSpecialCasesMod(width, id_wid)
449 self.dmod = FPAddDeNormMod(width, id_wid)
450 self.o = self.ospec()
451
452 def ispec(self):
453 return self.smod.ispec()
454
455 def ospec(self):
456 return self.dmod.ospec()
457
458 def setup(self, m, i):
459 """ links module to inputs and outputs
460 """
461 # these only needed for break-out (early-out)
462 # out_z = self.smod.ospec()
463 # out_do_z = Signal(reset_less=True)
464 self.smod.setup(m, i)
465 self.dmod.setup(m, self.smod.o)
466 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
467
468 # out_do_z=True, only needed for early-out (split pipeline)
469 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
470 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
471
472 # out_do_z=False
473 m.d.sync += self.o.eq(self.dmod.o)
474
475 def process(self, i):
476 return self.o
477
478 def action(self, m):
479 #with m.If(self.out_do_z):
480 # m.next = "put_z"
481 #with m.Else():
482 m.next = "align"
483
484
485 class FPAddDeNormMod(FPState):
486
487 def __init__(self, width, id_wid):
488 self.width = width
489 self.id_wid = id_wid
490 self.i = self.ispec()
491 self.o = self.ospec()
492
493 def ispec(self):
494 return FPSCData(self.width, self.id_wid)
495
496 def ospec(self):
497 return FPSCData(self.width, self.id_wid)
498
499 def setup(self, m, i):
500 """ links module to inputs and outputs
501 """
502 m.submodules.denormalise = self
503 m.d.comb += self.i.eq(i)
504
505 def elaborate(self, platform):
506 m = Module()
507 m.submodules.denorm_in_a = self.i.a
508 m.submodules.denorm_in_b = self.i.b
509 m.submodules.denorm_out_a = self.o.a
510 m.submodules.denorm_out_b = self.o.b
511
512 with m.If(~self.i.out_do_z):
513 # XXX hmmm, don't like repeating identical code
514 m.d.comb += self.o.a.eq(self.i.a)
515 with m.If(self.i.a.exp_n127):
516 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
517 with m.Else():
518 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
519
520 m.d.comb += self.o.b.eq(self.i.b)
521 with m.If(self.i.b.exp_n127):
522 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
523 with m.Else():
524 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
525
526 m.d.comb += self.o.mid.eq(self.i.mid)
527 m.d.comb += self.o.z.eq(self.i.z)
528 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
529 m.d.comb += self.o.oz.eq(self.i.oz)
530
531 return m
532
533
534 class FPAddDeNorm(FPState):
535
536 def __init__(self, width, id_wid):
537 FPState.__init__(self, "denormalise")
538 self.mod = FPAddDeNormMod(width)
539 self.out_a = FPNumBase(width)
540 self.out_b = FPNumBase(width)
541
542 def setup(self, m, i):
543 """ links module to inputs and outputs
544 """
545 self.mod.setup(m, i)
546
547 m.d.sync += self.out_a.eq(self.mod.out_a)
548 m.d.sync += self.out_b.eq(self.mod.out_b)
549
550 def action(self, m):
551 # Denormalised Number checks
552 m.next = "align"
553
554
555 class FPAddAlignMultiMod(FPState):
556
557 def __init__(self, width):
558 self.in_a = FPNumBase(width)
559 self.in_b = FPNumBase(width)
560 self.out_a = FPNumIn(None, width)
561 self.out_b = FPNumIn(None, width)
562 self.exp_eq = Signal(reset_less=True)
563
564 def elaborate(self, platform):
565 # This one however (single-cycle) will do the shift
566 # in one go.
567
568 m = Module()
569
570 m.submodules.align_in_a = self.in_a
571 m.submodules.align_in_b = self.in_b
572 m.submodules.align_out_a = self.out_a
573 m.submodules.align_out_b = self.out_b
574
575 # NOTE: this does *not* do single-cycle multi-shifting,
576 # it *STAYS* in the align state until exponents match
577
578 # exponent of a greater than b: shift b down
579 m.d.comb += self.exp_eq.eq(0)
580 m.d.comb += self.out_a.eq(self.in_a)
581 m.d.comb += self.out_b.eq(self.in_b)
582 agtb = Signal(reset_less=True)
583 altb = Signal(reset_less=True)
584 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
585 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
586 with m.If(agtb):
587 m.d.comb += self.out_b.shift_down(self.in_b)
588 # exponent of b greater than a: shift a down
589 with m.Elif(altb):
590 m.d.comb += self.out_a.shift_down(self.in_a)
591 # exponents equal: move to next stage.
592 with m.Else():
593 m.d.comb += self.exp_eq.eq(1)
594 return m
595
596
597 class FPAddAlignMulti(FPState):
598
599 def __init__(self, width, id_wid):
600 FPState.__init__(self, "align")
601 self.mod = FPAddAlignMultiMod(width)
602 self.out_a = FPNumIn(None, width)
603 self.out_b = FPNumIn(None, width)
604 self.exp_eq = Signal(reset_less=True)
605
606 def setup(self, m, in_a, in_b):
607 """ links module to inputs and outputs
608 """
609 m.submodules.align = self.mod
610 m.d.comb += self.mod.in_a.eq(in_a)
611 m.d.comb += self.mod.in_b.eq(in_b)
612 #m.d.comb += self.out_a.eq(self.mod.out_a)
613 #m.d.comb += self.out_b.eq(self.mod.out_b)
614 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
615 m.d.sync += self.out_a.eq(self.mod.out_a)
616 m.d.sync += self.out_b.eq(self.mod.out_b)
617
618 def action(self, m):
619 with m.If(self.exp_eq):
620 m.next = "add_0"
621
622
623 class FPNumIn2Ops:
624
625 def __init__(self, width, id_wid):
626 self.a = FPNumIn(None, width)
627 self.b = FPNumIn(None, width)
628 self.z = FPNumOut(width, False)
629 self.out_do_z = Signal(reset_less=True)
630 self.oz = Signal(width, reset_less=True)
631 self.mid = Signal(id_wid, reset_less=True)
632
633 def eq(self, i):
634 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
635 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
636
637
638 class FPAddAlignSingleMod:
639
640 def __init__(self, width, id_wid):
641 self.width = width
642 self.id_wid = id_wid
643 self.i = self.ispec()
644 self.o = self.ospec()
645
646 def ispec(self):
647 return FPSCData(self.width, self.id_wid)
648
649 def ospec(self):
650 return FPNumIn2Ops(self.width, self.id_wid)
651
652 def process(self, i):
653 return self.o
654
655 def setup(self, m, i):
656 """ links module to inputs and outputs
657 """
658 m.submodules.align = self
659 m.d.comb += self.i.eq(i)
660
661 def elaborate(self, platform):
662 """ Aligns A against B or B against A, depending on which has the
663 greater exponent. This is done in a *single* cycle using
664 variable-width bit-shift
665
666 the shifter used here is quite expensive in terms of gates.
667 Mux A or B in (and out) into temporaries, as only one of them
668 needs to be aligned against the other
669 """
670 m = Module()
671
672 m.submodules.align_in_a = self.i.a
673 m.submodules.align_in_b = self.i.b
674 m.submodules.align_out_a = self.o.a
675 m.submodules.align_out_b = self.o.b
676
677 # temporary (muxed) input and output to be shifted
678 t_inp = FPNumBase(self.width)
679 t_out = FPNumIn(None, self.width)
680 espec = (len(self.i.a.e), True)
681 msr = MultiShiftRMerge(self.i.a.m_width, espec)
682 m.submodules.align_t_in = t_inp
683 m.submodules.align_t_out = t_out
684 m.submodules.multishift_r = msr
685
686 ediff = Signal(espec, reset_less=True)
687 ediffr = Signal(espec, reset_less=True)
688 tdiff = Signal(espec, reset_less=True)
689 elz = Signal(reset_less=True)
690 egz = Signal(reset_less=True)
691
692 # connect multi-shifter to t_inp/out mantissa (and tdiff)
693 m.d.comb += msr.inp.eq(t_inp.m)
694 m.d.comb += msr.diff.eq(tdiff)
695 m.d.comb += t_out.m.eq(msr.m)
696 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
697 m.d.comb += t_out.s.eq(t_inp.s)
698
699 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
700 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
701 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
702 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
703
704 # default: A-exp == B-exp, A and B untouched (fall through)
705 m.d.comb += self.o.a.eq(self.i.a)
706 m.d.comb += self.o.b.eq(self.i.b)
707 # only one shifter (muxed)
708 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
709 # exponent of a greater than b: shift b down
710 with m.If(~self.i.out_do_z):
711 with m.If(egz):
712 m.d.comb += [t_inp.eq(self.i.b),
713 tdiff.eq(ediff),
714 self.o.b.eq(t_out),
715 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
716 ]
717 # exponent of b greater than a: shift a down
718 with m.Elif(elz):
719 m.d.comb += [t_inp.eq(self.i.a),
720 tdiff.eq(ediffr),
721 self.o.a.eq(t_out),
722 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
723 ]
724
725 m.d.comb += self.o.mid.eq(self.i.mid)
726 m.d.comb += self.o.z.eq(self.i.z)
727 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
728 m.d.comb += self.o.oz.eq(self.i.oz)
729
730 return m
731
732
733 class FPAddAlignSingle(FPState):
734
735 def __init__(self, width, id_wid):
736 FPState.__init__(self, "align")
737 self.mod = FPAddAlignSingleMod(width, id_wid)
738 self.out_a = FPNumIn(None, width)
739 self.out_b = FPNumIn(None, width)
740
741 def setup(self, m, i):
742 """ links module to inputs and outputs
743 """
744 self.mod.setup(m, i)
745
746 # NOTE: could be done as comb
747 m.d.sync += self.out_a.eq(self.mod.out_a)
748 m.d.sync += self.out_b.eq(self.mod.out_b)
749
750 def action(self, m):
751 m.next = "add_0"
752
753
754 class FPAddAlignSingleAdd(FPState):
755
756 def __init__(self, width, id_wid):
757 FPState.__init__(self, "align")
758 self.width = width
759 self.id_wid = id_wid
760 self.a1o = self.ospec()
761
762 def ispec(self):
763 return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
764
765 def ospec(self):
766 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
767
768 def setup(self, m, i):
769 """ links module to inputs and outputs
770 """
771
772 # chain AddAlignSingle, AddStage0 and AddStage1
773 mod = FPAddAlignSingleMod(self.width, self.id_wid)
774 a0mod = FPAddStage0Mod(self.width, self.id_wid)
775 a1mod = FPAddStage1Mod(self.width, self.id_wid)
776
777 chain = StageChain([mod, a0mod, a1mod])
778 chain.setup(m, i)
779
780 m.d.sync += self.a1o.eq(a1mod.o)
781
782 def process(self, i):
783 return self.a1o
784
785 def action(self, m):
786 m.next = "normalise_1"
787
788
789 class FPAddStage0Data:
790
791 def __init__(self, width, id_wid):
792 self.z = FPNumBase(width, False)
793 self.out_do_z = Signal(reset_less=True)
794 self.oz = Signal(width, reset_less=True)
795 self.tot = Signal(self.z.m_width + 4, reset_less=True)
796 self.mid = Signal(id_wid, reset_less=True)
797
798 def eq(self, i):
799 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
800 self.tot.eq(i.tot), self.mid.eq(i.mid)]
801
802
803 class FPAddStage0Mod:
804
805 def __init__(self, width, id_wid):
806 self.width = width
807 self.id_wid = id_wid
808 self.i = self.ispec()
809 self.o = self.ospec()
810
811 def ispec(self):
812 return FPSCData(self.width, self.id_wid)
813
814 def ospec(self):
815 return FPAddStage0Data(self.width, self.id_wid)
816
817 def process(self, i):
818 return self.o
819
820 def setup(self, m, i):
821 """ links module to inputs and outputs
822 """
823 m.submodules.add0 = self
824 m.d.comb += self.i.eq(i)
825
826 def elaborate(self, platform):
827 m = Module()
828 m.submodules.add0_in_a = self.i.a
829 m.submodules.add0_in_b = self.i.b
830 m.submodules.add0_out_z = self.o.z
831
832 # store intermediate tests (and zero-extended mantissas)
833 seq = Signal(reset_less=True)
834 mge = Signal(reset_less=True)
835 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
836 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
837 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
838 mge.eq(self.i.a.m >= self.i.b.m),
839 am0.eq(Cat(self.i.a.m, 0)),
840 bm0.eq(Cat(self.i.b.m, 0))
841 ]
842 # same-sign (both negative or both positive) add mantissas
843 with m.If(~self.i.out_do_z):
844 m.d.comb += self.o.z.e.eq(self.i.a.e)
845 with m.If(seq):
846 m.d.comb += [
847 self.o.tot.eq(am0 + bm0),
848 self.o.z.s.eq(self.i.a.s)
849 ]
850 # a mantissa greater than b, use a
851 with m.Elif(mge):
852 m.d.comb += [
853 self.o.tot.eq(am0 - bm0),
854 self.o.z.s.eq(self.i.a.s)
855 ]
856 # b mantissa greater than a, use b
857 with m.Else():
858 m.d.comb += [
859 self.o.tot.eq(bm0 - am0),
860 self.o.z.s.eq(self.i.b.s)
861 ]
862
863 m.d.comb += self.o.oz.eq(self.i.oz)
864 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
865 m.d.comb += self.o.mid.eq(self.i.mid)
866 return m
867
868
869 class FPAddStage0(FPState):
870 """ First stage of add. covers same-sign (add) and subtract
871 special-casing when mantissas are greater or equal, to
872 give greatest accuracy.
873 """
874
875 def __init__(self, width, id_wid):
876 FPState.__init__(self, "add_0")
877 self.mod = FPAddStage0Mod(width)
878 self.o = self.mod.ospec()
879
880 def setup(self, m, i):
881 """ links module to inputs and outputs
882 """
883 self.mod.setup(m, i)
884
885 # NOTE: these could be done as combinatorial (merge add0+add1)
886 m.d.sync += self.o.eq(self.mod.o)
887
888 def action(self, m):
889 m.next = "add_1"
890
891
892 class FPAddStage1Data:
893
894 def __init__(self, width, id_wid):
895 self.z = FPNumBase(width, False)
896 self.out_do_z = Signal(reset_less=True)
897 self.oz = Signal(width, reset_less=True)
898 self.of = Overflow()
899 self.mid = Signal(id_wid, reset_less=True)
900
901 def eq(self, i):
902 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
903 self.of.eq(i.of), self.mid.eq(i.mid)]
904
905
906
907 class FPAddStage1Mod(FPState):
908 """ Second stage of add: preparation for normalisation.
909 detects when tot sum is too big (tot[27] is kinda a carry bit)
910 """
911
912 def __init__(self, width, id_wid):
913 self.width = width
914 self.id_wid = id_wid
915 self.i = self.ispec()
916 self.o = self.ospec()
917
918 def ispec(self):
919 return FPAddStage0Data(self.width, self.id_wid)
920
921 def ospec(self):
922 return FPAddStage1Data(self.width, self.id_wid)
923
924 def process(self, i):
925 return self.o
926
927 def setup(self, m, i):
928 """ links module to inputs and outputs
929 """
930 m.submodules.add1 = self
931 m.submodules.add1_out_overflow = self.o.of
932
933 m.d.comb += self.i.eq(i)
934
935 def elaborate(self, platform):
936 m = Module()
937 #m.submodules.norm1_in_overflow = self.in_of
938 #m.submodules.norm1_out_overflow = self.out_of
939 #m.submodules.norm1_in_z = self.in_z
940 #m.submodules.norm1_out_z = self.out_z
941 m.d.comb += self.o.z.eq(self.i.z)
942 # tot[-1] (MSB) gets set when the sum overflows. shift result down
943 with m.If(~self.i.out_do_z):
944 with m.If(self.i.tot[-1]):
945 m.d.comb += [
946 self.o.z.m.eq(self.i.tot[4:]),
947 self.o.of.m0.eq(self.i.tot[4]),
948 self.o.of.guard.eq(self.i.tot[3]),
949 self.o.of.round_bit.eq(self.i.tot[2]),
950 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
951 self.o.z.e.eq(self.i.z.e + 1)
952 ]
953 # tot[-1] (MSB) zero case
954 with m.Else():
955 m.d.comb += [
956 self.o.z.m.eq(self.i.tot[3:]),
957 self.o.of.m0.eq(self.i.tot[3]),
958 self.o.of.guard.eq(self.i.tot[2]),
959 self.o.of.round_bit.eq(self.i.tot[1]),
960 self.o.of.sticky.eq(self.i.tot[0])
961 ]
962
963 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
964 m.d.comb += self.o.oz.eq(self.i.oz)
965 m.d.comb += self.o.mid.eq(self.i.mid)
966
967 return m
968
969
970 class FPAddStage1(FPState):
971
972 def __init__(self, width, id_wid):
973 FPState.__init__(self, "add_1")
974 self.mod = FPAddStage1Mod(width)
975 self.out_z = FPNumBase(width, False)
976 self.out_of = Overflow()
977 self.norm_stb = Signal()
978
979 def setup(self, m, i):
980 """ links module to inputs and outputs
981 """
982 self.mod.setup(m, i)
983
984 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
985
986 m.d.sync += self.out_of.eq(self.mod.out_of)
987 m.d.sync += self.out_z.eq(self.mod.out_z)
988 m.d.sync += self.norm_stb.eq(1)
989
990 def action(self, m):
991 m.next = "normalise_1"
992
993
994 class FPNormaliseModSingle:
995
996 def __init__(self, width):
997 self.width = width
998 self.in_z = self.ispec()
999 self.out_z = self.ospec()
1000
1001 def ispec(self):
1002 return FPNumBase(self.width, False)
1003
1004 def ospec(self):
1005 return FPNumBase(self.width, False)
1006
1007 def setup(self, m, i):
1008 """ links module to inputs and outputs
1009 """
1010 m.submodules.normalise = self
1011 m.d.comb += self.i.eq(i)
1012
1013 def elaborate(self, platform):
1014 m = Module()
1015
1016 mwid = self.out_z.m_width+2
1017 pe = PriorityEncoder(mwid)
1018 m.submodules.norm_pe = pe
1019
1020 m.submodules.norm1_out_z = self.out_z
1021 m.submodules.norm1_in_z = self.in_z
1022
1023 in_z = FPNumBase(self.width, False)
1024 in_of = Overflow()
1025 m.submodules.norm1_insel_z = in_z
1026 m.submodules.norm1_insel_overflow = in_of
1027
1028 espec = (len(in_z.e), True)
1029 ediff_n126 = Signal(espec, reset_less=True)
1030 msr = MultiShiftRMerge(mwid, espec)
1031 m.submodules.multishift_r = msr
1032
1033 m.d.comb += in_z.eq(self.in_z)
1034 m.d.comb += in_of.eq(self.in_of)
1035 # initialise out from in (overridden below)
1036 m.d.comb += self.out_z.eq(in_z)
1037 m.d.comb += self.out_of.eq(in_of)
1038 # normalisation decrease condition
1039 decrease = Signal(reset_less=True)
1040 m.d.comb += decrease.eq(in_z.m_msbzero)
1041 # decrease exponent
1042 with m.If(decrease):
1043 # *sigh* not entirely obvious: count leading zeros (clz)
1044 # with a PriorityEncoder: to find from the MSB
1045 # we reverse the order of the bits.
1046 temp_m = Signal(mwid, reset_less=True)
1047 temp_s = Signal(mwid+1, reset_less=True)
1048 clz = Signal((len(in_z.e), True), reset_less=True)
1049 m.d.comb += [
1050 # cat round and guard bits back into the mantissa
1051 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1052 pe.i.eq(temp_m[::-1]), # inverted
1053 clz.eq(pe.o), # count zeros from MSB down
1054 temp_s.eq(temp_m << clz), # shift mantissa UP
1055 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1056 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1057 ]
1058
1059 return m
1060
1061 class FPNorm1Data:
1062
1063 def __init__(self, width, id_wid):
1064 self.roundz = Signal(reset_less=True)
1065 self.z = FPNumBase(width, False)
1066 self.out_do_z = Signal(reset_less=True)
1067 self.oz = Signal(width, reset_less=True)
1068 self.mid = Signal(id_wid, reset_less=True)
1069
1070 def eq(self, i):
1071 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1072 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1073
1074
1075 class FPNorm1ModSingle:
1076
1077 def __init__(self, width, id_wid):
1078 self.width = width
1079 self.id_wid = id_wid
1080 self.i = self.ispec()
1081 self.o = self.ospec()
1082
1083 def ispec(self):
1084 return FPAddStage1Data(self.width, self.id_wid)
1085
1086 def ospec(self):
1087 return FPNorm1Data(self.width, self.id_wid)
1088
1089 def setup(self, m, i):
1090 """ links module to inputs and outputs
1091 """
1092 m.submodules.normalise_1 = self
1093 m.d.comb += self.i.eq(i)
1094
1095 def process(self, i):
1096 return self.o
1097
1098 def elaborate(self, platform):
1099 m = Module()
1100
1101 mwid = self.o.z.m_width+2
1102 pe = PriorityEncoder(mwid)
1103 m.submodules.norm_pe = pe
1104
1105 of = Overflow()
1106 m.d.comb += self.o.roundz.eq(of.roundz)
1107
1108 m.submodules.norm1_out_z = self.o.z
1109 m.submodules.norm1_out_overflow = of
1110 m.submodules.norm1_in_z = self.i.z
1111 m.submodules.norm1_in_overflow = self.i.of
1112
1113 i = self.ispec()
1114 m.submodules.norm1_insel_z = i.z
1115 m.submodules.norm1_insel_overflow = i.of
1116
1117 espec = (len(i.z.e), True)
1118 ediff_n126 = Signal(espec, reset_less=True)
1119 msr = MultiShiftRMerge(mwid, espec)
1120 m.submodules.multishift_r = msr
1121
1122 m.d.comb += i.eq(self.i)
1123 # initialise out from in (overridden below)
1124 m.d.comb += self.o.z.eq(i.z)
1125 m.d.comb += of.eq(i.of)
1126 # normalisation increase/decrease conditions
1127 decrease = Signal(reset_less=True)
1128 increase = Signal(reset_less=True)
1129 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1130 m.d.comb += increase.eq(i.z.exp_lt_n126)
1131 # decrease exponent
1132 with m.If(~self.i.out_do_z):
1133 with m.If(decrease):
1134 # *sigh* not entirely obvious: count leading zeros (clz)
1135 # with a PriorityEncoder: to find from the MSB
1136 # we reverse the order of the bits.
1137 temp_m = Signal(mwid, reset_less=True)
1138 temp_s = Signal(mwid+1, reset_less=True)
1139 clz = Signal((len(i.z.e), True), reset_less=True)
1140 # make sure that the amount to decrease by does NOT
1141 # go below the minimum non-INF/NaN exponent
1142 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1143 i.z.exp_sub_n126)
1144 m.d.comb += [
1145 # cat round and guard bits back into the mantissa
1146 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1147 pe.i.eq(temp_m[::-1]), # inverted
1148 clz.eq(limclz), # count zeros from MSB down
1149 temp_s.eq(temp_m << clz), # shift mantissa UP
1150 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1151 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1152 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1153 # overflow in bits 0..1: got shifted too (leave sticky)
1154 of.guard.eq(temp_s[1]), # guard
1155 of.round_bit.eq(temp_s[0]), # round
1156 ]
1157 # increase exponent
1158 with m.Elif(increase):
1159 temp_m = Signal(mwid+1, reset_less=True)
1160 m.d.comb += [
1161 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1162 i.z.m)),
1163 ediff_n126.eq(i.z.N126 - i.z.e),
1164 # connect multi-shifter to inp/out mantissa (and ediff)
1165 msr.inp.eq(temp_m),
1166 msr.diff.eq(ediff_n126),
1167 self.o.z.m.eq(msr.m[3:]),
1168 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1169 # overflow in bits 0..1: got shifted too (leave sticky)
1170 of.guard.eq(temp_s[2]), # guard
1171 of.round_bit.eq(temp_s[1]), # round
1172 of.sticky.eq(temp_s[0]), # sticky
1173 self.o.z.e.eq(i.z.e + ediff_n126),
1174 ]
1175
1176 m.d.comb += self.o.mid.eq(self.i.mid)
1177 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1178 m.d.comb += self.o.oz.eq(self.i.oz)
1179
1180 return m
1181
1182
1183 class FPNorm1ModMulti:
1184
1185 def __init__(self, width, single_cycle=True):
1186 self.width = width
1187 self.in_select = Signal(reset_less=True)
1188 self.in_z = FPNumBase(width, False)
1189 self.in_of = Overflow()
1190 self.temp_z = FPNumBase(width, False)
1191 self.temp_of = Overflow()
1192 self.out_z = FPNumBase(width, False)
1193 self.out_of = Overflow()
1194
1195 def elaborate(self, platform):
1196 m = Module()
1197
1198 m.submodules.norm1_out_z = self.out_z
1199 m.submodules.norm1_out_overflow = self.out_of
1200 m.submodules.norm1_temp_z = self.temp_z
1201 m.submodules.norm1_temp_of = self.temp_of
1202 m.submodules.norm1_in_z = self.in_z
1203 m.submodules.norm1_in_overflow = self.in_of
1204
1205 in_z = FPNumBase(self.width, False)
1206 in_of = Overflow()
1207 m.submodules.norm1_insel_z = in_z
1208 m.submodules.norm1_insel_overflow = in_of
1209
1210 # select which of temp or in z/of to use
1211 with m.If(self.in_select):
1212 m.d.comb += in_z.eq(self.in_z)
1213 m.d.comb += in_of.eq(self.in_of)
1214 with m.Else():
1215 m.d.comb += in_z.eq(self.temp_z)
1216 m.d.comb += in_of.eq(self.temp_of)
1217 # initialise out from in (overridden below)
1218 m.d.comb += self.out_z.eq(in_z)
1219 m.d.comb += self.out_of.eq(in_of)
1220 # normalisation increase/decrease conditions
1221 decrease = Signal(reset_less=True)
1222 increase = Signal(reset_less=True)
1223 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1224 m.d.comb += increase.eq(in_z.exp_lt_n126)
1225 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1226 # decrease exponent
1227 with m.If(decrease):
1228 m.d.comb += [
1229 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1230 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1231 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1232 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1233 self.out_of.round_bit.eq(0), # reset round bit
1234 self.out_of.m0.eq(in_of.guard),
1235 ]
1236 # increase exponent
1237 with m.Elif(increase):
1238 m.d.comb += [
1239 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1240 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1241 self.out_of.guard.eq(in_z.m[0]),
1242 self.out_of.m0.eq(in_z.m[1]),
1243 self.out_of.round_bit.eq(in_of.guard),
1244 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1245 ]
1246
1247 return m
1248
1249
1250 class FPNorm1Single(FPState):
1251
1252 def __init__(self, width, id_wid, single_cycle=True):
1253 FPState.__init__(self, "normalise_1")
1254 self.mod = FPNorm1ModSingle(width)
1255 self.o = self.ospec()
1256 self.out_z = FPNumBase(width, False)
1257 self.out_roundz = Signal(reset_less=True)
1258
1259 def ispec(self):
1260 return self.mod.ispec()
1261
1262 def ospec(self):
1263 return self.mod.ospec()
1264
1265 def setup(self, m, i):
1266 """ links module to inputs and outputs
1267 """
1268 self.mod.setup(m, i)
1269
1270 def action(self, m):
1271 m.next = "round"
1272
1273
1274 class FPNorm1Multi(FPState):
1275
1276 def __init__(self, width, id_wid):
1277 FPState.__init__(self, "normalise_1")
1278 self.mod = FPNorm1ModMulti(width)
1279 self.stb = Signal(reset_less=True)
1280 self.ack = Signal(reset=0, reset_less=True)
1281 self.out_norm = Signal(reset_less=True)
1282 self.in_accept = Signal(reset_less=True)
1283 self.temp_z = FPNumBase(width)
1284 self.temp_of = Overflow()
1285 self.out_z = FPNumBase(width)
1286 self.out_roundz = Signal(reset_less=True)
1287
1288 def setup(self, m, in_z, in_of, norm_stb):
1289 """ links module to inputs and outputs
1290 """
1291 self.mod.setup(m, in_z, in_of, norm_stb,
1292 self.in_accept, self.temp_z, self.temp_of,
1293 self.out_z, self.out_norm)
1294
1295 m.d.comb += self.stb.eq(norm_stb)
1296 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1297
1298 def action(self, m):
1299 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1300 m.d.sync += self.temp_of.eq(self.mod.out_of)
1301 m.d.sync += self.temp_z.eq(self.out_z)
1302 with m.If(self.out_norm):
1303 with m.If(self.in_accept):
1304 m.d.sync += [
1305 self.ack.eq(1),
1306 ]
1307 with m.Else():
1308 m.d.sync += self.ack.eq(0)
1309 with m.Else():
1310 # normalisation not required (or done).
1311 m.next = "round"
1312 m.d.sync += self.ack.eq(1)
1313 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1314
1315
1316 class FPNormToPack(FPState):
1317
1318 def __init__(self, width, id_wid):
1319 FPState.__init__(self, "normalise_1")
1320 self.id_wid = id_wid
1321 self.width = width
1322
1323 def ispec(self):
1324 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1325
1326 def ospec(self):
1327 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1328
1329 def setup(self, m, i):
1330 """ links module to inputs and outputs
1331 """
1332
1333 # Normalisation, Rounding Corrections, Pack - in a chain
1334 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1335 rmod = FPRoundMod(self.width, self.id_wid)
1336 cmod = FPCorrectionsMod(self.width, self.id_wid)
1337 pmod = FPPackMod(self.width, self.id_wid)
1338 chain = StageChain([nmod, rmod, cmod, pmod])
1339 chain.setup(m, i)
1340 self.out_z = pmod.ospec()
1341
1342 m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1343 m.d.sync += self.out_z.z.v.eq(pmod.o.z.v) # outputs packed result
1344
1345 def process(self, i):
1346 return self.out_z
1347
1348 def action(self, m):
1349 m.next = "pack_put_z"
1350
1351
1352 class FPRoundData:
1353
1354 def __init__(self, width, id_wid):
1355 self.z = FPNumBase(width, False)
1356 self.out_do_z = Signal(reset_less=True)
1357 self.oz = Signal(width, reset_less=True)
1358 self.mid = Signal(id_wid, reset_less=True)
1359
1360 def eq(self, i):
1361 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1362 self.mid.eq(i.mid)]
1363
1364
1365 class FPRoundMod:
1366
1367 def __init__(self, width, id_wid):
1368 self.width = width
1369 self.id_wid = id_wid
1370 self.i = self.ispec()
1371 self.out_z = self.ospec()
1372
1373 def ispec(self):
1374 return FPNorm1Data(self.width, self.id_wid)
1375
1376 def ospec(self):
1377 return FPRoundData(self.width, self.id_wid)
1378
1379 def process(self, i):
1380 return self.out_z
1381
1382 def setup(self, m, i):
1383 m.submodules.roundz = self
1384 m.d.comb += self.i.eq(i)
1385
1386 def elaborate(self, platform):
1387 m = Module()
1388 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1389 with m.If(~self.i.out_do_z):
1390 with m.If(self.i.roundz):
1391 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1392 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1393 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1394
1395 return m
1396
1397
1398 class FPRound(FPState):
1399
1400 def __init__(self, width, id_wid):
1401 FPState.__init__(self, "round")
1402 self.mod = FPRoundMod(width)
1403 self.out_z = self.ospec()
1404
1405 def ispec(self):
1406 return self.mod.ispec()
1407
1408 def ospec(self):
1409 return self.mod.ospec()
1410
1411 def setup(self, m, i):
1412 """ links module to inputs and outputs
1413 """
1414 self.mod.setup(m, i)
1415
1416 self.idsync(m)
1417 m.d.sync += self.out_z.eq(self.mod.out_z)
1418 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1419
1420 def action(self, m):
1421 m.next = "corrections"
1422
1423
1424 class FPCorrectionsMod:
1425
1426 def __init__(self, width, id_wid):
1427 self.width = width
1428 self.id_wid = id_wid
1429 self.i = self.ispec()
1430 self.out_z = self.ospec()
1431
1432 def ispec(self):
1433 return FPRoundData(self.width, self.id_wid)
1434
1435 def ospec(self):
1436 return FPRoundData(self.width, self.id_wid)
1437
1438 def process(self, i):
1439 return self.out_z
1440
1441 def setup(self, m, i):
1442 """ links module to inputs and outputs
1443 """
1444 m.submodules.corrections = self
1445 m.d.comb += self.i.eq(i)
1446
1447 def elaborate(self, platform):
1448 m = Module()
1449 m.submodules.corr_in_z = self.i.z
1450 m.submodules.corr_out_z = self.out_z.z
1451 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1452 with m.If(~self.i.out_do_z):
1453 with m.If(self.i.z.is_denormalised):
1454 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1455 return m
1456
1457
1458 class FPCorrections(FPState):
1459
1460 def __init__(self, width, id_wid):
1461 FPState.__init__(self, "corrections")
1462 self.mod = FPCorrectionsMod(width)
1463 self.out_z = self.ospec()
1464
1465 def ispec(self):
1466 return self.mod.ispec()
1467
1468 def ospec(self):
1469 return self.mod.ospec()
1470
1471 def setup(self, m, in_z):
1472 """ links module to inputs and outputs
1473 """
1474 self.mod.setup(m, in_z)
1475
1476 m.d.sync += self.out_z.eq(self.mod.out_z)
1477 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1478
1479 def action(self, m):
1480 m.next = "pack"
1481
1482
1483 class FPPackData:
1484
1485 def __init__(self, width, id_wid):
1486 self.z = FPNumOut(width, False)
1487 self.mid = Signal(id_wid, reset_less=True)
1488
1489 def eq(self, i):
1490 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1491
1492
1493 class FPPackMod:
1494
1495 def __init__(self, width, id_wid):
1496 self.width = width
1497 self.id_wid = id_wid
1498 self.i = self.ispec()
1499 self.o = self.ospec()
1500
1501 def ispec(self):
1502 return FPRoundData(self.width, self.id_wid)
1503
1504 def ospec(self):
1505 return FPPackData(self.width, self.id_wid)
1506
1507 def process(self, i):
1508 return self.o
1509
1510 def setup(self, m, in_z):
1511 """ links module to inputs and outputs
1512 """
1513 m.submodules.pack = self
1514 m.d.comb += self.i.eq(in_z)
1515
1516 def elaborate(self, platform):
1517 m = Module()
1518 m.submodules.pack_in_z = self.i.z
1519 m.d.comb += self.o.mid.eq(self.i.mid)
1520 with m.If(~self.i.out_do_z):
1521 with m.If(self.i.z.is_overflowed):
1522 m.d.comb += self.o.z.inf(self.i.z.s)
1523 with m.Else():
1524 m.d.comb += self.o.z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1525 with m.Else():
1526 m.d.comb += self.o.z.v.eq(self.i.oz)
1527 return m
1528
1529
1530 class FPPack(FPState):
1531
1532 def __init__(self, width, id_wid):
1533 FPState.__init__(self, "pack")
1534 self.mod = FPPackMod(width)
1535 self.out_z = self.ospec()
1536
1537 def ispec(self):
1538 return self.mod.ispec()
1539
1540 def ospec(self):
1541 return self.mod.ospec()
1542
1543 def setup(self, m, in_z):
1544 """ links module to inputs and outputs
1545 """
1546 self.mod.setup(m, in_z)
1547
1548 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1549 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1550
1551 def action(self, m):
1552 m.next = "pack_put_z"
1553
1554
1555 class FPPutZ(FPState):
1556
1557 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1558 FPState.__init__(self, state)
1559 if to_state is None:
1560 to_state = "get_ops"
1561 self.to_state = to_state
1562 self.in_z = in_z
1563 self.out_z = out_z
1564 self.in_mid = in_mid
1565 self.out_mid = out_mid
1566
1567 def action(self, m):
1568 if self.in_mid is not None:
1569 m.d.sync += self.out_mid.eq(self.in_mid)
1570 m.d.sync += [
1571 self.out_z.z.v.eq(self.in_z.v)
1572 ]
1573 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1574 m.d.sync += self.out_z.z.stb.eq(0)
1575 m.next = self.to_state
1576 with m.Else():
1577 m.d.sync += self.out_z.z.stb.eq(1)
1578
1579
1580 class FPPutZIdx(FPState):
1581
1582 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1583 FPState.__init__(self, state)
1584 if to_state is None:
1585 to_state = "get_ops"
1586 self.to_state = to_state
1587 self.in_z = in_z
1588 self.out_zs = out_zs
1589 self.in_mid = in_mid
1590
1591 def action(self, m):
1592 outz_stb = Signal(reset_less=True)
1593 outz_ack = Signal(reset_less=True)
1594 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1595 outz_ack.eq(self.out_zs[self.in_mid].ack),
1596 ]
1597 m.d.sync += [
1598 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1599 ]
1600 with m.If(outz_stb & outz_ack):
1601 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1602 m.next = self.to_state
1603 with m.Else():
1604 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1605
1606 class FPADDBaseData:
1607
1608 def __init__(self, width, id_wid):
1609 self.width = width
1610 self.id_wid = id_wid
1611 self.a = Signal(width)
1612 self.b = Signal(width)
1613 self.mid = Signal(id_wid, reset_less=True)
1614
1615 def eq(self, i):
1616 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1617
1618
1619 class FPOpData:
1620 def __init__(self, width, id_wid):
1621 self.z = FPOp(width)
1622 self.mid = Signal(id_wid, reset_less=True)
1623
1624 def eq(self, i):
1625 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1626
1627
1628 class FPADDBaseMod:
1629
1630 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1631 """ IEEE754 FP Add
1632
1633 * width: bit-width of IEEE754. supported: 16, 32, 64
1634 * id_wid: an identifier that is sync-connected to the input
1635 * single_cycle: True indicates each stage to complete in 1 clock
1636 * compact: True indicates a reduced number of stages
1637 """
1638 self.width = width
1639 self.id_wid = id_wid
1640 self.single_cycle = single_cycle
1641 self.compact = compact
1642
1643 self.in_t = Trigger()
1644 self.i = self.ispec()
1645 self.o = self.ospec()
1646
1647 self.states = []
1648
1649 def ispec(self):
1650 return FPADDBaseData(self.width, self.id_wid)
1651
1652 def ospec(self):
1653 return FPOpData(self.width, self.id_wid)
1654
1655 def add_state(self, state):
1656 self.states.append(state)
1657 return state
1658
1659 def get_fragment(self, platform=None):
1660 """ creates the HDL code-fragment for FPAdd
1661 """
1662 m = Module()
1663 m.submodules.out_z = self.o.z
1664 m.submodules.in_t = self.in_t
1665 if self.compact:
1666 self.get_compact_fragment(m, platform)
1667 else:
1668 self.get_longer_fragment(m, platform)
1669
1670 with m.FSM() as fsm:
1671
1672 for state in self.states:
1673 with m.State(state.state_from):
1674 state.action(m)
1675
1676 return m
1677
1678 def get_longer_fragment(self, m, platform=None):
1679
1680 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1681 self.width))
1682 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1683 a = get.out_op1
1684 b = get.out_op2
1685
1686 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1687 sc.setup(m, a, b, self.in_mid)
1688
1689 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1690 dn.setup(m, a, b, sc.in_mid)
1691
1692 if self.single_cycle:
1693 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1694 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1695 else:
1696 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1697 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1698
1699 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1700 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1701
1702 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1703 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1704
1705 if self.single_cycle:
1706 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1707 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1708 else:
1709 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1710 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1711
1712 rn = self.add_state(FPRound(self.width, self.id_wid))
1713 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1714
1715 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1716 cor.setup(m, rn.out_z, rn.in_mid)
1717
1718 pa = self.add_state(FPPack(self.width, self.id_wid))
1719 pa.setup(m, cor.out_z, rn.in_mid)
1720
1721 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1722 pa.in_mid, self.out_mid))
1723
1724 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1725 pa.in_mid, self.out_mid))
1726
1727 def get_compact_fragment(self, m, platform=None):
1728
1729 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1730 self.width, self.id_wid))
1731 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1732
1733 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1734 sc.setup(m, get.o)
1735
1736 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1737 alm.setup(m, sc.o)
1738
1739 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1740 n1.setup(m, alm.a1o)
1741
1742 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1743 n1.out_z.mid, self.o.mid))
1744
1745 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1746 # sc.o.mid, self.o.mid))
1747
1748
1749 class FPADDBase(FPState):
1750
1751 def __init__(self, width, id_wid=None, single_cycle=False):
1752 """ IEEE754 FP Add
1753
1754 * width: bit-width of IEEE754. supported: 16, 32, 64
1755 * id_wid: an identifier that is sync-connected to the input
1756 * single_cycle: True indicates each stage to complete in 1 clock
1757 """
1758 FPState.__init__(self, "fpadd")
1759 self.width = width
1760 self.single_cycle = single_cycle
1761 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1762 self.o = self.ospec()
1763
1764 self.in_t = Trigger()
1765 self.i = self.ispec()
1766
1767 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1768 self.in_accept = Signal(reset_less=True)
1769 self.add_stb = Signal(reset_less=True)
1770 self.add_ack = Signal(reset=0, reset_less=True)
1771
1772 def ispec(self):
1773 return self.mod.ispec()
1774
1775 def ospec(self):
1776 return self.mod.ospec()
1777
1778 def setup(self, m, i, add_stb, in_mid):
1779 m.d.comb += [self.i.eq(i),
1780 self.mod.i.eq(self.i),
1781 self.z_done.eq(self.mod.o.z.trigger),
1782 #self.add_stb.eq(add_stb),
1783 self.mod.in_t.stb.eq(self.in_t.stb),
1784 self.in_t.ack.eq(self.mod.in_t.ack),
1785 self.o.mid.eq(self.mod.o.mid),
1786 self.o.z.v.eq(self.mod.o.z.v),
1787 self.o.z.stb.eq(self.mod.o.z.stb),
1788 self.mod.o.z.ack.eq(self.o.z.ack),
1789 ]
1790
1791 m.d.sync += self.add_stb.eq(add_stb)
1792 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1793 m.d.sync += self.o.z.ack.eq(0) # likewise
1794 #m.d.sync += self.in_t.stb.eq(0)
1795
1796 m.submodules.fpadd = self.mod
1797
1798 def action(self, m):
1799
1800 # in_accept is set on incoming strobe HIGH and ack LOW.
1801 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1802
1803 #with m.If(self.in_t.ack):
1804 # m.d.sync += self.in_t.stb.eq(0)
1805 with m.If(~self.z_done):
1806 # not done: test for accepting an incoming operand pair
1807 with m.If(self.in_accept):
1808 m.d.sync += [
1809 self.add_ack.eq(1), # acknowledge receipt...
1810 self.in_t.stb.eq(1), # initiate add
1811 ]
1812 with m.Else():
1813 m.d.sync += [self.add_ack.eq(0),
1814 self.in_t.stb.eq(0),
1815 self.o.z.ack.eq(1),
1816 ]
1817 with m.Else():
1818 # done: acknowledge, and write out id and value
1819 m.d.sync += [self.add_ack.eq(1),
1820 self.in_t.stb.eq(0)
1821 ]
1822 m.next = "put_z"
1823
1824 return
1825
1826 if self.in_mid is not None:
1827 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1828
1829 m.d.sync += [
1830 self.out_z.v.eq(self.mod.out_z.v)
1831 ]
1832 # move to output state on detecting z ack
1833 with m.If(self.out_z.trigger):
1834 m.d.sync += self.out_z.stb.eq(0)
1835 m.next = "put_z"
1836 with m.Else():
1837 m.d.sync += self.out_z.stb.eq(1)
1838
1839
1840 class FPADDStageIn:
1841 def __init__(self, width, id_wid):
1842 self.a = Signal(width)
1843 self.b = Signal(width)
1844 self.mid = Signal(id_wid, reset_less=True)
1845
1846 def eq(self, i):
1847 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1848
1849 def ports(self):
1850 return [self.a, self.b, self.mid]
1851
1852
1853 class FPADDStageOut:
1854 def __init__(self, width, id_wid):
1855 self.z = Signal(width)
1856 self.mid = Signal(id_wid, reset_less=True)
1857
1858 def eq(self, i):
1859 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1860
1861 def ports(self):
1862 return [self.z, self.mid]
1863
1864
1865 # matches the format of FPADDStageOut, allows eq function to do assignments
1866 class PlaceHolder: pass
1867
1868
1869 class FPAddBaseStage:
1870 def __init__(self, width, id_wid):
1871 self.width = width
1872 self.id_wid = id_wid
1873
1874 def ispec(self):
1875 return FPADDStageIn(self.width, self.id_wid)
1876
1877 def ospec(self):
1878 return FPADDStageOut(self.width, self.id_wid)
1879
1880 def process(self, i):
1881 o = PlaceHolder()
1882 o.z = i.a + i.b
1883 o.mid = i.mid
1884 return o
1885
1886
1887 class FPADDBasePipe1(UnbufferedPipeline):
1888 def __init__(self, width, id_wid):
1889 stage = FPAddBaseStage(width, id_wid)
1890 UnbufferedPipeline.__init__(self, stage)
1891
1892
1893 class FPADDBasePipe(ControlBase):
1894 def __init__(self, width, id_wid):
1895 ControlBase.__init__(self)
1896 self.pipe1 = FPADDBasePipe1(width, id_wid)
1897 self._eqs = self.connect([self.pipe1])
1898
1899 def elaborate(self, platform):
1900 m = Module()
1901 m.submodules.pipe1 = self.pipe1
1902 m.d.comb += self._eqs
1903 return m
1904
1905
1906 class PriorityCombPipeline(CombMultiInPipeline):
1907 def __init__(self, stage, p_len):
1908 p_mux = InputPriorityArbiter(self, p_len)
1909 CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1910
1911 def ports(self):
1912 return self.p_mux.ports()
1913
1914
1915 class FPAddInPassThruStage:
1916 def __init__(self, width, id_wid):
1917 self.width, self.id_wid = width, id_wid
1918 def ispec(self): return FPADDStageIn(self.width, self.id_wid)
1919 def ospec(self): return self.ispec()
1920 def process(self, i): return i
1921
1922
1923 class FPADDInMuxPipe(PriorityCombPipeline):
1924 def __init__(self, width, id_width, num_rows):
1925 self.num_rows = num_rows
1926 stage = FPAddInPassThruStage(width, id_width)
1927 PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1928 #self.p.i_data = stage.ispec()
1929 #self.n.o_data = stage.ospec()
1930
1931 def ports(self):
1932 res = []
1933 for i in range(len(self.p)):
1934 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1935 self.p[i].i_data.ports()
1936 res += [self.n.i_ready, self.n.o_valid] + \
1937 self.n.o_data.ports()
1938 return res
1939
1940
1941 class MuxCombPipeline(CombMultiOutPipeline):
1942 def __init__(self, stage, n_len):
1943 # HACK: stage is also the n-way multiplexer
1944 CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1945
1946 # HACK: n-mux is also the stage... so set the muxid equal to input mid
1947 stage.m_id = self.p.i_data.mid
1948
1949 def ports(self):
1950 return self.p_mux.ports()
1951
1952
1953 class FPAddOutPassThruStage:
1954 def __init__(self, width, id_wid):
1955 self.width, self.id_wid = width, id_wid
1956 def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1957 def ospec(self): return self.ispec()
1958 def process(self, i): return i
1959
1960
1961 class FPADDMuxOutPipe(MuxCombPipeline):
1962 def __init__(self, width, id_wid, num_rows):
1963 self.num_rows = num_rows
1964 stage = FPAddOutPassThruStage(width, id_wid)
1965 MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1966 #self.p.i_data = stage.ispec()
1967 #self.n.o_data = stage.ospec()
1968
1969 def ports(self):
1970 res = [self.p.i_valid, self.p.o_ready] + \
1971 self.p.i_data.ports()
1972 for i in range(len(self.n)):
1973 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1974 self.n[i].o_data.ports()
1975 return res
1976
1977
1978 class FPADDMuxInOut:
1979 """ Reservation-Station version of FPADD pipeline.
1980
1981 fan-in on
1982 """
1983 def __init__(self, width, id_wid, num_rows):
1984 self.num_rows = num_rows
1985 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1986 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1987 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1988
1989 self.p = self.inpipe.p # kinda annoying,
1990 self.n = self.outpipe.n # use pipe in/out as this class in/out
1991 self._ports = self.inpipe.ports() + self.outpipe.ports()
1992
1993 def elaborate(self, platform):
1994 m = Module()
1995 m.submodules.inpipe = self.inpipe
1996 m.submodules.fpadd = self.fpadd
1997 m.submodules.outpipe = self.outpipe
1998
1999 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
2000 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
2001
2002 return m
2003
2004 def ports(self):
2005 return self._ports
2006
2007
2008 class ResArray:
2009 def __init__(self, width, id_wid):
2010 self.width = width
2011 self.id_wid = id_wid
2012 res = []
2013 for i in range(rs_sz):
2014 out_z = FPOp(width)
2015 out_z.name = "out_z_%d" % i
2016 res.append(out_z)
2017 self.res = Array(res)
2018 self.in_z = FPOp(width)
2019 self.in_mid = Signal(self.id_wid, reset_less=True)
2020
2021 def setup(self, m, in_z, in_mid):
2022 m.d.comb += [self.in_z.eq(in_z),
2023 self.in_mid.eq(in_mid)]
2024
2025 def get_fragment(self, platform=None):
2026 """ creates the HDL code-fragment for FPAdd
2027 """
2028 m = Module()
2029 m.submodules.res_in_z = self.in_z
2030 m.submodules += self.res
2031
2032 return m
2033
2034 def ports(self):
2035 res = []
2036 for z in self.res:
2037 res += z.ports()
2038 return res
2039
2040
2041 class FPADD(FPID):
2042 """ FPADD: stages as follows:
2043
2044 FPGetOp (a)
2045 |
2046 FPGetOp (b)
2047 |
2048 FPAddBase---> FPAddBaseMod
2049 | |
2050 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2051
2052 FPAddBase is tricky: it is both a stage and *has* stages.
2053 Connection to FPAddBaseMod therefore requires an in stb/ack
2054 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2055 needs to be the thing that raises the incoming stb.
2056 """
2057
2058 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2059 """ IEEE754 FP Add
2060
2061 * width: bit-width of IEEE754. supported: 16, 32, 64
2062 * id_wid: an identifier that is sync-connected to the input
2063 * single_cycle: True indicates each stage to complete in 1 clock
2064 """
2065 self.width = width
2066 self.id_wid = id_wid
2067 self.single_cycle = single_cycle
2068
2069 #self.out_z = FPOp(width)
2070 self.ids = FPID(id_wid)
2071
2072 rs = []
2073 for i in range(rs_sz):
2074 in_a = FPOp(width)
2075 in_b = FPOp(width)
2076 in_a.name = "in_a_%d" % i
2077 in_b.name = "in_b_%d" % i
2078 rs.append((in_a, in_b))
2079 self.rs = Array(rs)
2080
2081 res = []
2082 for i in range(rs_sz):
2083 out_z = FPOp(width)
2084 out_z.name = "out_z_%d" % i
2085 res.append(out_z)
2086 self.res = Array(res)
2087
2088 self.states = []
2089
2090 def add_state(self, state):
2091 self.states.append(state)
2092 return state
2093
2094 def get_fragment(self, platform=None):
2095 """ creates the HDL code-fragment for FPAdd
2096 """
2097 m = Module()
2098 m.submodules += self.rs
2099
2100 in_a = self.rs[0][0]
2101 in_b = self.rs[0][1]
2102
2103 geta = self.add_state(FPGetOp("get_a", "get_b",
2104 in_a, self.width))
2105 geta.setup(m, in_a)
2106 a = geta.out_op
2107
2108 getb = self.add_state(FPGetOp("get_b", "fpadd",
2109 in_b, self.width))
2110 getb.setup(m, in_b)
2111 b = getb.out_op
2112
2113 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2114 ab = self.add_state(ab)
2115 abd = ab.ispec() # create an input spec object for FPADDBase
2116 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2117 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2118 o = ab.o
2119
2120 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2121 o.mid, "get_a"))
2122
2123 with m.FSM() as fsm:
2124
2125 for state in self.states:
2126 with m.State(state.state_from):
2127 state.action(m)
2128
2129 return m
2130
2131
2132 if __name__ == "__main__":
2133 if True:
2134 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2135 main(alu, ports=alu.rs[0][0].ports() + \
2136 alu.rs[0][1].ports() + \
2137 alu.res[0].ports() + \
2138 [alu.ids.in_mid, alu.ids.out_mid])
2139 else:
2140 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2141 main(alu, ports=[alu.in_a, alu.in_b] + \
2142 alu.in_t.ports() + \
2143 alu.out_z.ports() + \
2144 [alu.in_mid, alu.out_mid])
2145
2146
2147 # works... but don't use, just do "python fname.py convert -t v"
2148 #print (verilog.convert(alu, ports=[
2149 # ports=alu.in_a.ports() + \
2150 # alu.in_b.ports() + \
2151 # alu.out_z.ports())