add process function to FPGet2OpMod
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMultiOutPipeline
14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPGet2OpMod(Trigger):
191 def __init__(self, width, id_wid):
192 Trigger.__init__(self)
193 self.width = width
194 self.id_wid = id_wid
195 self.i = self.ispec()
196 self.o = self.ospec()
197
198 def ispec(self):
199 return FPADDBaseData(self.width, self.id_wid)
200
201 def ospec(self):
202 return FPNumBase2Ops(self.width, self.id_wid)
203
204 def process(self, i):
205 return self.o
206
207 def elaborate(self, platform):
208 m = Trigger.elaborate(self, platform)
209 m.submodules.get_op1_out = self.o.a
210 m.submodules.get_op2_out = self.o.b
211 out_op1 = FPNumIn(None, self.width)
212 out_op2 = FPNumIn(None, self.width)
213 with m.If(self.trigger):
214 m.d.comb += [
215 out_op1.decode(self.i.a),
216 out_op2.decode(self.i.b),
217 self.o.a.eq(out_op1),
218 self.o.b.eq(out_op2),
219 self.o.mid.eq(self.i.mid)
220 ]
221 return m
222
223
224 class FPGet2Op(FPState):
225 """ gets operands
226 """
227
228 def __init__(self, in_state, out_state, width, id_wid):
229 FPState.__init__(self, in_state)
230 self.out_state = out_state
231 self.mod = FPGet2OpMod(width, id_wid)
232 self.o = self.mod.ospec()
233 self.in_stb = Signal(reset_less=True)
234 self.out_ack = Signal(reset_less=True)
235 self.out_decode = Signal(reset_less=True)
236
237 def setup(self, m, i, in_stb, in_ack):
238 """ links module to inputs and outputs
239 """
240 m.submodules.get_ops = self.mod
241 m.d.comb += self.mod.i.eq(i)
242 m.d.comb += self.mod.stb.eq(in_stb)
243 m.d.comb += self.out_ack.eq(self.mod.ack)
244 m.d.comb += self.out_decode.eq(self.mod.trigger)
245 m.d.comb += in_ack.eq(self.mod.ack)
246
247 def action(self, m):
248 with m.If(self.out_decode):
249 m.next = self.out_state
250 m.d.sync += [
251 self.mod.ack.eq(0),
252 self.o.eq(self.mod.o),
253 ]
254 with m.Else():
255 m.d.sync += self.mod.ack.eq(1)
256
257
258 class FPNumBase2Ops:
259
260 def __init__(self, width, id_wid, m_extra=True):
261 self.a = FPNumBase(width, m_extra)
262 self.b = FPNumBase(width, m_extra)
263 self.mid = Signal(id_wid, reset_less=True)
264
265 def eq(self, i):
266 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
267
268
269 class FPSCData:
270
271 def __init__(self, width, id_wid):
272 self.a = FPNumBase(width, True)
273 self.b = FPNumBase(width, True)
274 self.z = FPNumOut(width, False)
275 self.oz = Signal(width, reset_less=True)
276 self.out_do_z = Signal(reset_less=True)
277 self.mid = Signal(id_wid, reset_less=True)
278
279 def eq(self, i):
280 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
281 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
282
283
284 class FPAddSpecialCasesMod:
285 """ special cases: NaNs, infs, zeros, denormalised
286 NOTE: some of these are unique to add. see "Special Operations"
287 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
288 """
289
290 def __init__(self, width, id_wid):
291 self.width = width
292 self.id_wid = id_wid
293 self.i = self.ispec()
294 self.o = self.ospec()
295
296 def ispec(self):
297 return FPNumBase2Ops(self.width, self.id_wid)
298
299 def ospec(self):
300 return FPSCData(self.width, self.id_wid)
301
302 def setup(self, m, i):
303 """ links module to inputs and outputs
304 """
305 m.submodules.specialcases = self
306 m.d.comb += self.i.eq(i)
307
308 def process(self, i):
309 return self.o
310
311 def elaborate(self, platform):
312 m = Module()
313
314 m.submodules.sc_in_a = self.i.a
315 m.submodules.sc_in_b = self.i.b
316 m.submodules.sc_out_z = self.o.z
317
318 s_nomatch = Signal()
319 m.d.comb += s_nomatch.eq(self.i.a.s != self.i.b.s)
320
321 m_match = Signal()
322 m.d.comb += m_match.eq(self.i.a.m == self.i.b.m)
323
324 # if a is NaN or b is NaN return NaN
325 with m.If(self.i.a.is_nan | self.i.b.is_nan):
326 m.d.comb += self.o.out_do_z.eq(1)
327 m.d.comb += self.o.z.nan(0)
328
329 # XXX WEIRDNESS for FP16 non-canonical NaN handling
330 # under review
331
332 ## if a is zero and b is NaN return -b
333 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
334 # m.d.comb += self.o.out_do_z.eq(1)
335 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
336
337 ## if b is zero and a is NaN return -a
338 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
339 # m.d.comb += self.o.out_do_z.eq(1)
340 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
341
342 ## if a is -zero and b is NaN return -b
343 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
344 # m.d.comb += self.o.out_do_z.eq(1)
345 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
346
347 ## if b is -zero and a is NaN return -a
348 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
349 # m.d.comb += self.o.out_do_z.eq(1)
350 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
351
352 # if a is inf return inf (or NaN)
353 with m.Elif(self.i.a.is_inf):
354 m.d.comb += self.o.out_do_z.eq(1)
355 m.d.comb += self.o.z.inf(self.i.a.s)
356 # if a is inf and signs don't match return NaN
357 with m.If(self.i.b.exp_128 & s_nomatch):
358 m.d.comb += self.o.z.nan(0)
359
360 # if b is inf return inf
361 with m.Elif(self.i.b.is_inf):
362 m.d.comb += self.o.out_do_z.eq(1)
363 m.d.comb += self.o.z.inf(self.i.b.s)
364
365 # if a is zero and b zero return signed-a/b
366 with m.Elif(self.i.a.is_zero & self.i.b.is_zero):
367 m.d.comb += self.o.out_do_z.eq(1)
368 m.d.comb += self.o.z.create(self.i.a.s & self.i.b.s,
369 self.i.b.e,
370 self.i.b.m[3:-1])
371
372 # if a is zero return b
373 with m.Elif(self.i.a.is_zero):
374 m.d.comb += self.o.out_do_z.eq(1)
375 m.d.comb += self.o.z.create(self.i.b.s, self.i.b.e,
376 self.i.b.m[3:-1])
377
378 # if b is zero return a
379 with m.Elif(self.i.b.is_zero):
380 m.d.comb += self.o.out_do_z.eq(1)
381 m.d.comb += self.o.z.create(self.i.a.s, self.i.a.e,
382 self.i.a.m[3:-1])
383
384 # if a equal to -b return zero (+ve zero)
385 with m.Elif(s_nomatch & m_match & (self.i.a.e == self.i.b.e)):
386 m.d.comb += self.o.out_do_z.eq(1)
387 m.d.comb += self.o.z.zero(0)
388
389 # Denormalised Number checks next, so pass a/b data through
390 with m.Else():
391 m.d.comb += self.o.out_do_z.eq(0)
392 m.d.comb += self.o.a.eq(self.i.a)
393 m.d.comb += self.o.b.eq(self.i.b)
394
395 m.d.comb += self.o.oz.eq(self.o.z.v)
396 m.d.comb += self.o.mid.eq(self.i.mid)
397
398 return m
399
400
401 class FPID:
402 def __init__(self, id_wid):
403 self.id_wid = id_wid
404 if self.id_wid:
405 self.in_mid = Signal(id_wid, reset_less=True)
406 self.out_mid = Signal(id_wid, reset_less=True)
407 else:
408 self.in_mid = None
409 self.out_mid = None
410
411 def idsync(self, m):
412 if self.id_wid is not None:
413 m.d.sync += self.out_mid.eq(self.in_mid)
414
415
416 class FPAddSpecialCases(FPState):
417 """ special cases: NaNs, infs, zeros, denormalised
418 NOTE: some of these are unique to add. see "Special Operations"
419 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
420 """
421
422 def __init__(self, width, id_wid):
423 FPState.__init__(self, "special_cases")
424 self.mod = FPAddSpecialCasesMod(width)
425 self.out_z = self.mod.ospec()
426 self.out_do_z = Signal(reset_less=True)
427
428 def setup(self, m, i):
429 """ links module to inputs and outputs
430 """
431 self.mod.setup(m, i, self.out_do_z)
432 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
433 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
434
435 def action(self, m):
436 self.idsync(m)
437 with m.If(self.out_do_z):
438 m.next = "put_z"
439 with m.Else():
440 m.next = "denormalise"
441
442
443 class FPAddSpecialCasesDeNorm(FPState):
444 """ special cases: NaNs, infs, zeros, denormalised
445 NOTE: some of these are unique to add. see "Special Operations"
446 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
447 """
448
449 def __init__(self, width, id_wid):
450 FPState.__init__(self, "special_cases")
451 self.smod = FPAddSpecialCasesMod(width, id_wid)
452 self.dmod = FPAddDeNormMod(width, id_wid)
453 self.o = self.ospec()
454
455 def ispec(self):
456 return self.smod.ispec()
457
458 def ospec(self):
459 return self.dmod.ospec()
460
461 def setup(self, m, i):
462 """ links module to inputs and outputs
463 """
464 # these only needed for break-out (early-out)
465 # out_z = self.smod.ospec()
466 # out_do_z = Signal(reset_less=True)
467 self.smod.setup(m, i)
468 self.dmod.setup(m, self.smod.o)
469 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
470
471 # out_do_z=True, only needed for early-out (split pipeline)
472 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
473 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
474
475 # out_do_z=False
476 m.d.sync += self.o.eq(self.dmod.o)
477
478 def process(self, i):
479 return self.o
480
481 def action(self, m):
482 #with m.If(self.out_do_z):
483 # m.next = "put_z"
484 #with m.Else():
485 m.next = "align"
486
487
488 class FPAddDeNormMod(FPState):
489
490 def __init__(self, width, id_wid):
491 self.width = width
492 self.id_wid = id_wid
493 self.i = self.ispec()
494 self.o = self.ospec()
495
496 def ispec(self):
497 return FPSCData(self.width, self.id_wid)
498
499 def ospec(self):
500 return FPSCData(self.width, self.id_wid)
501
502 def setup(self, m, i):
503 """ links module to inputs and outputs
504 """
505 m.submodules.denormalise = self
506 m.d.comb += self.i.eq(i)
507
508 def elaborate(self, platform):
509 m = Module()
510 m.submodules.denorm_in_a = self.i.a
511 m.submodules.denorm_in_b = self.i.b
512 m.submodules.denorm_out_a = self.o.a
513 m.submodules.denorm_out_b = self.o.b
514
515 with m.If(~self.i.out_do_z):
516 # XXX hmmm, don't like repeating identical code
517 m.d.comb += self.o.a.eq(self.i.a)
518 with m.If(self.i.a.exp_n127):
519 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
520 with m.Else():
521 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
522
523 m.d.comb += self.o.b.eq(self.i.b)
524 with m.If(self.i.b.exp_n127):
525 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
526 with m.Else():
527 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
528
529 m.d.comb += self.o.mid.eq(self.i.mid)
530 m.d.comb += self.o.z.eq(self.i.z)
531 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
532 m.d.comb += self.o.oz.eq(self.i.oz)
533
534 return m
535
536
537 class FPAddDeNorm(FPState):
538
539 def __init__(self, width, id_wid):
540 FPState.__init__(self, "denormalise")
541 self.mod = FPAddDeNormMod(width)
542 self.out_a = FPNumBase(width)
543 self.out_b = FPNumBase(width)
544
545 def setup(self, m, i):
546 """ links module to inputs and outputs
547 """
548 self.mod.setup(m, i)
549
550 m.d.sync += self.out_a.eq(self.mod.out_a)
551 m.d.sync += self.out_b.eq(self.mod.out_b)
552
553 def action(self, m):
554 # Denormalised Number checks
555 m.next = "align"
556
557
558 class FPAddAlignMultiMod(FPState):
559
560 def __init__(self, width):
561 self.in_a = FPNumBase(width)
562 self.in_b = FPNumBase(width)
563 self.out_a = FPNumIn(None, width)
564 self.out_b = FPNumIn(None, width)
565 self.exp_eq = Signal(reset_less=True)
566
567 def elaborate(self, platform):
568 # This one however (single-cycle) will do the shift
569 # in one go.
570
571 m = Module()
572
573 m.submodules.align_in_a = self.in_a
574 m.submodules.align_in_b = self.in_b
575 m.submodules.align_out_a = self.out_a
576 m.submodules.align_out_b = self.out_b
577
578 # NOTE: this does *not* do single-cycle multi-shifting,
579 # it *STAYS* in the align state until exponents match
580
581 # exponent of a greater than b: shift b down
582 m.d.comb += self.exp_eq.eq(0)
583 m.d.comb += self.out_a.eq(self.in_a)
584 m.d.comb += self.out_b.eq(self.in_b)
585 agtb = Signal(reset_less=True)
586 altb = Signal(reset_less=True)
587 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
588 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
589 with m.If(agtb):
590 m.d.comb += self.out_b.shift_down(self.in_b)
591 # exponent of b greater than a: shift a down
592 with m.Elif(altb):
593 m.d.comb += self.out_a.shift_down(self.in_a)
594 # exponents equal: move to next stage.
595 with m.Else():
596 m.d.comb += self.exp_eq.eq(1)
597 return m
598
599
600 class FPAddAlignMulti(FPState):
601
602 def __init__(self, width, id_wid):
603 FPState.__init__(self, "align")
604 self.mod = FPAddAlignMultiMod(width)
605 self.out_a = FPNumIn(None, width)
606 self.out_b = FPNumIn(None, width)
607 self.exp_eq = Signal(reset_less=True)
608
609 def setup(self, m, in_a, in_b):
610 """ links module to inputs and outputs
611 """
612 m.submodules.align = self.mod
613 m.d.comb += self.mod.in_a.eq(in_a)
614 m.d.comb += self.mod.in_b.eq(in_b)
615 #m.d.comb += self.out_a.eq(self.mod.out_a)
616 #m.d.comb += self.out_b.eq(self.mod.out_b)
617 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
618 m.d.sync += self.out_a.eq(self.mod.out_a)
619 m.d.sync += self.out_b.eq(self.mod.out_b)
620
621 def action(self, m):
622 with m.If(self.exp_eq):
623 m.next = "add_0"
624
625
626 class FPNumIn2Ops:
627
628 def __init__(self, width, id_wid):
629 self.a = FPNumIn(None, width)
630 self.b = FPNumIn(None, width)
631 self.z = FPNumOut(width, False)
632 self.out_do_z = Signal(reset_less=True)
633 self.oz = Signal(width, reset_less=True)
634 self.mid = Signal(id_wid, reset_less=True)
635
636 def eq(self, i):
637 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
638 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
639
640
641 class FPAddAlignSingleMod:
642
643 def __init__(self, width, id_wid):
644 self.width = width
645 self.id_wid = id_wid
646 self.i = self.ispec()
647 self.o = self.ospec()
648
649 def ispec(self):
650 return FPSCData(self.width, self.id_wid)
651
652 def ospec(self):
653 return FPNumIn2Ops(self.width, self.id_wid)
654
655 def process(self, i):
656 return self.o
657
658 def setup(self, m, i):
659 """ links module to inputs and outputs
660 """
661 m.submodules.align = self
662 m.d.comb += self.i.eq(i)
663
664 def elaborate(self, platform):
665 """ Aligns A against B or B against A, depending on which has the
666 greater exponent. This is done in a *single* cycle using
667 variable-width bit-shift
668
669 the shifter used here is quite expensive in terms of gates.
670 Mux A or B in (and out) into temporaries, as only one of them
671 needs to be aligned against the other
672 """
673 m = Module()
674
675 m.submodules.align_in_a = self.i.a
676 m.submodules.align_in_b = self.i.b
677 m.submodules.align_out_a = self.o.a
678 m.submodules.align_out_b = self.o.b
679
680 # temporary (muxed) input and output to be shifted
681 t_inp = FPNumBase(self.width)
682 t_out = FPNumIn(None, self.width)
683 espec = (len(self.i.a.e), True)
684 msr = MultiShiftRMerge(self.i.a.m_width, espec)
685 m.submodules.align_t_in = t_inp
686 m.submodules.align_t_out = t_out
687 m.submodules.multishift_r = msr
688
689 ediff = Signal(espec, reset_less=True)
690 ediffr = Signal(espec, reset_less=True)
691 tdiff = Signal(espec, reset_less=True)
692 elz = Signal(reset_less=True)
693 egz = Signal(reset_less=True)
694
695 # connect multi-shifter to t_inp/out mantissa (and tdiff)
696 m.d.comb += msr.inp.eq(t_inp.m)
697 m.d.comb += msr.diff.eq(tdiff)
698 m.d.comb += t_out.m.eq(msr.m)
699 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
700 m.d.comb += t_out.s.eq(t_inp.s)
701
702 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
703 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
704 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
705 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
706
707 # default: A-exp == B-exp, A and B untouched (fall through)
708 m.d.comb += self.o.a.eq(self.i.a)
709 m.d.comb += self.o.b.eq(self.i.b)
710 # only one shifter (muxed)
711 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
712 # exponent of a greater than b: shift b down
713 with m.If(~self.i.out_do_z):
714 with m.If(egz):
715 m.d.comb += [t_inp.eq(self.i.b),
716 tdiff.eq(ediff),
717 self.o.b.eq(t_out),
718 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
719 ]
720 # exponent of b greater than a: shift a down
721 with m.Elif(elz):
722 m.d.comb += [t_inp.eq(self.i.a),
723 tdiff.eq(ediffr),
724 self.o.a.eq(t_out),
725 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
726 ]
727
728 m.d.comb += self.o.mid.eq(self.i.mid)
729 m.d.comb += self.o.z.eq(self.i.z)
730 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
731 m.d.comb += self.o.oz.eq(self.i.oz)
732
733 return m
734
735
736 class FPAddAlignSingle(FPState):
737
738 def __init__(self, width, id_wid):
739 FPState.__init__(self, "align")
740 self.mod = FPAddAlignSingleMod(width, id_wid)
741 self.out_a = FPNumIn(None, width)
742 self.out_b = FPNumIn(None, width)
743
744 def setup(self, m, i):
745 """ links module to inputs and outputs
746 """
747 self.mod.setup(m, i)
748
749 # NOTE: could be done as comb
750 m.d.sync += self.out_a.eq(self.mod.out_a)
751 m.d.sync += self.out_b.eq(self.mod.out_b)
752
753 def action(self, m):
754 m.next = "add_0"
755
756
757 class FPAddAlignSingleAdd(FPState):
758
759 def __init__(self, width, id_wid):
760 FPState.__init__(self, "align")
761 self.width = width
762 self.id_wid = id_wid
763 self.a1o = self.ospec()
764
765 def ispec(self):
766 return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
767
768 def ospec(self):
769 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
770
771 def setup(self, m, i):
772 """ links module to inputs and outputs
773 """
774
775 # chain AddAlignSingle, AddStage0 and AddStage1
776 mod = FPAddAlignSingleMod(self.width, self.id_wid)
777 a0mod = FPAddStage0Mod(self.width, self.id_wid)
778 a1mod = FPAddStage1Mod(self.width, self.id_wid)
779
780 chain = StageChain([mod, a0mod, a1mod])
781 chain.setup(m, i)
782
783 m.d.sync += self.a1o.eq(a1mod.o)
784
785 def process(self, i):
786 return self.a1o
787
788 def action(self, m):
789 m.next = "normalise_1"
790
791
792 class FPAddStage0Data:
793
794 def __init__(self, width, id_wid):
795 self.z = FPNumBase(width, False)
796 self.out_do_z = Signal(reset_less=True)
797 self.oz = Signal(width, reset_less=True)
798 self.tot = Signal(self.z.m_width + 4, reset_less=True)
799 self.mid = Signal(id_wid, reset_less=True)
800
801 def eq(self, i):
802 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
803 self.tot.eq(i.tot), self.mid.eq(i.mid)]
804
805
806 class FPAddStage0Mod:
807
808 def __init__(self, width, id_wid):
809 self.width = width
810 self.id_wid = id_wid
811 self.i = self.ispec()
812 self.o = self.ospec()
813
814 def ispec(self):
815 return FPSCData(self.width, self.id_wid)
816
817 def ospec(self):
818 return FPAddStage0Data(self.width, self.id_wid)
819
820 def process(self, i):
821 return self.o
822
823 def setup(self, m, i):
824 """ links module to inputs and outputs
825 """
826 m.submodules.add0 = self
827 m.d.comb += self.i.eq(i)
828
829 def elaborate(self, platform):
830 m = Module()
831 m.submodules.add0_in_a = self.i.a
832 m.submodules.add0_in_b = self.i.b
833 m.submodules.add0_out_z = self.o.z
834
835 # store intermediate tests (and zero-extended mantissas)
836 seq = Signal(reset_less=True)
837 mge = Signal(reset_less=True)
838 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
839 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
840 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
841 mge.eq(self.i.a.m >= self.i.b.m),
842 am0.eq(Cat(self.i.a.m, 0)),
843 bm0.eq(Cat(self.i.b.m, 0))
844 ]
845 # same-sign (both negative or both positive) add mantissas
846 with m.If(~self.i.out_do_z):
847 m.d.comb += self.o.z.e.eq(self.i.a.e)
848 with m.If(seq):
849 m.d.comb += [
850 self.o.tot.eq(am0 + bm0),
851 self.o.z.s.eq(self.i.a.s)
852 ]
853 # a mantissa greater than b, use a
854 with m.Elif(mge):
855 m.d.comb += [
856 self.o.tot.eq(am0 - bm0),
857 self.o.z.s.eq(self.i.a.s)
858 ]
859 # b mantissa greater than a, use b
860 with m.Else():
861 m.d.comb += [
862 self.o.tot.eq(bm0 - am0),
863 self.o.z.s.eq(self.i.b.s)
864 ]
865
866 m.d.comb += self.o.oz.eq(self.i.oz)
867 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
868 m.d.comb += self.o.mid.eq(self.i.mid)
869 return m
870
871
872 class FPAddStage0(FPState):
873 """ First stage of add. covers same-sign (add) and subtract
874 special-casing when mantissas are greater or equal, to
875 give greatest accuracy.
876 """
877
878 def __init__(self, width, id_wid):
879 FPState.__init__(self, "add_0")
880 self.mod = FPAddStage0Mod(width)
881 self.o = self.mod.ospec()
882
883 def setup(self, m, i):
884 """ links module to inputs and outputs
885 """
886 self.mod.setup(m, i)
887
888 # NOTE: these could be done as combinatorial (merge add0+add1)
889 m.d.sync += self.o.eq(self.mod.o)
890
891 def action(self, m):
892 m.next = "add_1"
893
894
895 class FPAddStage1Data:
896
897 def __init__(self, width, id_wid):
898 self.z = FPNumBase(width, False)
899 self.out_do_z = Signal(reset_less=True)
900 self.oz = Signal(width, reset_less=True)
901 self.of = Overflow()
902 self.mid = Signal(id_wid, reset_less=True)
903
904 def eq(self, i):
905 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
906 self.of.eq(i.of), self.mid.eq(i.mid)]
907
908
909
910 class FPAddStage1Mod(FPState):
911 """ Second stage of add: preparation for normalisation.
912 detects when tot sum is too big (tot[27] is kinda a carry bit)
913 """
914
915 def __init__(self, width, id_wid):
916 self.width = width
917 self.id_wid = id_wid
918 self.i = self.ispec()
919 self.o = self.ospec()
920
921 def ispec(self):
922 return FPAddStage0Data(self.width, self.id_wid)
923
924 def ospec(self):
925 return FPAddStage1Data(self.width, self.id_wid)
926
927 def process(self, i):
928 return self.o
929
930 def setup(self, m, i):
931 """ links module to inputs and outputs
932 """
933 m.submodules.add1 = self
934 m.submodules.add1_out_overflow = self.o.of
935
936 m.d.comb += self.i.eq(i)
937
938 def elaborate(self, platform):
939 m = Module()
940 #m.submodules.norm1_in_overflow = self.in_of
941 #m.submodules.norm1_out_overflow = self.out_of
942 #m.submodules.norm1_in_z = self.in_z
943 #m.submodules.norm1_out_z = self.out_z
944 m.d.comb += self.o.z.eq(self.i.z)
945 # tot[-1] (MSB) gets set when the sum overflows. shift result down
946 with m.If(~self.i.out_do_z):
947 with m.If(self.i.tot[-1]):
948 m.d.comb += [
949 self.o.z.m.eq(self.i.tot[4:]),
950 self.o.of.m0.eq(self.i.tot[4]),
951 self.o.of.guard.eq(self.i.tot[3]),
952 self.o.of.round_bit.eq(self.i.tot[2]),
953 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
954 self.o.z.e.eq(self.i.z.e + 1)
955 ]
956 # tot[-1] (MSB) zero case
957 with m.Else():
958 m.d.comb += [
959 self.o.z.m.eq(self.i.tot[3:]),
960 self.o.of.m0.eq(self.i.tot[3]),
961 self.o.of.guard.eq(self.i.tot[2]),
962 self.o.of.round_bit.eq(self.i.tot[1]),
963 self.o.of.sticky.eq(self.i.tot[0])
964 ]
965
966 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
967 m.d.comb += self.o.oz.eq(self.i.oz)
968 m.d.comb += self.o.mid.eq(self.i.mid)
969
970 return m
971
972
973 class FPAddStage1(FPState):
974
975 def __init__(self, width, id_wid):
976 FPState.__init__(self, "add_1")
977 self.mod = FPAddStage1Mod(width)
978 self.out_z = FPNumBase(width, False)
979 self.out_of = Overflow()
980 self.norm_stb = Signal()
981
982 def setup(self, m, i):
983 """ links module to inputs and outputs
984 """
985 self.mod.setup(m, i)
986
987 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
988
989 m.d.sync += self.out_of.eq(self.mod.out_of)
990 m.d.sync += self.out_z.eq(self.mod.out_z)
991 m.d.sync += self.norm_stb.eq(1)
992
993 def action(self, m):
994 m.next = "normalise_1"
995
996
997 class FPNormaliseModSingle:
998
999 def __init__(self, width):
1000 self.width = width
1001 self.in_z = self.ispec()
1002 self.out_z = self.ospec()
1003
1004 def ispec(self):
1005 return FPNumBase(self.width, False)
1006
1007 def ospec(self):
1008 return FPNumBase(self.width, False)
1009
1010 def setup(self, m, i):
1011 """ links module to inputs and outputs
1012 """
1013 m.submodules.normalise = self
1014 m.d.comb += self.i.eq(i)
1015
1016 def elaborate(self, platform):
1017 m = Module()
1018
1019 mwid = self.out_z.m_width+2
1020 pe = PriorityEncoder(mwid)
1021 m.submodules.norm_pe = pe
1022
1023 m.submodules.norm1_out_z = self.out_z
1024 m.submodules.norm1_in_z = self.in_z
1025
1026 in_z = FPNumBase(self.width, False)
1027 in_of = Overflow()
1028 m.submodules.norm1_insel_z = in_z
1029 m.submodules.norm1_insel_overflow = in_of
1030
1031 espec = (len(in_z.e), True)
1032 ediff_n126 = Signal(espec, reset_less=True)
1033 msr = MultiShiftRMerge(mwid, espec)
1034 m.submodules.multishift_r = msr
1035
1036 m.d.comb += in_z.eq(self.in_z)
1037 m.d.comb += in_of.eq(self.in_of)
1038 # initialise out from in (overridden below)
1039 m.d.comb += self.out_z.eq(in_z)
1040 m.d.comb += self.out_of.eq(in_of)
1041 # normalisation decrease condition
1042 decrease = Signal(reset_less=True)
1043 m.d.comb += decrease.eq(in_z.m_msbzero)
1044 # decrease exponent
1045 with m.If(decrease):
1046 # *sigh* not entirely obvious: count leading zeros (clz)
1047 # with a PriorityEncoder: to find from the MSB
1048 # we reverse the order of the bits.
1049 temp_m = Signal(mwid, reset_less=True)
1050 temp_s = Signal(mwid+1, reset_less=True)
1051 clz = Signal((len(in_z.e), True), reset_less=True)
1052 m.d.comb += [
1053 # cat round and guard bits back into the mantissa
1054 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1055 pe.i.eq(temp_m[::-1]), # inverted
1056 clz.eq(pe.o), # count zeros from MSB down
1057 temp_s.eq(temp_m << clz), # shift mantissa UP
1058 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1059 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1060 ]
1061
1062 return m
1063
1064 class FPNorm1Data:
1065
1066 def __init__(self, width, id_wid):
1067 self.roundz = Signal(reset_less=True)
1068 self.z = FPNumBase(width, False)
1069 self.out_do_z = Signal(reset_less=True)
1070 self.oz = Signal(width, reset_less=True)
1071 self.mid = Signal(id_wid, reset_less=True)
1072
1073 def eq(self, i):
1074 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1075 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1076
1077
1078 class FPNorm1ModSingle:
1079
1080 def __init__(self, width, id_wid):
1081 self.width = width
1082 self.id_wid = id_wid
1083 self.i = self.ispec()
1084 self.o = self.ospec()
1085
1086 def ispec(self):
1087 return FPAddStage1Data(self.width, self.id_wid)
1088
1089 def ospec(self):
1090 return FPNorm1Data(self.width, self.id_wid)
1091
1092 def setup(self, m, i):
1093 """ links module to inputs and outputs
1094 """
1095 m.submodules.normalise_1 = self
1096 m.d.comb += self.i.eq(i)
1097
1098 def process(self, i):
1099 return self.o
1100
1101 def elaborate(self, platform):
1102 m = Module()
1103
1104 mwid = self.o.z.m_width+2
1105 pe = PriorityEncoder(mwid)
1106 m.submodules.norm_pe = pe
1107
1108 of = Overflow()
1109 m.d.comb += self.o.roundz.eq(of.roundz)
1110
1111 m.submodules.norm1_out_z = self.o.z
1112 m.submodules.norm1_out_overflow = of
1113 m.submodules.norm1_in_z = self.i.z
1114 m.submodules.norm1_in_overflow = self.i.of
1115
1116 i = self.ispec()
1117 m.submodules.norm1_insel_z = i.z
1118 m.submodules.norm1_insel_overflow = i.of
1119
1120 espec = (len(i.z.e), True)
1121 ediff_n126 = Signal(espec, reset_less=True)
1122 msr = MultiShiftRMerge(mwid, espec)
1123 m.submodules.multishift_r = msr
1124
1125 m.d.comb += i.eq(self.i)
1126 # initialise out from in (overridden below)
1127 m.d.comb += self.o.z.eq(i.z)
1128 m.d.comb += of.eq(i.of)
1129 # normalisation increase/decrease conditions
1130 decrease = Signal(reset_less=True)
1131 increase = Signal(reset_less=True)
1132 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1133 m.d.comb += increase.eq(i.z.exp_lt_n126)
1134 # decrease exponent
1135 with m.If(~self.i.out_do_z):
1136 with m.If(decrease):
1137 # *sigh* not entirely obvious: count leading zeros (clz)
1138 # with a PriorityEncoder: to find from the MSB
1139 # we reverse the order of the bits.
1140 temp_m = Signal(mwid, reset_less=True)
1141 temp_s = Signal(mwid+1, reset_less=True)
1142 clz = Signal((len(i.z.e), True), reset_less=True)
1143 # make sure that the amount to decrease by does NOT
1144 # go below the minimum non-INF/NaN exponent
1145 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1146 i.z.exp_sub_n126)
1147 m.d.comb += [
1148 # cat round and guard bits back into the mantissa
1149 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1150 pe.i.eq(temp_m[::-1]), # inverted
1151 clz.eq(limclz), # count zeros from MSB down
1152 temp_s.eq(temp_m << clz), # shift mantissa UP
1153 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1154 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1155 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1156 # overflow in bits 0..1: got shifted too (leave sticky)
1157 of.guard.eq(temp_s[1]), # guard
1158 of.round_bit.eq(temp_s[0]), # round
1159 ]
1160 # increase exponent
1161 with m.Elif(increase):
1162 temp_m = Signal(mwid+1, reset_less=True)
1163 m.d.comb += [
1164 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1165 i.z.m)),
1166 ediff_n126.eq(i.z.N126 - i.z.e),
1167 # connect multi-shifter to inp/out mantissa (and ediff)
1168 msr.inp.eq(temp_m),
1169 msr.diff.eq(ediff_n126),
1170 self.o.z.m.eq(msr.m[3:]),
1171 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1172 # overflow in bits 0..1: got shifted too (leave sticky)
1173 of.guard.eq(temp_s[2]), # guard
1174 of.round_bit.eq(temp_s[1]), # round
1175 of.sticky.eq(temp_s[0]), # sticky
1176 self.o.z.e.eq(i.z.e + ediff_n126),
1177 ]
1178
1179 m.d.comb += self.o.mid.eq(self.i.mid)
1180 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1181 m.d.comb += self.o.oz.eq(self.i.oz)
1182
1183 return m
1184
1185
1186 class FPNorm1ModMulti:
1187
1188 def __init__(self, width, single_cycle=True):
1189 self.width = width
1190 self.in_select = Signal(reset_less=True)
1191 self.in_z = FPNumBase(width, False)
1192 self.in_of = Overflow()
1193 self.temp_z = FPNumBase(width, False)
1194 self.temp_of = Overflow()
1195 self.out_z = FPNumBase(width, False)
1196 self.out_of = Overflow()
1197
1198 def elaborate(self, platform):
1199 m = Module()
1200
1201 m.submodules.norm1_out_z = self.out_z
1202 m.submodules.norm1_out_overflow = self.out_of
1203 m.submodules.norm1_temp_z = self.temp_z
1204 m.submodules.norm1_temp_of = self.temp_of
1205 m.submodules.norm1_in_z = self.in_z
1206 m.submodules.norm1_in_overflow = self.in_of
1207
1208 in_z = FPNumBase(self.width, False)
1209 in_of = Overflow()
1210 m.submodules.norm1_insel_z = in_z
1211 m.submodules.norm1_insel_overflow = in_of
1212
1213 # select which of temp or in z/of to use
1214 with m.If(self.in_select):
1215 m.d.comb += in_z.eq(self.in_z)
1216 m.d.comb += in_of.eq(self.in_of)
1217 with m.Else():
1218 m.d.comb += in_z.eq(self.temp_z)
1219 m.d.comb += in_of.eq(self.temp_of)
1220 # initialise out from in (overridden below)
1221 m.d.comb += self.out_z.eq(in_z)
1222 m.d.comb += self.out_of.eq(in_of)
1223 # normalisation increase/decrease conditions
1224 decrease = Signal(reset_less=True)
1225 increase = Signal(reset_less=True)
1226 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1227 m.d.comb += increase.eq(in_z.exp_lt_n126)
1228 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1229 # decrease exponent
1230 with m.If(decrease):
1231 m.d.comb += [
1232 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1233 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1234 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1235 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1236 self.out_of.round_bit.eq(0), # reset round bit
1237 self.out_of.m0.eq(in_of.guard),
1238 ]
1239 # increase exponent
1240 with m.Elif(increase):
1241 m.d.comb += [
1242 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1243 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1244 self.out_of.guard.eq(in_z.m[0]),
1245 self.out_of.m0.eq(in_z.m[1]),
1246 self.out_of.round_bit.eq(in_of.guard),
1247 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1248 ]
1249
1250 return m
1251
1252
1253 class FPNorm1Single(FPState):
1254
1255 def __init__(self, width, id_wid, single_cycle=True):
1256 FPState.__init__(self, "normalise_1")
1257 self.mod = FPNorm1ModSingle(width)
1258 self.o = self.ospec()
1259 self.out_z = FPNumBase(width, False)
1260 self.out_roundz = Signal(reset_less=True)
1261
1262 def ispec(self):
1263 return self.mod.ispec()
1264
1265 def ospec(self):
1266 return self.mod.ospec()
1267
1268 def setup(self, m, i):
1269 """ links module to inputs and outputs
1270 """
1271 self.mod.setup(m, i)
1272
1273 def action(self, m):
1274 m.next = "round"
1275
1276
1277 class FPNorm1Multi(FPState):
1278
1279 def __init__(self, width, id_wid):
1280 FPState.__init__(self, "normalise_1")
1281 self.mod = FPNorm1ModMulti(width)
1282 self.stb = Signal(reset_less=True)
1283 self.ack = Signal(reset=0, reset_less=True)
1284 self.out_norm = Signal(reset_less=True)
1285 self.in_accept = Signal(reset_less=True)
1286 self.temp_z = FPNumBase(width)
1287 self.temp_of = Overflow()
1288 self.out_z = FPNumBase(width)
1289 self.out_roundz = Signal(reset_less=True)
1290
1291 def setup(self, m, in_z, in_of, norm_stb):
1292 """ links module to inputs and outputs
1293 """
1294 self.mod.setup(m, in_z, in_of, norm_stb,
1295 self.in_accept, self.temp_z, self.temp_of,
1296 self.out_z, self.out_norm)
1297
1298 m.d.comb += self.stb.eq(norm_stb)
1299 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1300
1301 def action(self, m):
1302 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1303 m.d.sync += self.temp_of.eq(self.mod.out_of)
1304 m.d.sync += self.temp_z.eq(self.out_z)
1305 with m.If(self.out_norm):
1306 with m.If(self.in_accept):
1307 m.d.sync += [
1308 self.ack.eq(1),
1309 ]
1310 with m.Else():
1311 m.d.sync += self.ack.eq(0)
1312 with m.Else():
1313 # normalisation not required (or done).
1314 m.next = "round"
1315 m.d.sync += self.ack.eq(1)
1316 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1317
1318
1319 class FPNormToPack(FPState):
1320
1321 def __init__(self, width, id_wid):
1322 FPState.__init__(self, "normalise_1")
1323 self.id_wid = id_wid
1324 self.width = width
1325
1326 def ispec(self):
1327 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1328
1329 def ospec(self):
1330 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1331
1332 def setup(self, m, i):
1333 """ links module to inputs and outputs
1334 """
1335
1336 # Normalisation, Rounding Corrections, Pack - in a chain
1337 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1338 rmod = FPRoundMod(self.width, self.id_wid)
1339 cmod = FPCorrectionsMod(self.width, self.id_wid)
1340 pmod = FPPackMod(self.width, self.id_wid)
1341 chain = StageChain([nmod, rmod, cmod, pmod])
1342 chain.setup(m, i)
1343 self.out_z = pmod.ospec()
1344
1345 m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1346 m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1347
1348 def process(self, i):
1349 return self.out_z
1350
1351 def action(self, m):
1352 m.next = "pack_put_z"
1353
1354
1355 class FPRoundData:
1356
1357 def __init__(self, width, id_wid):
1358 self.z = FPNumBase(width, False)
1359 self.out_do_z = Signal(reset_less=True)
1360 self.oz = Signal(width, reset_less=True)
1361 self.mid = Signal(id_wid, reset_less=True)
1362
1363 def eq(self, i):
1364 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1365 self.mid.eq(i.mid)]
1366
1367
1368 class FPRoundMod:
1369
1370 def __init__(self, width, id_wid):
1371 self.width = width
1372 self.id_wid = id_wid
1373 self.i = self.ispec()
1374 self.out_z = self.ospec()
1375
1376 def ispec(self):
1377 return FPNorm1Data(self.width, self.id_wid)
1378
1379 def ospec(self):
1380 return FPRoundData(self.width, self.id_wid)
1381
1382 def process(self, i):
1383 return self.out_z
1384
1385 def setup(self, m, i):
1386 m.submodules.roundz = self
1387 m.d.comb += self.i.eq(i)
1388
1389 def elaborate(self, platform):
1390 m = Module()
1391 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1392 with m.If(~self.i.out_do_z):
1393 with m.If(self.i.roundz):
1394 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1395 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1396 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1397
1398 return m
1399
1400
1401 class FPRound(FPState):
1402
1403 def __init__(self, width, id_wid):
1404 FPState.__init__(self, "round")
1405 self.mod = FPRoundMod(width)
1406 self.out_z = self.ospec()
1407
1408 def ispec(self):
1409 return self.mod.ispec()
1410
1411 def ospec(self):
1412 return self.mod.ospec()
1413
1414 def setup(self, m, i):
1415 """ links module to inputs and outputs
1416 """
1417 self.mod.setup(m, i)
1418
1419 self.idsync(m)
1420 m.d.sync += self.out_z.eq(self.mod.out_z)
1421 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1422
1423 def action(self, m):
1424 m.next = "corrections"
1425
1426
1427 class FPCorrectionsMod:
1428
1429 def __init__(self, width, id_wid):
1430 self.width = width
1431 self.id_wid = id_wid
1432 self.i = self.ispec()
1433 self.out_z = self.ospec()
1434
1435 def ispec(self):
1436 return FPRoundData(self.width, self.id_wid)
1437
1438 def ospec(self):
1439 return FPRoundData(self.width, self.id_wid)
1440
1441 def process(self, i):
1442 return self.out_z
1443
1444 def setup(self, m, i):
1445 """ links module to inputs and outputs
1446 """
1447 m.submodules.corrections = self
1448 m.d.comb += self.i.eq(i)
1449
1450 def elaborate(self, platform):
1451 m = Module()
1452 m.submodules.corr_in_z = self.i.z
1453 m.submodules.corr_out_z = self.out_z.z
1454 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1455 with m.If(~self.i.out_do_z):
1456 with m.If(self.i.z.is_denormalised):
1457 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1458 return m
1459
1460
1461 class FPCorrections(FPState):
1462
1463 def __init__(self, width, id_wid):
1464 FPState.__init__(self, "corrections")
1465 self.mod = FPCorrectionsMod(width)
1466 self.out_z = self.ospec()
1467
1468 def ispec(self):
1469 return self.mod.ispec()
1470
1471 def ospec(self):
1472 return self.mod.ospec()
1473
1474 def setup(self, m, in_z):
1475 """ links module to inputs and outputs
1476 """
1477 self.mod.setup(m, in_z)
1478
1479 m.d.sync += self.out_z.eq(self.mod.out_z)
1480 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1481
1482 def action(self, m):
1483 m.next = "pack"
1484
1485
1486 class FPPackData:
1487
1488 def __init__(self, width, id_wid):
1489 self.z = Signal(width, reset_less=True)
1490 self.mid = Signal(id_wid, reset_less=True)
1491
1492 def eq(self, i):
1493 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1494
1495
1496 class FPPackMod:
1497
1498 def __init__(self, width, id_wid):
1499 self.width = width
1500 self.id_wid = id_wid
1501 self.i = self.ispec()
1502 self.o = self.ospec()
1503
1504 def ispec(self):
1505 return FPRoundData(self.width, self.id_wid)
1506
1507 def ospec(self):
1508 return FPPackData(self.width, self.id_wid)
1509
1510 def process(self, i):
1511 return self.o
1512
1513 def setup(self, m, in_z):
1514 """ links module to inputs and outputs
1515 """
1516 m.submodules.pack = self
1517 m.d.comb += self.i.eq(in_z)
1518
1519 def elaborate(self, platform):
1520 m = Module()
1521 z = FPNumOut(self.width, False)
1522 m.submodules.pack_in_z = self.i.z
1523 m.submodules.pack_out_z = z
1524 m.d.comb += self.o.mid.eq(self.i.mid)
1525 with m.If(~self.i.out_do_z):
1526 with m.If(self.i.z.is_overflowed):
1527 m.d.comb += z.inf(self.i.z.s)
1528 with m.Else():
1529 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1530 with m.Else():
1531 m.d.comb += z.v.eq(self.i.oz)
1532 m.d.comb += self.o.z.eq(z.v)
1533 return m
1534
1535
1536 class FPPack(FPState):
1537
1538 def __init__(self, width, id_wid):
1539 FPState.__init__(self, "pack")
1540 self.mod = FPPackMod(width)
1541 self.out_z = self.ospec()
1542
1543 def ispec(self):
1544 return self.mod.ispec()
1545
1546 def ospec(self):
1547 return self.mod.ospec()
1548
1549 def setup(self, m, in_z):
1550 """ links module to inputs and outputs
1551 """
1552 self.mod.setup(m, in_z)
1553
1554 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1555 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1556
1557 def action(self, m):
1558 m.next = "pack_put_z"
1559
1560
1561 class FPPutZ(FPState):
1562
1563 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1564 FPState.__init__(self, state)
1565 if to_state is None:
1566 to_state = "get_ops"
1567 self.to_state = to_state
1568 self.in_z = in_z
1569 self.out_z = out_z
1570 self.in_mid = in_mid
1571 self.out_mid = out_mid
1572
1573 def action(self, m):
1574 if self.in_mid is not None:
1575 m.d.sync += self.out_mid.eq(self.in_mid)
1576 m.d.sync += [
1577 self.out_z.z.v.eq(self.in_z)
1578 ]
1579 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1580 m.d.sync += self.out_z.z.stb.eq(0)
1581 m.next = self.to_state
1582 with m.Else():
1583 m.d.sync += self.out_z.z.stb.eq(1)
1584
1585
1586 class FPPutZIdx(FPState):
1587
1588 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1589 FPState.__init__(self, state)
1590 if to_state is None:
1591 to_state = "get_ops"
1592 self.to_state = to_state
1593 self.in_z = in_z
1594 self.out_zs = out_zs
1595 self.in_mid = in_mid
1596
1597 def action(self, m):
1598 outz_stb = Signal(reset_less=True)
1599 outz_ack = Signal(reset_less=True)
1600 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1601 outz_ack.eq(self.out_zs[self.in_mid].ack),
1602 ]
1603 m.d.sync += [
1604 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1605 ]
1606 with m.If(outz_stb & outz_ack):
1607 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1608 m.next = self.to_state
1609 with m.Else():
1610 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1611
1612 class FPADDBaseData:
1613
1614 def __init__(self, width, id_wid):
1615 self.width = width
1616 self.id_wid = id_wid
1617 self.a = Signal(width)
1618 self.b = Signal(width)
1619 self.mid = Signal(id_wid, reset_less=True)
1620
1621 def eq(self, i):
1622 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
1623
1624 def ports(self):
1625 return [self.a, self.b, self.mid]
1626
1627 class FPOpData:
1628 def __init__(self, width, id_wid):
1629 self.z = FPOp(width)
1630 self.mid = Signal(id_wid, reset_less=True)
1631
1632 def eq(self, i):
1633 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1634
1635 def ports(self):
1636 return [self.z, self.mid]
1637
1638
1639 class FPADDBaseMod:
1640
1641 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1642 """ IEEE754 FP Add
1643
1644 * width: bit-width of IEEE754. supported: 16, 32, 64
1645 * id_wid: an identifier that is sync-connected to the input
1646 * single_cycle: True indicates each stage to complete in 1 clock
1647 * compact: True indicates a reduced number of stages
1648 """
1649 self.width = width
1650 self.id_wid = id_wid
1651 self.single_cycle = single_cycle
1652 self.compact = compact
1653
1654 self.in_t = Trigger()
1655 self.i = self.ispec()
1656 self.o = self.ospec()
1657
1658 self.states = []
1659
1660 def ispec(self):
1661 return FPADDBaseData(self.width, self.id_wid)
1662
1663 def ospec(self):
1664 return FPOpData(self.width, self.id_wid)
1665
1666 def add_state(self, state):
1667 self.states.append(state)
1668 return state
1669
1670 def get_fragment(self, platform=None):
1671 """ creates the HDL code-fragment for FPAdd
1672 """
1673 m = Module()
1674 m.submodules.out_z = self.o.z
1675 m.submodules.in_t = self.in_t
1676 if self.compact:
1677 self.get_compact_fragment(m, platform)
1678 else:
1679 self.get_longer_fragment(m, platform)
1680
1681 with m.FSM() as fsm:
1682
1683 for state in self.states:
1684 with m.State(state.state_from):
1685 state.action(m)
1686
1687 return m
1688
1689 def get_longer_fragment(self, m, platform=None):
1690
1691 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1692 self.width))
1693 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1694 a = get.out_op1
1695 b = get.out_op2
1696
1697 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1698 sc.setup(m, a, b, self.in_mid)
1699
1700 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1701 dn.setup(m, a, b, sc.in_mid)
1702
1703 if self.single_cycle:
1704 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1705 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1706 else:
1707 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1708 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1709
1710 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1711 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1712
1713 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1714 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1715
1716 if self.single_cycle:
1717 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1718 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1719 else:
1720 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1721 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1722
1723 rn = self.add_state(FPRound(self.width, self.id_wid))
1724 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1725
1726 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1727 cor.setup(m, rn.out_z, rn.in_mid)
1728
1729 pa = self.add_state(FPPack(self.width, self.id_wid))
1730 pa.setup(m, cor.out_z, rn.in_mid)
1731
1732 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1733 pa.in_mid, self.out_mid))
1734
1735 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1736 pa.in_mid, self.out_mid))
1737
1738 def get_compact_fragment(self, m, platform=None):
1739
1740 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1741 self.width, self.id_wid))
1742 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1743
1744 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1745 sc.setup(m, get.o)
1746
1747 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1748 alm.setup(m, sc.o)
1749
1750 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1751 n1.setup(m, alm.a1o)
1752
1753 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1754 n1.out_z.mid, self.o.mid))
1755
1756 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1757 # sc.o.mid, self.o.mid))
1758
1759
1760 class FPADDBase(FPState):
1761
1762 def __init__(self, width, id_wid=None, single_cycle=False):
1763 """ IEEE754 FP Add
1764
1765 * width: bit-width of IEEE754. supported: 16, 32, 64
1766 * id_wid: an identifier that is sync-connected to the input
1767 * single_cycle: True indicates each stage to complete in 1 clock
1768 """
1769 FPState.__init__(self, "fpadd")
1770 self.width = width
1771 self.single_cycle = single_cycle
1772 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1773 self.o = self.ospec()
1774
1775 self.in_t = Trigger()
1776 self.i = self.ispec()
1777
1778 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1779 self.in_accept = Signal(reset_less=True)
1780 self.add_stb = Signal(reset_less=True)
1781 self.add_ack = Signal(reset=0, reset_less=True)
1782
1783 def ispec(self):
1784 return self.mod.ispec()
1785
1786 def ospec(self):
1787 return self.mod.ospec()
1788
1789 def setup(self, m, i, add_stb, in_mid):
1790 m.d.comb += [self.i.eq(i),
1791 self.mod.i.eq(self.i),
1792 self.z_done.eq(self.mod.o.z.trigger),
1793 #self.add_stb.eq(add_stb),
1794 self.mod.in_t.stb.eq(self.in_t.stb),
1795 self.in_t.ack.eq(self.mod.in_t.ack),
1796 self.o.mid.eq(self.mod.o.mid),
1797 self.o.z.v.eq(self.mod.o.z.v),
1798 self.o.z.stb.eq(self.mod.o.z.stb),
1799 self.mod.o.z.ack.eq(self.o.z.ack),
1800 ]
1801
1802 m.d.sync += self.add_stb.eq(add_stb)
1803 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1804 m.d.sync += self.o.z.ack.eq(0) # likewise
1805 #m.d.sync += self.in_t.stb.eq(0)
1806
1807 m.submodules.fpadd = self.mod
1808
1809 def action(self, m):
1810
1811 # in_accept is set on incoming strobe HIGH and ack LOW.
1812 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1813
1814 #with m.If(self.in_t.ack):
1815 # m.d.sync += self.in_t.stb.eq(0)
1816 with m.If(~self.z_done):
1817 # not done: test for accepting an incoming operand pair
1818 with m.If(self.in_accept):
1819 m.d.sync += [
1820 self.add_ack.eq(1), # acknowledge receipt...
1821 self.in_t.stb.eq(1), # initiate add
1822 ]
1823 with m.Else():
1824 m.d.sync += [self.add_ack.eq(0),
1825 self.in_t.stb.eq(0),
1826 self.o.z.ack.eq(1),
1827 ]
1828 with m.Else():
1829 # done: acknowledge, and write out id and value
1830 m.d.sync += [self.add_ack.eq(1),
1831 self.in_t.stb.eq(0)
1832 ]
1833 m.next = "put_z"
1834
1835 return
1836
1837 if self.in_mid is not None:
1838 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1839
1840 m.d.sync += [
1841 self.out_z.v.eq(self.mod.out_z.v)
1842 ]
1843 # move to output state on detecting z ack
1844 with m.If(self.out_z.trigger):
1845 m.d.sync += self.out_z.stb.eq(0)
1846 m.next = "put_z"
1847 with m.Else():
1848 m.d.sync += self.out_z.stb.eq(1)
1849
1850
1851 class FPADDStageOut:
1852 def __init__(self, width, id_wid):
1853 self.z = Signal(width)
1854 self.mid = Signal(id_wid, reset_less=True)
1855
1856 def eq(self, i):
1857 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1858
1859 def ports(self):
1860 return [self.z, self.mid]
1861
1862
1863 # matches the format of FPADDStageOut, allows eq function to do assignments
1864 class PlaceHolder: pass
1865
1866
1867 class FPAddBaseStage:
1868 def __init__(self, width, id_wid):
1869 self.width = width
1870 self.id_wid = id_wid
1871
1872 def ispec(self):
1873 return FPADDBaseData(self.width, self.id_wid)
1874
1875 def ospec(self):
1876 return FPADDStageOut(self.width, self.id_wid)
1877
1878 def process(self, i):
1879 o = PlaceHolder()
1880 o.z = i.a + i.b
1881 o.mid = i.mid
1882 return o
1883
1884
1885 class FPADDBasePipe1(UnbufferedPipeline):
1886 def __init__(self, width, id_wid):
1887 stage = FPAddBaseStage(width, id_wid)
1888 UnbufferedPipeline.__init__(self, stage)
1889
1890
1891 class FPADDBasePipe(ControlBase):
1892 def __init__(self, width, id_wid):
1893 ControlBase.__init__(self)
1894 self.pipe1 = FPADDBasePipe1(width, id_wid)
1895 self._eqs = self.connect([self.pipe1])
1896
1897 def elaborate(self, platform):
1898 m = Module()
1899 m.submodules.pipe1 = self.pipe1
1900 m.d.comb += self._eqs
1901 return m
1902
1903
1904 class PriorityCombPipeline(CombMultiInPipeline):
1905 def __init__(self, stage, p_len):
1906 p_mux = InputPriorityArbiter(self, p_len)
1907 CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1908
1909 def ports(self):
1910 return self.p_mux.ports()
1911
1912
1913 class FPAddInPassThruStage:
1914 def __init__(self, width, id_wid):
1915 self.width, self.id_wid = width, id_wid
1916 def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1917 def ospec(self): return self.ispec()
1918 def process(self, i): return i
1919
1920
1921 class FPADDInMuxPipe(PriorityCombPipeline):
1922 def __init__(self, width, id_width, num_rows):
1923 self.num_rows = num_rows
1924 stage = FPAddInPassThruStage(width, id_width)
1925 PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1926 #self.p.i_data = stage.ispec()
1927 #self.n.o_data = stage.ospec()
1928
1929 def ports(self):
1930 res = []
1931 for i in range(len(self.p)):
1932 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1933 self.p[i].i_data.ports()
1934 res += [self.n.i_ready, self.n.o_valid] + \
1935 self.n.o_data.ports()
1936 return res
1937
1938
1939 class MuxCombPipeline(CombMultiOutPipeline):
1940 def __init__(self, stage, n_len):
1941 # HACK: stage is also the n-way multiplexer
1942 CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1943
1944 # HACK: n-mux is also the stage... so set the muxid equal to input mid
1945 stage.m_id = self.p.i_data.mid
1946
1947 def ports(self):
1948 return self.p_mux.ports()
1949
1950
1951 class FPAddOutPassThruStage:
1952 def __init__(self, width, id_wid):
1953 self.width, self.id_wid = width, id_wid
1954 def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1955 def ospec(self): return self.ispec()
1956 def process(self, i): return i
1957
1958
1959 class FPADDMuxOutPipe(MuxCombPipeline):
1960 def __init__(self, width, id_wid, num_rows):
1961 self.num_rows = num_rows
1962 stage = FPAddOutPassThruStage(width, id_wid)
1963 MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1964 #self.p.i_data = stage.ispec()
1965 #self.n.o_data = stage.ospec()
1966
1967 def ports(self):
1968 res = [self.p.i_valid, self.p.o_ready] + \
1969 self.p.i_data.ports()
1970 for i in range(len(self.n)):
1971 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1972 self.n[i].o_data.ports()
1973 return res
1974
1975
1976 class FPADDMuxInOut:
1977 """ Reservation-Station version of FPADD pipeline.
1978
1979 fan-in on
1980 """
1981 def __init__(self, width, id_wid, num_rows):
1982 self.num_rows = num_rows
1983 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1984 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1985 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1986
1987 self.p = self.inpipe.p # kinda annoying,
1988 self.n = self.outpipe.n # use pipe in/out as this class in/out
1989 self._ports = self.inpipe.ports() + self.outpipe.ports()
1990
1991 def elaborate(self, platform):
1992 m = Module()
1993 m.submodules.inpipe = self.inpipe
1994 m.submodules.fpadd = self.fpadd
1995 m.submodules.outpipe = self.outpipe
1996
1997 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1998 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1999
2000 return m
2001
2002 def ports(self):
2003 return self._ports
2004
2005
2006 class ResArray:
2007 def __init__(self, width, id_wid):
2008 self.width = width
2009 self.id_wid = id_wid
2010 res = []
2011 for i in range(rs_sz):
2012 out_z = FPOp(width)
2013 out_z.name = "out_z_%d" % i
2014 res.append(out_z)
2015 self.res = Array(res)
2016 self.in_z = FPOp(width)
2017 self.in_mid = Signal(self.id_wid, reset_less=True)
2018
2019 def setup(self, m, in_z, in_mid):
2020 m.d.comb += [self.in_z.eq(in_z),
2021 self.in_mid.eq(in_mid)]
2022
2023 def get_fragment(self, platform=None):
2024 """ creates the HDL code-fragment for FPAdd
2025 """
2026 m = Module()
2027 m.submodules.res_in_z = self.in_z
2028 m.submodules += self.res
2029
2030 return m
2031
2032 def ports(self):
2033 res = []
2034 for z in self.res:
2035 res += z.ports()
2036 return res
2037
2038
2039 class FPADD(FPID):
2040 """ FPADD: stages as follows:
2041
2042 FPGetOp (a)
2043 |
2044 FPGetOp (b)
2045 |
2046 FPAddBase---> FPAddBaseMod
2047 | |
2048 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2049
2050 FPAddBase is tricky: it is both a stage and *has* stages.
2051 Connection to FPAddBaseMod therefore requires an in stb/ack
2052 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2053 needs to be the thing that raises the incoming stb.
2054 """
2055
2056 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2057 """ IEEE754 FP Add
2058
2059 * width: bit-width of IEEE754. supported: 16, 32, 64
2060 * id_wid: an identifier that is sync-connected to the input
2061 * single_cycle: True indicates each stage to complete in 1 clock
2062 """
2063 self.width = width
2064 self.id_wid = id_wid
2065 self.single_cycle = single_cycle
2066
2067 #self.out_z = FPOp(width)
2068 self.ids = FPID(id_wid)
2069
2070 rs = []
2071 for i in range(rs_sz):
2072 in_a = FPOp(width)
2073 in_b = FPOp(width)
2074 in_a.name = "in_a_%d" % i
2075 in_b.name = "in_b_%d" % i
2076 rs.append((in_a, in_b))
2077 self.rs = Array(rs)
2078
2079 res = []
2080 for i in range(rs_sz):
2081 out_z = FPOp(width)
2082 out_z.name = "out_z_%d" % i
2083 res.append(out_z)
2084 self.res = Array(res)
2085
2086 self.states = []
2087
2088 def add_state(self, state):
2089 self.states.append(state)
2090 return state
2091
2092 def get_fragment(self, platform=None):
2093 """ creates the HDL code-fragment for FPAdd
2094 """
2095 m = Module()
2096 m.submodules += self.rs
2097
2098 in_a = self.rs[0][0]
2099 in_b = self.rs[0][1]
2100
2101 geta = self.add_state(FPGetOp("get_a", "get_b",
2102 in_a, self.width))
2103 geta.setup(m, in_a)
2104 a = geta.out_op
2105
2106 getb = self.add_state(FPGetOp("get_b", "fpadd",
2107 in_b, self.width))
2108 getb.setup(m, in_b)
2109 b = getb.out_op
2110
2111 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2112 ab = self.add_state(ab)
2113 abd = ab.ispec() # create an input spec object for FPADDBase
2114 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2115 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2116 o = ab.o
2117
2118 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2119 o.mid, "get_a"))
2120
2121 with m.FSM() as fsm:
2122
2123 for state in self.states:
2124 with m.State(state.state_from):
2125 state.action(m)
2126
2127 return m
2128
2129
2130 if __name__ == "__main__":
2131 if True:
2132 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2133 main(alu, ports=alu.rs[0][0].ports() + \
2134 alu.rs[0][1].ports() + \
2135 alu.res[0].ports() + \
2136 [alu.ids.in_mid, alu.ids.out_mid])
2137 else:
2138 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2139 main(alu, ports=[alu.in_a, alu.in_b] + \
2140 alu.in_t.ports() + \
2141 alu.out_z.ports() + \
2142 [alu.in_mid, alu.out_mid])
2143
2144
2145 # works... but don't use, just do "python fname.py convert -t v"
2146 #print (verilog.convert(alu, ports=[
2147 # ports=alu.in_a.ports() + \
2148 # alu.in_b.ports() + \
2149 # alu.out_z.ports())