move decode from FPNumOp to SpecialCases
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 from singlepipe import (ControlBase, StageChain, UnbufferedPipeline)
13 from multipipe import CombMultiOutPipeline
14 from multipipe import CombMultiInPipeline, InputPriorityArbiter
15
16 #from fpbase import FPNumShiftMultiRight
17
18
19 class FPState(FPBase):
20 def __init__(self, state_from):
21 self.state_from = state_from
22
23 def set_inputs(self, inputs):
24 self.inputs = inputs
25 for k,v in inputs.items():
26 setattr(self, k, v)
27
28 def set_outputs(self, outputs):
29 self.outputs = outputs
30 for k,v in outputs.items():
31 setattr(self, k, v)
32
33
34 class FPGetSyncOpsMod:
35 def __init__(self, width, num_ops=2):
36 self.width = width
37 self.num_ops = num_ops
38 inops = []
39 outops = []
40 for i in range(num_ops):
41 inops.append(Signal(width, reset_less=True))
42 outops.append(Signal(width, reset_less=True))
43 self.in_op = inops
44 self.out_op = outops
45 self.stb = Signal(num_ops)
46 self.ack = Signal()
47 self.ready = Signal(reset_less=True)
48 self.out_decode = Signal(reset_less=True)
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
53 m.d.comb += self.out_decode.eq(self.ack & self.ready)
54 with m.If(self.out_decode):
55 for i in range(self.num_ops):
56 m.d.comb += [
57 self.out_op[i].eq(self.in_op[i]),
58 ]
59 return m
60
61 def ports(self):
62 return self.in_op + self.out_op + [self.stb, self.ack]
63
64
65 class FPOps(Trigger):
66 def __init__(self, width, num_ops):
67 Trigger.__init__(self)
68 self.width = width
69 self.num_ops = num_ops
70
71 res = []
72 for i in range(num_ops):
73 res.append(Signal(width))
74 self.v = Array(res)
75
76 def ports(self):
77 res = []
78 for i in range(self.num_ops):
79 res.append(self.v[i])
80 res.append(self.ack)
81 res.append(self.stb)
82 return res
83
84
85 class InputGroup:
86 def __init__(self, width, num_ops=2, num_rows=4):
87 self.width = width
88 self.num_ops = num_ops
89 self.num_rows = num_rows
90 self.mmax = int(log(self.num_rows) / log(2))
91 self.rs = []
92 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
93 for i in range(num_rows):
94 self.rs.append(FPGetSyncOpsMod(width, num_ops))
95 self.rs = Array(self.rs)
96
97 self.out_op = FPOps(width, num_ops)
98
99 def elaborate(self, platform):
100 m = Module()
101
102 pe = PriorityEncoder(self.num_rows)
103 m.submodules.selector = pe
104 m.submodules.out_op = self.out_op
105 m.submodules += self.rs
106
107 # connect priority encoder
108 in_ready = []
109 for i in range(self.num_rows):
110 in_ready.append(self.rs[i].ready)
111 m.d.comb += pe.i.eq(Cat(*in_ready))
112
113 active = Signal(reset_less=True)
114 out_en = Signal(reset_less=True)
115 m.d.comb += active.eq(~pe.n) # encoder active
116 m.d.comb += out_en.eq(active & self.out_op.trigger)
117
118 # encoder active: ack relevant input, record MID, pass output
119 with m.If(out_en):
120 rs = self.rs[pe.o]
121 m.d.sync += self.mid.eq(pe.o)
122 m.d.sync += rs.ack.eq(0)
123 m.d.sync += self.out_op.stb.eq(0)
124 for j in range(self.num_ops):
125 m.d.sync += self.out_op.v[j].eq(rs.out_op[j])
126 with m.Else():
127 m.d.sync += self.out_op.stb.eq(1)
128 # acks all default to zero
129 for i in range(self.num_rows):
130 m.d.sync += self.rs[i].ack.eq(1)
131
132 return m
133
134 def ports(self):
135 res = []
136 for i in range(self.num_rows):
137 inop = self.rs[i]
138 res += inop.in_op + [inop.stb]
139 return self.out_op.ports() + res + [self.mid]
140
141
142 class FPGetOpMod:
143 def __init__(self, width):
144 self.in_op = FPOp(width)
145 self.out_op = Signal(width)
146 self.out_decode = Signal(reset_less=True)
147
148 def elaborate(self, platform):
149 m = Module()
150 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
151 m.submodules.get_op_in = self.in_op
152 #m.submodules.get_op_out = self.out_op
153 with m.If(self.out_decode):
154 m.d.comb += [
155 self.out_op.eq(self.in_op.v),
156 ]
157 return m
158
159
160 class FPGetOp(FPState):
161 """ gets operand
162 """
163
164 def __init__(self, in_state, out_state, in_op, width):
165 FPState.__init__(self, in_state)
166 self.out_state = out_state
167 self.mod = FPGetOpMod(width)
168 self.in_op = in_op
169 self.out_op = Signal(width)
170 self.out_decode = Signal(reset_less=True)
171
172 def setup(self, m, in_op):
173 """ links module to inputs and outputs
174 """
175 setattr(m.submodules, self.state_from, self.mod)
176 m.d.comb += self.mod.in_op.eq(in_op)
177 m.d.comb += self.out_decode.eq(self.mod.out_decode)
178
179 def action(self, m):
180 with m.If(self.out_decode):
181 m.next = self.out_state
182 m.d.sync += [
183 self.in_op.ack.eq(0),
184 self.out_op.eq(self.mod.out_op)
185 ]
186 with m.Else():
187 m.d.sync += self.in_op.ack.eq(1)
188
189
190 class FPNumBase2Ops:
191
192 def __init__(self, width, id_wid, m_extra=True):
193 self.a = FPNumBase(width, m_extra)
194 self.b = FPNumBase(width, m_extra)
195 self.mid = Signal(id_wid, reset_less=True)
196
197 def eq(self, i):
198 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
199
200 def ports(self):
201 return [self.a, self.b, self.mid]
202
203
204 class FPADDBaseData:
205
206 def __init__(self, width, id_wid):
207 self.width = width
208 self.id_wid = id_wid
209 self.a = Signal(width)
210 self.b = Signal(width)
211 self.mid = Signal(id_wid, reset_less=True)
212
213 def eq(self, i):
214 return [self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
215
216 def ports(self):
217 return [self.a, self.b, self.mid]
218
219
220 class FPGet2OpMod(Trigger):
221 def __init__(self, width, id_wid):
222 Trigger.__init__(self)
223 self.width = width
224 self.id_wid = id_wid
225 self.i = self.ispec()
226 self.o = self.ospec()
227
228 def ispec(self):
229 return FPADDBaseData(self.width, self.id_wid)
230
231 def ospec(self):
232 return FPADDBaseData(self.width, self.id_wid)
233
234 def process(self, i):
235 return self.o
236
237 def elaborate(self, platform):
238 m = Trigger.elaborate(self, platform)
239 with m.If(self.trigger):
240 m.d.comb += [
241 self.o.eq(self.i),
242 ]
243 return m
244
245
246 class FPGet2Op(FPState):
247 """ gets operands
248 """
249
250 def __init__(self, in_state, out_state, width, id_wid):
251 FPState.__init__(self, in_state)
252 self.out_state = out_state
253 self.mod = FPGet2OpMod(width, id_wid)
254 self.o = self.mod.ospec()
255 self.in_stb = Signal(reset_less=True)
256 self.out_ack = Signal(reset_less=True)
257 self.out_decode = Signal(reset_less=True)
258
259 def setup(self, m, i, in_stb, in_ack):
260 """ links module to inputs and outputs
261 """
262 m.submodules.get_ops = self.mod
263 m.d.comb += self.mod.i.eq(i)
264 m.d.comb += self.mod.stb.eq(in_stb)
265 m.d.comb += self.out_ack.eq(self.mod.ack)
266 m.d.comb += self.out_decode.eq(self.mod.trigger)
267 m.d.comb += in_ack.eq(self.mod.ack)
268
269 def action(self, m):
270 with m.If(self.out_decode):
271 m.next = self.out_state
272 m.d.sync += [
273 self.mod.ack.eq(0),
274 self.o.eq(self.mod.o),
275 ]
276 with m.Else():
277 m.d.sync += self.mod.ack.eq(1)
278
279
280 class FPSCData:
281
282 def __init__(self, width, id_wid):
283 self.a = FPNumBase(width, True)
284 self.b = FPNumBase(width, True)
285 self.z = FPNumOut(width, False)
286 self.oz = Signal(width, reset_less=True)
287 self.out_do_z = Signal(reset_less=True)
288 self.mid = Signal(id_wid, reset_less=True)
289
290 def eq(self, i):
291 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
292 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
293
294
295 class FPAddSpecialCasesMod:
296 """ special cases: NaNs, infs, zeros, denormalised
297 NOTE: some of these are unique to add. see "Special Operations"
298 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
299 """
300
301 def __init__(self, width, id_wid):
302 self.width = width
303 self.id_wid = id_wid
304 self.i = self.ispec()
305 self.o = self.ospec()
306
307 def ispec(self):
308 return FPADDBaseData(self.width, self.id_wid)
309
310 def ospec(self):
311 return FPSCData(self.width, self.id_wid)
312
313 def setup(self, m, i):
314 """ links module to inputs and outputs
315 """
316 m.submodules.specialcases = self
317 m.d.comb += self.i.eq(i)
318
319 def process(self, i):
320 return self.o
321
322 def elaborate(self, platform):
323 m = Module()
324
325 m.submodules.sc_out_z = self.o.z
326
327 # decode: XXX really should move to separate stage
328 a1 = FPNumIn(None, self.width)
329 b1 = FPNumIn(None, self.width)
330 m.submodules.sc_decode_a = a1
331 m.submodules.sc_decode_b = b1
332 m.d.comb += [a1.decode(self.i.a),
333 b1.decode(self.i.b),
334 ]
335
336 s_nomatch = Signal()
337 m.d.comb += s_nomatch.eq(a1.s != b1.s)
338
339 m_match = Signal()
340 m.d.comb += m_match.eq(a1.m == b1.m)
341
342 # if a is NaN or b is NaN return NaN
343 with m.If(a1.is_nan | b1.is_nan):
344 m.d.comb += self.o.out_do_z.eq(1)
345 m.d.comb += self.o.z.nan(0)
346
347 # XXX WEIRDNESS for FP16 non-canonical NaN handling
348 # under review
349
350 ## if a is zero and b is NaN return -b
351 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
352 # m.d.comb += self.o.out_do_z.eq(1)
353 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
354
355 ## if b is zero and a is NaN return -a
356 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
357 # m.d.comb += self.o.out_do_z.eq(1)
358 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
359
360 ## if a is -zero and b is NaN return -b
361 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
362 # m.d.comb += self.o.out_do_z.eq(1)
363 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
364
365 ## if b is -zero and a is NaN return -a
366 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
367 # m.d.comb += self.o.out_do_z.eq(1)
368 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
369
370 # if a is inf return inf (or NaN)
371 with m.Elif(a1.is_inf):
372 m.d.comb += self.o.out_do_z.eq(1)
373 m.d.comb += self.o.z.inf(a1.s)
374 # if a is inf and signs don't match return NaN
375 with m.If(b1.exp_128 & s_nomatch):
376 m.d.comb += self.o.z.nan(0)
377
378 # if b is inf return inf
379 with m.Elif(b1.is_inf):
380 m.d.comb += self.o.out_do_z.eq(1)
381 m.d.comb += self.o.z.inf(b1.s)
382
383 # if a is zero and b zero return signed-a/b
384 with m.Elif(a1.is_zero & b1.is_zero):
385 m.d.comb += self.o.out_do_z.eq(1)
386 m.d.comb += self.o.z.create(a1.s & b1.s, b1.e, b1.m[3:-1])
387
388 # if a is zero return b
389 with m.Elif(a1.is_zero):
390 m.d.comb += self.o.out_do_z.eq(1)
391 m.d.comb += self.o.z.create(b1.s, b1.e, b1.m[3:-1])
392
393 # if b is zero return a
394 with m.Elif(b1.is_zero):
395 m.d.comb += self.o.out_do_z.eq(1)
396 m.d.comb += self.o.z.create(a1.s, a1.e, a1.m[3:-1])
397
398 # if a equal to -b return zero (+ve zero)
399 with m.Elif(s_nomatch & m_match & (a1.e == b1.e)):
400 m.d.comb += self.o.out_do_z.eq(1)
401 m.d.comb += self.o.z.zero(0)
402
403 # Denormalised Number checks next, so pass a/b data through
404 with m.Else():
405 m.d.comb += self.o.out_do_z.eq(0)
406 m.d.comb += self.o.a.eq(a1)
407 m.d.comb += self.o.b.eq(b1)
408
409 m.d.comb += self.o.oz.eq(self.o.z.v)
410 m.d.comb += self.o.mid.eq(self.i.mid)
411
412 return m
413
414
415 class FPID:
416 def __init__(self, id_wid):
417 self.id_wid = id_wid
418 if self.id_wid:
419 self.in_mid = Signal(id_wid, reset_less=True)
420 self.out_mid = Signal(id_wid, reset_less=True)
421 else:
422 self.in_mid = None
423 self.out_mid = None
424
425 def idsync(self, m):
426 if self.id_wid is not None:
427 m.d.sync += self.out_mid.eq(self.in_mid)
428
429
430 class FPAddSpecialCases(FPState):
431 """ special cases: NaNs, infs, zeros, denormalised
432 NOTE: some of these are unique to add. see "Special Operations"
433 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
434 """
435
436 def __init__(self, width, id_wid):
437 FPState.__init__(self, "special_cases")
438 self.mod = FPAddSpecialCasesMod(width)
439 self.out_z = self.mod.ospec()
440 self.out_do_z = Signal(reset_less=True)
441
442 def setup(self, m, i):
443 """ links module to inputs and outputs
444 """
445 self.mod.setup(m, i, self.out_do_z)
446 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
447 m.d.sync += self.out_z.mid.eq(self.mod.o.mid) # (and mid)
448
449 def action(self, m):
450 self.idsync(m)
451 with m.If(self.out_do_z):
452 m.next = "put_z"
453 with m.Else():
454 m.next = "denormalise"
455
456
457 class FPAddSpecialCasesDeNorm(FPState):
458 """ special cases: NaNs, infs, zeros, denormalised
459 NOTE: some of these are unique to add. see "Special Operations"
460 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
461 """
462
463 def __init__(self, width, id_wid):
464 FPState.__init__(self, "special_cases")
465 self.smod = FPAddSpecialCasesMod(width, id_wid)
466 self.dmod = FPAddDeNormMod(width, id_wid)
467 self.o = self.ospec()
468
469 def ispec(self):
470 return self.smod.ispec()
471
472 def ospec(self):
473 return self.dmod.ospec()
474
475 def setup(self, m, i):
476 """ links module to inputs and outputs
477 """
478 # these only needed for break-out (early-out)
479 # out_z = self.smod.ospec()
480 # out_do_z = Signal(reset_less=True)
481 self.smod.setup(m, i)
482 self.dmod.setup(m, self.smod.o)
483 #m.d.comb += out_do_z.eq(self.smod.o.out_do_z)
484
485 # out_do_z=True, only needed for early-out (split pipeline)
486 #m.d.sync += out_z.z.v.eq(self.smod.o.z.v) # only take output
487 #m.d.sync += out_z.mid.eq(self.smod.o.mid) # (and mid)
488
489 # out_do_z=False
490 m.d.sync += self.o.eq(self.dmod.o)
491
492 def process(self, i):
493 return self.o
494
495 def action(self, m):
496 #with m.If(self.out_do_z):
497 # m.next = "put_z"
498 #with m.Else():
499 m.next = "align"
500
501
502 class FPAddDeNormMod(FPState):
503
504 def __init__(self, width, id_wid):
505 self.width = width
506 self.id_wid = id_wid
507 self.i = self.ispec()
508 self.o = self.ospec()
509
510 def ispec(self):
511 return FPSCData(self.width, self.id_wid)
512
513 def ospec(self):
514 return FPSCData(self.width, self.id_wid)
515
516 def setup(self, m, i):
517 """ links module to inputs and outputs
518 """
519 m.submodules.denormalise = self
520 m.d.comb += self.i.eq(i)
521
522 def elaborate(self, platform):
523 m = Module()
524 m.submodules.denorm_in_a = self.i.a
525 m.submodules.denorm_in_b = self.i.b
526 m.submodules.denorm_out_a = self.o.a
527 m.submodules.denorm_out_b = self.o.b
528
529 with m.If(~self.i.out_do_z):
530 # XXX hmmm, don't like repeating identical code
531 m.d.comb += self.o.a.eq(self.i.a)
532 with m.If(self.i.a.exp_n127):
533 m.d.comb += self.o.a.e.eq(self.i.a.N126) # limit a exponent
534 with m.Else():
535 m.d.comb += self.o.a.m[-1].eq(1) # set top mantissa bit
536
537 m.d.comb += self.o.b.eq(self.i.b)
538 with m.If(self.i.b.exp_n127):
539 m.d.comb += self.o.b.e.eq(self.i.b.N126) # limit a exponent
540 with m.Else():
541 m.d.comb += self.o.b.m[-1].eq(1) # set top mantissa bit
542
543 m.d.comb += self.o.mid.eq(self.i.mid)
544 m.d.comb += self.o.z.eq(self.i.z)
545 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
546 m.d.comb += self.o.oz.eq(self.i.oz)
547
548 return m
549
550
551 class FPAddDeNorm(FPState):
552
553 def __init__(self, width, id_wid):
554 FPState.__init__(self, "denormalise")
555 self.mod = FPAddDeNormMod(width)
556 self.out_a = FPNumBase(width)
557 self.out_b = FPNumBase(width)
558
559 def setup(self, m, i):
560 """ links module to inputs and outputs
561 """
562 self.mod.setup(m, i)
563
564 m.d.sync += self.out_a.eq(self.mod.out_a)
565 m.d.sync += self.out_b.eq(self.mod.out_b)
566
567 def action(self, m):
568 # Denormalised Number checks
569 m.next = "align"
570
571
572 class FPAddAlignMultiMod(FPState):
573
574 def __init__(self, width):
575 self.in_a = FPNumBase(width)
576 self.in_b = FPNumBase(width)
577 self.out_a = FPNumIn(None, width)
578 self.out_b = FPNumIn(None, width)
579 self.exp_eq = Signal(reset_less=True)
580
581 def elaborate(self, platform):
582 # This one however (single-cycle) will do the shift
583 # in one go.
584
585 m = Module()
586
587 m.submodules.align_in_a = self.in_a
588 m.submodules.align_in_b = self.in_b
589 m.submodules.align_out_a = self.out_a
590 m.submodules.align_out_b = self.out_b
591
592 # NOTE: this does *not* do single-cycle multi-shifting,
593 # it *STAYS* in the align state until exponents match
594
595 # exponent of a greater than b: shift b down
596 m.d.comb += self.exp_eq.eq(0)
597 m.d.comb += self.out_a.eq(self.in_a)
598 m.d.comb += self.out_b.eq(self.in_b)
599 agtb = Signal(reset_less=True)
600 altb = Signal(reset_less=True)
601 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
602 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
603 with m.If(agtb):
604 m.d.comb += self.out_b.shift_down(self.in_b)
605 # exponent of b greater than a: shift a down
606 with m.Elif(altb):
607 m.d.comb += self.out_a.shift_down(self.in_a)
608 # exponents equal: move to next stage.
609 with m.Else():
610 m.d.comb += self.exp_eq.eq(1)
611 return m
612
613
614 class FPAddAlignMulti(FPState):
615
616 def __init__(self, width, id_wid):
617 FPState.__init__(self, "align")
618 self.mod = FPAddAlignMultiMod(width)
619 self.out_a = FPNumIn(None, width)
620 self.out_b = FPNumIn(None, width)
621 self.exp_eq = Signal(reset_less=True)
622
623 def setup(self, m, in_a, in_b):
624 """ links module to inputs and outputs
625 """
626 m.submodules.align = self.mod
627 m.d.comb += self.mod.in_a.eq(in_a)
628 m.d.comb += self.mod.in_b.eq(in_b)
629 #m.d.comb += self.out_a.eq(self.mod.out_a)
630 #m.d.comb += self.out_b.eq(self.mod.out_b)
631 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
632 m.d.sync += self.out_a.eq(self.mod.out_a)
633 m.d.sync += self.out_b.eq(self.mod.out_b)
634
635 def action(self, m):
636 with m.If(self.exp_eq):
637 m.next = "add_0"
638
639
640 class FPNumIn2Ops:
641
642 def __init__(self, width, id_wid):
643 self.a = FPNumIn(None, width)
644 self.b = FPNumIn(None, width)
645 self.z = FPNumOut(width, False)
646 self.out_do_z = Signal(reset_less=True)
647 self.oz = Signal(width, reset_less=True)
648 self.mid = Signal(id_wid, reset_less=True)
649
650 def eq(self, i):
651 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
652 self.a.eq(i.a), self.b.eq(i.b), self.mid.eq(i.mid)]
653
654
655 class FPAddAlignSingleMod:
656
657 def __init__(self, width, id_wid):
658 self.width = width
659 self.id_wid = id_wid
660 self.i = self.ispec()
661 self.o = self.ospec()
662
663 def ispec(self):
664 return FPSCData(self.width, self.id_wid)
665
666 def ospec(self):
667 return FPNumIn2Ops(self.width, self.id_wid)
668
669 def process(self, i):
670 return self.o
671
672 def setup(self, m, i):
673 """ links module to inputs and outputs
674 """
675 m.submodules.align = self
676 m.d.comb += self.i.eq(i)
677
678 def elaborate(self, platform):
679 """ Aligns A against B or B against A, depending on which has the
680 greater exponent. This is done in a *single* cycle using
681 variable-width bit-shift
682
683 the shifter used here is quite expensive in terms of gates.
684 Mux A or B in (and out) into temporaries, as only one of them
685 needs to be aligned against the other
686 """
687 m = Module()
688
689 m.submodules.align_in_a = self.i.a
690 m.submodules.align_in_b = self.i.b
691 m.submodules.align_out_a = self.o.a
692 m.submodules.align_out_b = self.o.b
693
694 # temporary (muxed) input and output to be shifted
695 t_inp = FPNumBase(self.width)
696 t_out = FPNumIn(None, self.width)
697 espec = (len(self.i.a.e), True)
698 msr = MultiShiftRMerge(self.i.a.m_width, espec)
699 m.submodules.align_t_in = t_inp
700 m.submodules.align_t_out = t_out
701 m.submodules.multishift_r = msr
702
703 ediff = Signal(espec, reset_less=True)
704 ediffr = Signal(espec, reset_less=True)
705 tdiff = Signal(espec, reset_less=True)
706 elz = Signal(reset_less=True)
707 egz = Signal(reset_less=True)
708
709 # connect multi-shifter to t_inp/out mantissa (and tdiff)
710 m.d.comb += msr.inp.eq(t_inp.m)
711 m.d.comb += msr.diff.eq(tdiff)
712 m.d.comb += t_out.m.eq(msr.m)
713 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
714 m.d.comb += t_out.s.eq(t_inp.s)
715
716 m.d.comb += ediff.eq(self.i.a.e - self.i.b.e)
717 m.d.comb += ediffr.eq(self.i.b.e - self.i.a.e)
718 m.d.comb += elz.eq(self.i.a.e < self.i.b.e)
719 m.d.comb += egz.eq(self.i.a.e > self.i.b.e)
720
721 # default: A-exp == B-exp, A and B untouched (fall through)
722 m.d.comb += self.o.a.eq(self.i.a)
723 m.d.comb += self.o.b.eq(self.i.b)
724 # only one shifter (muxed)
725 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
726 # exponent of a greater than b: shift b down
727 with m.If(~self.i.out_do_z):
728 with m.If(egz):
729 m.d.comb += [t_inp.eq(self.i.b),
730 tdiff.eq(ediff),
731 self.o.b.eq(t_out),
732 self.o.b.s.eq(self.i.b.s), # whoops forgot sign
733 ]
734 # exponent of b greater than a: shift a down
735 with m.Elif(elz):
736 m.d.comb += [t_inp.eq(self.i.a),
737 tdiff.eq(ediffr),
738 self.o.a.eq(t_out),
739 self.o.a.s.eq(self.i.a.s), # whoops forgot sign
740 ]
741
742 m.d.comb += self.o.mid.eq(self.i.mid)
743 m.d.comb += self.o.z.eq(self.i.z)
744 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
745 m.d.comb += self.o.oz.eq(self.i.oz)
746
747 return m
748
749
750 class FPAddAlignSingle(FPState):
751
752 def __init__(self, width, id_wid):
753 FPState.__init__(self, "align")
754 self.mod = FPAddAlignSingleMod(width, id_wid)
755 self.out_a = FPNumIn(None, width)
756 self.out_b = FPNumIn(None, width)
757
758 def setup(self, m, i):
759 """ links module to inputs and outputs
760 """
761 self.mod.setup(m, i)
762
763 # NOTE: could be done as comb
764 m.d.sync += self.out_a.eq(self.mod.out_a)
765 m.d.sync += self.out_b.eq(self.mod.out_b)
766
767 def action(self, m):
768 m.next = "add_0"
769
770
771 class FPAddAlignSingleAdd(FPState):
772
773 def __init__(self, width, id_wid):
774 FPState.__init__(self, "align")
775 self.width = width
776 self.id_wid = id_wid
777 self.a1o = self.ospec()
778
779 def ispec(self):
780 return FPNumBase2Ops(self.width, self.id_wid) # AlignSingle ispec
781
782 def ospec(self):
783 return FPAddStage1Data(self.width, self.id_wid) # AddStage1 ospec
784
785 def setup(self, m, i):
786 """ links module to inputs and outputs
787 """
788
789 # chain AddAlignSingle, AddStage0 and AddStage1
790 mod = FPAddAlignSingleMod(self.width, self.id_wid)
791 a0mod = FPAddStage0Mod(self.width, self.id_wid)
792 a1mod = FPAddStage1Mod(self.width, self.id_wid)
793
794 chain = StageChain([mod, a0mod, a1mod])
795 chain.setup(m, i)
796
797 m.d.sync += self.a1o.eq(a1mod.o)
798
799 def process(self, i):
800 return self.a1o
801
802 def action(self, m):
803 m.next = "normalise_1"
804
805
806 class FPAddStage0Data:
807
808 def __init__(self, width, id_wid):
809 self.z = FPNumBase(width, False)
810 self.out_do_z = Signal(reset_less=True)
811 self.oz = Signal(width, reset_less=True)
812 self.tot = Signal(self.z.m_width + 4, reset_less=True)
813 self.mid = Signal(id_wid, reset_less=True)
814
815 def eq(self, i):
816 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
817 self.tot.eq(i.tot), self.mid.eq(i.mid)]
818
819
820 class FPAddStage0Mod:
821
822 def __init__(self, width, id_wid):
823 self.width = width
824 self.id_wid = id_wid
825 self.i = self.ispec()
826 self.o = self.ospec()
827
828 def ispec(self):
829 return FPSCData(self.width, self.id_wid)
830
831 def ospec(self):
832 return FPAddStage0Data(self.width, self.id_wid)
833
834 def process(self, i):
835 return self.o
836
837 def setup(self, m, i):
838 """ links module to inputs and outputs
839 """
840 m.submodules.add0 = self
841 m.d.comb += self.i.eq(i)
842
843 def elaborate(self, platform):
844 m = Module()
845 m.submodules.add0_in_a = self.i.a
846 m.submodules.add0_in_b = self.i.b
847 m.submodules.add0_out_z = self.o.z
848
849 # store intermediate tests (and zero-extended mantissas)
850 seq = Signal(reset_less=True)
851 mge = Signal(reset_less=True)
852 am0 = Signal(len(self.i.a.m)+1, reset_less=True)
853 bm0 = Signal(len(self.i.b.m)+1, reset_less=True)
854 m.d.comb += [seq.eq(self.i.a.s == self.i.b.s),
855 mge.eq(self.i.a.m >= self.i.b.m),
856 am0.eq(Cat(self.i.a.m, 0)),
857 bm0.eq(Cat(self.i.b.m, 0))
858 ]
859 # same-sign (both negative or both positive) add mantissas
860 with m.If(~self.i.out_do_z):
861 m.d.comb += self.o.z.e.eq(self.i.a.e)
862 with m.If(seq):
863 m.d.comb += [
864 self.o.tot.eq(am0 + bm0),
865 self.o.z.s.eq(self.i.a.s)
866 ]
867 # a mantissa greater than b, use a
868 with m.Elif(mge):
869 m.d.comb += [
870 self.o.tot.eq(am0 - bm0),
871 self.o.z.s.eq(self.i.a.s)
872 ]
873 # b mantissa greater than a, use b
874 with m.Else():
875 m.d.comb += [
876 self.o.tot.eq(bm0 - am0),
877 self.o.z.s.eq(self.i.b.s)
878 ]
879
880 m.d.comb += self.o.oz.eq(self.i.oz)
881 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
882 m.d.comb += self.o.mid.eq(self.i.mid)
883 return m
884
885
886 class FPAddStage0(FPState):
887 """ First stage of add. covers same-sign (add) and subtract
888 special-casing when mantissas are greater or equal, to
889 give greatest accuracy.
890 """
891
892 def __init__(self, width, id_wid):
893 FPState.__init__(self, "add_0")
894 self.mod = FPAddStage0Mod(width)
895 self.o = self.mod.ospec()
896
897 def setup(self, m, i):
898 """ links module to inputs and outputs
899 """
900 self.mod.setup(m, i)
901
902 # NOTE: these could be done as combinatorial (merge add0+add1)
903 m.d.sync += self.o.eq(self.mod.o)
904
905 def action(self, m):
906 m.next = "add_1"
907
908
909 class FPAddStage1Data:
910
911 def __init__(self, width, id_wid):
912 self.z = FPNumBase(width, False)
913 self.out_do_z = Signal(reset_less=True)
914 self.oz = Signal(width, reset_less=True)
915 self.of = Overflow()
916 self.mid = Signal(id_wid, reset_less=True)
917
918 def eq(self, i):
919 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
920 self.of.eq(i.of), self.mid.eq(i.mid)]
921
922
923
924 class FPAddStage1Mod(FPState):
925 """ Second stage of add: preparation for normalisation.
926 detects when tot sum is too big (tot[27] is kinda a carry bit)
927 """
928
929 def __init__(self, width, id_wid):
930 self.width = width
931 self.id_wid = id_wid
932 self.i = self.ispec()
933 self.o = self.ospec()
934
935 def ispec(self):
936 return FPAddStage0Data(self.width, self.id_wid)
937
938 def ospec(self):
939 return FPAddStage1Data(self.width, self.id_wid)
940
941 def process(self, i):
942 return self.o
943
944 def setup(self, m, i):
945 """ links module to inputs and outputs
946 """
947 m.submodules.add1 = self
948 m.submodules.add1_out_overflow = self.o.of
949
950 m.d.comb += self.i.eq(i)
951
952 def elaborate(self, platform):
953 m = Module()
954 #m.submodules.norm1_in_overflow = self.in_of
955 #m.submodules.norm1_out_overflow = self.out_of
956 #m.submodules.norm1_in_z = self.in_z
957 #m.submodules.norm1_out_z = self.out_z
958 m.d.comb += self.o.z.eq(self.i.z)
959 # tot[-1] (MSB) gets set when the sum overflows. shift result down
960 with m.If(~self.i.out_do_z):
961 with m.If(self.i.tot[-1]):
962 m.d.comb += [
963 self.o.z.m.eq(self.i.tot[4:]),
964 self.o.of.m0.eq(self.i.tot[4]),
965 self.o.of.guard.eq(self.i.tot[3]),
966 self.o.of.round_bit.eq(self.i.tot[2]),
967 self.o.of.sticky.eq(self.i.tot[1] | self.i.tot[0]),
968 self.o.z.e.eq(self.i.z.e + 1)
969 ]
970 # tot[-1] (MSB) zero case
971 with m.Else():
972 m.d.comb += [
973 self.o.z.m.eq(self.i.tot[3:]),
974 self.o.of.m0.eq(self.i.tot[3]),
975 self.o.of.guard.eq(self.i.tot[2]),
976 self.o.of.round_bit.eq(self.i.tot[1]),
977 self.o.of.sticky.eq(self.i.tot[0])
978 ]
979
980 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
981 m.d.comb += self.o.oz.eq(self.i.oz)
982 m.d.comb += self.o.mid.eq(self.i.mid)
983
984 return m
985
986
987 class FPAddStage1(FPState):
988
989 def __init__(self, width, id_wid):
990 FPState.__init__(self, "add_1")
991 self.mod = FPAddStage1Mod(width)
992 self.out_z = FPNumBase(width, False)
993 self.out_of = Overflow()
994 self.norm_stb = Signal()
995
996 def setup(self, m, i):
997 """ links module to inputs and outputs
998 """
999 self.mod.setup(m, i)
1000
1001 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
1002
1003 m.d.sync += self.out_of.eq(self.mod.out_of)
1004 m.d.sync += self.out_z.eq(self.mod.out_z)
1005 m.d.sync += self.norm_stb.eq(1)
1006
1007 def action(self, m):
1008 m.next = "normalise_1"
1009
1010
1011 class FPNormaliseModSingle:
1012
1013 def __init__(self, width):
1014 self.width = width
1015 self.in_z = self.ispec()
1016 self.out_z = self.ospec()
1017
1018 def ispec(self):
1019 return FPNumBase(self.width, False)
1020
1021 def ospec(self):
1022 return FPNumBase(self.width, False)
1023
1024 def setup(self, m, i):
1025 """ links module to inputs and outputs
1026 """
1027 m.submodules.normalise = self
1028 m.d.comb += self.i.eq(i)
1029
1030 def elaborate(self, platform):
1031 m = Module()
1032
1033 mwid = self.out_z.m_width+2
1034 pe = PriorityEncoder(mwid)
1035 m.submodules.norm_pe = pe
1036
1037 m.submodules.norm1_out_z = self.out_z
1038 m.submodules.norm1_in_z = self.in_z
1039
1040 in_z = FPNumBase(self.width, False)
1041 in_of = Overflow()
1042 m.submodules.norm1_insel_z = in_z
1043 m.submodules.norm1_insel_overflow = in_of
1044
1045 espec = (len(in_z.e), True)
1046 ediff_n126 = Signal(espec, reset_less=True)
1047 msr = MultiShiftRMerge(mwid, espec)
1048 m.submodules.multishift_r = msr
1049
1050 m.d.comb += in_z.eq(self.in_z)
1051 m.d.comb += in_of.eq(self.in_of)
1052 # initialise out from in (overridden below)
1053 m.d.comb += self.out_z.eq(in_z)
1054 m.d.comb += self.out_of.eq(in_of)
1055 # normalisation decrease condition
1056 decrease = Signal(reset_less=True)
1057 m.d.comb += decrease.eq(in_z.m_msbzero)
1058 # decrease exponent
1059 with m.If(decrease):
1060 # *sigh* not entirely obvious: count leading zeros (clz)
1061 # with a PriorityEncoder: to find from the MSB
1062 # we reverse the order of the bits.
1063 temp_m = Signal(mwid, reset_less=True)
1064 temp_s = Signal(mwid+1, reset_less=True)
1065 clz = Signal((len(in_z.e), True), reset_less=True)
1066 m.d.comb += [
1067 # cat round and guard bits back into the mantissa
1068 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
1069 pe.i.eq(temp_m[::-1]), # inverted
1070 clz.eq(pe.o), # count zeros from MSB down
1071 temp_s.eq(temp_m << clz), # shift mantissa UP
1072 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
1073 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
1074 ]
1075
1076 return m
1077
1078 class FPNorm1Data:
1079
1080 def __init__(self, width, id_wid):
1081 self.roundz = Signal(reset_less=True)
1082 self.z = FPNumBase(width, False)
1083 self.out_do_z = Signal(reset_less=True)
1084 self.oz = Signal(width, reset_less=True)
1085 self.mid = Signal(id_wid, reset_less=True)
1086
1087 def eq(self, i):
1088 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1089 self.roundz.eq(i.roundz), self.mid.eq(i.mid)]
1090
1091
1092 class FPNorm1ModSingle:
1093
1094 def __init__(self, width, id_wid):
1095 self.width = width
1096 self.id_wid = id_wid
1097 self.i = self.ispec()
1098 self.o = self.ospec()
1099
1100 def ispec(self):
1101 return FPAddStage1Data(self.width, self.id_wid)
1102
1103 def ospec(self):
1104 return FPNorm1Data(self.width, self.id_wid)
1105
1106 def setup(self, m, i):
1107 """ links module to inputs and outputs
1108 """
1109 m.submodules.normalise_1 = self
1110 m.d.comb += self.i.eq(i)
1111
1112 def process(self, i):
1113 return self.o
1114
1115 def elaborate(self, platform):
1116 m = Module()
1117
1118 mwid = self.o.z.m_width+2
1119 pe = PriorityEncoder(mwid)
1120 m.submodules.norm_pe = pe
1121
1122 of = Overflow()
1123 m.d.comb += self.o.roundz.eq(of.roundz)
1124
1125 m.submodules.norm1_out_z = self.o.z
1126 m.submodules.norm1_out_overflow = of
1127 m.submodules.norm1_in_z = self.i.z
1128 m.submodules.norm1_in_overflow = self.i.of
1129
1130 i = self.ispec()
1131 m.submodules.norm1_insel_z = i.z
1132 m.submodules.norm1_insel_overflow = i.of
1133
1134 espec = (len(i.z.e), True)
1135 ediff_n126 = Signal(espec, reset_less=True)
1136 msr = MultiShiftRMerge(mwid, espec)
1137 m.submodules.multishift_r = msr
1138
1139 m.d.comb += i.eq(self.i)
1140 # initialise out from in (overridden below)
1141 m.d.comb += self.o.z.eq(i.z)
1142 m.d.comb += of.eq(i.of)
1143 # normalisation increase/decrease conditions
1144 decrease = Signal(reset_less=True)
1145 increase = Signal(reset_less=True)
1146 m.d.comb += decrease.eq(i.z.m_msbzero & i.z.exp_gt_n126)
1147 m.d.comb += increase.eq(i.z.exp_lt_n126)
1148 # decrease exponent
1149 with m.If(~self.i.out_do_z):
1150 with m.If(decrease):
1151 # *sigh* not entirely obvious: count leading zeros (clz)
1152 # with a PriorityEncoder: to find from the MSB
1153 # we reverse the order of the bits.
1154 temp_m = Signal(mwid, reset_less=True)
1155 temp_s = Signal(mwid+1, reset_less=True)
1156 clz = Signal((len(i.z.e), True), reset_less=True)
1157 # make sure that the amount to decrease by does NOT
1158 # go below the minimum non-INF/NaN exponent
1159 limclz = Mux(i.z.exp_sub_n126 > pe.o, pe.o,
1160 i.z.exp_sub_n126)
1161 m.d.comb += [
1162 # cat round and guard bits back into the mantissa
1163 temp_m.eq(Cat(i.of.round_bit, i.of.guard, i.z.m)),
1164 pe.i.eq(temp_m[::-1]), # inverted
1165 clz.eq(limclz), # count zeros from MSB down
1166 temp_s.eq(temp_m << clz), # shift mantissa UP
1167 self.o.z.e.eq(i.z.e - clz), # DECREASE exponent
1168 self.o.z.m.eq(temp_s[2:]), # exclude bits 0&1
1169 of.m0.eq(temp_s[2]), # copy of mantissa[0]
1170 # overflow in bits 0..1: got shifted too (leave sticky)
1171 of.guard.eq(temp_s[1]), # guard
1172 of.round_bit.eq(temp_s[0]), # round
1173 ]
1174 # increase exponent
1175 with m.Elif(increase):
1176 temp_m = Signal(mwid+1, reset_less=True)
1177 m.d.comb += [
1178 temp_m.eq(Cat(i.of.sticky, i.of.round_bit, i.of.guard,
1179 i.z.m)),
1180 ediff_n126.eq(i.z.N126 - i.z.e),
1181 # connect multi-shifter to inp/out mantissa (and ediff)
1182 msr.inp.eq(temp_m),
1183 msr.diff.eq(ediff_n126),
1184 self.o.z.m.eq(msr.m[3:]),
1185 of.m0.eq(temp_s[3]), # copy of mantissa[0]
1186 # overflow in bits 0..1: got shifted too (leave sticky)
1187 of.guard.eq(temp_s[2]), # guard
1188 of.round_bit.eq(temp_s[1]), # round
1189 of.sticky.eq(temp_s[0]), # sticky
1190 self.o.z.e.eq(i.z.e + ediff_n126),
1191 ]
1192
1193 m.d.comb += self.o.mid.eq(self.i.mid)
1194 m.d.comb += self.o.out_do_z.eq(self.i.out_do_z)
1195 m.d.comb += self.o.oz.eq(self.i.oz)
1196
1197 return m
1198
1199
1200 class FPNorm1ModMulti:
1201
1202 def __init__(self, width, single_cycle=True):
1203 self.width = width
1204 self.in_select = Signal(reset_less=True)
1205 self.in_z = FPNumBase(width, False)
1206 self.in_of = Overflow()
1207 self.temp_z = FPNumBase(width, False)
1208 self.temp_of = Overflow()
1209 self.out_z = FPNumBase(width, False)
1210 self.out_of = Overflow()
1211
1212 def elaborate(self, platform):
1213 m = Module()
1214
1215 m.submodules.norm1_out_z = self.out_z
1216 m.submodules.norm1_out_overflow = self.out_of
1217 m.submodules.norm1_temp_z = self.temp_z
1218 m.submodules.norm1_temp_of = self.temp_of
1219 m.submodules.norm1_in_z = self.in_z
1220 m.submodules.norm1_in_overflow = self.in_of
1221
1222 in_z = FPNumBase(self.width, False)
1223 in_of = Overflow()
1224 m.submodules.norm1_insel_z = in_z
1225 m.submodules.norm1_insel_overflow = in_of
1226
1227 # select which of temp or in z/of to use
1228 with m.If(self.in_select):
1229 m.d.comb += in_z.eq(self.in_z)
1230 m.d.comb += in_of.eq(self.in_of)
1231 with m.Else():
1232 m.d.comb += in_z.eq(self.temp_z)
1233 m.d.comb += in_of.eq(self.temp_of)
1234 # initialise out from in (overridden below)
1235 m.d.comb += self.out_z.eq(in_z)
1236 m.d.comb += self.out_of.eq(in_of)
1237 # normalisation increase/decrease conditions
1238 decrease = Signal(reset_less=True)
1239 increase = Signal(reset_less=True)
1240 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1241 m.d.comb += increase.eq(in_z.exp_lt_n126)
1242 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1243 # decrease exponent
1244 with m.If(decrease):
1245 m.d.comb += [
1246 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1247 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1248 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1249 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1250 self.out_of.round_bit.eq(0), # reset round bit
1251 self.out_of.m0.eq(in_of.guard),
1252 ]
1253 # increase exponent
1254 with m.Elif(increase):
1255 m.d.comb += [
1256 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1257 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1258 self.out_of.guard.eq(in_z.m[0]),
1259 self.out_of.m0.eq(in_z.m[1]),
1260 self.out_of.round_bit.eq(in_of.guard),
1261 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1262 ]
1263
1264 return m
1265
1266
1267 class FPNorm1Single(FPState):
1268
1269 def __init__(self, width, id_wid, single_cycle=True):
1270 FPState.__init__(self, "normalise_1")
1271 self.mod = FPNorm1ModSingle(width)
1272 self.o = self.ospec()
1273 self.out_z = FPNumBase(width, False)
1274 self.out_roundz = Signal(reset_less=True)
1275
1276 def ispec(self):
1277 return self.mod.ispec()
1278
1279 def ospec(self):
1280 return self.mod.ospec()
1281
1282 def setup(self, m, i):
1283 """ links module to inputs and outputs
1284 """
1285 self.mod.setup(m, i)
1286
1287 def action(self, m):
1288 m.next = "round"
1289
1290
1291 class FPNorm1Multi(FPState):
1292
1293 def __init__(self, width, id_wid):
1294 FPState.__init__(self, "normalise_1")
1295 self.mod = FPNorm1ModMulti(width)
1296 self.stb = Signal(reset_less=True)
1297 self.ack = Signal(reset=0, reset_less=True)
1298 self.out_norm = Signal(reset_less=True)
1299 self.in_accept = Signal(reset_less=True)
1300 self.temp_z = FPNumBase(width)
1301 self.temp_of = Overflow()
1302 self.out_z = FPNumBase(width)
1303 self.out_roundz = Signal(reset_less=True)
1304
1305 def setup(self, m, in_z, in_of, norm_stb):
1306 """ links module to inputs and outputs
1307 """
1308 self.mod.setup(m, in_z, in_of, norm_stb,
1309 self.in_accept, self.temp_z, self.temp_of,
1310 self.out_z, self.out_norm)
1311
1312 m.d.comb += self.stb.eq(norm_stb)
1313 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1314
1315 def action(self, m):
1316 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1317 m.d.sync += self.temp_of.eq(self.mod.out_of)
1318 m.d.sync += self.temp_z.eq(self.out_z)
1319 with m.If(self.out_norm):
1320 with m.If(self.in_accept):
1321 m.d.sync += [
1322 self.ack.eq(1),
1323 ]
1324 with m.Else():
1325 m.d.sync += self.ack.eq(0)
1326 with m.Else():
1327 # normalisation not required (or done).
1328 m.next = "round"
1329 m.d.sync += self.ack.eq(1)
1330 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1331
1332
1333 class FPNormToPack(FPState):
1334
1335 def __init__(self, width, id_wid):
1336 FPState.__init__(self, "normalise_1")
1337 self.id_wid = id_wid
1338 self.width = width
1339
1340 def ispec(self):
1341 return FPAddStage1Data(self.width, self.id_wid) # Norm1ModSingle ispec
1342
1343 def ospec(self):
1344 return FPPackData(self.width, self.id_wid) # FPPackMod ospec
1345
1346 def setup(self, m, i):
1347 """ links module to inputs and outputs
1348 """
1349
1350 # Normalisation, Rounding Corrections, Pack - in a chain
1351 nmod = FPNorm1ModSingle(self.width, self.id_wid)
1352 rmod = FPRoundMod(self.width, self.id_wid)
1353 cmod = FPCorrectionsMod(self.width, self.id_wid)
1354 pmod = FPPackMod(self.width, self.id_wid)
1355 chain = StageChain([nmod, rmod, cmod, pmod])
1356 chain.setup(m, i)
1357 self.out_z = pmod.ospec()
1358
1359 m.d.sync += self.out_z.mid.eq(pmod.o.mid)
1360 m.d.sync += self.out_z.z.eq(pmod.o.z) # outputs packed result
1361
1362 def process(self, i):
1363 return self.out_z
1364
1365 def action(self, m):
1366 m.next = "pack_put_z"
1367
1368
1369 class FPRoundData:
1370
1371 def __init__(self, width, id_wid):
1372 self.z = FPNumBase(width, False)
1373 self.out_do_z = Signal(reset_less=True)
1374 self.oz = Signal(width, reset_less=True)
1375 self.mid = Signal(id_wid, reset_less=True)
1376
1377 def eq(self, i):
1378 return [self.z.eq(i.z), self.out_do_z.eq(i.out_do_z), self.oz.eq(i.oz),
1379 self.mid.eq(i.mid)]
1380
1381
1382 class FPRoundMod:
1383
1384 def __init__(self, width, id_wid):
1385 self.width = width
1386 self.id_wid = id_wid
1387 self.i = self.ispec()
1388 self.out_z = self.ospec()
1389
1390 def ispec(self):
1391 return FPNorm1Data(self.width, self.id_wid)
1392
1393 def ospec(self):
1394 return FPRoundData(self.width, self.id_wid)
1395
1396 def process(self, i):
1397 return self.out_z
1398
1399 def setup(self, m, i):
1400 m.submodules.roundz = self
1401 m.d.comb += self.i.eq(i)
1402
1403 def elaborate(self, platform):
1404 m = Module()
1405 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1406 with m.If(~self.i.out_do_z):
1407 with m.If(self.i.roundz):
1408 m.d.comb += self.out_z.z.m.eq(self.i.z.m + 1) # mantissa up
1409 with m.If(self.i.z.m == self.i.z.m1s): # all 1s
1410 m.d.comb += self.out_z.z.e.eq(self.i.z.e + 1) # exponent up
1411
1412 return m
1413
1414
1415 class FPRound(FPState):
1416
1417 def __init__(self, width, id_wid):
1418 FPState.__init__(self, "round")
1419 self.mod = FPRoundMod(width)
1420 self.out_z = self.ospec()
1421
1422 def ispec(self):
1423 return self.mod.ispec()
1424
1425 def ospec(self):
1426 return self.mod.ospec()
1427
1428 def setup(self, m, i):
1429 """ links module to inputs and outputs
1430 """
1431 self.mod.setup(m, i)
1432
1433 self.idsync(m)
1434 m.d.sync += self.out_z.eq(self.mod.out_z)
1435 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1436
1437 def action(self, m):
1438 m.next = "corrections"
1439
1440
1441 class FPCorrectionsMod:
1442
1443 def __init__(self, width, id_wid):
1444 self.width = width
1445 self.id_wid = id_wid
1446 self.i = self.ispec()
1447 self.out_z = self.ospec()
1448
1449 def ispec(self):
1450 return FPRoundData(self.width, self.id_wid)
1451
1452 def ospec(self):
1453 return FPRoundData(self.width, self.id_wid)
1454
1455 def process(self, i):
1456 return self.out_z
1457
1458 def setup(self, m, i):
1459 """ links module to inputs and outputs
1460 """
1461 m.submodules.corrections = self
1462 m.d.comb += self.i.eq(i)
1463
1464 def elaborate(self, platform):
1465 m = Module()
1466 m.submodules.corr_in_z = self.i.z
1467 m.submodules.corr_out_z = self.out_z.z
1468 m.d.comb += self.out_z.eq(self.i) # copies mid, z, out_do_z
1469 with m.If(~self.i.out_do_z):
1470 with m.If(self.i.z.is_denormalised):
1471 m.d.comb += self.out_z.z.e.eq(self.i.z.N127)
1472 return m
1473
1474
1475 class FPCorrections(FPState):
1476
1477 def __init__(self, width, id_wid):
1478 FPState.__init__(self, "corrections")
1479 self.mod = FPCorrectionsMod(width)
1480 self.out_z = self.ospec()
1481
1482 def ispec(self):
1483 return self.mod.ispec()
1484
1485 def ospec(self):
1486 return self.mod.ospec()
1487
1488 def setup(self, m, in_z):
1489 """ links module to inputs and outputs
1490 """
1491 self.mod.setup(m, in_z)
1492
1493 m.d.sync += self.out_z.eq(self.mod.out_z)
1494 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1495
1496 def action(self, m):
1497 m.next = "pack"
1498
1499
1500 class FPPackData:
1501
1502 def __init__(self, width, id_wid):
1503 self.z = Signal(width, reset_less=True)
1504 self.mid = Signal(id_wid, reset_less=True)
1505
1506 def eq(self, i):
1507 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1508
1509
1510 class FPPackMod:
1511
1512 def __init__(self, width, id_wid):
1513 self.width = width
1514 self.id_wid = id_wid
1515 self.i = self.ispec()
1516 self.o = self.ospec()
1517
1518 def ispec(self):
1519 return FPRoundData(self.width, self.id_wid)
1520
1521 def ospec(self):
1522 return FPPackData(self.width, self.id_wid)
1523
1524 def process(self, i):
1525 return self.o
1526
1527 def setup(self, m, in_z):
1528 """ links module to inputs and outputs
1529 """
1530 m.submodules.pack = self
1531 m.d.comb += self.i.eq(in_z)
1532
1533 def elaborate(self, platform):
1534 m = Module()
1535 z = FPNumOut(self.width, False)
1536 m.submodules.pack_in_z = self.i.z
1537 m.submodules.pack_out_z = z
1538 m.d.comb += self.o.mid.eq(self.i.mid)
1539 with m.If(~self.i.out_do_z):
1540 with m.If(self.i.z.is_overflowed):
1541 m.d.comb += z.inf(self.i.z.s)
1542 with m.Else():
1543 m.d.comb += z.create(self.i.z.s, self.i.z.e, self.i.z.m)
1544 with m.Else():
1545 m.d.comb += z.v.eq(self.i.oz)
1546 m.d.comb += self.o.z.eq(z.v)
1547 return m
1548
1549
1550 class FPPack(FPState):
1551
1552 def __init__(self, width, id_wid):
1553 FPState.__init__(self, "pack")
1554 self.mod = FPPackMod(width)
1555 self.out_z = self.ospec()
1556
1557 def ispec(self):
1558 return self.mod.ispec()
1559
1560 def ospec(self):
1561 return self.mod.ospec()
1562
1563 def setup(self, m, in_z):
1564 """ links module to inputs and outputs
1565 """
1566 self.mod.setup(m, in_z)
1567
1568 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1569 m.d.sync += self.out_z.mid.eq(self.mod.o.mid)
1570
1571 def action(self, m):
1572 m.next = "pack_put_z"
1573
1574
1575 class FPPutZ(FPState):
1576
1577 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1578 FPState.__init__(self, state)
1579 if to_state is None:
1580 to_state = "get_ops"
1581 self.to_state = to_state
1582 self.in_z = in_z
1583 self.out_z = out_z
1584 self.in_mid = in_mid
1585 self.out_mid = out_mid
1586
1587 def action(self, m):
1588 if self.in_mid is not None:
1589 m.d.sync += self.out_mid.eq(self.in_mid)
1590 m.d.sync += [
1591 self.out_z.z.v.eq(self.in_z)
1592 ]
1593 with m.If(self.out_z.z.stb & self.out_z.z.ack):
1594 m.d.sync += self.out_z.z.stb.eq(0)
1595 m.next = self.to_state
1596 with m.Else():
1597 m.d.sync += self.out_z.z.stb.eq(1)
1598
1599
1600 class FPPutZIdx(FPState):
1601
1602 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1603 FPState.__init__(self, state)
1604 if to_state is None:
1605 to_state = "get_ops"
1606 self.to_state = to_state
1607 self.in_z = in_z
1608 self.out_zs = out_zs
1609 self.in_mid = in_mid
1610
1611 def action(self, m):
1612 outz_stb = Signal(reset_less=True)
1613 outz_ack = Signal(reset_less=True)
1614 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1615 outz_ack.eq(self.out_zs[self.in_mid].ack),
1616 ]
1617 m.d.sync += [
1618 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1619 ]
1620 with m.If(outz_stb & outz_ack):
1621 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1622 m.next = self.to_state
1623 with m.Else():
1624 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1625
1626 class FPOpData:
1627 def __init__(self, width, id_wid):
1628 self.z = FPOp(width)
1629 self.mid = Signal(id_wid, reset_less=True)
1630
1631 def eq(self, i):
1632 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1633
1634 def ports(self):
1635 return [self.z, self.mid]
1636
1637
1638 class FPADDBaseMod:
1639
1640 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1641 """ IEEE754 FP Add
1642
1643 * width: bit-width of IEEE754. supported: 16, 32, 64
1644 * id_wid: an identifier that is sync-connected to the input
1645 * single_cycle: True indicates each stage to complete in 1 clock
1646 * compact: True indicates a reduced number of stages
1647 """
1648 self.width = width
1649 self.id_wid = id_wid
1650 self.single_cycle = single_cycle
1651 self.compact = compact
1652
1653 self.in_t = Trigger()
1654 self.i = self.ispec()
1655 self.o = self.ospec()
1656
1657 self.states = []
1658
1659 def ispec(self):
1660 return FPADDBaseData(self.width, self.id_wid)
1661
1662 def ospec(self):
1663 return FPOpData(self.width, self.id_wid)
1664
1665 def add_state(self, state):
1666 self.states.append(state)
1667 return state
1668
1669 def get_fragment(self, platform=None):
1670 """ creates the HDL code-fragment for FPAdd
1671 """
1672 m = Module()
1673 m.submodules.out_z = self.o.z
1674 m.submodules.in_t = self.in_t
1675 if self.compact:
1676 self.get_compact_fragment(m, platform)
1677 else:
1678 self.get_longer_fragment(m, platform)
1679
1680 with m.FSM() as fsm:
1681
1682 for state in self.states:
1683 with m.State(state.state_from):
1684 state.action(m)
1685
1686 return m
1687
1688 def get_longer_fragment(self, m, platform=None):
1689
1690 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1691 self.width))
1692 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1693 a = get.out_op1
1694 b = get.out_op2
1695
1696 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1697 sc.setup(m, a, b, self.in_mid)
1698
1699 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1700 dn.setup(m, a, b, sc.in_mid)
1701
1702 if self.single_cycle:
1703 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1704 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1705 else:
1706 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1707 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1708
1709 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1710 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1711
1712 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1713 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1714
1715 if self.single_cycle:
1716 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1717 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1718 else:
1719 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1720 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1721
1722 rn = self.add_state(FPRound(self.width, self.id_wid))
1723 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1724
1725 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1726 cor.setup(m, rn.out_z, rn.in_mid)
1727
1728 pa = self.add_state(FPPack(self.width, self.id_wid))
1729 pa.setup(m, cor.out_z, rn.in_mid)
1730
1731 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1732 pa.in_mid, self.out_mid))
1733
1734 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1735 pa.in_mid, self.out_mid))
1736
1737 def get_compact_fragment(self, m, platform=None):
1738
1739 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1740 self.width, self.id_wid))
1741 get.setup(m, self.i, self.in_t.stb, self.in_t.ack)
1742
1743 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1744 sc.setup(m, get.o)
1745
1746 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1747 alm.setup(m, sc.o)
1748
1749 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1750 n1.setup(m, alm.a1o)
1751
1752 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z.z, self.o,
1753 n1.out_z.mid, self.o.mid))
1754
1755 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
1756 # sc.o.mid, self.o.mid))
1757
1758
1759 class FPADDBase(FPState):
1760
1761 def __init__(self, width, id_wid=None, single_cycle=False):
1762 """ IEEE754 FP Add
1763
1764 * width: bit-width of IEEE754. supported: 16, 32, 64
1765 * id_wid: an identifier that is sync-connected to the input
1766 * single_cycle: True indicates each stage to complete in 1 clock
1767 """
1768 FPState.__init__(self, "fpadd")
1769 self.width = width
1770 self.single_cycle = single_cycle
1771 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1772 self.o = self.ospec()
1773
1774 self.in_t = Trigger()
1775 self.i = self.ispec()
1776
1777 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1778 self.in_accept = Signal(reset_less=True)
1779 self.add_stb = Signal(reset_less=True)
1780 self.add_ack = Signal(reset=0, reset_less=True)
1781
1782 def ispec(self):
1783 return self.mod.ispec()
1784
1785 def ospec(self):
1786 return self.mod.ospec()
1787
1788 def setup(self, m, i, add_stb, in_mid):
1789 m.d.comb += [self.i.eq(i),
1790 self.mod.i.eq(self.i),
1791 self.z_done.eq(self.mod.o.z.trigger),
1792 #self.add_stb.eq(add_stb),
1793 self.mod.in_t.stb.eq(self.in_t.stb),
1794 self.in_t.ack.eq(self.mod.in_t.ack),
1795 self.o.mid.eq(self.mod.o.mid),
1796 self.o.z.v.eq(self.mod.o.z.v),
1797 self.o.z.stb.eq(self.mod.o.z.stb),
1798 self.mod.o.z.ack.eq(self.o.z.ack),
1799 ]
1800
1801 m.d.sync += self.add_stb.eq(add_stb)
1802 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1803 m.d.sync += self.o.z.ack.eq(0) # likewise
1804 #m.d.sync += self.in_t.stb.eq(0)
1805
1806 m.submodules.fpadd = self.mod
1807
1808 def action(self, m):
1809
1810 # in_accept is set on incoming strobe HIGH and ack LOW.
1811 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1812
1813 #with m.If(self.in_t.ack):
1814 # m.d.sync += self.in_t.stb.eq(0)
1815 with m.If(~self.z_done):
1816 # not done: test for accepting an incoming operand pair
1817 with m.If(self.in_accept):
1818 m.d.sync += [
1819 self.add_ack.eq(1), # acknowledge receipt...
1820 self.in_t.stb.eq(1), # initiate add
1821 ]
1822 with m.Else():
1823 m.d.sync += [self.add_ack.eq(0),
1824 self.in_t.stb.eq(0),
1825 self.o.z.ack.eq(1),
1826 ]
1827 with m.Else():
1828 # done: acknowledge, and write out id and value
1829 m.d.sync += [self.add_ack.eq(1),
1830 self.in_t.stb.eq(0)
1831 ]
1832 m.next = "put_z"
1833
1834 return
1835
1836 if self.in_mid is not None:
1837 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1838
1839 m.d.sync += [
1840 self.out_z.v.eq(self.mod.out_z.v)
1841 ]
1842 # move to output state on detecting z ack
1843 with m.If(self.out_z.trigger):
1844 m.d.sync += self.out_z.stb.eq(0)
1845 m.next = "put_z"
1846 with m.Else():
1847 m.d.sync += self.out_z.stb.eq(1)
1848
1849
1850 class FPADDStageOut:
1851 def __init__(self, width, id_wid):
1852 self.z = Signal(width)
1853 self.mid = Signal(id_wid, reset_less=True)
1854
1855 def eq(self, i):
1856 return [self.z.eq(i.z), self.mid.eq(i.mid)]
1857
1858 def ports(self):
1859 return [self.z, self.mid]
1860
1861
1862 # matches the format of FPADDStageOut, allows eq function to do assignments
1863 class PlaceHolder: pass
1864
1865
1866 class FPAddBaseStage:
1867 def __init__(self, width, id_wid):
1868 self.width = width
1869 self.id_wid = id_wid
1870
1871 def ispec(self):
1872 return FPADDBaseData(self.width, self.id_wid)
1873
1874 def ospec(self):
1875 return FPADDStageOut(self.width, self.id_wid)
1876
1877 def process(self, i):
1878 o = PlaceHolder()
1879 o.z = i.a + i.b
1880 o.mid = i.mid
1881 return o
1882
1883
1884 class FPADDBasePipe1(UnbufferedPipeline):
1885 def __init__(self, width, id_wid):
1886 stage = FPAddBaseStage(width, id_wid)
1887 UnbufferedPipeline.__init__(self, stage)
1888
1889
1890 class FPADDBasePipe(ControlBase):
1891 def __init__(self, width, id_wid):
1892 ControlBase.__init__(self)
1893 self.pipe1 = FPADDBasePipe1(width, id_wid)
1894 self._eqs = self.connect([self.pipe1])
1895
1896 def elaborate(self, platform):
1897 m = Module()
1898 m.submodules.pipe1 = self.pipe1
1899 m.d.comb += self._eqs
1900 return m
1901
1902
1903 class PriorityCombPipeline(CombMultiInPipeline):
1904 def __init__(self, stage, p_len):
1905 p_mux = InputPriorityArbiter(self, p_len)
1906 CombMultiInPipeline.__init__(self, stage, p_len=p_len, p_mux=p_mux)
1907
1908 def ports(self):
1909 return self.p_mux.ports()
1910
1911
1912 class FPAddInPassThruStage:
1913 def __init__(self, width, id_wid):
1914 self.width, self.id_wid = width, id_wid
1915 def ispec(self): return FPADDBaseData(self.width, self.id_wid)
1916 def ospec(self): return self.ispec()
1917 def process(self, i): return i
1918
1919
1920 class FPADDInMuxPipe(PriorityCombPipeline):
1921 def __init__(self, width, id_width, num_rows):
1922 self.num_rows = num_rows
1923 stage = FPAddInPassThruStage(width, id_width)
1924 PriorityCombPipeline.__init__(self, stage, p_len=self.num_rows)
1925 #self.p.i_data = stage.ispec()
1926 #self.n.o_data = stage.ospec()
1927
1928 def ports(self):
1929 res = []
1930 for i in range(len(self.p)):
1931 res += [self.p[i].i_valid, self.p[i].o_ready] + \
1932 self.p[i].i_data.ports()
1933 res += [self.n.i_ready, self.n.o_valid] + \
1934 self.n.o_data.ports()
1935 return res
1936
1937
1938 class MuxCombPipeline(CombMultiOutPipeline):
1939 def __init__(self, stage, n_len):
1940 # HACK: stage is also the n-way multiplexer
1941 CombMultiOutPipeline.__init__(self, stage, n_len=n_len, n_mux=stage)
1942
1943 # HACK: n-mux is also the stage... so set the muxid equal to input mid
1944 stage.m_id = self.p.i_data.mid
1945
1946 def ports(self):
1947 return self.p_mux.ports()
1948
1949
1950 class FPAddOutPassThruStage:
1951 def __init__(self, width, id_wid):
1952 self.width, self.id_wid = width, id_wid
1953 def ispec(self): return FPADDStageOut(self.width, self.id_wid)
1954 def ospec(self): return self.ispec()
1955 def process(self, i): return i
1956
1957
1958 class FPADDMuxOutPipe(MuxCombPipeline):
1959 def __init__(self, width, id_wid, num_rows):
1960 self.num_rows = num_rows
1961 stage = FPAddOutPassThruStage(width, id_wid)
1962 MuxCombPipeline.__init__(self, stage, n_len=self.num_rows)
1963 #self.p.i_data = stage.ispec()
1964 #self.n.o_data = stage.ospec()
1965
1966 def ports(self):
1967 res = [self.p.i_valid, self.p.o_ready] + \
1968 self.p.i_data.ports()
1969 for i in range(len(self.n)):
1970 res += [self.n[i].i_ready, self.n[i].o_valid] + \
1971 self.n[i].o_data.ports()
1972 return res
1973
1974
1975 class FPADDMuxInOut:
1976 """ Reservation-Station version of FPADD pipeline.
1977
1978 fan-in on
1979 """
1980 def __init__(self, width, id_wid, num_rows):
1981 self.num_rows = num_rows
1982 self.inpipe = FPADDInMuxPipe(width, id_wid, num_rows) # fan-in
1983 self.fpadd = FPADDBasePipe(width, id_wid) # add stage
1984 self.outpipe = FPADDMuxOutPipe(width, id_wid, num_rows) # fan-out
1985
1986 self.p = self.inpipe.p # kinda annoying,
1987 self.n = self.outpipe.n # use pipe in/out as this class in/out
1988 self._ports = self.inpipe.ports() + self.outpipe.ports()
1989
1990 def elaborate(self, platform):
1991 m = Module()
1992 m.submodules.inpipe = self.inpipe
1993 m.submodules.fpadd = self.fpadd
1994 m.submodules.outpipe = self.outpipe
1995
1996 m.d.comb += self.inpipe.n.connect_to_next(self.fpadd.p)
1997 m.d.comb += self.fpadd.connect_to_next(self.outpipe)
1998
1999 return m
2000
2001 def ports(self):
2002 return self._ports
2003
2004
2005 class ResArray:
2006 def __init__(self, width, id_wid):
2007 self.width = width
2008 self.id_wid = id_wid
2009 res = []
2010 for i in range(rs_sz):
2011 out_z = FPOp(width)
2012 out_z.name = "out_z_%d" % i
2013 res.append(out_z)
2014 self.res = Array(res)
2015 self.in_z = FPOp(width)
2016 self.in_mid = Signal(self.id_wid, reset_less=True)
2017
2018 def setup(self, m, in_z, in_mid):
2019 m.d.comb += [self.in_z.eq(in_z),
2020 self.in_mid.eq(in_mid)]
2021
2022 def get_fragment(self, platform=None):
2023 """ creates the HDL code-fragment for FPAdd
2024 """
2025 m = Module()
2026 m.submodules.res_in_z = self.in_z
2027 m.submodules += self.res
2028
2029 return m
2030
2031 def ports(self):
2032 res = []
2033 for z in self.res:
2034 res += z.ports()
2035 return res
2036
2037
2038 class FPADD(FPID):
2039 """ FPADD: stages as follows:
2040
2041 FPGetOp (a)
2042 |
2043 FPGetOp (b)
2044 |
2045 FPAddBase---> FPAddBaseMod
2046 | |
2047 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
2048
2049 FPAddBase is tricky: it is both a stage and *has* stages.
2050 Connection to FPAddBaseMod therefore requires an in stb/ack
2051 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
2052 needs to be the thing that raises the incoming stb.
2053 """
2054
2055 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
2056 """ IEEE754 FP Add
2057
2058 * width: bit-width of IEEE754. supported: 16, 32, 64
2059 * id_wid: an identifier that is sync-connected to the input
2060 * single_cycle: True indicates each stage to complete in 1 clock
2061 """
2062 self.width = width
2063 self.id_wid = id_wid
2064 self.single_cycle = single_cycle
2065
2066 #self.out_z = FPOp(width)
2067 self.ids = FPID(id_wid)
2068
2069 rs = []
2070 for i in range(rs_sz):
2071 in_a = FPOp(width)
2072 in_b = FPOp(width)
2073 in_a.name = "in_a_%d" % i
2074 in_b.name = "in_b_%d" % i
2075 rs.append((in_a, in_b))
2076 self.rs = Array(rs)
2077
2078 res = []
2079 for i in range(rs_sz):
2080 out_z = FPOp(width)
2081 out_z.name = "out_z_%d" % i
2082 res.append(out_z)
2083 self.res = Array(res)
2084
2085 self.states = []
2086
2087 def add_state(self, state):
2088 self.states.append(state)
2089 return state
2090
2091 def get_fragment(self, platform=None):
2092 """ creates the HDL code-fragment for FPAdd
2093 """
2094 m = Module()
2095 m.submodules += self.rs
2096
2097 in_a = self.rs[0][0]
2098 in_b = self.rs[0][1]
2099
2100 geta = self.add_state(FPGetOp("get_a", "get_b",
2101 in_a, self.width))
2102 geta.setup(m, in_a)
2103 a = geta.out_op
2104
2105 getb = self.add_state(FPGetOp("get_b", "fpadd",
2106 in_b, self.width))
2107 getb.setup(m, in_b)
2108 b = getb.out_op
2109
2110 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
2111 ab = self.add_state(ab)
2112 abd = ab.ispec() # create an input spec object for FPADDBase
2113 m.d.sync += [abd.a.eq(a), abd.b.eq(b), abd.mid.eq(self.ids.in_mid)]
2114 ab.setup(m, abd, getb.out_decode, self.ids.in_mid)
2115 o = ab.o
2116
2117 pz = self.add_state(FPPutZIdx("put_z", o.z, self.res,
2118 o.mid, "get_a"))
2119
2120 with m.FSM() as fsm:
2121
2122 for state in self.states:
2123 with m.State(state.state_from):
2124 state.action(m)
2125
2126 return m
2127
2128
2129 if __name__ == "__main__":
2130 if True:
2131 alu = FPADD(width=32, id_wid=5, single_cycle=True)
2132 main(alu, ports=alu.rs[0][0].ports() + \
2133 alu.rs[0][1].ports() + \
2134 alu.res[0].ports() + \
2135 [alu.ids.in_mid, alu.ids.out_mid])
2136 else:
2137 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
2138 main(alu, ports=[alu.in_a, alu.in_b] + \
2139 alu.in_t.ports() + \
2140 alu.out_z.ports() + \
2141 [alu.in_mid, alu.out_mid])
2142
2143
2144 # works... but don't use, just do "python fname.py convert -t v"
2145 #print (verilog.convert(alu, ports=[
2146 # ports=alu.in_a.ports() + \
2147 # alu.in_b.ports() + \
2148 # alu.out_z.ports())