291b56feb3e369636d91c04284abca7b35de2bca
[ieee754fpu.git] / src / add / nmigen_add_experiment.py
1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
3 # 2013-12-12
4
5 from nmigen import Module, Signal, Cat, Mux, Array, Const
6 from nmigen.lib.coding import PriorityEncoder
7 from nmigen.cli import main, verilog
8 from math import log
9
10 from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPNumBase
11 from fpbase import MultiShiftRMerge, Trigger
12 #from fpbase import FPNumShiftMultiRight
13
14
15 class FPState(FPBase):
16 def __init__(self, state_from):
17 self.state_from = state_from
18
19 def set_inputs(self, inputs):
20 self.inputs = inputs
21 for k,v in inputs.items():
22 setattr(self, k, v)
23
24 def set_outputs(self, outputs):
25 self.outputs = outputs
26 for k,v in outputs.items():
27 setattr(self, k, v)
28
29
30 class FPGetSyncOpsMod:
31 def __init__(self, width, num_ops=2):
32 self.width = width
33 self.num_ops = num_ops
34 inops = []
35 outops = []
36 for i in range(num_ops):
37 inops.append(Signal(width, reset_less=True))
38 outops.append(Signal(width, reset_less=True))
39 self.in_op = inops
40 self.out_op = outops
41 self.stb = Signal(num_ops)
42 self.ack = Signal()
43 self.ready = Signal(reset_less=True)
44 self.out_decode = Signal(reset_less=True)
45
46 def elaborate(self, platform):
47 m = Module()
48 m.d.comb += self.ready.eq(self.stb == Const(-1, (self.num_ops, False)))
49 m.d.comb += self.out_decode.eq(self.ack & self.ready)
50 with m.If(self.out_decode):
51 for i in range(self.num_ops):
52 m.d.comb += [
53 self.out_op[i].eq(self.in_op[i]),
54 ]
55 return m
56
57 def ports(self):
58 return self.in_op + self.out_op + [self.stb, self.ack]
59
60
61 class FPOps(Trigger):
62 def __init__(self, width, num_ops):
63 Trigger.__init__(self)
64 self.width = width
65 self.num_ops = num_ops
66
67 res = []
68 for i in range(num_ops):
69 res.append(Signal(width))
70 self.v = Array(res)
71
72 def ports(self):
73 res = []
74 for i in range(self.num_ops):
75 res.append(self.v[i])
76 res.append(self.ack)
77 res.append(self.stb)
78 return res
79
80
81 class InputGroup:
82 def __init__(self, width, num_ops=2, num_rows=4):
83 self.width = width
84 self.num_ops = num_ops
85 self.num_rows = num_rows
86 self.mmax = int(log(self.num_rows) / log(2))
87 self.rs = []
88 self.mid = Signal(self.mmax, reset_less=True) # multiplex id
89 for i in range(num_rows):
90 self.rs.append(FPGetSyncOpsMod(width, num_ops))
91
92 self.out_op = FPOps(width, num_ops)
93
94 def elaborate(self, platform):
95 m = Module()
96
97 pe = PriorityEncoder(self.num_rows)
98 m.submodules.selector = pe
99 m.submodules.out_op = self.out_op
100 m.submodules += self.rs
101
102 # connect priority encoder
103 in_ready = []
104 for i in range(self.num_rows):
105 in_ready.append(self.rs[i].ready)
106 m.d.comb += pe.i.eq(Cat(*in_ready))
107 m.d.comb += self.out_op.stb.eq(pe.n) # strobe-out when encoder active
108
109 with m.If(pe.n):
110 m.d.sync += self.mid.eq(pe.o)
111 for i in range(self.num_rows):
112 with m.If(pe.o == Const(i, (self.mmax, False))):
113 for j in range(self.num_ops):
114 m.d.sync += self.out_op.v[j].eq(self.rs[i].out_op[j])
115 return m
116
117 def ports(self):
118 res = []
119 for i in range(self.num_rows):
120 inop = self.rs[i]
121 res += inop.in_op + [inop.stb]
122 return self.out_op.ports() + res + [self.ack + self.stb]
123
124
125 class FPGetOpMod:
126 def __init__(self, width):
127 self.in_op = FPOp(width)
128 self.out_op = Signal(width)
129 self.out_decode = Signal(reset_less=True)
130
131 def elaborate(self, platform):
132 m = Module()
133 m.d.comb += self.out_decode.eq((self.in_op.ack) & (self.in_op.stb))
134 m.submodules.get_op_in = self.in_op
135 #m.submodules.get_op_out = self.out_op
136 with m.If(self.out_decode):
137 m.d.comb += [
138 self.out_op.eq(self.in_op.v),
139 ]
140 return m
141
142
143 class FPGetOp(FPState):
144 """ gets operand
145 """
146
147 def __init__(self, in_state, out_state, in_op, width):
148 FPState.__init__(self, in_state)
149 self.out_state = out_state
150 self.mod = FPGetOpMod(width)
151 self.in_op = in_op
152 self.out_op = Signal(width)
153 self.out_decode = Signal(reset_less=True)
154
155 def setup(self, m, in_op):
156 """ links module to inputs and outputs
157 """
158 setattr(m.submodules, self.state_from, self.mod)
159 m.d.comb += self.mod.in_op.copy(in_op)
160 #m.d.comb += self.out_op.eq(self.mod.out_op)
161 m.d.comb += self.out_decode.eq(self.mod.out_decode)
162
163 def action(self, m):
164 with m.If(self.out_decode):
165 m.next = self.out_state
166 m.d.sync += [
167 self.in_op.ack.eq(0),
168 self.out_op.eq(self.mod.out_op)
169 ]
170 with m.Else():
171 m.d.sync += self.in_op.ack.eq(1)
172
173
174 class FPGet2OpMod(Trigger):
175 def __init__(self, width):
176 Trigger.__init__(self)
177 self.in_op1 = Signal(width, reset_less=True)
178 self.in_op2 = Signal(width, reset_less=True)
179 self.out_op1 = FPNumIn(None, width)
180 self.out_op2 = FPNumIn(None, width)
181
182 def elaborate(self, platform):
183 m = Trigger.elaborate(self, platform)
184 #m.submodules.get_op_in = self.in_op
185 m.submodules.get_op1_out = self.out_op1
186 m.submodules.get_op2_out = self.out_op2
187 with m.If(self.trigger):
188 m.d.comb += [
189 self.out_op1.decode(self.in_op1),
190 self.out_op2.decode(self.in_op2),
191 ]
192 return m
193
194
195 class FPGet2Op(FPState):
196 """ gets operands
197 """
198
199 def __init__(self, in_state, out_state, in_op1, in_op2, width):
200 FPState.__init__(self, in_state)
201 self.out_state = out_state
202 self.mod = FPGet2OpMod(width)
203 self.in_op1 = in_op1
204 self.in_op2 = in_op2
205 self.out_op1 = FPNumIn(None, width)
206 self.out_op2 = FPNumIn(None, width)
207 self.in_stb = Signal(reset_less=True)
208 self.out_ack = Signal(reset_less=True)
209 self.out_decode = Signal(reset_less=True)
210
211 def setup(self, m, in_op1, in_op2, in_stb, in_ack):
212 """ links module to inputs and outputs
213 """
214 m.submodules.get_ops = self.mod
215 m.d.comb += self.mod.in_op1.eq(in_op1)
216 m.d.comb += self.mod.in_op2.eq(in_op2)
217 m.d.comb += self.mod.stb.eq(in_stb)
218 m.d.comb += self.out_ack.eq(self.mod.ack)
219 m.d.comb += self.out_decode.eq(self.mod.trigger)
220 m.d.comb += in_ack.eq(self.mod.ack)
221
222 def action(self, m):
223 with m.If(self.out_decode):
224 m.next = self.out_state
225 m.d.sync += [
226 self.mod.ack.eq(0),
227 #self.out_op1.v.eq(self.mod.out_op1.v),
228 #self.out_op2.v.eq(self.mod.out_op2.v),
229 self.out_op1.copy(self.mod.out_op1),
230 self.out_op2.copy(self.mod.out_op2)
231 ]
232 with m.Else():
233 m.d.sync += self.mod.ack.eq(1)
234
235
236 class FPAddSpecialCasesMod:
237 """ special cases: NaNs, infs, zeros, denormalised
238 NOTE: some of these are unique to add. see "Special Operations"
239 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
240 """
241
242 def __init__(self, width):
243 self.in_a = FPNumBase(width)
244 self.in_b = FPNumBase(width)
245 self.out_z = FPNumOut(width, False)
246 self.out_do_z = Signal(reset_less=True)
247
248 def setup(self, m, in_a, in_b, out_do_z):
249 """ links module to inputs and outputs
250 """
251 m.submodules.specialcases = self
252 m.d.comb += self.in_a.copy(in_a)
253 m.d.comb += self.in_b.copy(in_b)
254 m.d.comb += out_do_z.eq(self.out_do_z)
255
256 def elaborate(self, platform):
257 m = Module()
258
259 m.submodules.sc_in_a = self.in_a
260 m.submodules.sc_in_b = self.in_b
261 m.submodules.sc_out_z = self.out_z
262
263 s_nomatch = Signal()
264 m.d.comb += s_nomatch.eq(self.in_a.s != self.in_b.s)
265
266 m_match = Signal()
267 m.d.comb += m_match.eq(self.in_a.m == self.in_b.m)
268
269 # if a is NaN or b is NaN return NaN
270 with m.If(self.in_a.is_nan | self.in_b.is_nan):
271 m.d.comb += self.out_do_z.eq(1)
272 m.d.comb += self.out_z.nan(0)
273
274 # XXX WEIRDNESS for FP16 non-canonical NaN handling
275 # under review
276
277 ## if a is zero and b is NaN return -b
278 #with m.If(a.is_zero & (a.s==0) & b.is_nan):
279 # m.d.comb += self.out_do_z.eq(1)
280 # m.d.comb += z.create(b.s, b.e, Cat(b.m[3:-2], ~b.m[0]))
281
282 ## if b is zero and a is NaN return -a
283 #with m.Elif(b.is_zero & (b.s==0) & a.is_nan):
284 # m.d.comb += self.out_do_z.eq(1)
285 # m.d.comb += z.create(a.s, a.e, Cat(a.m[3:-2], ~a.m[0]))
286
287 ## if a is -zero and b is NaN return -b
288 #with m.Elif(a.is_zero & (a.s==1) & b.is_nan):
289 # m.d.comb += self.out_do_z.eq(1)
290 # m.d.comb += z.create(a.s & b.s, b.e, Cat(b.m[3:-2], 1))
291
292 ## if b is -zero and a is NaN return -a
293 #with m.Elif(b.is_zero & (b.s==1) & a.is_nan):
294 # m.d.comb += self.out_do_z.eq(1)
295 # m.d.comb += z.create(a.s & b.s, a.e, Cat(a.m[3:-2], 1))
296
297 # if a is inf return inf (or NaN)
298 with m.Elif(self.in_a.is_inf):
299 m.d.comb += self.out_do_z.eq(1)
300 m.d.comb += self.out_z.inf(self.in_a.s)
301 # if a is inf and signs don't match return NaN
302 with m.If(self.in_b.exp_128 & s_nomatch):
303 m.d.comb += self.out_z.nan(0)
304
305 # if b is inf return inf
306 with m.Elif(self.in_b.is_inf):
307 m.d.comb += self.out_do_z.eq(1)
308 m.d.comb += self.out_z.inf(self.in_b.s)
309
310 # if a is zero and b zero return signed-a/b
311 with m.Elif(self.in_a.is_zero & self.in_b.is_zero):
312 m.d.comb += self.out_do_z.eq(1)
313 m.d.comb += self.out_z.create(self.in_a.s & self.in_b.s,
314 self.in_b.e,
315 self.in_b.m[3:-1])
316
317 # if a is zero return b
318 with m.Elif(self.in_a.is_zero):
319 m.d.comb += self.out_do_z.eq(1)
320 m.d.comb += self.out_z.create(self.in_b.s, self.in_b.e,
321 self.in_b.m[3:-1])
322
323 # if b is zero return a
324 with m.Elif(self.in_b.is_zero):
325 m.d.comb += self.out_do_z.eq(1)
326 m.d.comb += self.out_z.create(self.in_a.s, self.in_a.e,
327 self.in_a.m[3:-1])
328
329 # if a equal to -b return zero (+ve zero)
330 with m.Elif(s_nomatch & m_match & (self.in_a.e == self.in_b.e)):
331 m.d.comb += self.out_do_z.eq(1)
332 m.d.comb += self.out_z.zero(0)
333
334 # Denormalised Number checks
335 with m.Else():
336 m.d.comb += self.out_do_z.eq(0)
337
338 return m
339
340
341 class FPID:
342 def __init__(self, id_wid):
343 self.id_wid = id_wid
344 if self.id_wid:
345 self.in_mid = Signal(id_wid, reset_less=True)
346 self.out_mid = Signal(id_wid, reset_less=True)
347 else:
348 self.in_mid = None
349 self.out_mid = None
350
351 def idsync(self, m):
352 if self.id_wid is not None:
353 m.d.sync += self.out_mid.eq(self.in_mid)
354
355
356 class FPAddSpecialCases(FPState, FPID):
357 """ special cases: NaNs, infs, zeros, denormalised
358 NOTE: some of these are unique to add. see "Special Operations"
359 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
360 """
361
362 def __init__(self, width, id_wid):
363 FPState.__init__(self, "special_cases")
364 FPID.__init__(self, id_wid)
365 self.mod = FPAddSpecialCasesMod(width)
366 self.out_z = FPNumOut(width, False)
367 self.out_do_z = Signal(reset_less=True)
368
369 def setup(self, m, in_a, in_b, in_mid):
370 """ links module to inputs and outputs
371 """
372 self.mod.setup(m, in_a, in_b, self.out_do_z)
373 if self.in_mid is not None:
374 m.d.comb += self.in_mid.eq(in_mid)
375
376 def action(self, m):
377 self.idsync(m)
378 with m.If(self.out_do_z):
379 m.d.sync += self.out_z.v.eq(self.mod.out_z.v) # only take the output
380 m.next = "put_z"
381 with m.Else():
382 m.next = "denormalise"
383
384
385 class FPAddSpecialCasesDeNorm(FPState, FPID):
386 """ special cases: NaNs, infs, zeros, denormalised
387 NOTE: some of these are unique to add. see "Special Operations"
388 https://steve.hollasch.net/cgindex/coding/ieeefloat.html
389 """
390
391 def __init__(self, width, id_wid):
392 FPState.__init__(self, "special_cases")
393 FPID.__init__(self, id_wid)
394 self.smod = FPAddSpecialCasesMod(width)
395 self.out_z = FPNumOut(width, False)
396 self.out_do_z = Signal(reset_less=True)
397
398 self.dmod = FPAddDeNormMod(width)
399 self.out_a = FPNumBase(width)
400 self.out_b = FPNumBase(width)
401
402 def setup(self, m, in_a, in_b, in_mid):
403 """ links module to inputs and outputs
404 """
405 self.smod.setup(m, in_a, in_b, self.out_do_z)
406 self.dmod.setup(m, in_a, in_b)
407 if self.in_mid is not None:
408 m.d.comb += self.in_mid.eq(in_mid)
409
410 def action(self, m):
411 self.idsync(m)
412 with m.If(self.out_do_z):
413 m.d.sync += self.out_z.v.eq(self.smod.out_z.v) # only take output
414 m.next = "put_z"
415 with m.Else():
416 m.next = "align"
417 m.d.sync += self.out_a.copy(self.dmod.out_a)
418 m.d.sync += self.out_b.copy(self.dmod.out_b)
419
420
421 class FPAddDeNormMod(FPState):
422
423 def __init__(self, width):
424 self.in_a = FPNumBase(width)
425 self.in_b = FPNumBase(width)
426 self.out_a = FPNumBase(width)
427 self.out_b = FPNumBase(width)
428
429 def setup(self, m, in_a, in_b):
430 """ links module to inputs and outputs
431 """
432 m.submodules.denormalise = self
433 m.d.comb += self.in_a.copy(in_a)
434 m.d.comb += self.in_b.copy(in_b)
435
436 def elaborate(self, platform):
437 m = Module()
438 m.submodules.denorm_in_a = self.in_a
439 m.submodules.denorm_in_b = self.in_b
440 m.submodules.denorm_out_a = self.out_a
441 m.submodules.denorm_out_b = self.out_b
442 # hmmm, don't like repeating identical code
443 m.d.comb += self.out_a.copy(self.in_a)
444 with m.If(self.in_a.exp_n127):
445 m.d.comb += self.out_a.e.eq(self.in_a.N126) # limit a exponent
446 with m.Else():
447 m.d.comb += self.out_a.m[-1].eq(1) # set top mantissa bit
448
449 m.d.comb += self.out_b.copy(self.in_b)
450 with m.If(self.in_b.exp_n127):
451 m.d.comb += self.out_b.e.eq(self.in_b.N126) # limit a exponent
452 with m.Else():
453 m.d.comb += self.out_b.m[-1].eq(1) # set top mantissa bit
454
455 return m
456
457
458 class FPAddDeNorm(FPState, FPID):
459
460 def __init__(self, width, id_wid):
461 FPState.__init__(self, "denormalise")
462 FPID.__init__(self, id_wid)
463 self.mod = FPAddDeNormMod(width)
464 self.out_a = FPNumBase(width)
465 self.out_b = FPNumBase(width)
466
467 def setup(self, m, in_a, in_b, in_mid):
468 """ links module to inputs and outputs
469 """
470 self.mod.setup(m, in_a, in_b)
471 if self.in_mid is not None:
472 m.d.comb += self.in_mid.eq(in_mid)
473
474 def action(self, m):
475 self.idsync(m)
476 # Denormalised Number checks
477 m.next = "align"
478 m.d.sync += self.out_a.copy(self.mod.out_a)
479 m.d.sync += self.out_b.copy(self.mod.out_b)
480
481
482 class FPAddAlignMultiMod(FPState):
483
484 def __init__(self, width):
485 self.in_a = FPNumBase(width)
486 self.in_b = FPNumBase(width)
487 self.out_a = FPNumIn(None, width)
488 self.out_b = FPNumIn(None, width)
489 self.exp_eq = Signal(reset_less=True)
490
491 def elaborate(self, platform):
492 # This one however (single-cycle) will do the shift
493 # in one go.
494
495 m = Module()
496
497 m.submodules.align_in_a = self.in_a
498 m.submodules.align_in_b = self.in_b
499 m.submodules.align_out_a = self.out_a
500 m.submodules.align_out_b = self.out_b
501
502 # NOTE: this does *not* do single-cycle multi-shifting,
503 # it *STAYS* in the align state until exponents match
504
505 # exponent of a greater than b: shift b down
506 m.d.comb += self.exp_eq.eq(0)
507 m.d.comb += self.out_a.copy(self.in_a)
508 m.d.comb += self.out_b.copy(self.in_b)
509 agtb = Signal(reset_less=True)
510 altb = Signal(reset_less=True)
511 m.d.comb += agtb.eq(self.in_a.e > self.in_b.e)
512 m.d.comb += altb.eq(self.in_a.e < self.in_b.e)
513 with m.If(agtb):
514 m.d.comb += self.out_b.shift_down(self.in_b)
515 # exponent of b greater than a: shift a down
516 with m.Elif(altb):
517 m.d.comb += self.out_a.shift_down(self.in_a)
518 # exponents equal: move to next stage.
519 with m.Else():
520 m.d.comb += self.exp_eq.eq(1)
521 return m
522
523
524 class FPAddAlignMulti(FPState, FPID):
525
526 def __init__(self, width, id_wid):
527 FPID.__init__(self, id_wid)
528 FPState.__init__(self, "align")
529 self.mod = FPAddAlignMultiMod(width)
530 self.out_a = FPNumIn(None, width)
531 self.out_b = FPNumIn(None, width)
532 self.exp_eq = Signal(reset_less=True)
533
534 def setup(self, m, in_a, in_b, in_mid):
535 """ links module to inputs and outputs
536 """
537 m.submodules.align = self.mod
538 m.d.comb += self.mod.in_a.copy(in_a)
539 m.d.comb += self.mod.in_b.copy(in_b)
540 #m.d.comb += self.out_a.copy(self.mod.out_a)
541 #m.d.comb += self.out_b.copy(self.mod.out_b)
542 m.d.comb += self.exp_eq.eq(self.mod.exp_eq)
543 if self.in_mid is not None:
544 m.d.comb += self.in_mid.eq(in_mid)
545
546 def action(self, m):
547 self.idsync(m)
548 m.d.sync += self.out_a.copy(self.mod.out_a)
549 m.d.sync += self.out_b.copy(self.mod.out_b)
550 with m.If(self.exp_eq):
551 m.next = "add_0"
552
553
554 class FPAddAlignSingleMod:
555
556 def __init__(self, width):
557 self.width = width
558 self.in_a = FPNumBase(width)
559 self.in_b = FPNumBase(width)
560 self.out_a = FPNumIn(None, width)
561 self.out_b = FPNumIn(None, width)
562
563 def setup(self, m, in_a, in_b):
564 """ links module to inputs and outputs
565 """
566 m.submodules.align = self
567 m.d.comb += self.in_a.copy(in_a)
568 m.d.comb += self.in_b.copy(in_b)
569
570 def elaborate(self, platform):
571 """ Aligns A against B or B against A, depending on which has the
572 greater exponent. This is done in a *single* cycle using
573 variable-width bit-shift
574
575 the shifter used here is quite expensive in terms of gates.
576 Mux A or B in (and out) into temporaries, as only one of them
577 needs to be aligned against the other
578 """
579 m = Module()
580
581 m.submodules.align_in_a = self.in_a
582 m.submodules.align_in_b = self.in_b
583 m.submodules.align_out_a = self.out_a
584 m.submodules.align_out_b = self.out_b
585
586 # temporary (muxed) input and output to be shifted
587 t_inp = FPNumBase(self.width)
588 t_out = FPNumIn(None, self.width)
589 espec = (len(self.in_a.e), True)
590 msr = MultiShiftRMerge(self.in_a.m_width, espec)
591 m.submodules.align_t_in = t_inp
592 m.submodules.align_t_out = t_out
593 m.submodules.multishift_r = msr
594
595 ediff = Signal(espec, reset_less=True)
596 ediffr = Signal(espec, reset_less=True)
597 tdiff = Signal(espec, reset_less=True)
598 elz = Signal(reset_less=True)
599 egz = Signal(reset_less=True)
600
601 # connect multi-shifter to t_inp/out mantissa (and tdiff)
602 m.d.comb += msr.inp.eq(t_inp.m)
603 m.d.comb += msr.diff.eq(tdiff)
604 m.d.comb += t_out.m.eq(msr.m)
605 m.d.comb += t_out.e.eq(t_inp.e + tdiff)
606 m.d.comb += t_out.s.eq(t_inp.s)
607
608 m.d.comb += ediff.eq(self.in_a.e - self.in_b.e)
609 m.d.comb += ediffr.eq(self.in_b.e - self.in_a.e)
610 m.d.comb += elz.eq(self.in_a.e < self.in_b.e)
611 m.d.comb += egz.eq(self.in_a.e > self.in_b.e)
612
613 # default: A-exp == B-exp, A and B untouched (fall through)
614 m.d.comb += self.out_a.copy(self.in_a)
615 m.d.comb += self.out_b.copy(self.in_b)
616 # only one shifter (muxed)
617 #m.d.comb += t_out.shift_down_multi(tdiff, t_inp)
618 # exponent of a greater than b: shift b down
619 with m.If(egz):
620 m.d.comb += [t_inp.copy(self.in_b),
621 tdiff.eq(ediff),
622 self.out_b.copy(t_out),
623 self.out_b.s.eq(self.in_b.s), # whoops forgot sign
624 ]
625 # exponent of b greater than a: shift a down
626 with m.Elif(elz):
627 m.d.comb += [t_inp.copy(self.in_a),
628 tdiff.eq(ediffr),
629 self.out_a.copy(t_out),
630 self.out_a.s.eq(self.in_a.s), # whoops forgot sign
631 ]
632 return m
633
634
635 class FPAddAlignSingle(FPState, FPID):
636
637 def __init__(self, width, id_wid):
638 FPState.__init__(self, "align")
639 FPID.__init__(self, id_wid)
640 self.mod = FPAddAlignSingleMod(width)
641 self.out_a = FPNumIn(None, width)
642 self.out_b = FPNumIn(None, width)
643
644 def setup(self, m, in_a, in_b, in_mid):
645 """ links module to inputs and outputs
646 """
647 self.mod.setup(m, in_a, in_b)
648 if self.in_mid is not None:
649 m.d.comb += self.in_mid.eq(in_mid)
650
651 def action(self, m):
652 self.idsync(m)
653 # NOTE: could be done as comb
654 m.d.sync += self.out_a.copy(self.mod.out_a)
655 m.d.sync += self.out_b.copy(self.mod.out_b)
656 m.next = "add_0"
657
658
659 class FPAddAlignSingleAdd(FPState, FPID):
660
661 def __init__(self, width, id_wid):
662 FPState.__init__(self, "align")
663 FPID.__init__(self, id_wid)
664 self.mod = FPAddAlignSingleMod(width)
665 self.out_a = FPNumIn(None, width)
666 self.out_b = FPNumIn(None, width)
667
668 self.a0mod = FPAddStage0Mod(width)
669 self.a0_out_z = FPNumBase(width, False)
670 self.out_tot = Signal(self.a0_out_z.m_width + 4, reset_less=True)
671 self.a0_out_z = FPNumBase(width, False)
672
673 self.a1mod = FPAddStage1Mod(width)
674 self.out_z = FPNumBase(width, False)
675 self.out_of = Overflow()
676
677 def setup(self, m, in_a, in_b, in_mid):
678 """ links module to inputs and outputs
679 """
680 self.mod.setup(m, in_a, in_b)
681 m.d.comb += self.out_a.copy(self.mod.out_a)
682 m.d.comb += self.out_b.copy(self.mod.out_b)
683
684 self.a0mod.setup(m, self.out_a, self.out_b)
685 m.d.comb += self.a0_out_z.copy(self.a0mod.out_z)
686 m.d.comb += self.out_tot.eq(self.a0mod.out_tot)
687
688 self.a1mod.setup(m, self.out_tot, self.a0_out_z)
689
690 if self.in_mid is not None:
691 m.d.comb += self.in_mid.eq(in_mid)
692
693 def action(self, m):
694 self.idsync(m)
695 m.d.sync += self.out_of.copy(self.a1mod.out_of)
696 m.d.sync += self.out_z.copy(self.a1mod.out_z)
697 m.next = "normalise_1"
698
699
700 class FPAddStage0Mod:
701
702 def __init__(self, width):
703 self.in_a = FPNumBase(width)
704 self.in_b = FPNumBase(width)
705 self.in_z = FPNumBase(width, False)
706 self.out_z = FPNumBase(width, False)
707 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
708
709 def setup(self, m, in_a, in_b):
710 """ links module to inputs and outputs
711 """
712 m.submodules.add0 = self
713 m.d.comb += self.in_a.copy(in_a)
714 m.d.comb += self.in_b.copy(in_b)
715
716 def elaborate(self, platform):
717 m = Module()
718 m.submodules.add0_in_a = self.in_a
719 m.submodules.add0_in_b = self.in_b
720 m.submodules.add0_out_z = self.out_z
721
722 m.d.comb += self.out_z.e.eq(self.in_a.e)
723
724 # store intermediate tests (and zero-extended mantissas)
725 seq = Signal(reset_less=True)
726 mge = Signal(reset_less=True)
727 am0 = Signal(len(self.in_a.m)+1, reset_less=True)
728 bm0 = Signal(len(self.in_b.m)+1, reset_less=True)
729 m.d.comb += [seq.eq(self.in_a.s == self.in_b.s),
730 mge.eq(self.in_a.m >= self.in_b.m),
731 am0.eq(Cat(self.in_a.m, 0)),
732 bm0.eq(Cat(self.in_b.m, 0))
733 ]
734 # same-sign (both negative or both positive) add mantissas
735 with m.If(seq):
736 m.d.comb += [
737 self.out_tot.eq(am0 + bm0),
738 self.out_z.s.eq(self.in_a.s)
739 ]
740 # a mantissa greater than b, use a
741 with m.Elif(mge):
742 m.d.comb += [
743 self.out_tot.eq(am0 - bm0),
744 self.out_z.s.eq(self.in_a.s)
745 ]
746 # b mantissa greater than a, use b
747 with m.Else():
748 m.d.comb += [
749 self.out_tot.eq(bm0 - am0),
750 self.out_z.s.eq(self.in_b.s)
751 ]
752 return m
753
754
755 class FPAddStage0(FPState, FPID):
756 """ First stage of add. covers same-sign (add) and subtract
757 special-casing when mantissas are greater or equal, to
758 give greatest accuracy.
759 """
760
761 def __init__(self, width, id_wid):
762 FPState.__init__(self, "add_0")
763 FPID.__init__(self, id_wid)
764 self.mod = FPAddStage0Mod(width)
765 self.out_z = FPNumBase(width, False)
766 self.out_tot = Signal(self.out_z.m_width + 4, reset_less=True)
767
768 def setup(self, m, in_a, in_b, in_mid):
769 """ links module to inputs and outputs
770 """
771 self.mod.setup(m, in_a, in_b)
772 if self.in_mid is not None:
773 m.d.comb += self.in_mid.eq(in_mid)
774
775 def action(self, m):
776 self.idsync(m)
777 # NOTE: these could be done as combinatorial (merge add0+add1)
778 m.d.sync += self.out_z.copy(self.mod.out_z)
779 m.d.sync += self.out_tot.eq(self.mod.out_tot)
780 m.next = "add_1"
781
782
783 class FPAddStage1Mod(FPState):
784 """ Second stage of add: preparation for normalisation.
785 detects when tot sum is too big (tot[27] is kinda a carry bit)
786 """
787
788 def __init__(self, width):
789 self.out_norm = Signal(reset_less=True)
790 self.in_z = FPNumBase(width, False)
791 self.in_tot = Signal(self.in_z.m_width + 4, reset_less=True)
792 self.out_z = FPNumBase(width, False)
793 self.out_of = Overflow()
794
795 def setup(self, m, in_tot, in_z):
796 """ links module to inputs and outputs
797 """
798 m.submodules.add1 = self
799 m.submodules.add1_out_overflow = self.out_of
800
801 m.d.comb += self.in_z.copy(in_z)
802 m.d.comb += self.in_tot.eq(in_tot)
803
804 def elaborate(self, platform):
805 m = Module()
806 #m.submodules.norm1_in_overflow = self.in_of
807 #m.submodules.norm1_out_overflow = self.out_of
808 #m.submodules.norm1_in_z = self.in_z
809 #m.submodules.norm1_out_z = self.out_z
810 m.d.comb += self.out_z.copy(self.in_z)
811 # tot[27] gets set when the sum overflows. shift result down
812 with m.If(self.in_tot[-1]):
813 m.d.comb += [
814 self.out_z.m.eq(self.in_tot[4:]),
815 self.out_of.m0.eq(self.in_tot[4]),
816 self.out_of.guard.eq(self.in_tot[3]),
817 self.out_of.round_bit.eq(self.in_tot[2]),
818 self.out_of.sticky.eq(self.in_tot[1] | self.in_tot[0]),
819 self.out_z.e.eq(self.in_z.e + 1)
820 ]
821 # tot[27] zero case
822 with m.Else():
823 m.d.comb += [
824 self.out_z.m.eq(self.in_tot[3:]),
825 self.out_of.m0.eq(self.in_tot[3]),
826 self.out_of.guard.eq(self.in_tot[2]),
827 self.out_of.round_bit.eq(self.in_tot[1]),
828 self.out_of.sticky.eq(self.in_tot[0])
829 ]
830 return m
831
832
833 class FPAddStage1(FPState, FPID):
834
835 def __init__(self, width, id_wid):
836 FPState.__init__(self, "add_1")
837 FPID.__init__(self, id_wid)
838 self.mod = FPAddStage1Mod(width)
839 self.out_z = FPNumBase(width, False)
840 self.out_of = Overflow()
841 self.norm_stb = Signal()
842
843 def setup(self, m, in_tot, in_z, in_mid):
844 """ links module to inputs and outputs
845 """
846 self.mod.setup(m, in_tot, in_z)
847
848 m.d.sync += self.norm_stb.eq(0) # sets to zero when not in add1 state
849
850 if self.in_mid is not None:
851 m.d.comb += self.in_mid.eq(in_mid)
852
853 def action(self, m):
854 self.idsync(m)
855 m.d.sync += self.out_of.copy(self.mod.out_of)
856 m.d.sync += self.out_z.copy(self.mod.out_z)
857 m.d.sync += self.norm_stb.eq(1)
858 m.next = "normalise_1"
859
860
861 class FPNorm1ModSingle:
862
863 def __init__(self, width):
864 self.width = width
865 self.out_norm = Signal(reset_less=True)
866 self.in_z = FPNumBase(width, False)
867 self.in_of = Overflow()
868 self.out_z = FPNumBase(width, False)
869 self.out_of = Overflow()
870
871 def setup(self, m, in_z, in_of, out_z):
872 """ links module to inputs and outputs
873 """
874 m.submodules.normalise_1 = self
875
876 m.d.comb += self.in_z.copy(in_z)
877 m.d.comb += self.in_of.copy(in_of)
878
879 m.d.comb += out_z.copy(self.out_z)
880
881 def elaborate(self, platform):
882 m = Module()
883
884 mwid = self.out_z.m_width+2
885 pe = PriorityEncoder(mwid)
886 m.submodules.norm_pe = pe
887
888 m.submodules.norm1_out_z = self.out_z
889 m.submodules.norm1_out_overflow = self.out_of
890 m.submodules.norm1_in_z = self.in_z
891 m.submodules.norm1_in_overflow = self.in_of
892
893 in_z = FPNumBase(self.width, False)
894 in_of = Overflow()
895 m.submodules.norm1_insel_z = in_z
896 m.submodules.norm1_insel_overflow = in_of
897
898 espec = (len(in_z.e), True)
899 ediff_n126 = Signal(espec, reset_less=True)
900 msr = MultiShiftRMerge(mwid, espec)
901 m.submodules.multishift_r = msr
902
903 m.d.comb += in_z.copy(self.in_z)
904 m.d.comb += in_of.copy(self.in_of)
905 # initialise out from in (overridden below)
906 m.d.comb += self.out_z.copy(in_z)
907 m.d.comb += self.out_of.copy(in_of)
908 # normalisation increase/decrease conditions
909 decrease = Signal(reset_less=True)
910 increase = Signal(reset_less=True)
911 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
912 m.d.comb += increase.eq(in_z.exp_lt_n126)
913 # decrease exponent
914 with m.If(decrease):
915 # *sigh* not entirely obvious: count leading zeros (clz)
916 # with a PriorityEncoder: to find from the MSB
917 # we reverse the order of the bits.
918 temp_m = Signal(mwid, reset_less=True)
919 temp_s = Signal(mwid+1, reset_less=True)
920 clz = Signal((len(in_z.e), True), reset_less=True)
921 # make sure that the amount to decrease by does NOT
922 # go below the minimum non-INF/NaN exponent
923 limclz = Mux(in_z.exp_sub_n126 > pe.o, pe.o,
924 in_z.exp_sub_n126)
925 m.d.comb += [
926 # cat round and guard bits back into the mantissa
927 temp_m.eq(Cat(in_of.round_bit, in_of.guard, in_z.m)),
928 pe.i.eq(temp_m[::-1]), # inverted
929 clz.eq(limclz), # count zeros from MSB down
930 temp_s.eq(temp_m << clz), # shift mantissa UP
931 self.out_z.e.eq(in_z.e - clz), # DECREASE exponent
932 self.out_z.m.eq(temp_s[2:]), # exclude bits 0&1
933 self.out_of.m0.eq(temp_s[2]), # copy of mantissa[0]
934 # overflow in bits 0..1: got shifted too (leave sticky)
935 self.out_of.guard.eq(temp_s[1]), # guard
936 self.out_of.round_bit.eq(temp_s[0]), # round
937 ]
938 # increase exponent
939 with m.Elif(increase):
940 temp_m = Signal(mwid+1, reset_less=True)
941 m.d.comb += [
942 temp_m.eq(Cat(in_of.sticky, in_of.round_bit, in_of.guard,
943 in_z.m)),
944 ediff_n126.eq(in_z.N126 - in_z.e),
945 # connect multi-shifter to inp/out mantissa (and ediff)
946 msr.inp.eq(temp_m),
947 msr.diff.eq(ediff_n126),
948 self.out_z.m.eq(msr.m[3:]),
949 self.out_of.m0.eq(temp_s[3]), # copy of mantissa[0]
950 # overflow in bits 0..1: got shifted too (leave sticky)
951 self.out_of.guard.eq(temp_s[2]), # guard
952 self.out_of.round_bit.eq(temp_s[1]), # round
953 self.out_of.sticky.eq(temp_s[0]), # sticky
954 self.out_z.e.eq(in_z.e + ediff_n126),
955 ]
956
957 return m
958
959
960 class FPNorm1ModMulti:
961
962 def __init__(self, width, single_cycle=True):
963 self.width = width
964 self.in_select = Signal(reset_less=True)
965 self.out_norm = Signal(reset_less=True)
966 self.in_z = FPNumBase(width, False)
967 self.in_of = Overflow()
968 self.temp_z = FPNumBase(width, False)
969 self.temp_of = Overflow()
970 self.out_z = FPNumBase(width, False)
971 self.out_of = Overflow()
972
973 def elaborate(self, platform):
974 m = Module()
975
976 m.submodules.norm1_out_z = self.out_z
977 m.submodules.norm1_out_overflow = self.out_of
978 m.submodules.norm1_temp_z = self.temp_z
979 m.submodules.norm1_temp_of = self.temp_of
980 m.submodules.norm1_in_z = self.in_z
981 m.submodules.norm1_in_overflow = self.in_of
982
983 in_z = FPNumBase(self.width, False)
984 in_of = Overflow()
985 m.submodules.norm1_insel_z = in_z
986 m.submodules.norm1_insel_overflow = in_of
987
988 # select which of temp or in z/of to use
989 with m.If(self.in_select):
990 m.d.comb += in_z.copy(self.in_z)
991 m.d.comb += in_of.copy(self.in_of)
992 with m.Else():
993 m.d.comb += in_z.copy(self.temp_z)
994 m.d.comb += in_of.copy(self.temp_of)
995 # initialise out from in (overridden below)
996 m.d.comb += self.out_z.copy(in_z)
997 m.d.comb += self.out_of.copy(in_of)
998 # normalisation increase/decrease conditions
999 decrease = Signal(reset_less=True)
1000 increase = Signal(reset_less=True)
1001 m.d.comb += decrease.eq(in_z.m_msbzero & in_z.exp_gt_n126)
1002 m.d.comb += increase.eq(in_z.exp_lt_n126)
1003 m.d.comb += self.out_norm.eq(decrease | increase) # loop-end
1004 # decrease exponent
1005 with m.If(decrease):
1006 m.d.comb += [
1007 self.out_z.e.eq(in_z.e - 1), # DECREASE exponent
1008 self.out_z.m.eq(in_z.m << 1), # shift mantissa UP
1009 self.out_z.m[0].eq(in_of.guard), # steal guard (was tot[2])
1010 self.out_of.guard.eq(in_of.round_bit), # round (was tot[1])
1011 self.out_of.round_bit.eq(0), # reset round bit
1012 self.out_of.m0.eq(in_of.guard),
1013 ]
1014 # increase exponent
1015 with m.Elif(increase):
1016 m.d.comb += [
1017 self.out_z.e.eq(in_z.e + 1), # INCREASE exponent
1018 self.out_z.m.eq(in_z.m >> 1), # shift mantissa DOWN
1019 self.out_of.guard.eq(in_z.m[0]),
1020 self.out_of.m0.eq(in_z.m[1]),
1021 self.out_of.round_bit.eq(in_of.guard),
1022 self.out_of.sticky.eq(in_of.sticky | in_of.round_bit)
1023 ]
1024
1025 return m
1026
1027
1028 class FPNorm1Single(FPState, FPID):
1029
1030 def __init__(self, width, id_wid, single_cycle=True):
1031 FPID.__init__(self, id_wid)
1032 FPState.__init__(self, "normalise_1")
1033 self.mod = FPNorm1ModSingle(width)
1034 self.out_norm = Signal(reset_less=True)
1035 self.out_z = FPNumBase(width)
1036 self.out_roundz = Signal(reset_less=True)
1037
1038 def setup(self, m, in_z, in_of, in_mid):
1039 """ links module to inputs and outputs
1040 """
1041 self.mod.setup(m, in_z, in_of, self.out_z)
1042
1043 if self.in_mid is not None:
1044 m.d.comb += self.in_mid.eq(in_mid)
1045
1046 def action(self, m):
1047 self.idsync(m)
1048 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1049 m.next = "round"
1050
1051
1052 class FPNorm1Multi(FPState, FPID):
1053
1054 def __init__(self, width, id_wid):
1055 FPID.__init__(self, id_wid)
1056 FPState.__init__(self, "normalise_1")
1057 self.mod = FPNorm1ModMulti(width)
1058 self.stb = Signal(reset_less=True)
1059 self.ack = Signal(reset=0, reset_less=True)
1060 self.out_norm = Signal(reset_less=True)
1061 self.in_accept = Signal(reset_less=True)
1062 self.temp_z = FPNumBase(width)
1063 self.temp_of = Overflow()
1064 self.out_z = FPNumBase(width)
1065 self.out_roundz = Signal(reset_less=True)
1066
1067 def setup(self, m, in_z, in_of, norm_stb, in_mid):
1068 """ links module to inputs and outputs
1069 """
1070 self.mod.setup(m, in_z, in_of, norm_stb,
1071 self.in_accept, self.temp_z, self.temp_of,
1072 self.out_z, self.out_norm)
1073
1074 m.d.comb += self.stb.eq(norm_stb)
1075 m.d.sync += self.ack.eq(0) # sets to zero when not in normalise_1 state
1076
1077 if self.in_mid is not None:
1078 m.d.comb += self.in_mid.eq(in_mid)
1079
1080 def action(self, m):
1081 self.idsync(m)
1082 m.d.comb += self.in_accept.eq((~self.ack) & (self.stb))
1083 m.d.sync += self.temp_of.copy(self.mod.out_of)
1084 m.d.sync += self.temp_z.copy(self.out_z)
1085 with m.If(self.out_norm):
1086 with m.If(self.in_accept):
1087 m.d.sync += [
1088 self.ack.eq(1),
1089 ]
1090 with m.Else():
1091 m.d.sync += self.ack.eq(0)
1092 with m.Else():
1093 # normalisation not required (or done).
1094 m.next = "round"
1095 m.d.sync += self.ack.eq(1)
1096 m.d.sync += self.out_roundz.eq(self.mod.out_of.roundz)
1097
1098
1099 class FPNormToPack(FPState, FPID):
1100
1101 def __init__(self, width, id_wid):
1102 FPID.__init__(self, id_wid)
1103 FPState.__init__(self, "normalise_1")
1104 self.width = width
1105
1106 def setup(self, m, in_z, in_of, in_mid):
1107 """ links module to inputs and outputs
1108 """
1109
1110 # Normalisation (chained to input in_z+in_of)
1111 nmod = FPNorm1ModSingle(self.width)
1112 n_out_z = FPNumBase(self.width)
1113 n_out_roundz = Signal(reset_less=True)
1114 nmod.setup(m, in_z, in_of, n_out_z)
1115
1116 # Rounding (chained to normalisation)
1117 rmod = FPRoundMod(self.width)
1118 r_out_z = FPNumBase(self.width)
1119 rmod.setup(m, n_out_z, n_out_roundz)
1120 m.d.comb += n_out_roundz.eq(nmod.out_of.roundz)
1121 m.d.comb += r_out_z.copy(rmod.out_z)
1122
1123 # Corrections (chained to rounding)
1124 cmod = FPCorrectionsMod(self.width)
1125 c_out_z = FPNumBase(self.width)
1126 cmod.setup(m, r_out_z)
1127 m.d.comb += c_out_z.copy(cmod.out_z)
1128
1129 # Pack (chained to corrections)
1130 self.pmod = FPPackMod(self.width)
1131 self.out_z = FPNumBase(self.width)
1132 self.pmod.setup(m, c_out_z)
1133
1134 # Multiplex ID
1135 if self.in_mid is not None:
1136 m.d.comb += self.in_mid.eq(in_mid)
1137
1138 def action(self, m):
1139 self.idsync(m) # copies incoming ID to outgoing
1140 m.d.sync += self.out_z.v.eq(self.pmod.out_z.v) # outputs packed result
1141 m.next = "pack_put_z"
1142
1143
1144 class FPRoundMod:
1145
1146 def __init__(self, width):
1147 self.in_roundz = Signal(reset_less=True)
1148 self.in_z = FPNumBase(width, False)
1149 self.out_z = FPNumBase(width, False)
1150
1151 def setup(self, m, in_z, roundz):
1152 m.submodules.roundz = self
1153
1154 m.d.comb += self.in_z.copy(in_z)
1155 m.d.comb += self.in_roundz.eq(roundz)
1156
1157 def elaborate(self, platform):
1158 m = Module()
1159 m.d.comb += self.out_z.copy(self.in_z)
1160 with m.If(self.in_roundz):
1161 m.d.comb += self.out_z.m.eq(self.in_z.m + 1) # mantissa rounds up
1162 with m.If(self.in_z.m == self.in_z.m1s): # all 1s
1163 m.d.comb += self.out_z.e.eq(self.in_z.e + 1) # exponent up
1164 return m
1165
1166
1167 class FPRound(FPState, FPID):
1168
1169 def __init__(self, width, id_wid):
1170 FPState.__init__(self, "round")
1171 FPID.__init__(self, id_wid)
1172 self.mod = FPRoundMod(width)
1173 self.out_z = FPNumBase(width)
1174
1175 def setup(self, m, in_z, roundz, in_mid):
1176 """ links module to inputs and outputs
1177 """
1178 self.mod.setup(m, in_z, roundz)
1179
1180 if self.in_mid is not None:
1181 m.d.comb += self.in_mid.eq(in_mid)
1182
1183 def action(self, m):
1184 self.idsync(m)
1185 m.d.sync += self.out_z.copy(self.mod.out_z)
1186 m.next = "corrections"
1187
1188
1189 class FPCorrectionsMod:
1190
1191 def __init__(self, width):
1192 self.in_z = FPNumOut(width, False)
1193 self.out_z = FPNumOut(width, False)
1194
1195 def setup(self, m, in_z):
1196 """ links module to inputs and outputs
1197 """
1198 m.submodules.corrections = self
1199 m.d.comb += self.in_z.copy(in_z)
1200
1201 def elaborate(self, platform):
1202 m = Module()
1203 m.submodules.corr_in_z = self.in_z
1204 m.submodules.corr_out_z = self.out_z
1205 m.d.comb += self.out_z.copy(self.in_z)
1206 with m.If(self.in_z.is_denormalised):
1207 m.d.comb += self.out_z.e.eq(self.in_z.N127)
1208 return m
1209
1210
1211 class FPCorrections(FPState, FPID):
1212
1213 def __init__(self, width, id_wid):
1214 FPState.__init__(self, "corrections")
1215 FPID.__init__(self, id_wid)
1216 self.mod = FPCorrectionsMod(width)
1217 self.out_z = FPNumBase(width)
1218
1219 def setup(self, m, in_z, in_mid):
1220 """ links module to inputs and outputs
1221 """
1222 self.mod.setup(m, in_z)
1223 if self.in_mid is not None:
1224 m.d.comb += self.in_mid.eq(in_mid)
1225
1226 def action(self, m):
1227 self.idsync(m)
1228 m.d.sync += self.out_z.copy(self.mod.out_z)
1229 m.next = "pack"
1230
1231
1232 class FPPackMod:
1233
1234 def __init__(self, width):
1235 self.in_z = FPNumOut(width, False)
1236 self.out_z = FPNumOut(width, False)
1237
1238 def setup(self, m, in_z):
1239 """ links module to inputs and outputs
1240 """
1241 m.submodules.pack = self
1242 m.d.comb += self.in_z.copy(in_z)
1243
1244 def elaborate(self, platform):
1245 m = Module()
1246 m.submodules.pack_in_z = self.in_z
1247 with m.If(self.in_z.is_overflowed):
1248 m.d.comb += self.out_z.inf(self.in_z.s)
1249 with m.Else():
1250 m.d.comb += self.out_z.create(self.in_z.s, self.in_z.e, self.in_z.m)
1251 return m
1252
1253
1254 class FPPack(FPState, FPID):
1255
1256 def __init__(self, width, id_wid):
1257 FPState.__init__(self, "pack")
1258 FPID.__init__(self, id_wid)
1259 self.mod = FPPackMod(width)
1260 self.out_z = FPNumOut(width, False)
1261
1262 def setup(self, m, in_z, in_mid):
1263 """ links module to inputs and outputs
1264 """
1265 self.mod.setup(m, in_z)
1266 if self.in_mid is not None:
1267 m.d.comb += self.in_mid.eq(in_mid)
1268
1269 def action(self, m):
1270 self.idsync(m)
1271 m.d.sync += self.out_z.v.eq(self.mod.out_z.v)
1272 m.next = "pack_put_z"
1273
1274
1275 class FPPutZ(FPState):
1276
1277 def __init__(self, state, in_z, out_z, in_mid, out_mid, to_state=None):
1278 FPState.__init__(self, state)
1279 if to_state is None:
1280 to_state = "get_ops"
1281 self.to_state = to_state
1282 self.in_z = in_z
1283 self.out_z = out_z
1284 self.in_mid = in_mid
1285 self.out_mid = out_mid
1286
1287 def action(self, m):
1288 if self.in_mid is not None:
1289 m.d.sync += self.out_mid.eq(self.in_mid)
1290 m.d.sync += [
1291 self.out_z.v.eq(self.in_z.v)
1292 ]
1293 with m.If(self.out_z.stb & self.out_z.ack):
1294 m.d.sync += self.out_z.stb.eq(0)
1295 m.next = self.to_state
1296 with m.Else():
1297 m.d.sync += self.out_z.stb.eq(1)
1298
1299
1300 class FPPutZIdx(FPState):
1301
1302 def __init__(self, state, in_z, out_zs, in_mid, to_state=None):
1303 FPState.__init__(self, state)
1304 if to_state is None:
1305 to_state = "get_ops"
1306 self.to_state = to_state
1307 self.in_z = in_z
1308 self.out_zs = out_zs
1309 self.in_mid = in_mid
1310
1311 def action(self, m):
1312 outz_stb = Signal(reset_less=True)
1313 outz_ack = Signal(reset_less=True)
1314 m.d.comb += [outz_stb.eq(self.out_zs[self.in_mid].stb),
1315 outz_ack.eq(self.out_zs[self.in_mid].ack),
1316 ]
1317 m.d.sync += [
1318 self.out_zs[self.in_mid].v.eq(self.in_z.v)
1319 ]
1320 with m.If(outz_stb & outz_ack):
1321 m.d.sync += self.out_zs[self.in_mid].stb.eq(0)
1322 m.next = self.to_state
1323 with m.Else():
1324 m.d.sync += self.out_zs[self.in_mid].stb.eq(1)
1325
1326
1327 class FPADDBaseMod(FPID):
1328
1329 def __init__(self, width, id_wid=None, single_cycle=False, compact=True):
1330 """ IEEE754 FP Add
1331
1332 * width: bit-width of IEEE754. supported: 16, 32, 64
1333 * id_wid: an identifier that is sync-connected to the input
1334 * single_cycle: True indicates each stage to complete in 1 clock
1335 * compact: True indicates a reduced number of stages
1336 """
1337 FPID.__init__(self, id_wid)
1338 self.width = width
1339 self.single_cycle = single_cycle
1340 self.compact = compact
1341
1342 self.in_t = Trigger()
1343 self.in_a = Signal(width)
1344 self.in_b = Signal(width)
1345 self.out_z = FPOp(width)
1346
1347 self.states = []
1348
1349 def add_state(self, state):
1350 self.states.append(state)
1351 return state
1352
1353 def get_fragment(self, platform=None):
1354 """ creates the HDL code-fragment for FPAdd
1355 """
1356 m = Module()
1357 m.submodules.out_z = self.out_z
1358 m.submodules.in_t = self.in_t
1359 if self.compact:
1360 self.get_compact_fragment(m, platform)
1361 else:
1362 self.get_longer_fragment(m, platform)
1363
1364 with m.FSM() as fsm:
1365
1366 for state in self.states:
1367 with m.State(state.state_from):
1368 state.action(m)
1369
1370 return m
1371
1372 def get_longer_fragment(self, m, platform=None):
1373
1374 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1375 self.in_a, self.in_b, self.width))
1376 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1377 a = get.out_op1
1378 b = get.out_op2
1379
1380 sc = self.add_state(FPAddSpecialCases(self.width, self.id_wid))
1381 sc.setup(m, a, b, self.in_mid)
1382
1383 dn = self.add_state(FPAddDeNorm(self.width, self.id_wid))
1384 dn.setup(m, a, b, sc.in_mid)
1385
1386 if self.single_cycle:
1387 alm = self.add_state(FPAddAlignSingle(self.width, self.id_wid))
1388 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1389 else:
1390 alm = self.add_state(FPAddAlignMulti(self.width, self.id_wid))
1391 alm.setup(m, dn.out_a, dn.out_b, dn.in_mid)
1392
1393 add0 = self.add_state(FPAddStage0(self.width, self.id_wid))
1394 add0.setup(m, alm.out_a, alm.out_b, alm.in_mid)
1395
1396 add1 = self.add_state(FPAddStage1(self.width, self.id_wid))
1397 add1.setup(m, add0.out_tot, add0.out_z, add0.in_mid)
1398
1399 if self.single_cycle:
1400 n1 = self.add_state(FPNorm1Single(self.width, self.id_wid))
1401 n1.setup(m, add1.out_z, add1.out_of, add0.in_mid)
1402 else:
1403 n1 = self.add_state(FPNorm1Multi(self.width, self.id_wid))
1404 n1.setup(m, add1.out_z, add1.out_of, add1.norm_stb, add0.in_mid)
1405
1406 rn = self.add_state(FPRound(self.width, self.id_wid))
1407 rn.setup(m, n1.out_z, n1.out_roundz, n1.in_mid)
1408
1409 cor = self.add_state(FPCorrections(self.width, self.id_wid))
1410 cor.setup(m, rn.out_z, rn.in_mid)
1411
1412 pa = self.add_state(FPPack(self.width, self.id_wid))
1413 pa.setup(m, cor.out_z, rn.in_mid)
1414
1415 ppz = self.add_state(FPPutZ("pack_put_z", pa.out_z, self.out_z,
1416 pa.in_mid, self.out_mid))
1417
1418 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1419 pa.in_mid, self.out_mid))
1420
1421 def get_compact_fragment(self, m, platform=None):
1422
1423 get = self.add_state(FPGet2Op("get_ops", "special_cases",
1424 self.in_a, self.in_b, self.width))
1425 get.setup(m, self.in_a, self.in_b, self.in_t.stb, self.in_t.ack)
1426 a = get.out_op1
1427 b = get.out_op2
1428
1429 sc = self.add_state(FPAddSpecialCasesDeNorm(self.width, self.id_wid))
1430 sc.setup(m, a, b, self.in_mid)
1431
1432 alm = self.add_state(FPAddAlignSingleAdd(self.width, self.id_wid))
1433 alm.setup(m, sc.out_a, sc.out_b, sc.in_mid)
1434
1435 n1 = self.add_state(FPNormToPack(self.width, self.id_wid))
1436 n1.setup(m, alm.out_z, alm.out_of, alm.in_mid)
1437
1438 ppz = self.add_state(FPPutZ("pack_put_z", n1.out_z, self.out_z,
1439 n1.in_mid, self.out_mid))
1440
1441 pz = self.add_state(FPPutZ("put_z", sc.out_z, self.out_z,
1442 sc.in_mid, self.out_mid))
1443
1444
1445 class FPADDBase(FPState, FPID):
1446
1447 def __init__(self, width, id_wid=None, single_cycle=False):
1448 """ IEEE754 FP Add
1449
1450 * width: bit-width of IEEE754. supported: 16, 32, 64
1451 * id_wid: an identifier that is sync-connected to the input
1452 * single_cycle: True indicates each stage to complete in 1 clock
1453 """
1454 FPID.__init__(self, id_wid)
1455 FPState.__init__(self, "fpadd")
1456 self.width = width
1457 self.single_cycle = single_cycle
1458 self.mod = FPADDBaseMod(width, id_wid, single_cycle)
1459
1460 self.in_t = Trigger()
1461 self.in_a = Signal(width)
1462 self.in_b = Signal(width)
1463 #self.out_z = FPOp(width)
1464
1465 self.z_done = Signal(reset_less=True) # connects to out_z Strobe
1466 self.in_accept = Signal(reset_less=True)
1467 self.add_stb = Signal(reset_less=True)
1468 self.add_ack = Signal(reset=0, reset_less=True)
1469
1470 def setup(self, m, a, b, add_stb, in_mid, out_z, out_mid):
1471 self.out_z = out_z
1472 self.out_mid = out_mid
1473 m.d.comb += [self.in_a.eq(a),
1474 self.in_b.eq(b),
1475 self.mod.in_a.eq(self.in_a),
1476 self.mod.in_b.eq(self.in_b),
1477 self.in_mid.eq(in_mid),
1478 self.mod.in_mid.eq(self.in_mid),
1479 self.z_done.eq(self.mod.out_z.trigger),
1480 #self.add_stb.eq(add_stb),
1481 self.mod.in_t.stb.eq(self.in_t.stb),
1482 self.in_t.ack.eq(self.mod.in_t.ack),
1483 self.out_mid.eq(self.mod.out_mid),
1484 self.out_z.v.eq(self.mod.out_z.v),
1485 self.out_z.stb.eq(self.mod.out_z.stb),
1486 self.mod.out_z.ack.eq(self.out_z.ack),
1487 ]
1488
1489 m.d.sync += self.add_stb.eq(add_stb)
1490 m.d.sync += self.add_ack.eq(0) # sets to zero when not in active state
1491 m.d.sync += self.out_z.ack.eq(0) # likewise
1492 #m.d.sync += self.in_t.stb.eq(0)
1493
1494 m.submodules.fpadd = self.mod
1495
1496 def action(self, m):
1497
1498 # in_accept is set on incoming strobe HIGH and ack LOW.
1499 m.d.comb += self.in_accept.eq((~self.add_ack) & (self.add_stb))
1500
1501 #with m.If(self.in_t.ack):
1502 # m.d.sync += self.in_t.stb.eq(0)
1503 with m.If(~self.z_done):
1504 # not done: test for accepting an incoming operand pair
1505 with m.If(self.in_accept):
1506 m.d.sync += [
1507 self.add_ack.eq(1), # acknowledge receipt...
1508 self.in_t.stb.eq(1), # initiate add
1509 ]
1510 with m.Else():
1511 m.d.sync += [self.add_ack.eq(0),
1512 self.in_t.stb.eq(0),
1513 self.out_z.ack.eq(1),
1514 ]
1515 with m.Else():
1516 # done: acknowledge, and write out id and value
1517 m.d.sync += [self.add_ack.eq(1),
1518 self.in_t.stb.eq(0)
1519 ]
1520 m.next = "put_z"
1521
1522 return
1523
1524 if self.in_mid is not None:
1525 m.d.sync += self.out_mid.eq(self.mod.out_mid)
1526
1527 m.d.sync += [
1528 self.out_z.v.eq(self.mod.out_z.v)
1529 ]
1530 # move to output state on detecting z ack
1531 with m.If(self.out_z.trigger):
1532 m.d.sync += self.out_z.stb.eq(0)
1533 m.next = "put_z"
1534 with m.Else():
1535 m.d.sync += self.out_z.stb.eq(1)
1536
1537 class ResArray:
1538 def __init__(self, width, id_wid):
1539 self.width = width
1540 self.id_wid = id_wid
1541 res = []
1542 for i in range(rs_sz):
1543 out_z = FPOp(width)
1544 out_z.name = "out_z_%d" % i
1545 res.append(out_z)
1546 self.res = Array(res)
1547 self.in_z = FPOp(width)
1548 self.in_mid = Signal(self.id_wid, reset_less=True)
1549
1550 def setup(self, m, in_z, in_mid):
1551 m.d.comb += [self.in_z.copy(in_z),
1552 self.in_mid.eq(in_mid)]
1553
1554 def get_fragment(self, platform=None):
1555 """ creates the HDL code-fragment for FPAdd
1556 """
1557 m = Module()
1558 m.submodules.res_in_z = self.in_z
1559 m.submodules += self.res
1560
1561 return m
1562
1563 def ports(self):
1564 res = []
1565 for z in self.res:
1566 res += z.ports()
1567 return res
1568
1569
1570 class FPADD(FPID):
1571 """ FPADD: stages as follows:
1572
1573 FPGetOp (a)
1574 |
1575 FPGetOp (b)
1576 |
1577 FPAddBase---> FPAddBaseMod
1578 | |
1579 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
1580
1581 FPAddBase is tricky: it is both a stage and *has* stages.
1582 Connection to FPAddBaseMod therefore requires an in stb/ack
1583 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
1584 needs to be the thing that raises the incoming stb.
1585 """
1586
1587 def __init__(self, width, id_wid=None, single_cycle=False, rs_sz=2):
1588 """ IEEE754 FP Add
1589
1590 * width: bit-width of IEEE754. supported: 16, 32, 64
1591 * id_wid: an identifier that is sync-connected to the input
1592 * single_cycle: True indicates each stage to complete in 1 clock
1593 """
1594 self.width = width
1595 self.id_wid = id_wid
1596 self.single_cycle = single_cycle
1597
1598 #self.out_z = FPOp(width)
1599 self.ids = FPID(id_wid)
1600
1601 rs = []
1602 for i in range(rs_sz):
1603 in_a = FPOp(width)
1604 in_b = FPOp(width)
1605 in_a.name = "in_a_%d" % i
1606 in_b.name = "in_b_%d" % i
1607 rs.append((in_a, in_b))
1608 self.rs = Array(rs)
1609
1610 res = []
1611 for i in range(rs_sz):
1612 out_z = FPOp(width)
1613 out_z.name = "out_z_%d" % i
1614 res.append(out_z)
1615 self.res = Array(res)
1616
1617 self.states = []
1618
1619 def add_state(self, state):
1620 self.states.append(state)
1621 return state
1622
1623 def get_fragment(self, platform=None):
1624 """ creates the HDL code-fragment for FPAdd
1625 """
1626 m = Module()
1627 m.submodules += self.rs
1628
1629 in_a = self.rs[0][0]
1630 in_b = self.rs[0][1]
1631
1632 out_z = FPOp(self.width)
1633 out_mid = Signal(self.id_wid, reset_less=True)
1634 m.submodules.out_z = out_z
1635
1636 geta = self.add_state(FPGetOp("get_a", "get_b",
1637 in_a, self.width))
1638 geta.setup(m, in_a)
1639 a = geta.out_op
1640
1641 getb = self.add_state(FPGetOp("get_b", "fpadd",
1642 in_b, self.width))
1643 getb.setup(m, in_b)
1644 b = getb.out_op
1645
1646 ab = FPADDBase(self.width, self.id_wid, self.single_cycle)
1647 ab = self.add_state(ab)
1648 ab.setup(m, a, b, getb.out_decode, self.ids.in_mid,
1649 out_z, out_mid)
1650
1651 pz = self.add_state(FPPutZIdx("put_z", ab.out_z, self.res,
1652 out_mid, "get_a"))
1653
1654 with m.FSM() as fsm:
1655
1656 for state in self.states:
1657 with m.State(state.state_from):
1658 state.action(m)
1659
1660 return m
1661
1662
1663 if __name__ == "__main__":
1664 if True:
1665 alu = FPADD(width=32, id_wid=5, single_cycle=True)
1666 main(alu, ports=alu.rs[0][0].ports() + \
1667 alu.rs[0][1].ports() + \
1668 alu.res[0].ports() + \
1669 [alu.ids.in_mid, alu.ids.out_mid])
1670 else:
1671 alu = FPADDBase(width=32, id_wid=5, single_cycle=True)
1672 main(alu, ports=[alu.in_a, alu.in_b] + \
1673 alu.in_t.ports() + \
1674 alu.out_z.ports() + \
1675 [alu.in_mid, alu.out_mid])
1676
1677
1678 # works... but don't use, just do "python fname.py convert -t v"
1679 #print (verilog.convert(alu, ports=[
1680 # ports=alu.in_a.ports() + \
1681 # alu.in_b.ports() + \
1682 # alu.out_z.ports())