1 # IEEE Floating Point Adder (Single Precision)
2 # Copyright (C) Jonathan P Dawson 2013
5 from nmigen
import Module
, Signal
, Cat
, Mux
, Array
, Const
6 from nmigen
.lib
.coding
import PriorityEncoder
7 from nmigen
.cli
import main
, verilog
10 from fpbase
import FPNumIn
, FPNumOut
, FPOp
, Overflow
, FPBase
, FPNumBase
11 from fpbase
import MultiShiftRMerge
, Trigger
12 from singlepipe
import (ControlBase
, StageChain
, UnbufferedPipeline
,
14 from multipipe
import CombMuxOutPipe
15 from multipipe
import PriorityCombMuxInPipe
17 from fpbase
import FPState
, FPID
18 from fpcommon
.getop
import (FPGetOpMod
, FPGetOp
, FPNumBase2Ops
, FPADDBaseData
,
19 FPGet2OpMod
, FPGet2Op
)
20 from fpcommon
.denorm
import (FPSCData
, FPAddDeNormMod
, FPAddDeNorm
)
21 from fpcommon
.postcalc
import FPAddStage1Data
22 from fpcommon
.postnormalise
import (FPNorm1Data
, FPNorm1ModSingle
,
23 FPNorm1ModMulti
, FPNorm1Single
, FPNorm1Multi
)
24 from fpcommon
.roundz
import (FPRoundData
, FPRoundMod
, FPRound
)
25 from fpcommon
.corrections
import (FPCorrectionsMod
, FPCorrections
)
26 from fpcommon
.pack
import (FPPackData
, FPPackMod
, FPPack
)
27 from fpcommon
.normtopack
import FPNormToPack
28 from fpcommon
.putz
import (FPPutZ
, FPPutZIdx
)
30 from fpadd
.specialcases
import (FPAddSpecialCasesMod
, FPAddSpecialCases
,
31 FPAddSpecialCasesDeNorm
)
32 from fpadd
.align
import (FPAddAlignMulti
, FPAddAlignMultiMod
, FPNumIn2Ops
,
33 FPAddAlignSingleMod
, FPAddAlignSingle
)
34 from fpadd
.add0
import (FPAddStage0Data
, FPAddStage0Mod
, FPAddStage0
)
37 class FPAddAlignSingleAdd(FPState
, UnbufferedPipeline
):
39 def __init__(self
, width
, id_wid
):
40 FPState
.__init
__(self
, "align")
43 UnbufferedPipeline
.__init
__(self
, self
) # pipeline is its own stage
44 self
.a1o
= self
.ospec()
47 return FPSCData(self
.width
, self
.id_wid
)
50 return FPAddStage1Data(self
.width
, self
.id_wid
) # AddStage1 ospec
52 def setup(self
, m
, i
):
53 """ links module to inputs and outputs
56 # chain AddAlignSingle, AddStage0 and AddStage1
57 mod
= FPAddAlignSingleMod(self
.width
, self
.id_wid
)
58 a0mod
= FPAddStage0Mod(self
.width
, self
.id_wid
)
59 a1mod
= FPAddStage1Mod(self
.width
, self
.id_wid
)
61 chain
= StageChain([mod
, a0mod
, a1mod
])
70 m
.d
.sync
+= self
.a1o
.eq(self
.process(None))
71 m
.next
= "normalise_1"
76 class FPAddStage1Mod(FPState
):
77 """ Second stage of add: preparation for normalisation.
78 detects when tot sum is too big (tot[27] is kinda a carry bit)
81 def __init__(self
, width
, id_wid
):
88 return FPAddStage0Data(self
.width
, self
.id_wid
)
91 return FPAddStage1Data(self
.width
, self
.id_wid
)
96 def setup(self
, m
, i
):
97 """ links module to inputs and outputs
99 m
.submodules
.add1
= self
100 m
.submodules
.add1_out_overflow
= self
.o
.of
102 m
.d
.comb
+= self
.i
.eq(i
)
104 def elaborate(self
, platform
):
106 m
.d
.comb
+= self
.o
.z
.eq(self
.i
.z
)
107 # tot[-1] (MSB) gets set when the sum overflows. shift result down
108 with m
.If(~self
.i
.out_do_z
):
109 with m
.If(self
.i
.tot
[-1]):
111 self
.o
.z
.m
.eq(self
.i
.tot
[4:]),
112 self
.o
.of
.m0
.eq(self
.i
.tot
[4]),
113 self
.o
.of
.guard
.eq(self
.i
.tot
[3]),
114 self
.o
.of
.round_bit
.eq(self
.i
.tot
[2]),
115 self
.o
.of
.sticky
.eq(self
.i
.tot
[1] | self
.i
.tot
[0]),
116 self
.o
.z
.e
.eq(self
.i
.z
.e
+ 1)
118 # tot[-1] (MSB) zero case
121 self
.o
.z
.m
.eq(self
.i
.tot
[3:]),
122 self
.o
.of
.m0
.eq(self
.i
.tot
[3]),
123 self
.o
.of
.guard
.eq(self
.i
.tot
[2]),
124 self
.o
.of
.round_bit
.eq(self
.i
.tot
[1]),
125 self
.o
.of
.sticky
.eq(self
.i
.tot
[0])
128 m
.d
.comb
+= self
.o
.out_do_z
.eq(self
.i
.out_do_z
)
129 m
.d
.comb
+= self
.o
.oz
.eq(self
.i
.oz
)
130 m
.d
.comb
+= self
.o
.mid
.eq(self
.i
.mid
)
135 class FPAddStage1(FPState
):
137 def __init__(self
, width
, id_wid
):
138 FPState
.__init
__(self
, "add_1")
139 self
.mod
= FPAddStage1Mod(width
)
140 self
.out_z
= FPNumBase(width
, False)
141 self
.out_of
= Overflow()
142 self
.norm_stb
= Signal()
144 def setup(self
, m
, i
):
145 """ links module to inputs and outputs
149 m
.d
.sync
+= self
.norm_stb
.eq(0) # sets to zero when not in add1 state
151 m
.d
.sync
+= self
.out_of
.eq(self
.mod
.out_of
)
152 m
.d
.sync
+= self
.out_z
.eq(self
.mod
.out_z
)
153 m
.d
.sync
+= self
.norm_stb
.eq(1)
156 m
.next
= "normalise_1"
162 def __init__(self
, width
, id_wid
):
164 self
.mid
= Signal(id_wid
, reset_less
=True)
167 return [self
.z
.eq(i
.z
), self
.mid
.eq(i
.mid
)]
170 return [self
.z
, self
.mid
]
175 def __init__(self
, width
, id_wid
=None, single_cycle
=False, compact
=True):
178 * width: bit-width of IEEE754. supported: 16, 32, 64
179 * id_wid: an identifier that is sync-connected to the input
180 * single_cycle: True indicates each stage to complete in 1 clock
181 * compact: True indicates a reduced number of stages
185 self
.single_cycle
= single_cycle
186 self
.compact
= compact
188 self
.in_t
= Trigger()
189 self
.i
= self
.ispec()
190 self
.o
= self
.ospec()
195 return FPADDBaseData(self
.width
, self
.id_wid
)
198 return FPOpData(self
.width
, self
.id_wid
)
200 def add_state(self
, state
):
201 self
.states
.append(state
)
204 def get_fragment(self
, platform
=None):
205 """ creates the HDL code-fragment for FPAdd
208 m
.submodules
.out_z
= self
.o
.z
209 m
.submodules
.in_t
= self
.in_t
211 self
.get_compact_fragment(m
, platform
)
213 self
.get_longer_fragment(m
, platform
)
217 for state
in self
.states
:
218 with m
.State(state
.state_from
):
223 def get_longer_fragment(self
, m
, platform
=None):
225 get
= self
.add_state(FPGet2Op("get_ops", "special_cases",
230 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
232 sc
= self
.add_state(FPAddSpecialCases(self
.width
, self
.id_wid
))
233 sc
.setup(m
, a
, b
, self
.in_mid
)
235 dn
= self
.add_state(FPAddDeNorm(self
.width
, self
.id_wid
))
236 dn
.setup(m
, a
, b
, sc
.in_mid
)
238 if self
.single_cycle
:
239 alm
= self
.add_state(FPAddAlignSingle(self
.width
, self
.id_wid
))
240 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
242 alm
= self
.add_state(FPAddAlignMulti(self
.width
, self
.id_wid
))
243 alm
.setup(m
, dn
.out_a
, dn
.out_b
, dn
.in_mid
)
245 add0
= self
.add_state(FPAddStage0(self
.width
, self
.id_wid
))
246 add0
.setup(m
, alm
.out_a
, alm
.out_b
, alm
.in_mid
)
248 add1
= self
.add_state(FPAddStage1(self
.width
, self
.id_wid
))
249 add1
.setup(m
, add0
.out_tot
, add0
.out_z
, add0
.in_mid
)
251 if self
.single_cycle
:
252 n1
= self
.add_state(FPNorm1Single(self
.width
, self
.id_wid
))
253 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add0
.in_mid
)
255 n1
= self
.add_state(FPNorm1Multi(self
.width
, self
.id_wid
))
256 n1
.setup(m
, add1
.out_z
, add1
.out_of
, add1
.norm_stb
, add0
.in_mid
)
258 rn
= self
.add_state(FPRound(self
.width
, self
.id_wid
))
259 rn
.setup(m
, n1
.out_z
, n1
.out_roundz
, n1
.in_mid
)
261 cor
= self
.add_state(FPCorrections(self
.width
, self
.id_wid
))
262 cor
.setup(m
, rn
.out_z
, rn
.in_mid
)
264 pa
= self
.add_state(FPPack(self
.width
, self
.id_wid
))
265 pa
.setup(m
, cor
.out_z
, rn
.in_mid
)
267 ppz
= self
.add_state(FPPutZ("pack_put_z", pa
.out_z
, self
.out_z
,
268 pa
.in_mid
, self
.out_mid
))
270 pz
= self
.add_state(FPPutZ("put_z", sc
.out_z
, self
.out_z
,
271 pa
.in_mid
, self
.out_mid
))
273 def get_compact_fragment(self
, m
, platform
=None):
275 get
= FPGet2Op("get_ops", "special_cases", self
.width
, self
.id_wid
)
276 sc
= FPAddSpecialCasesDeNorm(self
.width
, self
.id_wid
)
277 alm
= FPAddAlignSingleAdd(self
.width
, self
.id_wid
)
278 n1
= FPNormToPack(self
.width
, self
.id_wid
)
280 get
.trigger_setup(m
, self
.in_t
.stb
, self
.in_t
.ack
)
282 chainlist
= [get
, sc
, alm
, n1
]
283 chain
= StageChain(chainlist
, specallocate
=True)
284 chain
.setup(m
, self
.i
)
286 for mod
in chainlist
:
287 sc
= self
.add_state(mod
)
289 ppz
= self
.add_state(FPPutZ("pack_put_z", n1
.out_z
.z
, self
.o
,
290 n1
.out_z
.mid
, self
.o
.mid
))
292 #pz = self.add_state(FPPutZ("put_z", sc.out_z.z, self.o,
293 # sc.o.mid, self.o.mid))
296 class FPADDBase(FPState
):
298 def __init__(self
, width
, id_wid
=None, single_cycle
=False):
301 * width: bit-width of IEEE754. supported: 16, 32, 64
302 * id_wid: an identifier that is sync-connected to the input
303 * single_cycle: True indicates each stage to complete in 1 clock
305 FPState
.__init
__(self
, "fpadd")
307 self
.single_cycle
= single_cycle
308 self
.mod
= FPADDBaseMod(width
, id_wid
, single_cycle
)
309 self
.o
= self
.ospec()
311 self
.in_t
= Trigger()
312 self
.i
= self
.ispec()
314 self
.z_done
= Signal(reset_less
=True) # connects to out_z Strobe
315 self
.in_accept
= Signal(reset_less
=True)
316 self
.add_stb
= Signal(reset_less
=True)
317 self
.add_ack
= Signal(reset
=0, reset_less
=True)
320 return self
.mod
.ispec()
323 return self
.mod
.ospec()
325 def setup(self
, m
, i
, add_stb
, in_mid
):
326 m
.d
.comb
+= [self
.i
.eq(i
),
327 self
.mod
.i
.eq(self
.i
),
328 self
.z_done
.eq(self
.mod
.o
.z
.trigger
),
329 #self.add_stb.eq(add_stb),
330 self
.mod
.in_t
.stb
.eq(self
.in_t
.stb
),
331 self
.in_t
.ack
.eq(self
.mod
.in_t
.ack
),
332 self
.o
.mid
.eq(self
.mod
.o
.mid
),
333 self
.o
.z
.v
.eq(self
.mod
.o
.z
.v
),
334 self
.o
.z
.stb
.eq(self
.mod
.o
.z
.stb
),
335 self
.mod
.o
.z
.ack
.eq(self
.o
.z
.ack
),
338 m
.d
.sync
+= self
.add_stb
.eq(add_stb
)
339 m
.d
.sync
+= self
.add_ack
.eq(0) # sets to zero when not in active state
340 m
.d
.sync
+= self
.o
.z
.ack
.eq(0) # likewise
341 #m.d.sync += self.in_t.stb.eq(0)
343 m
.submodules
.fpadd
= self
.mod
347 # in_accept is set on incoming strobe HIGH and ack LOW.
348 m
.d
.comb
+= self
.in_accept
.eq((~self
.add_ack
) & (self
.add_stb
))
350 #with m.If(self.in_t.ack):
351 # m.d.sync += self.in_t.stb.eq(0)
352 with m
.If(~self
.z_done
):
353 # not done: test for accepting an incoming operand pair
354 with m
.If(self
.in_accept
):
356 self
.add_ack
.eq(1), # acknowledge receipt...
357 self
.in_t
.stb
.eq(1), # initiate add
360 m
.d
.sync
+= [self
.add_ack
.eq(0),
365 # done: acknowledge, and write out id and value
366 m
.d
.sync
+= [self
.add_ack
.eq(1),
373 if self
.in_mid
is not None:
374 m
.d
.sync
+= self
.out_mid
.eq(self
.mod
.out_mid
)
377 self
.out_z
.v
.eq(self
.mod
.out_z
.v
)
379 # move to output state on detecting z ack
380 with m
.If(self
.out_z
.trigger
):
381 m
.d
.sync
+= self
.out_z
.stb
.eq(0)
384 m
.d
.sync
+= self
.out_z
.stb
.eq(1)
387 class FPADDBasePipe(ControlBase
):
388 def __init__(self
, width
, id_wid
):
389 ControlBase
.__init
__(self
)
390 self
.pipe1
= FPAddSpecialCasesDeNorm(width
, id_wid
)
391 self
.pipe2
= FPAddAlignSingleAdd(width
, id_wid
)
392 self
.pipe3
= FPNormToPack(width
, id_wid
)
394 self
._eqs
= self
.connect([self
.pipe1
, self
.pipe2
, self
.pipe3
])
396 def elaborate(self
, platform
):
398 m
.submodules
.scnorm
= self
.pipe1
399 m
.submodules
.addalign
= self
.pipe2
400 m
.submodules
.normpack
= self
.pipe3
401 m
.d
.comb
+= self
._eqs
405 class FPADDInMuxPipe(PriorityCombMuxInPipe
):
406 def __init__(self
, width
, id_wid
, num_rows
):
407 self
.num_rows
= num_rows
408 def iospec(): return FPADDBaseData(width
, id_wid
)
409 stage
= PassThroughStage(iospec
)
410 PriorityCombMuxInPipe
.__init
__(self
, stage
, p_len
=self
.num_rows
)
413 class FPADDMuxOutPipe(CombMuxOutPipe
):
414 def __init__(self
, width
, id_wid
, num_rows
):
415 self
.num_rows
= num_rows
416 def iospec(): return FPPackData(width
, id_wid
)
417 stage
= PassThroughStage(iospec
)
418 CombMuxOutPipe
.__init
__(self
, stage
, n_len
=self
.num_rows
)
422 """ Reservation-Station version of FPADD pipeline.
424 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
425 * 3-stage adder pipeline
426 * fan-out on outputs (an array of FPPackData: z,mid)
428 Fan-in and Fan-out are combinatorial.
430 def __init__(self
, width
, id_wid
, num_rows
):
431 self
.num_rows
= num_rows
432 self
.inpipe
= FPADDInMuxPipe(width
, id_wid
, num_rows
) # fan-in
433 self
.fpadd
= FPADDBasePipe(width
, id_wid
) # add stage
434 self
.outpipe
= FPADDMuxOutPipe(width
, id_wid
, num_rows
) # fan-out
436 self
.p
= self
.inpipe
.p
# kinda annoying,
437 self
.n
= self
.outpipe
.n
# use pipe in/out as this class in/out
438 self
._ports
= self
.inpipe
.ports() + self
.outpipe
.ports()
440 def elaborate(self
, platform
):
442 m
.submodules
.inpipe
= self
.inpipe
443 m
.submodules
.fpadd
= self
.fpadd
444 m
.submodules
.outpipe
= self
.outpipe
446 m
.d
.comb
+= self
.inpipe
.n
.connect_to_next(self
.fpadd
.p
)
447 m
.d
.comb
+= self
.fpadd
.connect_to_next(self
.outpipe
)
456 """ FPADD: stages as follows:
462 FPAddBase---> FPAddBaseMod
464 PutZ GetOps->Specials->Align->Add1/2->Norm->Round/Pack->PutZ
466 FPAddBase is tricky: it is both a stage and *has* stages.
467 Connection to FPAddBaseMod therefore requires an in stb/ack
468 and an out stb/ack. Just as with Add1-Norm1 interaction, FPGetOp
469 needs to be the thing that raises the incoming stb.
472 def __init__(self
, width
, id_wid
=None, single_cycle
=False, rs_sz
=2):
475 * width: bit-width of IEEE754. supported: 16, 32, 64
476 * id_wid: an identifier that is sync-connected to the input
477 * single_cycle: True indicates each stage to complete in 1 clock
481 self
.single_cycle
= single_cycle
483 #self.out_z = FPOp(width)
484 self
.ids
= FPID(id_wid
)
487 for i
in range(rs_sz
):
490 in_a
.name
= "in_a_%d" % i
491 in_b
.name
= "in_b_%d" % i
492 rs
.append((in_a
, in_b
))
496 for i
in range(rs_sz
):
498 out_z
.name
= "out_z_%d" % i
500 self
.res
= Array(res
)
504 def add_state(self
, state
):
505 self
.states
.append(state
)
508 def get_fragment(self
, platform
=None):
509 """ creates the HDL code-fragment for FPAdd
512 m
.submodules
+= self
.rs
517 geta
= self
.add_state(FPGetOp("get_a", "get_b",
522 getb
= self
.add_state(FPGetOp("get_b", "fpadd",
527 ab
= FPADDBase(self
.width
, self
.id_wid
, self
.single_cycle
)
528 ab
= self
.add_state(ab
)
529 abd
= ab
.ispec() # create an input spec object for FPADDBase
530 m
.d
.sync
+= [abd
.a
.eq(a
), abd
.b
.eq(b
), abd
.mid
.eq(self
.ids
.in_mid
)]
531 ab
.setup(m
, abd
, getb
.out_decode
, self
.ids
.in_mid
)
534 pz
= self
.add_state(FPPutZIdx("put_z", o
.z
, self
.res
,
539 for state
in self
.states
:
540 with m
.State(state
.state_from
):
546 if __name__
== "__main__":
548 alu
= FPADD(width
=32, id_wid
=5, single_cycle
=True)
549 main(alu
, ports
=alu
.rs
[0][0].ports() + \
550 alu
.rs
[0][1].ports() + \
551 alu
.res
[0].ports() + \
552 [alu
.ids
.in_mid
, alu
.ids
.out_mid
])
554 alu
= FPADDBase(width
=32, id_wid
=5, single_cycle
=True)
555 main(alu
, ports
=[alu
.in_a
, alu
.in_b
] + \
557 alu
.out_z
.ports() + \
558 [alu
.in_mid
, alu
.out_mid
])
561 # works... but don't use, just do "python fname.py convert -t v"
562 #print (verilog.convert(alu, ports=[
563 # ports=alu.in_a.ports() + \
564 # alu.in_b.ports() + \