switch to exact version of cython
[ieee754fpu.git] / src / ieee754 / fpdiv / pipeline.py
1 """IEEE Floating Point Divider Pipeline
2
3 Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
4
5 Stack looks like this:
6
7 scnorm - FPDIVSpecialCasesDeNorm ispec FPADDBaseData
8 ------ ospec FPSCData
9
10 StageChain: FPDIVSpecialCasesMod,
11 FPAddDeNormMod
12
13 pipediv0 - FPDivStagesSetup ispec FPSCData
14 -------- ospec DivPipeInterstageData
15
16 StageChain: FPDivStage0Mod,
17 DivPipeSetupStage,
18 DivPipeCalculateStage,
19 ...
20 DivPipeCalculateStage
21
22 pipediv1 - FPDivStagesIntermediate ispec DivPipeInterstageData
23 -------- ospec DivPipeInterstageData
24
25 StageChain: DivPipeCalculateStage,
26 ...
27 DivPipeCalculateStage
28 ...
29 ...
30
31 pipediv5 - FPDivStageFinal ispec FPDivStage0Data
32 -------- ospec FPAddStage1Data
33
34 StageChain: DivPipeCalculateStage,
35 ...
36 DivPipeCalculateStage,
37 DivPipeFinalStage,
38 FPDivStage2Mod
39
40 normpack - FPNormToPack ispec FPAddStage1Data
41 -------- ospec FPPackData
42
43 StageChain: Norm1ModSingle,
44 RoundMod,
45 CorrectionsMod,
46 PackMod
47
48 the number of combinatorial StageChains (n_comb_stages) in
49 FPDivStages is an argument arranged to get the length of the whole
50 pipeline down to sane numbers. it specifies the number of "blocks"
51 that will be combinatorially chained together.
52
53 the reason for keeping the number of stages down is that for every
54 pipeline clock delay, a corresponding ReservationStation is needed.
55 if there are 24 pipeline stages, we need a whopping TWENTY FOUR
56 RS's. that's far too many. 6 is just about an acceptable number.
57 even 8 is starting to get alarmingly high.
58 """
59
60 from nmigen import Module
61 from nmigen.cli import main, verilog
62
63 from nmutil.singlepipe import ControlBase
64 from nmutil.concurrentunit import ReservationStations, num_bits
65
66 from ieee754.fpcommon.getop import FPADDBaseData
67 from ieee754.fpcommon.denorm import FPSCData
68 from ieee754.fpcommon.fpbase import FPFormat
69 from ieee754.fpcommon.pack import FPPackData
70 from ieee754.fpcommon.normtopack import FPNormToPack
71 from ieee754.fpdiv.specialcases import FPDIVSpecialCasesDeNorm
72 from ieee754.fpdiv.divstages import (FPDivStagesSetup,
73 FPDivStagesIntermediate,
74 FPDivStagesFinal)
75 from ieee754.pipeline import PipelineSpec
76 from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreConfig
77
78
79 class FPDIVBasePipe(ControlBase):
80 def __init__(self, pspec):
81 self.pspec = pspec
82 ControlBase.__init__(self)
83
84 pipechain = []
85 # to which the answer: "as few as possible"
86 # is required. too many ReservationStations
87 # means "big problems".
88
89 # get number of stages, set up loop.
90 n_stages = pspec.core_config.n_stages
91 max_n_comb_stages = self.pspec.n_comb_stages
92 print("n_stages", n_stages)
93 stage_idx = 0
94
95 end = False
96 while not end:
97
98 n_comb_stages = max_n_comb_stages
99 # needs to convert input from pipestart ospec
100 if stage_idx == 0:
101 n_comb_stages -= 1
102 kls = FPDivStagesSetup # does n_comb_stages-1 calcs as well
103
104 # needs to convert output to pipeend ispec
105 elif stage_idx + n_comb_stages >= n_stages:
106 kls = FPDivStagesFinal # does n_comb_stages-1 calcs as well
107 end = True
108 n_comb_stages = n_stages - stage_idx
109
110 # intermediary stage
111 else:
112 kls = FPDivStagesIntermediate # does n_comb_stages calcs
113
114 # create (in each pipe) a StageChain n_comb_stages in length
115 pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
116 stage_idx += n_comb_stages # increment so that each CalcStage
117 # gets a (correct) unique index
118
119 self.pipechain = pipechain
120
121 # start and end: unpack/specialcases then normalisation/packing
122 self.pipestart = pipestart = FPDIVSpecialCasesDeNorm(self.pspec)
123 self.pipeend = pipeend = FPNormToPack(self.pspec)
124
125 self._eqs = self.connect([pipestart] + pipechain + [pipeend])
126
127 def elaborate(self, platform):
128 m = ControlBase.elaborate(self, platform)
129
130 # add submodules
131 m.submodules.scnorm = self.pipestart
132 for i, p in enumerate(self.pipechain):
133 setattr(m.submodules, "pipediv%d" % i, p)
134 m.submodules.normpack = self.pipeend
135
136 # ControlBase.connect creates the "eqs" needed to connect each pipe
137 m.d.comb += self._eqs
138
139 return m
140
141
142 def roundup(x, mod):
143 return x if x % mod == 0 else x + mod - x % mod
144
145
146 class FPDIVMuxInOut(ReservationStations):
147 """ Reservation-Station version of FPDIV pipeline.
148
149 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
150 * N-stage divider pipeline
151 * fan-out on outputs (an array of FPPackData: z,mid)
152
153 Fan-in and Fan-out are combinatorial.
154
155 :op_wid: - set this to the width of an operator which can
156 then be used to change the behaviour of the pipeline.
157 """
158
159 def __init__(self, width, num_rows, op_wid=2):
160 self.id_wid = num_bits(num_rows)
161 self.pspec = PipelineSpec(width, self.id_wid, op_wid)
162
163 # get the standard mantissa width, store in the pspec
164 fmt = FPFormat.standard(width)
165 log2_radix = 3 # tested options so far: 1, 2 and 3.
166 n_comb_stages = 2 # 2 compute stages per pipeline stage
167
168 # extra bits needed: guard + round (sticky comes from remainer.bool())
169 fraction_width = fmt.fraction_width
170 fraction_width += 2
171
172 # rounding width to a multiple of log2_radix is not needed,
173 # DivPipeCoreCalculateStage just internally reduces log2_radix on
174 # the last stage
175 cfg = DivPipeCoreConfig(fmt.width, fraction_width, log2_radix)
176
177 self.pspec.fpformat = fmt
178 self.pspec.n_comb_stages = n_comb_stages
179 self.pspec.core_config = cfg
180
181 # XXX TODO - a class (or function?) that takes the pspec (right here)
182 # and creates... "something". that "something" MUST have an eq function
183 # new_pspec = deepcopy(self.pspec)
184 # new_pspec.opkls = DivPipeCoreOperation
185 # self.alu = FPDIVBasePipe(new_pspec)
186 self.alu = FPDIVBasePipe(self.pspec)
187 ReservationStations.__init__(self, num_rows)
188
189 def i_specfn(self):
190 return FPADDBaseData(self.pspec)
191
192 def o_specfn(self):
193 return FPPackData(self.pspec)