reduce n_comb_stages for fpdiv first setup
[ieee754fpu.git] / src / ieee754 / fpdiv / pipeline.py
1 """IEEE Floating Point Divider Pipeline
2
3 Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
4
5 Stack looks like this:
6
7 scnorm - FPDIVSpecialCasesDeNorm ispec FPADDBaseData
8 ------ ospec FPSCData
9
10 StageChain: FPDIVSpecialCasesMod,
11 FPAddDeNormMod
12
13 pipediv0 - FPDivStagesSetup ispec FPSCData
14 -------- ospec DivPipeInterstageData
15
16 StageChain: FPDivStage0Mod,
17 DivPipeSetupStage,
18 DivPipeCalculateStage,
19 ...
20 DivPipeCalculateStage
21
22 pipediv1 - FPDivStagesIntermediate ispec DivPipeInterstageData
23 -------- ospec DivPipeInterstageData
24
25 StageChain: DivPipeCalculateStage,
26 ...
27 DivPipeCalculateStage
28 ...
29 ...
30
31 pipediv5 - FPDivStageFinal ispec FPDivStage0Data
32 -------- ospec FPAddStage1Data
33
34 StageChain: DivPipeCalculateStage,
35 ...
36 DivPipeCalculateStage,
37 DivPipeFinalStage,
38 FPDivStage2Mod
39
40 normpack - FPNormToPack ispec FPAddStage1Data
41 -------- ospec FPPackData
42
43 StageChain: Norm1ModSingle,
44 RoundMod,
45 CorrectionsMod,
46 PackMod
47
48 the number of combinatorial StageChains (n_comb_stages) in
49 FPDivStages is an argument arranged to get the length of the whole
50 pipeline down to sane numbers.
51
52 the reason for keeping the number of stages down is that for every
53 pipeline clock delay, a corresponding ReservationStation is needed.
54 if there are 24 pipeline stages, we need a whopping TWENTY FOUR
55 RS's. that's far too many. 6 is just about an acceptable number.
56 even 8 is starting to get alarmingly high.
57 """
58
59 from nmigen import Module
60 from nmigen.cli import main, verilog
61
62 from nmutil.singlepipe import ControlBase
63 from nmutil.concurrentunit import ReservationStations, num_bits
64
65 from ieee754.fpcommon.getop import FPADDBaseData
66 from ieee754.fpcommon.denorm import FPSCData
67 from ieee754.fpcommon.fpbase import FPFormat
68 from ieee754.fpcommon.pack import FPPackData
69 from ieee754.fpcommon.normtopack import FPNormToPack
70 from ieee754.fpdiv.specialcases import FPDIVSpecialCasesDeNorm
71 from ieee754.fpdiv.divstages import (FPDivStagesSetup,
72 FPDivStagesIntermediate,
73 FPDivStagesFinal)
74 from ieee754.pipeline import PipelineSpec
75 from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreConfig
76
77
78 class FPDIVBasePipe(ControlBase):
79 def __init__(self, pspec):
80 self.pspec = pspec
81 ControlBase.__init__(self)
82
83 pipechain = []
84 # to which the answer: "as few as possible"
85 # is required. too many ReservationStations
86 # means "big problems".
87
88 # get number of stages, set up loop.
89 n_stages = pspec.core_config.n_stages
90 max_n_comb_stages = self.pspec.n_comb_stages
91 print ("n_stages", n_stages)
92 stage_idx = 0
93
94 end = False
95 while not end:
96
97 n_comb_stages = max_n_comb_stages
98 # needs to convert input from pipestart ospec
99 if stage_idx == 0:
100 n_comb_stages -= 1
101 kls = FPDivStagesSetup # does n_comb_stages-1 calcs as well
102
103 # needs to convert output to pipeend ispec
104 elif stage_idx + n_comb_stages >= n_stages:
105 kls = FPDivStagesFinal # does n_comb_stages-1 calcs as well
106 end = True
107 n_comb_stages = n_stages - stage_idx
108
109 # intermediary stage
110 else:
111 kls = FPDivStagesIntermediate # does n_comb_stages calcs
112
113 # create (in each pipe) a StageChain n_comb_stages in length
114 pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
115 stage_idx += n_comb_stages # increment so that each CalcStage
116 # gets a (correct) unique index
117
118 self.pipechain = pipechain
119
120 # start and end: unpack/specialcases then normalisation/packing
121 self.pipestart = pipestart = FPDIVSpecialCasesDeNorm(self.pspec)
122 self.pipeend = pipeend = FPNormToPack(self.pspec)
123
124 self._eqs = self.connect([pipestart] + pipechain + [pipeend])
125
126 def elaborate(self, platform):
127 m = ControlBase.elaborate(self, platform)
128
129 # add submodules
130 m.submodules.scnorm = self.pipestart
131 for i, p in enumerate(self.pipechain):
132 setattr(m.submodules, "pipediv%d" % i, p)
133 m.submodules.normpack = self.pipeend
134
135 # ControlBase.connect creates the "eqs" needed to connect each pipe
136 m.d.comb += self._eqs
137
138 return m
139
140 def roundup(x, mod):
141 return x if x % mod == 0 else x + mod - x % mod
142
143
144 class FPDIVMuxInOut(ReservationStations):
145 """ Reservation-Station version of FPDIV pipeline.
146
147 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
148 * N-stage divider pipeline
149 * fan-out on outputs (an array of FPPackData: z,mid)
150
151 Fan-in and Fan-out are combinatorial.
152
153 :op_wid: - set this to the width of an operator which can
154 then be used to change the behaviour of the pipeline.
155 """
156
157 def __init__(self, width, num_rows, op_wid=1):
158 self.id_wid = num_bits(width)
159 self.pspec = PipelineSpec(width, self.id_wid, op_wid)
160 # get the standard mantissa width, store in the pspec HOWEVER...
161 fmt = FPFormat.standard(width)
162 log2_radix = 3 # tested options so far: 1, 2 and 3.
163 n_comb_stages = 3 # TODO (depends on how many RS's we want)
164
165 # ...5 extra bits on the mantissa: MSB is zero, MSB-1 is 1
166 # then there is guard, round and sticky at the LSB end.
167 # also: round up to nearest radix
168 if width == 16:
169 extra = 5
170 elif width == 32:
171 extra = 6
172 elif width == 64:
173 extra = 5
174 fmt.m_width = roundup(fmt.m_width + extra, log2_radix)
175 print ("width", fmt.m_width)
176
177 cfg = DivPipeCoreConfig(fmt.m_width, fmt.fraction_width, log2_radix)
178
179 self.pspec.fpformat = fmt
180 self.pspec.log2_radix = log2_radix
181 self.pspec.n_comb_stages = n_comb_stages
182 self.pspec.core_config = cfg
183
184 # XXX TODO - a class (or function?) that takes the pspec (right here)
185 # and creates... "something". that "something" MUST have an eq function
186 # new_pspec = deepcopy(self.pspec)
187 # new_pspec.opkls = DivPipeCoreOperation
188 # self.alu = FPDIVBasePipe(new_pspec)
189 self.alu = FPDIVBasePipe(self.pspec)
190 ReservationStations.__init__(self, num_rows)
191
192 def i_specfn(self):
193 return FPADDBaseData(self.pspec)
194
195 def o_specfn(self):
196 return FPPackData(self.pspec)