FP16 DIV seems to be working
[ieee754fpu.git] / src / ieee754 / fpdiv / pipeline.py
1 """IEEE Floating Point Divider Pipeline
2
3 Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
4
5 Stack looks like this:
6
7 scnorm - FPDIVSpecialCasesDeNorm ispec FPADDBaseData
8 ------ ospec FPSCData
9
10 StageChain: FPDIVSpecialCasesMod,
11 FPAddDeNormMod
12
13 pipediv0 - FPDivStagesSetup ispec FPSCData
14 -------- ospec DivPipeInterstageData
15
16 StageChain: FPDivStage0Mod,
17 DivPipeSetupStage,
18 DivPipeCalculateStage,
19 ...
20 DivPipeCalculateStage
21
22 pipediv1 - FPDivStagesIntermediate ispec DivPipeInterstageData
23 -------- ospec DivPipeInterstageData
24
25 StageChain: DivPipeCalculateStage,
26 ...
27 DivPipeCalculateStage
28 ...
29 ...
30
31 pipediv5 - FPDivStageFinal ispec FPDivStage0Data
32 -------- ospec FPAddStage1Data
33
34 StageChain: DivPipeCalculateStage,
35 ...
36 DivPipeCalculateStage,
37 DivPipeFinalStage,
38 FPDivStage2Mod
39
40 normpack - FPNormToPack ispec FPAddStage1Data
41 -------- ospec FPPackData
42
43 StageChain: Norm1ModSingle,
44 RoundMod,
45 CorrectionsMod,
46 PackMod
47
48 the number of combinatorial StageChains (n_comb_stages) in
49 FPDivStages is an argument arranged to get the length of the whole
50 pipeline down to sane numbers.
51
52 the reason for keeping the number of stages down is that for every
53 pipeline clock delay, a corresponding ReservationStation is needed.
54 if there are 24 pipeline stages, we need a whopping TWENTY FOUR
55 RS's. that's far too many. 6 is just about an acceptable number.
56 even 8 is starting to get alarmingly high.
57 """
58
59 from nmigen import Module
60 from nmigen.cli import main, verilog
61
62 from nmutil.singlepipe import ControlBase
63 from nmutil.concurrentunit import ReservationStations, num_bits
64
65 from ieee754.fpcommon.getop import FPADDBaseData
66 from ieee754.fpcommon.denorm import FPSCData
67 from ieee754.fpcommon.fpbase import FPFormat
68 from ieee754.fpcommon.pack import FPPackData
69 from ieee754.fpcommon.normtopack import FPNormToPack
70 from ieee754.fpdiv.specialcases import FPDIVSpecialCasesDeNorm
71 from ieee754.fpdiv.divstages import (FPDivStagesSetup,
72 FPDivStagesIntermediate,
73 FPDivStagesFinal)
74 from ieee754.pipeline import PipelineSpec
75 from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreConfig
76
77
78 class FPDIVBasePipe(ControlBase):
79 def __init__(self, pspec):
80 self.pspec = pspec
81 ControlBase.__init__(self)
82
83 pipechain = []
84 max_n_comb_stages = 2 # TODO (depends on how many RS's we want)
85 # to which the answer: "as few as possible"
86 # is required. too many ReservationStations
87 # means "big problems".
88
89 # XXX BUG - subtracting 4 from number of stages stops assert
90 # probably related to having to add 4 in FPDivMuxInOut
91 radix = pspec.log2_radix
92 n_stages = pspec.core_config.n_stages // max_n_comb_stages
93 print ("n_stages", pspec.core_config.n_stages, n_stages)
94 stage_idx = 0
95
96 for i in range(n_stages):
97
98 n_comb_stages = max_n_comb_stages
99 # needs to convert input from pipestart ospec
100 if i == 0:
101 kls = FPDivStagesSetup
102 #n_comb_stages -= 1 # reduce due to work done at start?
103
104 # needs to convert output to pipeend ispec
105 elif i == n_stages - 1:
106 kls = FPDivStagesFinal
107 #n_comb_stages -= 1 # FIXME - reduce due to work done at end?
108
109 # intermediary stage
110 else:
111 kls = FPDivStagesIntermediate
112
113 pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
114 stage_idx += n_comb_stages # increment so that each CalcStage
115 # gets a (correct) unique index
116
117 self.pipechain = pipechain
118
119 # start and end: unpack/specialcases then normalisation/packing
120 self.pipestart = pipestart = FPDIVSpecialCasesDeNorm(self.pspec)
121 self.pipeend = pipeend = FPNormToPack(self.pspec)
122
123 self._eqs = self.connect([pipestart] + pipechain + [pipeend])
124
125 def elaborate(self, platform):
126 m = ControlBase.elaborate(self, platform)
127
128 # add submodules
129 m.submodules.scnorm = self.pipestart
130 for i, p in enumerate(self.pipechain):
131 setattr(m.submodules, "pipediv%d" % i, p)
132 m.submodules.normpack = self.pipeend
133
134 # ControlBase.connect creates the "eqs" needed to connect each pipe
135 m.d.comb += self._eqs
136
137 return m
138
139 def roundup(x, mod):
140 return x if x % mod == 0 else x + mod - x % mod
141
142
143 class FPDIVMuxInOut(ReservationStations):
144 """ Reservation-Station version of FPDIV pipeline.
145
146 * fan-in on inputs (an array of FPADDBaseData: a,b,mid)
147 * N-stage divider pipeline
148 * fan-out on outputs (an array of FPPackData: z,mid)
149
150 Fan-in and Fan-out are combinatorial.
151
152 :op_wid: - set this to the width of an operator which can
153 then be used to change the behaviour of the pipeline.
154 """
155
156 def __init__(self, width, num_rows, op_wid=1):
157 self.id_wid = num_bits(width)
158 self.pspec = PipelineSpec(width, self.id_wid, op_wid)
159 # get the standard mantissa width, store in the pspec HOWEVER...
160 fmt = FPFormat.standard(width)
161 log2_radix = 2
162
163 # ...5 extra bits on the mantissa: MSB is zero, MSB-1 is 1
164 # then there is guard, round and sticky at the LSB end.
165 # also: round up to nearest radix
166 fmt.m_width = roundup(fmt.m_width + 5, log2_radix)
167 print ("width", fmt.m_width)
168
169 cfg = DivPipeCoreConfig(fmt.m_width, fmt.fraction_width, log2_radix)
170
171 self.pspec.fpformat = fmt
172 self.pspec.log2_radix = log2_radix
173 self.pspec.core_config = cfg
174
175 # XXX TODO - a class (or function?) that takes the pspec (right here)
176 # and creates... "something". that "something" MUST have an eq function
177 # new_pspec = deepcopy(self.pspec)
178 # new_pspec.opkls = DivPipeCoreOperation
179 # self.alu = FPDIVBasePipe(new_pspec)
180 self.alu = FPDIVBasePipe(self.pspec)
181 ReservationStations.__init__(self, num_rows)
182
183 def i_specfn(self):
184 return FPADDBaseData(self.pspec)
185
186 def o_specfn(self):
187 return FPPackData(self.pspec)