1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
69 NAN_RESULT, EXC_RESULT);
71 type reg_type is record
74 instr_done : std_ulogic;
78 insn : std_ulogic_vector(31 downto 0);
79 nia : std_ulogic_vector(63 downto 0);
80 instr_tag : instr_tag_t;
81 dest_fpr : gspr_index_t;
85 single_prec : std_ulogic;
86 fpscr : std_ulogic_vector(31 downto 0);
90 r : std_ulogic_vector(63 downto 0); -- 10.54 format
91 s : std_ulogic_vector(55 downto 0); -- extended fraction
93 p : std_ulogic_vector(63 downto 0); -- 8.56 format
94 y : std_ulogic_vector(63 downto 0); -- 8.56 format
95 result_sign : std_ulogic;
96 result_class : fp_number_class;
97 result_exp : signed(EXP_BITS-1 downto 0);
98 shift : signed(EXP_BITS-1 downto 0);
99 writing_back : std_ulogic;
100 int_result : std_ulogic;
101 cr_result : std_ulogic_vector(3 downto 0);
102 cr_mask : std_ulogic_vector(7 downto 0);
103 old_exc : std_ulogic_vector(4 downto 0);
104 update_fprf : std_ulogic;
105 quieten_nan : std_ulogic;
108 round_mode : std_ulogic_vector(2 downto 0);
109 is_subtract : std_ulogic;
110 exp_cmp : std_ulogic;
111 madd_cmp : std_ulogic;
112 add_bsmall : std_ulogic;
113 is_multiply : std_ulogic;
114 is_sqrt : std_ulogic;
116 count : unsigned(1 downto 0);
117 doing_ftdiv : std_ulogic_vector(1 downto 0);
118 opsel_a : std_ulogic_vector(1 downto 0);
122 invalid : std_ulogic;
124 longmask : std_ulogic;
127 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
129 signal r, rin : reg_type;
131 signal fp_result : std_ulogic_vector(63 downto 0);
132 signal opsel_b : std_ulogic_vector(1 downto 0);
133 signal opsel_r : std_ulogic_vector(1 downto 0);
134 signal opsel_s : std_ulogic_vector(1 downto 0);
135 signal opsel_ainv : std_ulogic;
136 signal opsel_mask : std_ulogic;
137 signal opsel_binv : std_ulogic;
138 signal in_a : std_ulogic_vector(63 downto 0);
139 signal in_b : std_ulogic_vector(63 downto 0);
140 signal result : std_ulogic_vector(63 downto 0);
141 signal carry_in : std_ulogic;
142 signal lost_bits : std_ulogic;
143 signal r_hi_nz : std_ulogic;
144 signal r_lo_nz : std_ulogic;
145 signal s_nz : std_ulogic;
146 signal misc_sel : std_ulogic_vector(3 downto 0);
147 signal f_to_multiply : MultiplyInputType;
148 signal multiply_to_f : MultiplyOutputType;
149 signal msel_1 : std_ulogic_vector(1 downto 0);
150 signal msel_2 : std_ulogic_vector(1 downto 0);
151 signal msel_add : std_ulogic_vector(1 downto 0);
152 signal msel_inv : std_ulogic;
153 signal inverse_est : std_ulogic_vector(18 downto 0);
156 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
157 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
158 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
159 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
161 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
163 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
164 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
166 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
167 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
168 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
169 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
171 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
172 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
173 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
174 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
177 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
178 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
179 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
180 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
182 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
183 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
184 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
185 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
187 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
188 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
189 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
190 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
192 -- Inverse lookup table, indexed by the top 8 fraction bits
193 -- The first 256 entries are the reciprocal (1/x) lookup table,
194 -- and the remaining 768 entries are the reciprocal square root table.
195 -- Output range is [0.5, 1) in 0.19 format, though the top
196 -- bit isn't stored since it is always 1.
197 -- Each output value is the inverse of the center of the input
198 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
199 -- entry 1 is 1 / (1 + 3/512), etc.
200 signal inverse_table : lookup_table := (
202 -- Unit bit is assumed to be 1, so input range is [1, 2)
203 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
204 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
205 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
206 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
207 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
208 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
209 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
210 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
211 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
212 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
213 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
214 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
215 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
216 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
217 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
218 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
219 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
220 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
221 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
222 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
223 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
224 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
225 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
226 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
227 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
228 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
229 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
230 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
231 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
232 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
233 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
234 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
235 -- 1/sqrt(x) lookup table
236 -- Input is in the range [1, 4), i.e. two bits to the left of the
237 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
239 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
240 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
241 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
242 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
243 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
244 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
245 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
246 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
247 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
248 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
249 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
250 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
251 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
252 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
253 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
254 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
255 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
256 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
257 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
258 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
259 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
260 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
261 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
262 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
263 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
264 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
265 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
266 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
267 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
268 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
269 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
270 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
272 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
273 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
274 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
275 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
276 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
277 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
278 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
279 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
280 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
281 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
282 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
283 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
284 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
285 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
286 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
287 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
288 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
289 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
290 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
291 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
292 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
293 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
294 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
295 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
296 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
297 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
298 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
299 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
300 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
301 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
302 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
303 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
305 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
306 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
307 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
308 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
309 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
310 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
311 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
312 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
313 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
314 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
315 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
316 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
317 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
318 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
319 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
320 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
321 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
322 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
323 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
324 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
325 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
326 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
327 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
328 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
329 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
330 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
331 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
332 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
333 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
334 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
335 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
336 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
339 -- Left and right shifter with 120 bit input and 64 bit output.
340 -- Shifts inp left by shift bits and returns the upper 64 bits of
341 -- the result. The shift parameter is interpreted as a signed
342 -- number in the range -64..63, with negative values indicating
344 function shifter_64(inp: std_ulogic_vector(119 downto 0);
345 shift: std_ulogic_vector(6 downto 0))
346 return std_ulogic_vector is
347 variable s1 : std_ulogic_vector(94 downto 0);
348 variable s2 : std_ulogic_vector(70 downto 0);
349 variable result : std_ulogic_vector(63 downto 0);
351 case shift(6 downto 5) is
353 s1 := inp(119 downto 25);
355 s1 := inp(87 downto 0) & "0000000";
357 s1 := x"0000000000000000" & inp(119 downto 89);
359 s1 := x"00000000" & inp(119 downto 57);
361 case shift(4 downto 3) is
363 s2 := s1(94 downto 24);
365 s2 := s1(86 downto 16);
367 s2 := s1(78 downto 8);
369 s2 := s1(70 downto 0);
371 case shift(2 downto 0) is
373 result := s2(70 downto 7);
375 result := s2(69 downto 6);
377 result := s2(68 downto 5);
379 result := s2(67 downto 4);
381 result := s2(66 downto 3);
383 result := s2(65 downto 2);
385 result := s2(64 downto 1);
387 result := s2(63 downto 0);
392 -- Generate a mask with 0-bits on the left and 1-bits on the right which
393 -- selects the bits will be lost in doing a right shift. The shift
394 -- parameter is the bottom 6 bits of a negative shift count,
395 -- indicating a right shift.
396 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
397 variable result: std_ulogic_vector(63 downto 0);
399 result := (others => '0');
400 for i in 0 to 63 loop
402 result(63 - i) := '1';
408 -- Split a DP floating-point number into components and work out its class.
409 -- If is_int = 1, the input is considered an integer
410 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
411 variable r : fpu_reg_type;
412 variable exp_nz : std_ulogic;
413 variable exp_ao : std_ulogic;
414 variable frac_nz : std_ulogic;
415 variable cls : std_ulogic_vector(2 downto 0);
417 r.negative := fpr(63);
418 exp_nz := or (fpr(62 downto 52));
419 exp_ao := and (fpr(62 downto 52));
420 frac_nz := or (fpr(51 downto 0));
422 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
424 r.exponent := to_signed(-1022, EXP_BITS);
426 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
427 cls := exp_ao & exp_nz & frac_nz;
429 when "000" => r.class := ZERO;
430 when "001" => r.class := FINITE; -- denormalized
431 when "010" => r.class := FINITE;
432 when "011" => r.class := FINITE;
433 when "110" => r.class := INFINITY;
434 when others => r.class := NAN;
438 r.exponent := (others => '0');
439 if (fpr(63) or exp_nz or frac_nz) = '1' then
448 -- Construct a DP floating-point result from components
449 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
450 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
451 return std_ulogic_vector is
452 variable result : std_ulogic_vector(63 downto 0);
454 result := (others => '0');
459 if mantissa(54) = '1' then
461 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
463 result(51 downto 29) := mantissa(53 downto 31);
464 if single_prec = '0' then
465 result(28 downto 0) := mantissa(30 downto 2);
468 result(62 downto 52) := "11111111111";
470 result(62 downto 52) := "11111111111";
471 result(51) := quieten_nan or mantissa(53);
472 result(50 downto 29) := mantissa(52 downto 31);
473 if single_prec = '0' then
474 result(28 downto 0) := mantissa(30 downto 2);
480 -- Determine whether to increment when rounding
481 -- Returns rounding_inc & inexact
482 -- Assumes x includes the bottom 29 bits of the mantissa already
483 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
484 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
485 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
487 return std_ulogic_vector is
488 variable grx : std_ulogic_vector(2 downto 0);
489 variable ret : std_ulogic_vector(1 downto 0);
490 variable lsb : std_ulogic;
492 if single_prec = '0' then
493 grx := mantissa(1 downto 0) & x;
496 grx := mantissa(30 downto 29) & x;
501 case rn(1 downto 0) is
502 when "00" => -- round to nearest
503 if grx = "100" and rn(2) = '0' then
504 ret(1) := lsb; -- tie, round to even
508 when "01" => -- round towards zero
509 when others => -- round towards +/- inf
511 -- round towards greater magnitude
518 -- Determine result flags to write into the FPSCR
519 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
520 return std_ulogic_vector is
524 return sign & "0010";
526 return (not unitbit) & sign & (not sign) & "00";
528 return '0' & sign & (not sign) & "01";
535 fpu_multiply_0: entity work.multiply
538 m_in => f_to_multiply,
539 m_out => multiply_to_f
544 if rising_edge(clk) then
550 r.fpscr <= (others => '0');
551 r.writing_back <= '0';
553 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
559 -- synchronous reads from lookup table
560 lut_access: process(clk)
561 variable addrhi : std_ulogic_vector(1 downto 0);
562 variable addr : std_ulogic_vector(9 downto 0);
564 if rising_edge(clk) then
565 if r.is_sqrt = '1' then
566 addrhi := r.b.mantissa(55 downto 54);
570 addr := addrhi & r.b.mantissa(53 downto 46);
571 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
575 e_out.busy <= r.busy;
576 e_out.exception <= r.fpscr(FPSCR_FEX);
578 w_out.valid <= r.instr_done and not r.do_intr;
579 w_out.instr_tag <= r.instr_tag;
580 w_out.write_enable <= r.writing_back;
581 w_out.write_reg <= r.dest_fpr;
582 w_out.write_data <= fp_result;
583 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
584 w_out.write_cr_mask <= r.cr_mask;
585 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
586 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
587 w_out.interrupt <= r.do_intr;
588 w_out.intr_vec <= 16#700#;
590 w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
593 variable v : reg_type;
594 variable adec : fpu_reg_type;
595 variable bdec : fpu_reg_type;
596 variable cdec : fpu_reg_type;
597 variable fpscr_mask : std_ulogic_vector(31 downto 0);
598 variable illegal : std_ulogic;
599 variable j, k : integer;
600 variable flm : std_ulogic_vector(7 downto 0);
601 variable int_input : std_ulogic;
602 variable mask : std_ulogic_vector(63 downto 0);
603 variable in_a0 : std_ulogic_vector(63 downto 0);
604 variable in_b0 : std_ulogic_vector(63 downto 0);
605 variable misc : std_ulogic_vector(63 downto 0);
606 variable shift_res : std_ulogic_vector(63 downto 0);
607 variable round : std_ulogic_vector(1 downto 0);
608 variable update_fx : std_ulogic;
609 variable arith_done : std_ulogic;
610 variable invalid : std_ulogic;
611 variable zero_divide : std_ulogic;
612 variable mant_nz : std_ulogic;
613 variable min_exp : signed(EXP_BITS-1 downto 0);
614 variable max_exp : signed(EXP_BITS-1 downto 0);
615 variable bias_exp : signed(EXP_BITS-1 downto 0);
616 variable new_exp : signed(EXP_BITS-1 downto 0);
617 variable exp_tiny : std_ulogic;
618 variable exp_huge : std_ulogic;
619 variable renormalize : std_ulogic;
620 variable clz : std_ulogic_vector(5 downto 0);
621 variable set_x : std_ulogic;
622 variable mshift : signed(EXP_BITS-1 downto 0);
623 variable need_check : std_ulogic;
624 variable msb : std_ulogic;
625 variable is_add : std_ulogic;
626 variable set_a : std_ulogic;
627 variable set_b : std_ulogic;
628 variable set_c : std_ulogic;
629 variable set_y : std_ulogic;
630 variable set_s : std_ulogic;
631 variable qnan_result : std_ulogic;
632 variable px_nz : std_ulogic;
633 variable pcmpb_eq : std_ulogic;
634 variable pcmpb_lt : std_ulogic;
635 variable pshift : std_ulogic;
636 variable renorm_sqrt : std_ulogic;
637 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
638 variable shiftin : std_ulogic;
639 variable mulexp : signed(EXP_BITS-1 downto 0);
640 variable maddend : std_ulogic_vector(127 downto 0);
641 variable sum : std_ulogic_vector(63 downto 0);
642 variable round_inc : std_ulogic_vector(63 downto 0);
649 -- capture incoming instruction
650 if e_in.valid = '1' then
654 v.instr_tag := e_in.itag;
655 v.fe_mode := or (e_in.fe_mode);
656 v.dest_fpr := e_in.frt;
657 v.single_prec := e_in.single;
658 v.longmask := e_in.single;
661 v.is_cmp := e_in.out_cr;
662 if e_in.out_cr = '0' then
663 v.cr_mask := num_to_fxm(1);
665 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
668 if e_in.op = OP_FPOP_I then
671 v.quieten_nan := '1';
674 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
675 v.is_subtract := '0';
676 v.is_multiply := '0';
679 v.doing_ftdiv := "00";
681 adec := decode_dp(e_in.fra, int_input);
682 bdec := decode_dp(e_in.frb, int_input);
683 cdec := decode_dp(e_in.frc, int_input);
689 if adec.exponent > bdec.exponent then
693 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
698 r_hi_nz <= or (r.r(55 downto 31));
699 r_lo_nz <= or (r.r(30 downto 2));
702 if r.single_prec = '0' then
703 if r.doing_ftdiv(1) = '0' then
704 max_exp := to_signed(1023, EXP_BITS);
706 max_exp := to_signed(1020, EXP_BITS);
708 if r.doing_ftdiv(0) = '0' then
709 min_exp := to_signed(-1022, EXP_BITS);
711 min_exp := to_signed(-1021, EXP_BITS);
713 bias_exp := to_signed(1536, EXP_BITS);
715 max_exp := to_signed(127, EXP_BITS);
716 min_exp := to_signed(-126, EXP_BITS);
717 bias_exp := to_signed(192, EXP_BITS);
719 new_exp := r.result_exp - r.shift;
722 if new_exp < min_exp then
725 if new_exp > max_exp then
729 -- Compare P with zero and with B
730 px_nz := or (r.p(57 downto 4));
732 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
736 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
740 v.writing_back := '0';
742 v.update_fprf := '0';
743 v.shift := to_signed(0, EXP_BITS);
754 fpscr_mask := (others => '1');
766 f_to_multiply.is_32bit <= '0';
767 f_to_multiply.valid <= '0';
770 msel_add <= MULADD_ZERO;
783 if e_in.valid = '1' then
784 case e_in.insn(5 downto 1) is
786 if e_in.insn(8) = '1' then
787 if e_in.insn(6) = '0' then
790 v.state := DO_FTSQRT;
792 elsif e_in.insn(7) = '1' then
799 if e_in.insn(10) = '0' then
800 if e_in.insn(8) = '0' then
803 v.state := DO_MTFSFI;
809 if e_in.insn(8) = '0' then
816 if e_in.insn(9 downto 8) /= "11" then
826 if int_input = '1' then
833 v.round_mode := "001";
838 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
842 when "10100" | "10101" =>
855 v.is_multiply := '1';
857 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
864 v.state := DO_FRSQRTE;
865 when "11100" | "11101" | "11110" | "11111" =>
866 if v.a.mantissa(54) = '0' then
868 elsif v.c.mantissa(54) = '0' then
879 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
883 j := to_integer(unsigned(insn_bfa(r.insn)));
887 v.cr_result := r.fpscr(k + 3 downto k);
888 fpscr_mask(k + 3 downto k) := "0000";
891 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
898 v.cr_result := "0000";
899 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
900 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
901 v.cr_result(2) := '1';
903 if r.a.class = NAN or r.a.class = INFINITY or
904 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
905 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
906 v.cr_result(1) := '1';
908 v.doing_ftdiv := "11";
917 v.cr_result := "0000";
918 if r.b.class = ZERO or r.b.class = INFINITY or
919 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
920 v.cr_result(2) := '1';
922 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
923 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
924 v.cr_result(1) := '0';
933 v.result_exp := r.b.exponent;
934 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
935 (r.b.class = NAN and r.b.mantissa(53) = '0') then
937 v.fpscr(FPSCR_VXSNAN) := '1';
938 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
939 v.fpscr(FPSCR_VXVC) := '1';
942 v.cr_result := "0001"; -- unordered
943 elsif r.a.class = NAN or r.b.class = NAN then
944 if r.insn(6) = '1' then
946 v.fpscr(FPSCR_VXVC) := '1';
949 v.cr_result := "0001"; -- unordered
950 elsif r.a.class = ZERO and r.b.class = ZERO then
951 v.cr_result := "0010"; -- equal
952 elsif r.a.negative /= r.b.negative then
953 v.cr_result := r.a.negative & r.b.negative & "00";
954 elsif r.a.class = ZERO then
955 -- A and B are the same sign from here down
956 v.cr_result := not r.b.negative & r.b.negative & "00";
957 elsif r.a.class = INFINITY then
958 if r.b.class = INFINITY then
959 v.cr_result := "0010";
961 v.cr_result := r.a.negative & not r.a.negative & "00";
963 elsif r.b.class = ZERO then
964 -- A is finite from here down
965 v.cr_result := r.a.negative & not r.a.negative & "00";
966 elsif r.b.class = INFINITY then
967 v.cr_result := not r.b.negative & r.b.negative & "00";
968 elsif r.exp_cmp = '1' then
969 -- A and B are both finite from here down
970 v.cr_result := r.a.negative & not r.a.negative & "00";
971 elsif r.a.exponent /= r.b.exponent then
972 -- A exponent is smaller than B
973 v.cr_result := not r.a.negative & r.a.negative & "00";
975 -- Prepare to subtract mantissas, put B in R
976 v.cr_result := "0000";
981 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
985 j := to_integer(unsigned(insn_bt(r.insn)));
986 for i in 0 to 31 loop
988 v.fpscr(31 - i) := r.insn(6);
996 j := to_integer(unsigned(insn_bf(r.insn)));
997 if r.insn(16) = '0' then
1001 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1005 v.instr_done := '1';
1010 opsel_r <= RES_MISC;
1011 misc_sel <= "01" & r.insn(8) & '0';
1012 v.int_result := '1';
1013 v.writing_back := '1';
1014 v.instr_done := '1';
1018 v.int_result := '1';
1019 v.writing_back := '1';
1020 opsel_r <= RES_MISC;
1021 case r.insn(20 downto 16) is
1026 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1027 when "10100" | "10101" =>
1028 -- mffscdrn[i] (but we don't implement DRN)
1029 fpscr_mask := x"000000FF";
1032 fpscr_mask := x"000000FF";
1033 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1034 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1037 fpscr_mask := x"000000FF";
1038 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1041 fpscr_mask := x"0007F0FF";
1045 v.instr_done := '1';
1049 if r.insn(25) = '1' then
1051 elsif r.insn(16) = '1' then
1054 flm := r.insn(24 downto 17);
1056 for i in 0 to 7 loop
1058 if flm(i) = '1' then
1059 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1062 v.instr_done := '1';
1066 -- r.opsel_a = AIN_B
1067 v.result_class := r.b.class;
1068 v.result_exp := r.b.exponent;
1069 v.quieten_nan := '0';
1070 if r.insn(9) = '1' then
1071 v.result_sign := '0'; -- fabs
1072 elsif r.insn(8) = '1' then
1073 v.result_sign := '1'; -- fnabs
1074 elsif r.insn(7) = '1' then
1075 v.result_sign := r.b.negative; -- fmr
1076 elsif r.insn(6) = '1' then
1077 v.result_sign := not r.b.negative; -- fneg
1079 v.result_sign := r.a.negative; -- fcpsgn
1081 v.writing_back := '1';
1082 v.instr_done := '1';
1085 when DO_FRI => -- fri[nzpm]
1086 -- r.opsel_a = AIN_B
1087 v.result_class := r.b.class;
1088 v.result_sign := r.b.negative;
1089 v.result_exp := r.b.exponent;
1090 v.fpscr(FPSCR_FR) := '0';
1091 v.fpscr(FPSCR_FI) := '0';
1092 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1094 v.fpscr(FPSCR_VXSNAN) := '1';
1097 if r.b.class = FINITE then
1098 if r.b.exponent >= to_signed(52, EXP_BITS) then
1099 -- integer already, no rounding required
1102 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1104 v.round_mode := '1' & r.insn(7 downto 6);
1111 -- r.opsel_a = AIN_B, r.shift = 0
1112 v.result_class := r.b.class;
1113 v.result_sign := r.b.negative;
1114 v.result_exp := r.b.exponent;
1115 v.fpscr(FPSCR_FR) := '0';
1116 v.fpscr(FPSCR_FI) := '0';
1117 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1119 v.fpscr(FPSCR_VXSNAN) := '1';
1123 if r.b.class = FINITE then
1124 if r.b.exponent < to_signed(-126, EXP_BITS) then
1125 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1126 v.state := ROUND_UFLOW;
1127 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1128 v.state := ROUND_OFLOW;
1130 v.state := ROUNDING;
1137 -- instr bit 9: 1=dword 0=word
1138 -- instr bit 8: 1=unsigned 0=signed
1139 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1140 -- r.opsel_a = AIN_B
1141 v.result_class := r.b.class;
1142 v.result_sign := r.b.negative;
1143 v.result_exp := r.b.exponent;
1144 v.fpscr(FPSCR_FR) := '0';
1145 v.fpscr(FPSCR_FI) := '0';
1146 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1148 v.fpscr(FPSCR_VXSNAN) := '1';
1152 v.int_result := '1';
1157 if r.b.exponent >= to_signed(64, EXP_BITS) or
1158 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1159 v.state := INT_OFLOW;
1160 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1161 -- integer already, no rounding required,
1162 -- shift into final position
1163 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1164 if r.insn(8) = '1' and r.b.negative = '1' then
1165 v.state := INT_OFLOW;
1167 v.state := INT_ISHIFT;
1170 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1171 v.state := INT_SHIFT;
1173 when INFINITY | NAN =>
1174 v.state := INT_OFLOW;
1178 -- r.opsel_a = AIN_B
1179 v.result_sign := '0';
1180 if r.insn(8) = '0' and r.b.negative = '1' then
1181 -- fcfid[s] with negative operand, set R = -B
1184 v.result_sign := '1';
1186 v.result_class := r.b.class;
1187 v.result_exp := to_signed(54, EXP_BITS);
1188 v.fpscr(FPSCR_FR) := '0';
1189 v.fpscr(FPSCR_FI) := '0';
1190 if r.b.class = ZERO then
1197 -- fadd[s] and fsub[s]
1198 -- r.opsel_a = AIN_A
1199 v.result_sign := r.a.negative;
1200 v.result_class := r.a.class;
1201 v.result_exp := r.a.exponent;
1202 v.fpscr(FPSCR_FR) := '0';
1203 v.fpscr(FPSCR_FI) := '0';
1206 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1207 if r.a.class = FINITE and r.b.class = FINITE then
1208 v.is_subtract := not is_add;
1209 v.add_bsmall := r.exp_cmp;
1211 if r.exp_cmp = '0' then
1212 v.shift := r.a.exponent - r.b.exponent;
1213 v.result_sign := r.b.negative xnor r.insn(1);
1214 if r.a.exponent = r.b.exponent then
1218 v.state := ADD_SHIFT;
1224 if r.a.class = NAN or r.b.class = NAN then
1225 v.state := NAN_RESULT;
1226 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1227 -- invalid operation, construct QNaN
1228 v.fpscr(FPSCR_VXISI) := '1';
1231 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1232 -- return -0 for rounding to -infinity
1233 v.result_sign := r.round_mode(1) and r.round_mode(0);
1235 elsif r.a.class = INFINITY or r.b.class = ZERO then
1238 v.state := EXC_RESULT;
1242 v.negate := not r.insn(1);
1243 v.state := EXC_RESULT;
1249 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1250 v.result_sign := r.a.negative xor r.c.negative;
1251 v.result_class := r.a.class;
1252 v.fpscr(FPSCR_FR) := '0';
1253 v.fpscr(FPSCR_FI) := '0';
1256 if r.a.class = FINITE and r.c.class = FINITE then
1257 v.result_exp := r.a.exponent + r.c.exponent;
1258 -- Renormalize denorm operands
1259 if r.a.mantissa(54) = '0' then
1260 v.state := RENORM_A;
1261 elsif r.c.mantissa(54) = '0' then
1262 v.state := RENORM_C;
1264 f_to_multiply.valid <= '1';
1268 if r.a.class = NAN or r.c.class = NAN then
1269 v.state := NAN_RESULT;
1270 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1271 (r.a.class = ZERO and r.c.class = INFINITY) then
1272 -- invalid operation, construct QNaN
1273 v.fpscr(FPSCR_VXIMZ) := '1';
1275 elsif r.a.class = ZERO or r.a.class = INFINITY then
1279 -- r.c.class is ZERO or INFINITY
1281 v.negate := r.a.negative;
1282 v.state := EXC_RESULT;
1287 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1288 v.result_class := r.a.class;
1289 v.fpscr(FPSCR_FR) := '0';
1290 v.fpscr(FPSCR_FI) := '0';
1293 v.result_sign := r.a.negative xor r.b.negative;
1294 v.result_exp := r.a.exponent - r.b.exponent;
1296 if r.a.class = FINITE and r.b.class = FINITE then
1297 -- Renormalize denorm operands
1298 if r.a.mantissa(54) = '0' then
1299 v.state := RENORM_A;
1300 elsif r.b.mantissa(54) = '0' then
1301 v.state := RENORM_B;
1307 if r.a.class = NAN or r.b.class = NAN then
1308 v.state := NAN_RESULT;
1309 elsif r.b.class = INFINITY then
1310 if r.a.class = INFINITY then
1311 v.fpscr(FPSCR_VXIDI) := '1';
1314 v.result_class := ZERO;
1317 elsif r.b.class = ZERO then
1318 if r.a.class = ZERO then
1319 v.fpscr(FPSCR_VXZDZ) := '1';
1322 if r.a.class = FINITE then
1325 v.result_class := INFINITY;
1328 else -- r.b.class = FINITE, result_class = r.a.class
1334 v.fpscr(FPSCR_FR) := '0';
1335 v.fpscr(FPSCR_FI) := '0';
1336 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1341 v.quieten_nan := '0';
1342 v.state := EXC_RESULT;
1345 -- r.opsel_a = AIN_B
1346 v.result_class := r.b.class;
1347 v.result_sign := r.b.negative;
1348 v.fpscr(FPSCR_FR) := '0';
1349 v.fpscr(FPSCR_FI) := '0';
1353 v.result_exp := r.b.exponent;
1354 if r.b.negative = '1' then
1355 v.fpscr(FPSCR_VXSQRT) := '1';
1357 elsif r.b.mantissa(54) = '0' then
1358 v.state := RENORM_B;
1359 elsif r.b.exponent(0) = '0' then
1362 v.shift := to_signed(1, EXP_BITS);
1363 v.state := RENORM_B2;
1366 v.state := NAN_RESULT;
1371 if r.b.negative = '1' then
1372 v.fpscr(FPSCR_VXSQRT) := '1';
1380 -- r.opsel_a = AIN_B
1381 v.result_class := r.b.class;
1382 v.result_sign := r.b.negative;
1383 v.fpscr(FPSCR_FR) := '0';
1384 v.fpscr(FPSCR_FI) := '0';
1388 v.result_exp := - r.b.exponent;
1389 if r.b.mantissa(54) = '0' then
1390 v.state := RENORM_B;
1395 v.state := NAN_RESULT;
1397 v.result_class := ZERO;
1400 v.result_class := INFINITY;
1406 -- r.opsel_a = AIN_B
1407 v.result_class := r.b.class;
1408 v.result_sign := r.b.negative;
1409 v.fpscr(FPSCR_FR) := '0';
1410 v.fpscr(FPSCR_FI) := '0';
1412 v.shift := to_signed(1, EXP_BITS);
1415 v.result_exp := r.b.exponent;
1416 if r.b.negative = '1' then
1417 v.fpscr(FPSCR_VXSQRT) := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1424 v.state := RENORM_B2;
1427 v.state := NAN_RESULT;
1429 if r.b.negative = '1' then
1430 v.fpscr(FPSCR_VXSQRT) := '1';
1433 v.result_class := ZERO;
1437 v.result_class := INFINITY;
1443 -- fmadd, fmsub, fnmadd, fnmsub
1444 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1454 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1455 if r.a.class = FINITE and r.c.class = FINITE and
1456 (r.b.class = FINITE or r.b.class = ZERO) then
1457 v.is_subtract := not is_add;
1458 mulexp := r.a.exponent + r.c.exponent;
1459 v.result_exp := mulexp;
1460 -- Make sure A and C are normalized
1461 if r.a.mantissa(54) = '0' then
1462 v.state := RENORM_A;
1463 elsif r.c.mantissa(54) = '0' then
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1486 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1487 v.state := NAN_RESULT;
1488 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1489 (r.a.class = INFINITY and r.c.class = ZERO) then
1490 -- invalid operation, construct QNaN
1491 v.fpscr(FPSCR_VXIMZ) := '1';
1493 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1494 if r.b.class = INFINITY and is_add = '0' then
1495 -- invalid operation, construct QNaN
1496 v.fpscr(FPSCR_VXISI) := '1';
1499 -- result is infinity
1500 v.result_class := INFINITY;
1501 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1505 -- Here A is zero, C is zero, or B is infinity
1506 -- Result is +/-B in all of those cases
1508 if r.b.class /= ZERO or is_add = '1' then
1509 v.negate := not (r.insn(1) xor r.insn(2));
1511 -- have to be careful about rule for 0 - 0 result sign
1512 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1514 v.state := EXC_RESULT;
1520 v.state := RENORM_A2;
1521 if r.insn(4) = '1' then
1528 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1530 v.result_exp := new_exp;
1531 if r.insn(4) = '1' then
1532 if r.c.mantissa(54) = '1' then
1533 if r.insn(3) = '0' or r.b.class = ZERO then
1538 if new_exp + 1 >= r.b.exponent then
1542 v.state := DO_FMADD;
1545 v.state := RENORM_C;
1548 if r.b.mantissa(54) = '1' then
1552 v.state := RENORM_B;
1558 renorm_sqrt := r.is_sqrt;
1559 v.state := RENORM_B2;
1563 if r.is_sqrt = '0' then
1564 v.result_exp := r.result_exp + r.shift;
1566 v.result_exp := new_exp;
1573 v.state := RENORM_C2;
1577 v.result_exp := new_exp;
1578 if r.insn(3) = '0' or r.b.class = ZERO then
1583 if new_exp + 1 >= r.b.exponent then
1587 v.state := DO_FMADD;
1591 -- transferring B to R
1592 v.shift := r.b.exponent - r.a.exponent;
1593 v.result_exp := r.b.exponent;
1595 v.state := ADD_SHIFT;
1598 -- r.shift = - exponent difference, r.longmask = 0
1599 opsel_r <= RES_SHIFT;
1602 v.longmask := r.single_prec;
1603 if r.add_bsmall = '1' then
1611 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1613 opsel_binv <= r.is_subtract;
1614 carry_in <= r.is_subtract and not r.x;
1615 v.shift := to_signed(-1, EXP_BITS);
1619 -- check for overflow or negative result (can't get both)
1621 if r.r(63) = '1' then
1622 -- result is opposite sign to expected
1623 v.result_sign := not r.result_sign;
1627 elsif r.r(55) = '1' then
1628 -- sum overflowed, shift right
1629 opsel_r <= RES_SHIFT;
1631 if exp_huge = '1' then
1632 v.state := ROUND_OFLOW;
1634 v.state := ROUNDING;
1636 elsif r.r(54) = '1' then
1638 v.state := ROUNDING;
1639 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1640 -- r.x must be zero at this point
1641 v.result_class := ZERO;
1642 if r.is_subtract = '1' then
1643 -- set result sign depending on rounding mode
1644 v.result_sign := r.round_mode(1) and r.round_mode(0);
1649 v.state := NORMALIZE;
1653 -- r.opsel_a = AIN_A
1660 if r.r(63) = '1' then
1661 -- A is smaller in magnitude
1662 v.cr_result := not r.a.negative & r.a.negative & "00";
1663 elsif (r_hi_nz or r_lo_nz) = '0' then
1664 v.cr_result := "0010";
1666 v.cr_result := r.a.negative & not r.a.negative & "00";
1668 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1669 v.instr_done := '1';
1673 f_to_multiply.valid <= r.first;
1674 opsel_r <= RES_MULT;
1675 if multiply_to_f.valid = '1' then
1680 -- Addend is bigger here
1681 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1682 -- note v.shift is at most -2 here
1683 v.shift := r.result_exp - r.b.exponent;
1684 opsel_r <= RES_MULT;
1687 f_to_multiply.valid <= r.first;
1688 if multiply_to_f.valid = '1' then
1690 v.state := ADD_SHIFT;
1694 -- Product is potentially bigger here
1695 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1698 v.shift := r.shift - to_signed(64, EXP_BITS);
1702 -- r.shift = addend exp - product exp
1703 opsel_r <= RES_SHIFT;
1708 msel_add <= MULADD_RS;
1709 f_to_multiply.valid <= r.first;
1710 msel_inv <= r.is_subtract;
1711 opsel_r <= RES_MULT;
1714 if multiply_to_f.valid = '1' then
1719 -- negate R:S:X if negative
1720 if r.r(63) = '1' then
1721 v.result_sign := not r.result_sign;
1723 carry_in <= not (s_nz or r.x);
1727 v.shift := to_signed(56, EXP_BITS);
1731 -- r.shift = 56 (or 0, but only if r is now nonzero)
1732 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1734 -- must be a subtraction, and r.x must be zero
1735 v.result_class := ZERO;
1736 v.result_sign := r.round_mode(1) and r.round_mode(0);
1739 -- R is all zeroes but there are non-zero bits in S
1740 -- so shift them into R and set S to 0
1741 opsel_r <= RES_SHIFT;
1743 -- stay in state FMADD_6
1745 elsif r.r(56 downto 54) = "001" then
1749 v.state := NORMALIZE;
1753 -- r.opsel_a = AIN_B
1754 -- wait one cycle for inverse_table[B] lookup
1756 if r.insn(4) = '0' then
1757 if r.insn(3) = '0' then
1762 elsif r.insn(2) = '0' then
1769 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1771 msel_add <= MULADD_CONST;
1780 f_to_multiply.valid <= r.first;
1781 if multiply_to_f.valid = '1' then
1783 v.count := r.count + 1;
1788 -- compute Y = P = P * Y
1791 f_to_multiply.valid <= r.first;
1793 if multiply_to_f.valid = '1' then
1803 -- compute R = P = A * Y (quotient)
1807 f_to_multiply.valid <= r.first;
1809 if multiply_to_f.valid = '1' then
1810 opsel_r <= RES_MULT;
1816 -- compute P = A - B * R (remainder)
1819 msel_add <= MULADD_A;
1821 f_to_multiply.valid <= r.first;
1822 if multiply_to_f.valid = '1' then
1827 -- test if remainder is 0 or >= B
1828 if pcmpb_lt = '1' then
1829 -- quotient is correct, set X if remainder non-zero
1830 v.x := r.p(58) or px_nz;
1832 -- quotient needs to be incremented by 1
1834 v.x := not pcmpb_eq;
1839 opsel_r <= RES_MISC;
1841 v.shift := to_signed(1, EXP_BITS);
1842 v.state := NORMALIZE;
1845 v.cr_result(1) := exp_tiny or exp_huge;
1846 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1847 v.instr_done := '1';
1850 v.shift := r.a.exponent;
1851 v.doing_ftdiv := "10";
1855 opsel_r <= RES_MISC;
1857 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1858 v.result_exp := - sqrt_exp;
1859 v.shift := to_signed(1, EXP_BITS);
1860 v.state := NORMALIZE;
1863 -- put invsqr[B] in R and compute P = invsqr[B] * B
1864 -- also transfer B (in R) to A
1866 opsel_r <= RES_MISC;
1870 f_to_multiply.valid <= '1';
1871 v.shift := to_signed(-1, EXP_BITS);
1876 -- shift R right one place
1877 -- not expecting multiplier result yet
1879 opsel_r <= RES_SHIFT;
1884 -- put R into Y, wait for product from multiplier
1888 if multiply_to_f.valid = '1' then
1889 -- put result into R
1890 opsel_r <= RES_MULT;
1896 -- compute 1.5 - Y * P
1899 msel_add <= MULADD_CONST;
1901 f_to_multiply.valid <= r.first;
1903 if multiply_to_f.valid = '1' then
1908 -- compute Y = Y * P
1911 f_to_multiply.valid <= '1';
1916 -- pipeline in R = R * P
1919 f_to_multiply.valid <= r.first;
1921 if multiply_to_f.valid = '1' then
1927 -- first multiply is done, put result in Y
1930 -- wait for second multiply (should be here already)
1932 if multiply_to_f.valid = '1' then
1933 -- put result into R
1934 opsel_r <= RES_MULT;
1936 v.count := r.count + 1;
1946 -- compute P = A - R * R, which can be +ve or -ve
1947 -- we arranged for B to be put into A earlier
1950 msel_add <= MULADD_A;
1953 f_to_multiply.valid <= r.first;
1954 if multiply_to_f.valid = '1' then
1960 -- compute P = P * Y
1961 -- since Y is an estimate of 1/sqrt(B), this makes P an
1962 -- estimate of the adjustment needed to R. Since the error
1963 -- could be negative and we have an unsigned multiplier, the
1964 -- upper bits can be wrong, but it turns out the lowest 8 bits
1965 -- are correct and are all we need (given 3 iterations through
1966 -- SQRT_4 to SQRT_7).
1970 f_to_multiply.valid <= r.first;
1971 if multiply_to_f.valid = '1' then
1976 -- Add the bottom 8 bits of P, sign-extended,
1977 -- divided by 4, onto R.
1978 -- The division by 4 is because R is 10.54 format
1979 -- whereas P is 8.56 format.
1981 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1982 v.result_exp := sqrt_exp;
1983 v.shift := to_signed(1, EXP_BITS);
1988 -- compute P = A - R * R (remainder)
1989 -- also put 2 * R + 1 into B for comparison with P
1992 msel_add <= MULADD_A;
1994 f_to_multiply.valid <= r.first;
1997 if multiply_to_f.valid = '1' then
2002 -- test if remainder is 0 or >= B = 2*R + 1
2003 if pcmpb_lt = '1' then
2004 -- square root is correct, set X if remainder non-zero
2005 v.x := r.p(58) or px_nz;
2007 -- square root needs to be incremented by 1
2009 v.x := not pcmpb_eq;
2014 -- r.shift = b.exponent - 52
2015 opsel_r <= RES_SHIFT;
2017 v.state := INT_ROUND;
2018 v.shift := to_signed(-2, EXP_BITS);
2022 opsel_r <= RES_SHIFT;
2023 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2024 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2025 -- Check for negative values that don't round to 0 for fcti*u*
2026 if r.insn(8) = '1' and r.result_sign = '1' and
2027 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2028 v.state := INT_OFLOW;
2030 v.state := INT_FINAL;
2034 -- r.shift = b.exponent - 54;
2035 opsel_r <= RES_SHIFT;
2036 v.state := INT_FINAL;
2039 -- Negate if necessary, and increment for rounding if needed
2040 opsel_ainv <= r.result_sign;
2041 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2042 -- Check for possible overflows
2043 case r.insn(9 downto 8) is
2044 when "00" => -- fctiw[z]
2045 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2046 when "01" => -- fctiwu[z]
2047 need_check := r.r(31);
2048 when "10" => -- fctid[z]
2049 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2050 when others => -- fctidu[z]
2051 need_check := r.r(63);
2053 if need_check = '1' then
2054 v.state := INT_CHECK;
2056 if r.fpscr(FPSCR_FI) = '1' then
2057 v.fpscr(FPSCR_XX) := '1';
2063 if r.insn(9) = '0' then
2068 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2069 if (r.insn(8) = '0' and msb /= r.result_sign) or
2070 (r.insn(8) = '1' and msb /= '1') then
2071 opsel_r <= RES_MISC;
2072 v.fpscr(FPSCR_VXCVI) := '1';
2075 if r.fpscr(FPSCR_FI) = '1' then
2076 v.fpscr(FPSCR_XX) := '1';
2082 opsel_r <= RES_MISC;
2083 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2084 if r.b.class = NAN then
2087 v.fpscr(FPSCR_VXCVI) := '1';
2092 -- r.shift = b.exponent - 52
2093 opsel_r <= RES_SHIFT;
2095 v.state := ROUNDING;
2098 if r.is_multiply = '1' and px_nz = '1' then
2101 if r.r(63 downto 54) /= "0000000001" then
2103 v.state := NORMALIZE;
2106 if exp_tiny = '1' then
2107 v.shift := new_exp - min_exp;
2108 v.state := ROUND_UFLOW;
2109 elsif exp_huge = '1' then
2110 v.state := ROUND_OFLOW;
2112 v.state := ROUNDING;
2117 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2118 -- r.shift = clz(r.r) - 9
2119 opsel_r <= RES_SHIFT;
2121 if exp_tiny = '1' then
2122 v.shift := new_exp - min_exp;
2123 v.state := ROUND_UFLOW;
2124 elsif exp_huge = '1' then
2125 v.state := ROUND_OFLOW;
2127 v.state := ROUNDING;
2131 -- r.shift = - amount by which exponent underflows
2133 if r.fpscr(FPSCR_UE) = '0' then
2134 -- disabled underflow exception case
2135 -- have to denormalize before rounding
2136 opsel_r <= RES_SHIFT;
2138 v.state := ROUNDING;
2140 -- enabled underflow exception case
2141 -- if denormalized, have to normalize before rounding
2142 v.fpscr(FPSCR_UX) := '1';
2143 v.result_exp := r.result_exp + bias_exp;
2144 if r.r(54) = '0' then
2146 v.state := NORMALIZE;
2148 v.state := ROUNDING;
2153 v.fpscr(FPSCR_OX) := '1';
2154 if r.fpscr(FPSCR_OE) = '0' then
2155 -- disabled overflow exception
2156 -- result depends on rounding mode
2157 v.fpscr(FPSCR_XX) := '1';
2158 v.fpscr(FPSCR_FI) := '1';
2159 if r.round_mode(1 downto 0) = "00" or
2160 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161 v.result_class := INFINITY;
2162 v.fpscr(FPSCR_FR) := '1';
2164 v.fpscr(FPSCR_FR) := '0';
2166 -- construct largest representable number
2167 v.result_exp := max_exp;
2168 opsel_r <= RES_MISC;
2169 misc_sel <= "001" & r.single_prec;
2172 -- enabled overflow exception
2173 v.result_exp := r.result_exp - bias_exp;
2174 v.state := ROUNDING;
2179 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2180 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2181 if round(1) = '1' then
2182 -- increment the LSB for the precision
2184 v.shift := to_signed(-1, EXP_BITS);
2185 v.state := ROUNDING_2;
2187 if r.r(54) = '0' then
2188 -- result after masking could be zero, or could be a
2189 -- denormalized result that needs to be renormalized
2191 v.state := ROUNDING_3;
2196 if round(0) = '1' then
2197 v.fpscr(FPSCR_XX) := '1';
2198 if r.tiny = '1' then
2199 v.fpscr(FPSCR_UX) := '1';
2204 -- Check for overflow during rounding
2207 if r.r(55) = '1' then
2208 opsel_r <= RES_SHIFT;
2209 if exp_huge = '1' then
2210 v.state := ROUND_OFLOW;
2214 elsif r.r(54) = '0' then
2215 -- Do CLZ so we can renormalize the result
2217 v.state := ROUNDING_3;
2223 -- r.shift = clz(r.r) - 9
2224 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225 if mant_nz = '0' then
2226 v.result_class := ZERO;
2227 if r.is_subtract = '1' then
2228 -- set result sign depending on rounding mode
2229 v.result_sign := r.round_mode(1) and r.round_mode(0);
2233 -- Renormalize result after rounding
2234 opsel_r <= RES_SHIFT;
2235 v.denorm := exp_tiny;
2236 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237 if new_exp < to_signed(-1022, EXP_BITS) then
2245 -- r.shift = result_exp - -1022
2246 opsel_r <= RES_SHIFT;
2250 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2251 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2252 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2254 v.fpscr(FPSCR_VXSNAN) := '1';
2257 if r.use_a = '1' and r.a.class = NAN then
2259 elsif r.use_b = '1' and r.b.class = NAN then
2261 elsif r.use_c = '1' and r.c.class = NAN then
2264 v.state := EXC_RESULT;
2267 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2270 v.result_sign := r.b.negative xor r.negate;
2271 v.result_exp := r.b.exponent;
2272 v.result_class := r.b.class;
2274 v.result_sign := r.c.negative xor r.negate;
2275 v.result_exp := r.c.exponent;
2276 v.result_class := r.c.class;
2278 v.result_sign := r.a.negative xor r.negate;
2279 v.result_exp := r.a.exponent;
2280 v.result_class := r.a.class;
2286 if zero_divide = '1' then
2287 v.fpscr(FPSCR_ZX) := '1';
2289 if qnan_result = '1' then
2291 v.result_class := NAN;
2292 v.result_sign := '0';
2294 opsel_r <= RES_MISC;
2297 if invalid = '1' then
2300 if arith_done = '1' then
2301 -- Enabled invalid exception doesn't write result or FPRF
2302 -- Neither does enabled zero-divide exception
2303 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2304 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2305 v.writing_back := '1';
2306 v.update_fprf := '1';
2308 v.instr_done := '1';
2313 -- Multiplier and divide/square root data path
2316 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2318 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2320 f_to_multiply.data1 <= r.y;
2322 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2326 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2328 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2330 f_to_multiply.data2 <= r.p;
2332 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2334 maddend := (others => '0');
2336 when MULADD_CONST =>
2337 -- addend is 2.0 or 1.5 in 16.112 format
2338 if r.is_sqrt = '0' then
2339 maddend(113) := '1'; -- 2.0
2341 maddend(112 downto 111) := "11"; -- 1.5
2344 -- addend is A in 16.112 format
2345 maddend(121 downto 58) := r.a.mantissa;
2347 -- addend is concatenation of R and S in 16.112 format
2348 maddend := "000000" & r.r & r.s & "00";
2351 if msel_inv = '1' then
2352 f_to_multiply.addend <= not maddend;
2354 f_to_multiply.addend <= maddend;
2356 f_to_multiply.not_result <= msel_inv;
2358 v.y := f_to_multiply.data2;
2360 if multiply_to_f.valid = '1' then
2361 if pshift = '0' then
2362 v.p := multiply_to_f.result(63 downto 0);
2364 v.p := multiply_to_f.result(119 downto 56);
2369 -- This has A and B input multiplexers, an adder, a shifter,
2370 -- count-leading-zeroes logic, and a result mux.
2371 if r.longmask = '1' then
2372 mshift := r.shift + to_signed(-29, EXP_BITS);
2376 if mshift < to_signed(-64, EXP_BITS) then
2377 mask := (others => '1');
2378 elsif mshift >= to_signed(0, EXP_BITS) then
2379 mask := (others => '0');
2381 mask := right_mask(unsigned(mshift(5 downto 0)));
2387 in_a0 := r.a.mantissa;
2389 in_a0 := r.b.mantissa;
2391 in_a0 := r.c.mantissa;
2393 if (or (mask and in_a0)) = '1' and set_x = '1' then
2396 if opsel_ainv = '1' then
2402 in_b0 := (others => '0');
2406 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2409 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2410 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2412 if opsel_binv = '1' then
2416 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2417 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2418 std_ulogic_vector(r.shift(6 downto 0)));
2420 shift_res := (others => '0');
2422 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2423 if opsel_mask = '1' then
2424 sum(1 downto 0) := "00";
2425 if r.single_prec = '1' then
2426 sum(30 downto 2) := (others => '0');
2433 result <= shift_res;
2435 result <= multiply_to_f.result(121 downto 58);
2439 misc := x"00000000" & (r.fpscr and fpscr_mask);
2441 -- generated QNaN mantissa
2442 misc := x"0020000000000000";
2444 -- mantissa of max representable DP number
2445 misc := x"007ffffffffffffc";
2447 -- mantissa of max representable SP number
2448 misc := x"007fffff80000000";
2451 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2454 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2456 misc := 10x"000" & inverse_est & 35x"000000000";
2458 -- max positive result for fctiw[z]
2459 misc := x"000000007fffffff";
2461 -- max negative result for fctiw[z]
2462 misc := x"ffffffff80000000";
2464 -- max positive result for fctiwu[z]
2465 misc := x"00000000ffffffff";
2467 -- max negative result for fctiwu[z]
2468 misc := x"0000000000000000";
2470 -- max positive result for fctid[z]
2471 misc := x"7fffffffffffffff";
2473 -- max negative result for fctid[z]
2474 misc := x"8000000000000000";
2476 -- max positive result for fctidu[z]
2477 misc := x"ffffffffffffffff";
2479 -- max negative result for fctidu[z]
2480 misc := x"0000000000000000";
2482 misc := x"0000000000000000";
2490 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2492 v.s := multiply_to_f.result(57 downto 2);
2494 v.s := shift_res(63 downto 8);
2495 if shift_res(7 downto 0) /= x"00" then
2499 v.s := (others => '0');
2504 v.a.exponent := new_exp;
2505 v.a.mantissa := shift_res;
2508 v.b.exponent := new_exp;
2509 v.b.mantissa := shift_res;
2512 v.c.exponent := new_exp;
2513 v.c.mantissa := shift_res;
2516 if opsel_r = RES_SHIFT then
2517 v.result_exp := new_exp;
2520 if renormalize = '1' then
2521 clz := count_left_zeroes(r.r);
2522 if renorm_sqrt = '1' then
2523 -- make denormalized value end up with even exponent
2526 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2529 if r.int_result = '1' then
2532 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2533 r.single_prec, r.quieten_nan);
2535 if r.update_fprf = '1' then
2536 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2537 r.r(54) and not r.denorm);
2540 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2541 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2542 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2543 v.fpscr(FPSCR_VE downto FPSCR_XE));
2544 if update_fx = '1' and
2545 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2546 v.fpscr(FPSCR_FX) := '1';
2549 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2552 v.illegal := illegal;
2553 if illegal = '1' then
2554 v.instr_done := '0';
2556 v.writing_back := '0';
2560 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2561 if v.state /= IDLE or v.do_intr = '1' then
2569 end architecture behaviour;