1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
22 w_out : out FPUToWritebackType
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29 constant EXP_BITS : natural := 13;
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
69 NAN_RESULT, EXC_RESULT);
71 type reg_type is record
74 instr_done : std_ulogic;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
119 invalid : std_ulogic;
121 longmask : std_ulogic;
124 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
126 signal r, rin : reg_type;
128 signal fp_result : std_ulogic_vector(63 downto 0);
129 signal opsel_b : std_ulogic_vector(1 downto 0);
130 signal opsel_r : std_ulogic_vector(1 downto 0);
131 signal opsel_s : std_ulogic_vector(1 downto 0);
132 signal opsel_ainv : std_ulogic;
133 signal opsel_mask : std_ulogic;
134 signal opsel_binv : std_ulogic;
135 signal in_a : std_ulogic_vector(63 downto 0);
136 signal in_b : std_ulogic_vector(63 downto 0);
137 signal result : std_ulogic_vector(63 downto 0);
138 signal carry_in : std_ulogic;
139 signal lost_bits : std_ulogic;
140 signal r_hi_nz : std_ulogic;
141 signal r_lo_nz : std_ulogic;
142 signal s_nz : std_ulogic;
143 signal misc_sel : std_ulogic_vector(3 downto 0);
144 signal f_to_multiply : MultiplyInputType;
145 signal multiply_to_f : MultiplyOutputType;
146 signal msel_1 : std_ulogic_vector(1 downto 0);
147 signal msel_2 : std_ulogic_vector(1 downto 0);
148 signal msel_add : std_ulogic_vector(1 downto 0);
149 signal msel_inv : std_ulogic;
150 signal inverse_est : std_ulogic_vector(18 downto 0);
153 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
154 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
155 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
156 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
158 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
159 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
160 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
161 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
163 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
164 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
165 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
166 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
168 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
169 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
170 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
171 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
174 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
175 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
176 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
177 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
179 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
180 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
181 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
182 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
184 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
185 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
186 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
187 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
189 -- Inverse lookup table, indexed by the top 8 fraction bits
190 -- The first 256 entries are the reciprocal (1/x) lookup table,
191 -- and the remaining 768 entries are the reciprocal square root table.
192 -- Output range is [0.5, 1) in 0.19 format, though the top
193 -- bit isn't stored since it is always 1.
194 -- Each output value is the inverse of the center of the input
195 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
196 -- entry 1 is 1 / (1 + 3/512), etc.
197 signal inverse_table : lookup_table := (
199 -- Unit bit is assumed to be 1, so input range is [1, 2)
200 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
201 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
202 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
203 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
204 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
205 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
206 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
207 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
208 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
209 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
210 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
211 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
212 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
213 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
214 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
215 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
216 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
217 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
218 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
219 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
220 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
221 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
222 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
223 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
224 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
225 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
226 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
227 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
228 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
229 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
230 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
231 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
232 -- 1/sqrt(x) lookup table
233 -- Input is in the range [1, 4), i.e. two bits to the left of the
234 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
236 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
237 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
238 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
239 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
240 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
241 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
242 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
243 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
244 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
245 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
246 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
247 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
248 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
249 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
250 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
251 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
252 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
253 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
254 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
255 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
256 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
257 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
258 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
259 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
260 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
261 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
262 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
263 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
264 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
265 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
266 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
267 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
269 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
270 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
271 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
272 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
273 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
274 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
275 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
276 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
277 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
278 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
279 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
280 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
281 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
282 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
283 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
284 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
285 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
286 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
287 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
288 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
289 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
290 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
291 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
292 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
293 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
294 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
295 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
296 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
297 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
298 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
299 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
300 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
302 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
303 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
304 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
305 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
306 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
307 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
308 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
309 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
310 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
311 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
312 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
313 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
314 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
315 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
316 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
317 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
318 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
319 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
320 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
321 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
322 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
323 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
324 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
325 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
326 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
327 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
328 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
329 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
330 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
331 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
332 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
333 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
336 -- Left and right shifter with 120 bit input and 64 bit output.
337 -- Shifts inp left by shift bits and returns the upper 64 bits of
338 -- the result. The shift parameter is interpreted as a signed
339 -- number in the range -64..63, with negative values indicating
341 function shifter_64(inp: std_ulogic_vector(119 downto 0);
342 shift: std_ulogic_vector(6 downto 0))
343 return std_ulogic_vector is
344 variable s1 : std_ulogic_vector(94 downto 0);
345 variable s2 : std_ulogic_vector(70 downto 0);
346 variable result : std_ulogic_vector(63 downto 0);
348 case shift(6 downto 5) is
350 s1 := inp(119 downto 25);
352 s1 := inp(87 downto 0) & "0000000";
354 s1 := x"0000000000000000" & inp(119 downto 89);
356 s1 := x"00000000" & inp(119 downto 57);
358 case shift(4 downto 3) is
360 s2 := s1(94 downto 24);
362 s2 := s1(86 downto 16);
364 s2 := s1(78 downto 8);
366 s2 := s1(70 downto 0);
368 case shift(2 downto 0) is
370 result := s2(70 downto 7);
372 result := s2(69 downto 6);
374 result := s2(68 downto 5);
376 result := s2(67 downto 4);
378 result := s2(66 downto 3);
380 result := s2(65 downto 2);
382 result := s2(64 downto 1);
384 result := s2(63 downto 0);
389 -- Generate a mask with 0-bits on the left and 1-bits on the right which
390 -- selects the bits will be lost in doing a right shift. The shift
391 -- parameter is the bottom 6 bits of a negative shift count,
392 -- indicating a right shift.
393 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
394 variable result: std_ulogic_vector(63 downto 0);
396 result := (others => '0');
397 for i in 0 to 63 loop
399 result(63 - i) := '1';
405 -- Split a DP floating-point number into components and work out its class.
406 -- If is_int = 1, the input is considered an integer
407 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
408 variable r : fpu_reg_type;
409 variable exp_nz : std_ulogic;
410 variable exp_ao : std_ulogic;
411 variable frac_nz : std_ulogic;
412 variable cls : std_ulogic_vector(2 downto 0);
414 r.negative := fpr(63);
415 exp_nz := or (fpr(62 downto 52));
416 exp_ao := and (fpr(62 downto 52));
417 frac_nz := or (fpr(51 downto 0));
419 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
421 r.exponent := to_signed(-1022, EXP_BITS);
423 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
424 cls := exp_ao & exp_nz & frac_nz;
426 when "000" => r.class := ZERO;
427 when "001" => r.class := FINITE; -- denormalized
428 when "010" => r.class := FINITE;
429 when "011" => r.class := FINITE;
430 when "110" => r.class := INFINITY;
431 when others => r.class := NAN;
435 r.exponent := (others => '0');
436 if (fpr(63) or exp_nz or frac_nz) = '1' then
445 -- Construct a DP floating-point result from components
446 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
447 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
448 return std_ulogic_vector is
449 variable result : std_ulogic_vector(63 downto 0);
451 result := (others => '0');
456 if mantissa(54) = '1' then
458 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
460 result(51 downto 29) := mantissa(53 downto 31);
461 if single_prec = '0' then
462 result(28 downto 0) := mantissa(30 downto 2);
465 result(62 downto 52) := "11111111111";
467 result(62 downto 52) := "11111111111";
468 result(51) := quieten_nan or mantissa(53);
469 result(50 downto 29) := mantissa(52 downto 31);
470 if single_prec = '0' then
471 result(28 downto 0) := mantissa(30 downto 2);
477 -- Determine whether to increment when rounding
478 -- Returns rounding_inc & inexact
479 -- Assumes x includes the bottom 29 bits of the mantissa already
480 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
481 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
482 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
484 return std_ulogic_vector is
485 variable grx : std_ulogic_vector(2 downto 0);
486 variable ret : std_ulogic_vector(1 downto 0);
487 variable lsb : std_ulogic;
489 if single_prec = '0' then
490 grx := mantissa(1 downto 0) & x;
493 grx := mantissa(30 downto 29) & x;
498 case rn(1 downto 0) is
499 when "00" => -- round to nearest
500 if grx = "100" and rn(2) = '0' then
501 ret(1) := lsb; -- tie, round to even
505 when "01" => -- round towards zero
506 when others => -- round towards +/- inf
508 -- round towards greater magnitude
515 -- Determine result flags to write into the FPSCR
516 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
517 return std_ulogic_vector is
521 return sign & "0010";
523 return (not unitbit) & sign & (not sign) & "00";
525 return '0' & sign & (not sign) & "01";
532 fpu_multiply_0: entity work.multiply
535 m_in => f_to_multiply,
536 m_out => multiply_to_f
541 if rising_edge(clk) then
547 r.fpscr <= (others => '0');
548 r.writing_back <= '0';
550 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
556 -- synchronous reads from lookup table
557 lut_access: process(clk)
558 variable addrhi : std_ulogic_vector(1 downto 0);
559 variable addr : std_ulogic_vector(9 downto 0);
561 if rising_edge(clk) then
562 if r.is_sqrt = '1' then
563 addrhi := r.b.mantissa(55 downto 54);
567 addr := addrhi & r.b.mantissa(53 downto 46);
568 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
572 e_out.busy <= r.busy;
573 e_out.exception <= r.fpscr(FPSCR_FEX);
574 e_out.interrupt <= r.do_intr;
576 w_out.valid <= r.instr_done and not r.do_intr;
577 w_out.write_enable <= r.writing_back;
578 w_out.write_reg <= r.dest_fpr;
579 w_out.write_data <= fp_result;
580 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
581 w_out.write_cr_mask <= r.cr_mask;
582 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
583 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
586 variable v : reg_type;
587 variable adec : fpu_reg_type;
588 variable bdec : fpu_reg_type;
589 variable cdec : fpu_reg_type;
590 variable fpscr_mask : std_ulogic_vector(31 downto 0);
591 variable illegal : std_ulogic;
592 variable j, k : integer;
593 variable flm : std_ulogic_vector(7 downto 0);
594 variable int_input : std_ulogic;
595 variable mask : std_ulogic_vector(63 downto 0);
596 variable in_a0 : std_ulogic_vector(63 downto 0);
597 variable in_b0 : std_ulogic_vector(63 downto 0);
598 variable misc : std_ulogic_vector(63 downto 0);
599 variable shift_res : std_ulogic_vector(63 downto 0);
600 variable round : std_ulogic_vector(1 downto 0);
601 variable update_fx : std_ulogic;
602 variable arith_done : std_ulogic;
603 variable invalid : std_ulogic;
604 variable zero_divide : std_ulogic;
605 variable mant_nz : std_ulogic;
606 variable min_exp : signed(EXP_BITS-1 downto 0);
607 variable max_exp : signed(EXP_BITS-1 downto 0);
608 variable bias_exp : signed(EXP_BITS-1 downto 0);
609 variable new_exp : signed(EXP_BITS-1 downto 0);
610 variable exp_tiny : std_ulogic;
611 variable exp_huge : std_ulogic;
612 variable renormalize : std_ulogic;
613 variable clz : std_ulogic_vector(5 downto 0);
614 variable set_x : std_ulogic;
615 variable mshift : signed(EXP_BITS-1 downto 0);
616 variable need_check : std_ulogic;
617 variable msb : std_ulogic;
618 variable is_add : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 variable sum : std_ulogic_vector(63 downto 0);
635 variable round_inc : std_ulogic_vector(63 downto 0);
642 -- capture incoming instruction
643 if e_in.valid = '1' then
646 v.fe_mode := or (e_in.fe_mode);
647 v.dest_fpr := e_in.frt;
648 v.single_prec := e_in.single;
649 v.longmask := e_in.single;
652 v.is_cmp := e_in.out_cr;
653 if e_in.out_cr = '0' then
654 v.cr_mask := num_to_fxm(1);
656 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
659 if e_in.op = OP_FPOP_I then
662 v.quieten_nan := '1';
665 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
666 v.is_subtract := '0';
667 v.is_multiply := '0';
670 v.doing_ftdiv := "00";
672 adec := decode_dp(e_in.fra, int_input);
673 bdec := decode_dp(e_in.frb, int_input);
674 cdec := decode_dp(e_in.frc, int_input);
680 if adec.exponent > bdec.exponent then
684 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
689 r_hi_nz <= or (r.r(55 downto 31));
690 r_lo_nz <= or (r.r(30 downto 2));
693 if r.single_prec = '0' then
694 if r.doing_ftdiv(1) = '0' then
695 max_exp := to_signed(1023, EXP_BITS);
697 max_exp := to_signed(1020, EXP_BITS);
699 if r.doing_ftdiv(0) = '0' then
700 min_exp := to_signed(-1022, EXP_BITS);
702 min_exp := to_signed(-1021, EXP_BITS);
704 bias_exp := to_signed(1536, EXP_BITS);
706 max_exp := to_signed(127, EXP_BITS);
707 min_exp := to_signed(-126, EXP_BITS);
708 bias_exp := to_signed(192, EXP_BITS);
710 new_exp := r.result_exp - r.shift;
713 if new_exp < min_exp then
716 if new_exp > max_exp then
720 -- Compare P with zero and with B
721 px_nz := or (r.p(57 downto 4));
723 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
727 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
731 v.writing_back := '0';
733 v.update_fprf := '0';
734 v.shift := to_signed(0, EXP_BITS);
745 fpscr_mask := (others => '1');
757 f_to_multiply.is_32bit <= '0';
758 f_to_multiply.valid <= '0';
761 msel_add <= MULADD_ZERO;
774 if e_in.valid = '1' then
775 case e_in.insn(5 downto 1) is
777 if e_in.insn(8) = '1' then
778 if e_in.insn(6) = '0' then
781 v.state := DO_FTSQRT;
783 elsif e_in.insn(7) = '1' then
790 if e_in.insn(10) = '0' then
791 if e_in.insn(8) = '0' then
794 v.state := DO_MTFSFI;
800 if e_in.insn(8) = '0' then
807 if e_in.insn(9 downto 8) /= "11" then
817 if int_input = '1' then
824 v.round_mode := "001";
829 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
833 when "10100" | "10101" =>
846 v.is_multiply := '1';
848 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
855 v.state := DO_FRSQRTE;
856 when "11100" | "11101" | "11110" | "11111" =>
857 if v.a.mantissa(54) = '0' then
859 elsif v.c.mantissa(54) = '0' then
870 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
874 j := to_integer(unsigned(insn_bfa(r.insn)));
878 v.cr_result := r.fpscr(k + 3 downto k);
879 fpscr_mask(k + 3 downto k) := "0000";
882 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
889 v.cr_result := "0000";
890 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
891 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
892 v.cr_result(2) := '1';
894 if r.a.class = NAN or r.a.class = INFINITY or
895 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
896 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
897 v.cr_result(1) := '1';
899 v.doing_ftdiv := "11";
908 v.cr_result := "0000";
909 if r.b.class = ZERO or r.b.class = INFINITY or
910 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
911 v.cr_result(2) := '1';
913 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
914 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
915 v.cr_result(1) := '0';
924 v.result_exp := r.b.exponent;
925 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
926 (r.b.class = NAN and r.b.mantissa(53) = '0') then
928 v.fpscr(FPSCR_VXSNAN) := '1';
929 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
930 v.fpscr(FPSCR_VXVC) := '1';
933 v.cr_result := "0001"; -- unordered
934 elsif r.a.class = NAN or r.b.class = NAN then
935 if r.insn(6) = '1' then
937 v.fpscr(FPSCR_VXVC) := '1';
940 v.cr_result := "0001"; -- unordered
941 elsif r.a.class = ZERO and r.b.class = ZERO then
942 v.cr_result := "0010"; -- equal
943 elsif r.a.negative /= r.b.negative then
944 v.cr_result := r.a.negative & r.b.negative & "00";
945 elsif r.a.class = ZERO then
946 -- A and B are the same sign from here down
947 v.cr_result := not r.b.negative & r.b.negative & "00";
948 elsif r.a.class = INFINITY then
949 if r.b.class = INFINITY then
950 v.cr_result := "0010";
952 v.cr_result := r.a.negative & not r.a.negative & "00";
954 elsif r.b.class = ZERO then
955 -- A is finite from here down
956 v.cr_result := r.a.negative & not r.a.negative & "00";
957 elsif r.b.class = INFINITY then
958 v.cr_result := not r.b.negative & r.b.negative & "00";
959 elsif r.exp_cmp = '1' then
960 -- A and B are both finite from here down
961 v.cr_result := r.a.negative & not r.a.negative & "00";
962 elsif r.a.exponent /= r.b.exponent then
963 -- A exponent is smaller than B
964 v.cr_result := not r.a.negative & r.a.negative & "00";
966 -- Prepare to subtract mantissas, put B in R
967 v.cr_result := "0000";
972 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
976 j := to_integer(unsigned(insn_bt(r.insn)));
977 for i in 0 to 31 loop
979 v.fpscr(31 - i) := r.insn(6);
987 j := to_integer(unsigned(insn_bf(r.insn)));
988 if r.insn(16) = '0' then
992 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1001 opsel_r <= RES_MISC;
1002 misc_sel <= "01" & r.insn(8) & '0';
1003 v.int_result := '1';
1004 v.writing_back := '1';
1005 v.instr_done := '1';
1009 v.int_result := '1';
1010 v.writing_back := '1';
1011 opsel_r <= RES_MISC;
1012 case r.insn(20 downto 16) is
1017 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1018 when "10100" | "10101" =>
1019 -- mffscdrn[i] (but we don't implement DRN)
1020 fpscr_mask := x"000000FF";
1023 fpscr_mask := x"000000FF";
1024 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1025 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1028 fpscr_mask := x"000000FF";
1029 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1032 fpscr_mask := x"0007F0FF";
1036 v.instr_done := '1';
1040 if r.insn(25) = '1' then
1042 elsif r.insn(16) = '1' then
1045 flm := r.insn(24 downto 17);
1047 for i in 0 to 7 loop
1049 if flm(i) = '1' then
1050 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1053 v.instr_done := '1';
1057 -- r.opsel_a = AIN_B
1058 v.result_class := r.b.class;
1059 v.result_exp := r.b.exponent;
1060 v.quieten_nan := '0';
1061 if r.insn(9) = '1' then
1062 v.result_sign := '0'; -- fabs
1063 elsif r.insn(8) = '1' then
1064 v.result_sign := '1'; -- fnabs
1065 elsif r.insn(7) = '1' then
1066 v.result_sign := r.b.negative; -- fmr
1067 elsif r.insn(6) = '1' then
1068 v.result_sign := not r.b.negative; -- fneg
1070 v.result_sign := r.a.negative; -- fcpsgn
1072 v.writing_back := '1';
1073 v.instr_done := '1';
1076 when DO_FRI => -- fri[nzpm]
1077 -- r.opsel_a = AIN_B
1078 v.result_class := r.b.class;
1079 v.result_sign := r.b.negative;
1080 v.result_exp := r.b.exponent;
1081 v.fpscr(FPSCR_FR) := '0';
1082 v.fpscr(FPSCR_FI) := '0';
1083 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1085 v.fpscr(FPSCR_VXSNAN) := '1';
1088 if r.b.class = FINITE then
1089 if r.b.exponent >= to_signed(52, EXP_BITS) then
1090 -- integer already, no rounding required
1093 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1095 v.round_mode := '1' & r.insn(7 downto 6);
1102 -- r.opsel_a = AIN_B, r.shift = 0
1103 v.result_class := r.b.class;
1104 v.result_sign := r.b.negative;
1105 v.result_exp := r.b.exponent;
1106 v.fpscr(FPSCR_FR) := '0';
1107 v.fpscr(FPSCR_FI) := '0';
1108 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1110 v.fpscr(FPSCR_VXSNAN) := '1';
1114 if r.b.class = FINITE then
1115 if r.b.exponent < to_signed(-126, EXP_BITS) then
1116 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1117 v.state := ROUND_UFLOW;
1118 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1119 v.state := ROUND_OFLOW;
1121 v.state := ROUNDING;
1128 -- instr bit 9: 1=dword 0=word
1129 -- instr bit 8: 1=unsigned 0=signed
1130 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131 -- r.opsel_a = AIN_B
1132 v.result_class := r.b.class;
1133 v.result_sign := r.b.negative;
1134 v.result_exp := r.b.exponent;
1135 v.fpscr(FPSCR_FR) := '0';
1136 v.fpscr(FPSCR_FI) := '0';
1137 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1139 v.fpscr(FPSCR_VXSNAN) := '1';
1143 v.int_result := '1';
1148 if r.b.exponent >= to_signed(64, EXP_BITS) or
1149 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150 v.state := INT_OFLOW;
1151 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152 -- integer already, no rounding required,
1153 -- shift into final position
1154 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155 if r.insn(8) = '1' and r.b.negative = '1' then
1156 v.state := INT_OFLOW;
1158 v.state := INT_ISHIFT;
1161 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162 v.state := INT_SHIFT;
1164 when INFINITY | NAN =>
1165 v.state := INT_OFLOW;
1169 -- r.opsel_a = AIN_B
1170 v.result_sign := '0';
1171 if r.insn(8) = '0' and r.b.negative = '1' then
1172 -- fcfid[s] with negative operand, set R = -B
1175 v.result_sign := '1';
1177 v.result_class := r.b.class;
1178 v.result_exp := to_signed(54, EXP_BITS);
1179 v.fpscr(FPSCR_FR) := '0';
1180 v.fpscr(FPSCR_FI) := '0';
1181 if r.b.class = ZERO then
1188 -- fadd[s] and fsub[s]
1189 -- r.opsel_a = AIN_A
1190 v.result_sign := r.a.negative;
1191 v.result_class := r.a.class;
1192 v.result_exp := r.a.exponent;
1193 v.fpscr(FPSCR_FR) := '0';
1194 v.fpscr(FPSCR_FI) := '0';
1197 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198 if r.a.class = FINITE and r.b.class = FINITE then
1199 v.is_subtract := not is_add;
1200 v.add_bsmall := r.exp_cmp;
1202 if r.exp_cmp = '0' then
1203 v.shift := r.a.exponent - r.b.exponent;
1204 v.result_sign := r.b.negative xnor r.insn(1);
1205 if r.a.exponent = r.b.exponent then
1209 v.state := ADD_SHIFT;
1215 if r.a.class = NAN or r.b.class = NAN then
1216 v.state := NAN_RESULT;
1217 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218 -- invalid operation, construct QNaN
1219 v.fpscr(FPSCR_VXISI) := '1';
1222 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223 -- return -0 for rounding to -infinity
1224 v.result_sign := r.round_mode(1) and r.round_mode(0);
1226 elsif r.a.class = INFINITY or r.b.class = ZERO then
1229 v.state := EXC_RESULT;
1233 v.negate := not r.insn(1);
1234 v.state := EXC_RESULT;
1240 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241 v.result_sign := r.a.negative xor r.c.negative;
1242 v.result_class := r.a.class;
1243 v.fpscr(FPSCR_FR) := '0';
1244 v.fpscr(FPSCR_FI) := '0';
1247 if r.a.class = FINITE and r.c.class = FINITE then
1248 v.result_exp := r.a.exponent + r.c.exponent;
1249 -- Renormalize denorm operands
1250 if r.a.mantissa(54) = '0' then
1251 v.state := RENORM_A;
1252 elsif r.c.mantissa(54) = '0' then
1253 v.state := RENORM_C;
1255 f_to_multiply.valid <= '1';
1259 if r.a.class = NAN or r.c.class = NAN then
1260 v.state := NAN_RESULT;
1261 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262 (r.a.class = ZERO and r.c.class = INFINITY) then
1263 -- invalid operation, construct QNaN
1264 v.fpscr(FPSCR_VXIMZ) := '1';
1266 elsif r.a.class = ZERO or r.a.class = INFINITY then
1270 -- r.c.class is ZERO or INFINITY
1272 v.negate := r.a.negative;
1273 v.state := EXC_RESULT;
1278 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279 v.result_class := r.a.class;
1280 v.fpscr(FPSCR_FR) := '0';
1281 v.fpscr(FPSCR_FI) := '0';
1284 v.result_sign := r.a.negative xor r.b.negative;
1285 v.result_exp := r.a.exponent - r.b.exponent;
1287 if r.a.class = FINITE and r.b.class = FINITE then
1288 -- Renormalize denorm operands
1289 if r.a.mantissa(54) = '0' then
1290 v.state := RENORM_A;
1291 elsif r.b.mantissa(54) = '0' then
1292 v.state := RENORM_B;
1298 if r.a.class = NAN or r.b.class = NAN then
1299 v.state := NAN_RESULT;
1300 elsif r.b.class = INFINITY then
1301 if r.a.class = INFINITY then
1302 v.fpscr(FPSCR_VXIDI) := '1';
1305 v.result_class := ZERO;
1308 elsif r.b.class = ZERO then
1309 if r.a.class = ZERO then
1310 v.fpscr(FPSCR_VXZDZ) := '1';
1313 if r.a.class = FINITE then
1316 v.result_class := INFINITY;
1319 else -- r.b.class = FINITE, result_class = r.a.class
1325 v.fpscr(FPSCR_FR) := '0';
1326 v.fpscr(FPSCR_FI) := '0';
1327 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1332 v.quieten_nan := '0';
1333 v.state := EXC_RESULT;
1336 -- r.opsel_a = AIN_B
1337 v.result_class := r.b.class;
1338 v.result_sign := r.b.negative;
1339 v.fpscr(FPSCR_FR) := '0';
1340 v.fpscr(FPSCR_FI) := '0';
1344 v.result_exp := r.b.exponent;
1345 if r.b.negative = '1' then
1346 v.fpscr(FPSCR_VXSQRT) := '1';
1348 elsif r.b.mantissa(54) = '0' then
1349 v.state := RENORM_B;
1350 elsif r.b.exponent(0) = '0' then
1353 v.shift := to_signed(1, EXP_BITS);
1354 v.state := RENORM_B2;
1357 v.state := NAN_RESULT;
1362 if r.b.negative = '1' then
1363 v.fpscr(FPSCR_VXSQRT) := '1';
1371 -- r.opsel_a = AIN_B
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 v.fpscr(FPSCR_FR) := '0';
1375 v.fpscr(FPSCR_FI) := '0';
1379 v.result_exp := - r.b.exponent;
1380 if r.b.mantissa(54) = '0' then
1381 v.state := RENORM_B;
1386 v.state := NAN_RESULT;
1388 v.result_class := ZERO;
1391 v.result_class := INFINITY;
1397 -- r.opsel_a = AIN_B
1398 v.result_class := r.b.class;
1399 v.result_sign := r.b.negative;
1400 v.fpscr(FPSCR_FR) := '0';
1401 v.fpscr(FPSCR_FI) := '0';
1403 v.shift := to_signed(1, EXP_BITS);
1406 v.result_exp := r.b.exponent;
1407 if r.b.negative = '1' then
1408 v.fpscr(FPSCR_VXSQRT) := '1';
1410 elsif r.b.mantissa(54) = '0' then
1411 v.state := RENORM_B;
1412 elsif r.b.exponent(0) = '0' then
1415 v.state := RENORM_B2;
1418 v.state := NAN_RESULT;
1420 if r.b.negative = '1' then
1421 v.fpscr(FPSCR_VXSQRT) := '1';
1424 v.result_class := ZERO;
1428 v.result_class := INFINITY;
1434 -- fmadd, fmsub, fnmadd, fnmsub
1435 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1437 v.result_sign := r.a.negative;
1438 v.result_class := r.a.class;
1439 v.result_exp := r.a.exponent;
1440 v.fpscr(FPSCR_FR) := '0';
1441 v.fpscr(FPSCR_FI) := '0';
1445 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446 if r.a.class = FINITE and r.c.class = FINITE and
1447 (r.b.class = FINITE or r.b.class = ZERO) then
1448 v.is_subtract := not is_add;
1449 mulexp := r.a.exponent + r.c.exponent;
1450 v.result_exp := mulexp;
1451 -- Make sure A and C are normalized
1452 if r.a.mantissa(54) = '0' then
1453 v.state := RENORM_A;
1454 elsif r.c.mantissa(54) = '0' then
1455 v.state := RENORM_C;
1456 elsif r.b.class = ZERO then
1457 -- no addend, degenerates to multiply
1458 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459 f_to_multiply.valid <= '1';
1460 v.is_multiply := '1';
1462 elsif r.madd_cmp = '0' then
1463 -- addend is bigger, do multiply first
1464 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465 f_to_multiply.valid <= '1';
1468 -- product is bigger, shift B right and use it as the
1469 -- addend to the multiplier
1470 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471 -- for subtract, multiplier does B - A * C
1472 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473 v.result_exp := r.b.exponent;
1477 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478 v.state := NAN_RESULT;
1479 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480 (r.a.class = INFINITY and r.c.class = ZERO) then
1481 -- invalid operation, construct QNaN
1482 v.fpscr(FPSCR_VXIMZ) := '1';
1484 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485 if r.b.class = INFINITY and is_add = '0' then
1486 -- invalid operation, construct QNaN
1487 v.fpscr(FPSCR_VXISI) := '1';
1490 -- result is infinity
1491 v.result_class := INFINITY;
1492 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1496 -- Here A is zero, C is zero, or B is infinity
1497 -- Result is +/-B in all of those cases
1499 if r.b.class /= ZERO or is_add = '1' then
1500 v.negate := not (r.insn(1) xor r.insn(2));
1502 -- have to be careful about rule for 0 - 0 result sign
1503 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1505 v.state := EXC_RESULT;
1511 v.state := RENORM_A2;
1512 if r.insn(4) = '1' then
1519 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1521 v.result_exp := new_exp;
1522 if r.insn(4) = '1' then
1523 if r.c.mantissa(54) = '1' then
1524 if r.insn(3) = '0' or r.b.class = ZERO then
1529 if new_exp + 1 >= r.b.exponent then
1533 v.state := DO_FMADD;
1536 v.state := RENORM_C;
1539 if r.b.mantissa(54) = '1' then
1543 v.state := RENORM_B;
1549 renorm_sqrt := r.is_sqrt;
1550 v.state := RENORM_B2;
1554 if r.is_sqrt = '0' then
1555 v.result_exp := r.result_exp + r.shift;
1557 v.result_exp := new_exp;
1564 v.state := RENORM_C2;
1568 v.result_exp := new_exp;
1569 if r.insn(3) = '0' or r.b.class = ZERO then
1574 if new_exp + 1 >= r.b.exponent then
1578 v.state := DO_FMADD;
1582 -- transferring B to R
1583 v.shift := r.b.exponent - r.a.exponent;
1584 v.result_exp := r.b.exponent;
1586 v.state := ADD_SHIFT;
1589 -- r.shift = - exponent difference, r.longmask = 0
1590 opsel_r <= RES_SHIFT;
1593 v.longmask := r.single_prec;
1594 if r.add_bsmall = '1' then
1602 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1604 opsel_binv <= r.is_subtract;
1605 carry_in <= r.is_subtract and not r.x;
1606 v.shift := to_signed(-1, EXP_BITS);
1610 -- check for overflow or negative result (can't get both)
1612 if r.r(63) = '1' then
1613 -- result is opposite sign to expected
1614 v.result_sign := not r.result_sign;
1618 elsif r.r(55) = '1' then
1619 -- sum overflowed, shift right
1620 opsel_r <= RES_SHIFT;
1622 if exp_huge = '1' then
1623 v.state := ROUND_OFLOW;
1625 v.state := ROUNDING;
1627 elsif r.r(54) = '1' then
1629 v.state := ROUNDING;
1630 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1631 -- r.x must be zero at this point
1632 v.result_class := ZERO;
1633 if r.is_subtract = '1' then
1634 -- set result sign depending on rounding mode
1635 v.result_sign := r.round_mode(1) and r.round_mode(0);
1640 v.state := NORMALIZE;
1644 -- r.opsel_a = AIN_A
1651 if r.r(63) = '1' then
1652 -- A is smaller in magnitude
1653 v.cr_result := not r.a.negative & r.a.negative & "00";
1654 elsif (r_hi_nz or r_lo_nz) = '0' then
1655 v.cr_result := "0010";
1657 v.cr_result := r.a.negative & not r.a.negative & "00";
1659 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1660 v.instr_done := '1';
1664 f_to_multiply.valid <= r.first;
1665 opsel_r <= RES_MULT;
1666 if multiply_to_f.valid = '1' then
1671 -- Addend is bigger here
1672 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1673 -- note v.shift is at most -2 here
1674 v.shift := r.result_exp - r.b.exponent;
1675 opsel_r <= RES_MULT;
1678 f_to_multiply.valid <= r.first;
1679 if multiply_to_f.valid = '1' then
1681 v.state := ADD_SHIFT;
1685 -- Product is potentially bigger here
1686 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1689 v.shift := r.shift - to_signed(64, EXP_BITS);
1693 -- r.shift = addend exp - product exp
1694 opsel_r <= RES_SHIFT;
1699 msel_add <= MULADD_RS;
1700 f_to_multiply.valid <= r.first;
1701 msel_inv <= r.is_subtract;
1702 opsel_r <= RES_MULT;
1705 if multiply_to_f.valid = '1' then
1710 -- negate R:S:X if negative
1711 if r.r(63) = '1' then
1712 v.result_sign := not r.result_sign;
1714 carry_in <= not (s_nz or r.x);
1718 v.shift := to_signed(56, EXP_BITS);
1722 -- r.shift = 56 (or 0, but only if r is now nonzero)
1723 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1725 -- must be a subtraction, and r.x must be zero
1726 v.result_class := ZERO;
1727 v.result_sign := r.round_mode(1) and r.round_mode(0);
1730 -- R is all zeroes but there are non-zero bits in S
1731 -- so shift them into R and set S to 0
1732 opsel_r <= RES_SHIFT;
1734 -- stay in state FMADD_6
1736 elsif r.r(56 downto 54) = "001" then
1740 v.state := NORMALIZE;
1744 -- r.opsel_a = AIN_B
1745 -- wait one cycle for inverse_table[B] lookup
1747 if r.insn(4) = '0' then
1748 if r.insn(3) = '0' then
1753 elsif r.insn(2) = '0' then
1760 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1762 msel_add <= MULADD_CONST;
1771 f_to_multiply.valid <= r.first;
1772 if multiply_to_f.valid = '1' then
1774 v.count := r.count + 1;
1779 -- compute Y = P = P * Y
1782 f_to_multiply.valid <= r.first;
1784 if multiply_to_f.valid = '1' then
1794 -- compute R = P = A * Y (quotient)
1798 f_to_multiply.valid <= r.first;
1800 if multiply_to_f.valid = '1' then
1801 opsel_r <= RES_MULT;
1807 -- compute P = A - B * R (remainder)
1810 msel_add <= MULADD_A;
1812 f_to_multiply.valid <= r.first;
1813 if multiply_to_f.valid = '1' then
1818 -- test if remainder is 0 or >= B
1819 if pcmpb_lt = '1' then
1820 -- quotient is correct, set X if remainder non-zero
1821 v.x := r.p(58) or px_nz;
1823 -- quotient needs to be incremented by 1
1825 v.x := not pcmpb_eq;
1830 opsel_r <= RES_MISC;
1832 v.shift := to_signed(1, EXP_BITS);
1833 v.state := NORMALIZE;
1836 v.cr_result(1) := exp_tiny or exp_huge;
1837 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1838 v.instr_done := '1';
1841 v.shift := r.a.exponent;
1842 v.doing_ftdiv := "10";
1846 opsel_r <= RES_MISC;
1848 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1849 v.result_exp := - sqrt_exp;
1850 v.shift := to_signed(1, EXP_BITS);
1851 v.state := NORMALIZE;
1854 -- put invsqr[B] in R and compute P = invsqr[B] * B
1855 -- also transfer B (in R) to A
1857 opsel_r <= RES_MISC;
1861 f_to_multiply.valid <= '1';
1862 v.shift := to_signed(-1, EXP_BITS);
1867 -- shift R right one place
1868 -- not expecting multiplier result yet
1870 opsel_r <= RES_SHIFT;
1875 -- put R into Y, wait for product from multiplier
1879 if multiply_to_f.valid = '1' then
1880 -- put result into R
1881 opsel_r <= RES_MULT;
1887 -- compute 1.5 - Y * P
1890 msel_add <= MULADD_CONST;
1892 f_to_multiply.valid <= r.first;
1894 if multiply_to_f.valid = '1' then
1899 -- compute Y = Y * P
1902 f_to_multiply.valid <= '1';
1907 -- pipeline in R = R * P
1910 f_to_multiply.valid <= r.first;
1912 if multiply_to_f.valid = '1' then
1918 -- first multiply is done, put result in Y
1921 -- wait for second multiply (should be here already)
1923 if multiply_to_f.valid = '1' then
1924 -- put result into R
1925 opsel_r <= RES_MULT;
1927 v.count := r.count + 1;
1937 -- compute P = A - R * R, which can be +ve or -ve
1938 -- we arranged for B to be put into A earlier
1941 msel_add <= MULADD_A;
1944 f_to_multiply.valid <= r.first;
1945 if multiply_to_f.valid = '1' then
1951 -- compute P = P * Y
1952 -- since Y is an estimate of 1/sqrt(B), this makes P an
1953 -- estimate of the adjustment needed to R. Since the error
1954 -- could be negative and we have an unsigned multiplier, the
1955 -- upper bits can be wrong, but it turns out the lowest 8 bits
1956 -- are correct and are all we need (given 3 iterations through
1957 -- SQRT_4 to SQRT_7).
1961 f_to_multiply.valid <= r.first;
1962 if multiply_to_f.valid = '1' then
1967 -- Add the bottom 8 bits of P, sign-extended,
1968 -- divided by 4, onto R.
1969 -- The division by 4 is because R is 10.54 format
1970 -- whereas P is 8.56 format.
1972 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1973 v.result_exp := sqrt_exp;
1974 v.shift := to_signed(1, EXP_BITS);
1979 -- compute P = A - R * R (remainder)
1980 -- also put 2 * R + 1 into B for comparison with P
1983 msel_add <= MULADD_A;
1985 f_to_multiply.valid <= r.first;
1988 if multiply_to_f.valid = '1' then
1993 -- test if remainder is 0 or >= B = 2*R + 1
1994 if pcmpb_lt = '1' then
1995 -- square root is correct, set X if remainder non-zero
1996 v.x := r.p(58) or px_nz;
1998 -- square root needs to be incremented by 1
2000 v.x := not pcmpb_eq;
2005 -- r.shift = b.exponent - 52
2006 opsel_r <= RES_SHIFT;
2008 v.state := INT_ROUND;
2009 v.shift := to_signed(-2, EXP_BITS);
2013 opsel_r <= RES_SHIFT;
2014 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2015 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2016 -- Check for negative values that don't round to 0 for fcti*u*
2017 if r.insn(8) = '1' and r.result_sign = '1' and
2018 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2019 v.state := INT_OFLOW;
2021 v.state := INT_FINAL;
2025 -- r.shift = b.exponent - 54;
2026 opsel_r <= RES_SHIFT;
2027 v.state := INT_FINAL;
2030 -- Negate if necessary, and increment for rounding if needed
2031 opsel_ainv <= r.result_sign;
2032 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2033 -- Check for possible overflows
2034 case r.insn(9 downto 8) is
2035 when "00" => -- fctiw[z]
2036 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2037 when "01" => -- fctiwu[z]
2038 need_check := r.r(31);
2039 when "10" => -- fctid[z]
2040 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2041 when others => -- fctidu[z]
2042 need_check := r.r(63);
2044 if need_check = '1' then
2045 v.state := INT_CHECK;
2047 if r.fpscr(FPSCR_FI) = '1' then
2048 v.fpscr(FPSCR_XX) := '1';
2054 if r.insn(9) = '0' then
2059 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2060 if (r.insn(8) = '0' and msb /= r.result_sign) or
2061 (r.insn(8) = '1' and msb /= '1') then
2062 opsel_r <= RES_MISC;
2063 v.fpscr(FPSCR_VXCVI) := '1';
2066 if r.fpscr(FPSCR_FI) = '1' then
2067 v.fpscr(FPSCR_XX) := '1';
2073 opsel_r <= RES_MISC;
2074 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2075 if r.b.class = NAN then
2078 v.fpscr(FPSCR_VXCVI) := '1';
2083 -- r.shift = b.exponent - 52
2084 opsel_r <= RES_SHIFT;
2086 v.state := ROUNDING;
2089 if r.is_multiply = '1' and px_nz = '1' then
2092 if r.r(63 downto 54) /= "0000000001" then
2094 v.state := NORMALIZE;
2097 if exp_tiny = '1' then
2098 v.shift := new_exp - min_exp;
2099 v.state := ROUND_UFLOW;
2100 elsif exp_huge = '1' then
2101 v.state := ROUND_OFLOW;
2103 v.state := ROUNDING;
2108 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2109 -- r.shift = clz(r.r) - 9
2110 opsel_r <= RES_SHIFT;
2112 if exp_tiny = '1' then
2113 v.shift := new_exp - min_exp;
2114 v.state := ROUND_UFLOW;
2115 elsif exp_huge = '1' then
2116 v.state := ROUND_OFLOW;
2118 v.state := ROUNDING;
2122 -- r.shift = - amount by which exponent underflows
2124 if r.fpscr(FPSCR_UE) = '0' then
2125 -- disabled underflow exception case
2126 -- have to denormalize before rounding
2127 opsel_r <= RES_SHIFT;
2129 v.state := ROUNDING;
2131 -- enabled underflow exception case
2132 -- if denormalized, have to normalize before rounding
2133 v.fpscr(FPSCR_UX) := '1';
2134 v.result_exp := r.result_exp + bias_exp;
2135 if r.r(54) = '0' then
2137 v.state := NORMALIZE;
2139 v.state := ROUNDING;
2144 v.fpscr(FPSCR_OX) := '1';
2145 if r.fpscr(FPSCR_OE) = '0' then
2146 -- disabled overflow exception
2147 -- result depends on rounding mode
2148 v.fpscr(FPSCR_XX) := '1';
2149 v.fpscr(FPSCR_FI) := '1';
2150 if r.round_mode(1 downto 0) = "00" or
2151 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2152 v.result_class := INFINITY;
2153 v.fpscr(FPSCR_FR) := '1';
2155 v.fpscr(FPSCR_FR) := '0';
2157 -- construct largest representable number
2158 v.result_exp := max_exp;
2159 opsel_r <= RES_MISC;
2160 misc_sel <= "001" & r.single_prec;
2163 -- enabled overflow exception
2164 v.result_exp := r.result_exp - bias_exp;
2165 v.state := ROUNDING;
2170 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2171 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2172 if round(1) = '1' then
2173 -- increment the LSB for the precision
2175 v.shift := to_signed(-1, EXP_BITS);
2176 v.state := ROUNDING_2;
2178 if r.r(54) = '0' then
2179 -- result after masking could be zero, or could be a
2180 -- denormalized result that needs to be renormalized
2182 v.state := ROUNDING_3;
2187 if round(0) = '1' then
2188 v.fpscr(FPSCR_XX) := '1';
2189 if r.tiny = '1' then
2190 v.fpscr(FPSCR_UX) := '1';
2195 -- Check for overflow during rounding
2198 if r.r(55) = '1' then
2199 opsel_r <= RES_SHIFT;
2200 if exp_huge = '1' then
2201 v.state := ROUND_OFLOW;
2205 elsif r.r(54) = '0' then
2206 -- Do CLZ so we can renormalize the result
2208 v.state := ROUNDING_3;
2214 -- r.shift = clz(r.r) - 9
2215 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2216 if mant_nz = '0' then
2217 v.result_class := ZERO;
2218 if r.is_subtract = '1' then
2219 -- set result sign depending on rounding mode
2220 v.result_sign := r.round_mode(1) and r.round_mode(0);
2224 -- Renormalize result after rounding
2225 opsel_r <= RES_SHIFT;
2226 v.denorm := exp_tiny;
2227 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2228 if new_exp < to_signed(-1022, EXP_BITS) then
2236 -- r.shift = result_exp - -1022
2237 opsel_r <= RES_SHIFT;
2241 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2242 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2243 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2245 v.fpscr(FPSCR_VXSNAN) := '1';
2248 if r.use_a = '1' and r.a.class = NAN then
2250 elsif r.use_b = '1' and r.b.class = NAN then
2252 elsif r.use_c = '1' and r.c.class = NAN then
2255 v.state := EXC_RESULT;
2258 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2261 v.result_sign := r.b.negative xor r.negate;
2262 v.result_exp := r.b.exponent;
2263 v.result_class := r.b.class;
2265 v.result_sign := r.c.negative xor r.negate;
2266 v.result_exp := r.c.exponent;
2267 v.result_class := r.c.class;
2269 v.result_sign := r.a.negative xor r.negate;
2270 v.result_exp := r.a.exponent;
2271 v.result_class := r.a.class;
2277 if zero_divide = '1' then
2278 v.fpscr(FPSCR_ZX) := '1';
2280 if qnan_result = '1' then
2282 v.result_class := NAN;
2283 v.result_sign := '0';
2285 opsel_r <= RES_MISC;
2288 if invalid = '1' then
2291 if arith_done = '1' then
2292 -- Enabled invalid exception doesn't write result or FPRF
2293 -- Neither does enabled zero-divide exception
2294 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2295 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2296 v.writing_back := '1';
2297 v.update_fprf := '1';
2299 v.instr_done := '1';
2304 -- Multiplier and divide/square root data path
2307 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2309 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2311 f_to_multiply.data1 <= r.y;
2313 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2317 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2319 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2321 f_to_multiply.data2 <= r.p;
2323 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2325 maddend := (others => '0');
2327 when MULADD_CONST =>
2328 -- addend is 2.0 or 1.5 in 16.112 format
2329 if r.is_sqrt = '0' then
2330 maddend(113) := '1'; -- 2.0
2332 maddend(112 downto 111) := "11"; -- 1.5
2335 -- addend is A in 16.112 format
2336 maddend(121 downto 58) := r.a.mantissa;
2338 -- addend is concatenation of R and S in 16.112 format
2339 maddend := "000000" & r.r & r.s & "00";
2342 if msel_inv = '1' then
2343 f_to_multiply.addend <= not maddend;
2345 f_to_multiply.addend <= maddend;
2347 f_to_multiply.not_result <= msel_inv;
2349 v.y := f_to_multiply.data2;
2351 if multiply_to_f.valid = '1' then
2352 if pshift = '0' then
2353 v.p := multiply_to_f.result(63 downto 0);
2355 v.p := multiply_to_f.result(119 downto 56);
2360 -- This has A and B input multiplexers, an adder, a shifter,
2361 -- count-leading-zeroes logic, and a result mux.
2362 if r.longmask = '1' then
2363 mshift := r.shift + to_signed(-29, EXP_BITS);
2367 if mshift < to_signed(-64, EXP_BITS) then
2368 mask := (others => '1');
2369 elsif mshift >= to_signed(0, EXP_BITS) then
2370 mask := (others => '0');
2372 mask := right_mask(unsigned(mshift(5 downto 0)));
2378 in_a0 := r.a.mantissa;
2380 in_a0 := r.b.mantissa;
2382 in_a0 := r.c.mantissa;
2384 if (or (mask and in_a0)) = '1' and set_x = '1' then
2387 if opsel_ainv = '1' then
2393 in_b0 := (others => '0');
2397 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2400 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2401 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2403 if opsel_binv = '1' then
2407 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2408 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2409 std_ulogic_vector(r.shift(6 downto 0)));
2411 shift_res := (others => '0');
2413 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2414 if opsel_mask = '1' then
2415 sum(1 downto 0) := "00";
2416 if r.single_prec = '1' then
2417 sum(30 downto 2) := (others => '0');
2424 result <= shift_res;
2426 result <= multiply_to_f.result(121 downto 58);
2430 misc := x"00000000" & (r.fpscr and fpscr_mask);
2432 -- generated QNaN mantissa
2433 misc := x"0020000000000000";
2435 -- mantissa of max representable DP number
2436 misc := x"007ffffffffffffc";
2438 -- mantissa of max representable SP number
2439 misc := x"007fffff80000000";
2442 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2445 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2447 misc := 10x"000" & inverse_est & 35x"000000000";
2449 -- max positive result for fctiw[z]
2450 misc := x"000000007fffffff";
2452 -- max negative result for fctiw[z]
2453 misc := x"ffffffff80000000";
2455 -- max positive result for fctiwu[z]
2456 misc := x"00000000ffffffff";
2458 -- max negative result for fctiwu[z]
2459 misc := x"0000000000000000";
2461 -- max positive result for fctid[z]
2462 misc := x"7fffffffffffffff";
2464 -- max negative result for fctid[z]
2465 misc := x"8000000000000000";
2467 -- max positive result for fctidu[z]
2468 misc := x"ffffffffffffffff";
2470 -- max negative result for fctidu[z]
2471 misc := x"0000000000000000";
2473 misc := x"0000000000000000";
2481 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2483 v.s := multiply_to_f.result(57 downto 2);
2485 v.s := shift_res(63 downto 8);
2486 if shift_res(7 downto 0) /= x"00" then
2490 v.s := (others => '0');
2495 v.a.exponent := new_exp;
2496 v.a.mantissa := shift_res;
2499 v.b.exponent := new_exp;
2500 v.b.mantissa := shift_res;
2503 v.c.exponent := new_exp;
2504 v.c.mantissa := shift_res;
2507 if opsel_r = RES_SHIFT then
2508 v.result_exp := new_exp;
2511 if renormalize = '1' then
2512 clz := count_left_zeroes(r.r);
2513 if renorm_sqrt = '1' then
2514 -- make denormalized value end up with even exponent
2517 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2520 if r.int_result = '1' then
2523 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2524 r.single_prec, r.quieten_nan);
2526 if r.update_fprf = '1' then
2527 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2528 r.r(54) and not r.denorm);
2531 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2532 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2533 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2534 v.fpscr(FPSCR_VE downto FPSCR_XE));
2535 if update_fx = '1' and
2536 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2537 v.fpscr(FPSCR_FX) := '1';
2540 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2543 if illegal = '1' then
2544 v.instr_done := '0';
2546 v.writing_back := '0';
2550 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2551 if v.state /= IDLE or v.do_intr = '1' then
2557 e_out.illegal <= illegal;
2560 end architecture behaviour;