FPU: Implement fdiv[s]
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV,
44 FRI_1,
45 ADD_SHIFT, ADD_2, ADD_3,
46 MULT_1,
47 LOOKUP,
48 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
49 INT_SHIFT, INT_ROUND, INT_ISHIFT,
50 INT_FINAL, INT_CHECK, INT_OFLOW,
51 FINISH, NORMALIZE,
52 ROUND_UFLOW, ROUND_OFLOW,
53 ROUNDING, ROUNDING_2, ROUNDING_3,
54 DENORM,
55 RENORM_A, RENORM_A2,
56 RENORM_B, RENORM_B2,
57 RENORM_C, RENORM_C2);
58
59 type reg_type is record
60 state : state_t;
61 busy : std_ulogic;
62 instr_done : std_ulogic;
63 do_intr : std_ulogic;
64 op : insn_type_t;
65 insn : std_ulogic_vector(31 downto 0);
66 dest_fpr : gspr_index_t;
67 fe_mode : std_ulogic;
68 rc : std_ulogic;
69 is_cmp : std_ulogic;
70 single_prec : std_ulogic;
71 fpscr : std_ulogic_vector(31 downto 0);
72 a : fpu_reg_type;
73 b : fpu_reg_type;
74 c : fpu_reg_type;
75 r : std_ulogic_vector(63 downto 0); -- 10.54 format
76 x : std_ulogic;
77 p : std_ulogic_vector(63 downto 0); -- 8.56 format
78 y : std_ulogic_vector(63 downto 0); -- 8.56 format
79 result_sign : std_ulogic;
80 result_class : fp_number_class;
81 result_exp : signed(EXP_BITS-1 downto 0);
82 shift : signed(EXP_BITS-1 downto 0);
83 writing_back : std_ulogic;
84 int_result : std_ulogic;
85 cr_result : std_ulogic_vector(3 downto 0);
86 cr_mask : std_ulogic_vector(7 downto 0);
87 old_exc : std_ulogic_vector(4 downto 0);
88 update_fprf : std_ulogic;
89 quieten_nan : std_ulogic;
90 tiny : std_ulogic;
91 denorm : std_ulogic;
92 round_mode : std_ulogic_vector(2 downto 0);
93 is_subtract : std_ulogic;
94 exp_cmp : std_ulogic;
95 add_bsmall : std_ulogic;
96 is_multiply : std_ulogic;
97 first : std_ulogic;
98 count : unsigned(1 downto 0);
99 end record;
100
101 type lookup_table is array(0 to 255) of std_ulogic_vector(17 downto 0);
102
103 signal r, rin : reg_type;
104
105 signal fp_result : std_ulogic_vector(63 downto 0);
106 signal opsel_a : std_ulogic_vector(1 downto 0);
107 signal opsel_b : std_ulogic_vector(1 downto 0);
108 signal opsel_r : std_ulogic_vector(1 downto 0);
109 signal opsel_ainv : std_ulogic;
110 signal opsel_amask : std_ulogic;
111 signal opsel_binv : std_ulogic;
112 signal in_a : std_ulogic_vector(63 downto 0);
113 signal in_b : std_ulogic_vector(63 downto 0);
114 signal result : std_ulogic_vector(63 downto 0);
115 signal carry_in : std_ulogic;
116 signal lost_bits : std_ulogic;
117 signal r_hi_nz : std_ulogic;
118 signal r_lo_nz : std_ulogic;
119 signal misc_sel : std_ulogic_vector(3 downto 0);
120 signal f_to_multiply : MultiplyInputType;
121 signal multiply_to_f : MultiplyOutputType;
122 signal msel_1 : std_ulogic_vector(1 downto 0);
123 signal msel_2 : std_ulogic_vector(1 downto 0);
124 signal msel_add : std_ulogic_vector(1 downto 0);
125 signal msel_inv : std_ulogic;
126 signal inverse_est : std_ulogic_vector(18 downto 0);
127
128 -- opsel values
129 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
130 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
131 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
132 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
133
134 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
135 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
136 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
137
138 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
139 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
140 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
141 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
142
143 -- msel values
144 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
145 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
146 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
147 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
148
149 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
150 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
151 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
152 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
153
154 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
155 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
156 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
157
158 -- Inverse lookup table, indexed by the top 8 fraction bits
159 -- Output range is [0.5, 1) in 0.19 format, though the top
160 -- bit isn't stored since it is always 1.
161 -- Each output value is the inverse of the center of the input
162 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
163 -- entry 1 is 1 / (1 + 3/512), etc.
164 signal inverse_table : lookup_table := (
165 -- 1/x lookup table
166 -- Unit bit is assumed to be 1, so input range is [1, 2)
167 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
168 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
169 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
170 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
171 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
172 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
173 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
174 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
175 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
176 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
177 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
178 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
179 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
180 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
181 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
182 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
183 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
184 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
185 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
186 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
187 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
188 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
189 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
190 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
191 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
192 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
193 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
194 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
195 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
196 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
197 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
198 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100"
199 );
200
201 -- Left and right shifter with 120 bit input and 64 bit output.
202 -- Shifts inp left by shift bits and returns the upper 64 bits of
203 -- the result. The shift parameter is interpreted as a signed
204 -- number in the range -64..63, with negative values indicating
205 -- right shifts.
206 function shifter_64(inp: std_ulogic_vector(119 downto 0);
207 shift: std_ulogic_vector(6 downto 0))
208 return std_ulogic_vector is
209 variable s1 : std_ulogic_vector(94 downto 0);
210 variable s2 : std_ulogic_vector(70 downto 0);
211 variable result : std_ulogic_vector(63 downto 0);
212 begin
213 case shift(6 downto 5) is
214 when "00" =>
215 s1 := inp(119 downto 25);
216 when "01" =>
217 s1 := inp(87 downto 0) & "0000000";
218 when "10" =>
219 s1 := x"0000000000000000" & inp(119 downto 89);
220 when others =>
221 s1 := x"00000000" & inp(119 downto 57);
222 end case;
223 case shift(4 downto 3) is
224 when "00" =>
225 s2 := s1(94 downto 24);
226 when "01" =>
227 s2 := s1(86 downto 16);
228 when "10" =>
229 s2 := s1(78 downto 8);
230 when others =>
231 s2 := s1(70 downto 0);
232 end case;
233 case shift(2 downto 0) is
234 when "000" =>
235 result := s2(70 downto 7);
236 when "001" =>
237 result := s2(69 downto 6);
238 when "010" =>
239 result := s2(68 downto 5);
240 when "011" =>
241 result := s2(67 downto 4);
242 when "100" =>
243 result := s2(66 downto 3);
244 when "101" =>
245 result := s2(65 downto 2);
246 when "110" =>
247 result := s2(64 downto 1);
248 when others =>
249 result := s2(63 downto 0);
250 end case;
251 return result;
252 end;
253
254 -- Generate a mask with 0-bits on the left and 1-bits on the right which
255 -- selects the bits will be lost in doing a right shift. The shift
256 -- parameter is the bottom 6 bits of a negative shift count,
257 -- indicating a right shift.
258 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
259 variable result: std_ulogic_vector(63 downto 0);
260 begin
261 result := (others => '0');
262 for i in 0 to 63 loop
263 if i >= shift then
264 result(63 - i) := '1';
265 end if;
266 end loop;
267 return result;
268 end;
269
270 -- Split a DP floating-point number into components and work out its class.
271 -- If is_int = 1, the input is considered an integer
272 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
273 variable r : fpu_reg_type;
274 variable exp_nz : std_ulogic;
275 variable exp_ao : std_ulogic;
276 variable frac_nz : std_ulogic;
277 variable cls : std_ulogic_vector(2 downto 0);
278 begin
279 r.negative := fpr(63);
280 exp_nz := or (fpr(62 downto 52));
281 exp_ao := and (fpr(62 downto 52));
282 frac_nz := or (fpr(51 downto 0));
283 if is_int = '0' then
284 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
285 if exp_nz = '0' then
286 r.exponent := to_signed(-1022, EXP_BITS);
287 end if;
288 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
289 cls := exp_ao & exp_nz & frac_nz;
290 case cls is
291 when "000" => r.class := ZERO;
292 when "001" => r.class := FINITE; -- denormalized
293 when "010" => r.class := FINITE;
294 when "011" => r.class := FINITE;
295 when "110" => r.class := INFINITY;
296 when others => r.class := NAN;
297 end case;
298 else
299 r.mantissa := fpr;
300 r.exponent := (others => '0');
301 if (fpr(63) or exp_nz or frac_nz) = '1' then
302 r.class := FINITE;
303 else
304 r.class := ZERO;
305 end if;
306 end if;
307 return r;
308 end;
309
310 -- Construct a DP floating-point result from components
311 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
312 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
313 return std_ulogic_vector is
314 variable result : std_ulogic_vector(63 downto 0);
315 begin
316 result := (others => '0');
317 result(63) := sign;
318 case class is
319 when ZERO =>
320 when FINITE =>
321 if mantissa(54) = '1' then
322 -- normalized number
323 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
324 end if;
325 result(51 downto 29) := mantissa(53 downto 31);
326 if single_prec = '0' then
327 result(28 downto 0) := mantissa(30 downto 2);
328 end if;
329 when INFINITY =>
330 result(62 downto 52) := "11111111111";
331 when NAN =>
332 result(62 downto 52) := "11111111111";
333 result(51) := quieten_nan or mantissa(53);
334 result(50 downto 29) := mantissa(52 downto 31);
335 if single_prec = '0' then
336 result(28 downto 0) := mantissa(30 downto 2);
337 end if;
338 end case;
339 return result;
340 end;
341
342 -- Determine whether to increment when rounding
343 -- Returns rounding_inc & inexact
344 -- Assumes x includes the bottom 29 bits of the mantissa already
345 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
346 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
347 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
348 sign: std_ulogic)
349 return std_ulogic_vector is
350 variable grx : std_ulogic_vector(2 downto 0);
351 variable ret : std_ulogic_vector(1 downto 0);
352 variable lsb : std_ulogic;
353 begin
354 if single_prec = '0' then
355 grx := mantissa(1 downto 0) & x;
356 lsb := mantissa(2);
357 else
358 grx := mantissa(30 downto 29) & x;
359 lsb := mantissa(31);
360 end if;
361 ret(1) := '0';
362 ret(0) := or (grx);
363 case rn(1 downto 0) is
364 when "00" => -- round to nearest
365 if grx = "100" and rn(2) = '0' then
366 ret(1) := lsb; -- tie, round to even
367 else
368 ret(1) := grx(2);
369 end if;
370 when "01" => -- round towards zero
371 when others => -- round towards +/- inf
372 if rn(0) = sign then
373 -- round towards greater magnitude
374 ret(1) := ret(0);
375 end if;
376 end case;
377 return ret;
378 end;
379
380 -- Determine result flags to write into the FPSCR
381 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
382 return std_ulogic_vector is
383 begin
384 case class is
385 when ZERO =>
386 return sign & "0010";
387 when FINITE =>
388 return (not unitbit) & sign & (not sign) & "00";
389 when INFINITY =>
390 return '0' & sign & (not sign) & "01";
391 when NAN =>
392 return "10001";
393 end case;
394 end;
395
396 begin
397 fpu_multiply_0: entity work.multiply
398 port map (
399 clk => clk,
400 m_in => f_to_multiply,
401 m_out => multiply_to_f
402 );
403
404 fpu_0: process(clk)
405 begin
406 if rising_edge(clk) then
407 if rst = '1' then
408 r.state <= IDLE;
409 r.busy <= '0';
410 r.instr_done <= '0';
411 r.do_intr <= '0';
412 r.fpscr <= (others => '0');
413 r.writing_back <= '0';
414 else
415 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
416 r <= rin;
417 end if;
418 end if;
419 end process;
420
421 -- synchronous reads from lookup table
422 lut_access: process(clk)
423 begin
424 if rising_edge(clk) then
425 inverse_est <= '1' & inverse_table(to_integer(unsigned(r.b.mantissa(53 downto 46))));
426 end if;
427 end process;
428
429 e_out.busy <= r.busy;
430 e_out.exception <= r.fpscr(FPSCR_FEX);
431 e_out.interrupt <= r.do_intr;
432
433 w_out.valid <= r.instr_done and not r.do_intr;
434 w_out.write_enable <= r.writing_back;
435 w_out.write_reg <= r.dest_fpr;
436 w_out.write_data <= fp_result;
437 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
438 w_out.write_cr_mask <= r.cr_mask;
439 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
440 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
441
442 fpu_1: process(all)
443 variable v : reg_type;
444 variable adec : fpu_reg_type;
445 variable bdec : fpu_reg_type;
446 variable cdec : fpu_reg_type;
447 variable fpscr_mask : std_ulogic_vector(31 downto 0);
448 variable illegal : std_ulogic;
449 variable j, k : integer;
450 variable flm : std_ulogic_vector(7 downto 0);
451 variable int_input : std_ulogic;
452 variable mask : std_ulogic_vector(63 downto 0);
453 variable in_a0 : std_ulogic_vector(63 downto 0);
454 variable in_b0 : std_ulogic_vector(63 downto 0);
455 variable misc : std_ulogic_vector(63 downto 0);
456 variable shift_res : std_ulogic_vector(63 downto 0);
457 variable round : std_ulogic_vector(1 downto 0);
458 variable update_fx : std_ulogic;
459 variable arith_done : std_ulogic;
460 variable invalid : std_ulogic;
461 variable zero_divide : std_ulogic;
462 variable mant_nz : std_ulogic;
463 variable min_exp : signed(EXP_BITS-1 downto 0);
464 variable max_exp : signed(EXP_BITS-1 downto 0);
465 variable bias_exp : signed(EXP_BITS-1 downto 0);
466 variable new_exp : signed(EXP_BITS-1 downto 0);
467 variable exp_tiny : std_ulogic;
468 variable exp_huge : std_ulogic;
469 variable renormalize : std_ulogic;
470 variable clz : std_ulogic_vector(5 downto 0);
471 variable set_x : std_ulogic;
472 variable mshift : signed(EXP_BITS-1 downto 0);
473 variable need_check : std_ulogic;
474 variable msb : std_ulogic;
475 variable is_add : std_ulogic;
476 variable qnan_result : std_ulogic;
477 variable longmask : std_ulogic;
478 variable set_a : std_ulogic;
479 variable set_b : std_ulogic;
480 variable set_c : std_ulogic;
481 variable px_nz : std_ulogic;
482 variable maddend : std_ulogic_vector(127 downto 0);
483 variable set_y : std_ulogic;
484 variable pcmpb_eq : std_ulogic;
485 variable pcmpb_lt : std_ulogic;
486 variable pshift : std_ulogic;
487 begin
488 v := r;
489 illegal := '0';
490 v.busy := '0';
491 int_input := '0';
492
493 -- capture incoming instruction
494 if e_in.valid = '1' then
495 v.insn := e_in.insn;
496 v.op := e_in.op;
497 v.fe_mode := or (e_in.fe_mode);
498 v.dest_fpr := e_in.frt;
499 v.single_prec := e_in.single;
500 v.int_result := '0';
501 v.rc := e_in.rc;
502 v.is_cmp := e_in.out_cr;
503 if e_in.out_cr = '0' then
504 v.cr_mask := num_to_fxm(1);
505 else
506 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
507 end if;
508 int_input := '0';
509 if e_in.op = OP_FPOP_I then
510 int_input := '1';
511 end if;
512 v.quieten_nan := '1';
513 v.tiny := '0';
514 v.denorm := '0';
515 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
516 v.is_subtract := '0';
517 v.is_multiply := '0';
518 v.add_bsmall := '0';
519 adec := decode_dp(e_in.fra, int_input);
520 bdec := decode_dp(e_in.frb, int_input);
521 cdec := decode_dp(e_in.frc, int_input);
522 v.a := adec;
523 v.b := bdec;
524 v.c := cdec;
525
526 v.exp_cmp := '0';
527 if adec.exponent > bdec.exponent then
528 v.exp_cmp := '1';
529 end if;
530 end if;
531
532 r_hi_nz <= or (r.r(55 downto 31));
533 r_lo_nz <= or (r.r(30 downto 2));
534
535 if r.single_prec = '0' then
536 max_exp := to_signed(1023, EXP_BITS);
537 min_exp := to_signed(-1022, EXP_BITS);
538 bias_exp := to_signed(1536, EXP_BITS);
539 else
540 max_exp := to_signed(127, EXP_BITS);
541 min_exp := to_signed(-126, EXP_BITS);
542 bias_exp := to_signed(192, EXP_BITS);
543 end if;
544 new_exp := r.result_exp - r.shift;
545 exp_tiny := '0';
546 exp_huge := '0';
547 if new_exp < min_exp then
548 exp_tiny := '1';
549 end if;
550 if new_exp > max_exp then
551 exp_huge := '1';
552 end if;
553
554 -- Compare P with zero and with B
555 px_nz := or (r.p(57 downto 4));
556 pcmpb_eq := '0';
557 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
558 pcmpb_eq := '1';
559 end if;
560 pcmpb_lt := '0';
561 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
562 pcmpb_lt := '1';
563 end if;
564
565 v.writing_back := '0';
566 v.instr_done := '0';
567 v.update_fprf := '0';
568 v.shift := to_signed(0, EXP_BITS);
569 v.first := '0';
570 opsel_a <= AIN_R;
571 opsel_ainv <= '0';
572 opsel_amask <= '0';
573 opsel_b <= BIN_ZERO;
574 opsel_binv <= '0';
575 opsel_r <= RES_SUM;
576 carry_in <= '0';
577 misc_sel <= "0000";
578 fpscr_mask := (others => '1');
579 update_fx := '0';
580 arith_done := '0';
581 invalid := '0';
582 zero_divide := '0';
583 renormalize := '0';
584 set_x := '0';
585 qnan_result := '0';
586 longmask := r.single_prec;
587 set_a := '0';
588 set_b := '0';
589 set_c := '0';
590 f_to_multiply.is_32bit <= '0';
591 f_to_multiply.valid <= '0';
592 msel_1 <= MUL1_A;
593 msel_2 <= MUL2_C;
594 msel_add <= MULADD_ZERO;
595 msel_inv <= '0';
596 set_y := '0';
597 pshift := '0';
598 case r.state is
599 when IDLE =>
600 if e_in.valid = '1' then
601 case e_in.insn(5 downto 1) is
602 when "00000" =>
603 v.state := DO_MCRFS;
604 when "00110" =>
605 if e_in.insn(10) = '0' then
606 if e_in.insn(8) = '0' then
607 v.state := DO_MTFSB;
608 else
609 v.state := DO_MTFSFI;
610 end if;
611 else
612 v.state := DO_FMRG;
613 end if;
614 when "00111" =>
615 if e_in.insn(8) = '0' then
616 v.state := DO_MFFS;
617 else
618 v.state := DO_MTFSF;
619 end if;
620 when "01000" =>
621 if e_in.insn(9 downto 8) /= "11" then
622 v.state := DO_FMR;
623 else
624 v.state := DO_FRI;
625 end if;
626 when "01100" =>
627 v.state := DO_FRSP;
628 when "01110" =>
629 if int_input = '1' then
630 -- fcfid[u][s]
631 v.state := DO_FCFID;
632 else
633 v.state := DO_FCTI;
634 end if;
635 when "01111" =>
636 v.round_mode := "001";
637 v.state := DO_FCTI;
638 when "10010" =>
639 v.state := DO_FDIV;
640 when "10100" | "10101" =>
641 v.state := DO_FADD;
642 when "11001" =>
643 v.is_multiply := '1';
644 v.state := DO_FMUL;
645 when others =>
646 illegal := '1';
647 end case;
648 end if;
649 v.x := '0';
650 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
651
652 when DO_MCRFS =>
653 j := to_integer(unsigned(insn_bfa(r.insn)));
654 for i in 0 to 7 loop
655 if i = j then
656 k := (7 - i) * 4;
657 v.cr_result := r.fpscr(k + 3 downto k);
658 fpscr_mask(k + 3 downto k) := "0000";
659 end if;
660 end loop;
661 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
662 v.instr_done := '1';
663 v.state := IDLE;
664
665 when DO_MTFSB =>
666 -- mtfsb{0,1}
667 j := to_integer(unsigned(insn_bt(r.insn)));
668 for i in 0 to 31 loop
669 if i = j then
670 v.fpscr(31 - i) := r.insn(6);
671 end if;
672 end loop;
673 v.instr_done := '1';
674 v.state := IDLE;
675
676 when DO_MTFSFI =>
677 -- mtfsfi
678 j := to_integer(unsigned(insn_bf(r.insn)));
679 if r.insn(16) = '0' then
680 for i in 0 to 7 loop
681 if i = j then
682 k := (7 - i) * 4;
683 v.fpscr(k + 3 downto k) := insn_u(r.insn);
684 end if;
685 end loop;
686 end if;
687 v.instr_done := '1';
688 v.state := IDLE;
689
690 when DO_FMRG =>
691 -- fmrgew, fmrgow
692 opsel_r <= RES_MISC;
693 misc_sel <= "01" & r.insn(8) & '0';
694 v.int_result := '1';
695 v.writing_back := '1';
696 v.instr_done := '1';
697 v.state := IDLE;
698
699 when DO_MFFS =>
700 v.int_result := '1';
701 v.writing_back := '1';
702 opsel_r <= RES_MISC;
703 case r.insn(20 downto 16) is
704 when "00000" =>
705 -- mffs
706 when "00001" =>
707 -- mffsce
708 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
709 when "10100" | "10101" =>
710 -- mffscdrn[i] (but we don't implement DRN)
711 fpscr_mask := x"000000FF";
712 when "10110" =>
713 -- mffscrn
714 fpscr_mask := x"000000FF";
715 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
716 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
717 when "10111" =>
718 -- mffscrni
719 fpscr_mask := x"000000FF";
720 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
721 when "11000" =>
722 -- mffsl
723 fpscr_mask := x"0007F0FF";
724 when others =>
725 illegal := '1';
726 end case;
727 v.instr_done := '1';
728 v.state := IDLE;
729
730 when DO_MTFSF =>
731 if r.insn(25) = '1' then
732 flm := x"FF";
733 elsif r.insn(16) = '1' then
734 flm := x"00";
735 else
736 flm := r.insn(24 downto 17);
737 end if;
738 for i in 0 to 7 loop
739 k := i * 4;
740 if flm(i) = '1' then
741 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
742 end if;
743 end loop;
744 v.instr_done := '1';
745 v.state := IDLE;
746
747 when DO_FMR =>
748 opsel_a <= AIN_B;
749 v.result_class := r.b.class;
750 v.result_exp := r.b.exponent;
751 v.quieten_nan := '0';
752 if r.insn(9) = '1' then
753 v.result_sign := '0'; -- fabs
754 elsif r.insn(8) = '1' then
755 v.result_sign := '1'; -- fnabs
756 elsif r.insn(7) = '1' then
757 v.result_sign := r.b.negative; -- fmr
758 elsif r.insn(6) = '1' then
759 v.result_sign := not r.b.negative; -- fneg
760 else
761 v.result_sign := r.a.negative; -- fcpsgn
762 end if;
763 v.writing_back := '1';
764 v.instr_done := '1';
765 v.state := IDLE;
766
767 when DO_FRI => -- fri[nzpm]
768 opsel_a <= AIN_B;
769 v.result_class := r.b.class;
770 v.result_sign := r.b.negative;
771 v.result_exp := r.b.exponent;
772 v.fpscr(FPSCR_FR) := '0';
773 v.fpscr(FPSCR_FI) := '0';
774 if r.b.class = NAN and r.b.mantissa(53) = '0' then
775 -- Signalling NAN
776 v.fpscr(FPSCR_VXSNAN) := '1';
777 invalid := '1';
778 end if;
779 if r.b.class = FINITE then
780 if r.b.exponent >= to_signed(52, EXP_BITS) then
781 -- integer already, no rounding required
782 arith_done := '1';
783 else
784 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
785 v.state := FRI_1;
786 v.round_mode := '1' & r.insn(7 downto 6);
787 end if;
788 else
789 arith_done := '1';
790 end if;
791
792 when DO_FRSP =>
793 opsel_a <= AIN_B;
794 v.result_class := r.b.class;
795 v.result_sign := r.b.negative;
796 v.result_exp := r.b.exponent;
797 v.fpscr(FPSCR_FR) := '0';
798 v.fpscr(FPSCR_FI) := '0';
799 if r.b.class = NAN and r.b.mantissa(53) = '0' then
800 -- Signalling NAN
801 v.fpscr(FPSCR_VXSNAN) := '1';
802 invalid := '1';
803 end if;
804 set_x := '1';
805 if r.b.class = FINITE then
806 if r.b.exponent < to_signed(-126, EXP_BITS) then
807 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
808 v.state := ROUND_UFLOW;
809 elsif r.b.exponent > to_signed(127, EXP_BITS) then
810 v.state := ROUND_OFLOW;
811 else
812 v.shift := to_signed(-2, EXP_BITS);
813 v.state := ROUNDING;
814 end if;
815 else
816 arith_done := '1';
817 end if;
818
819 when DO_FCTI =>
820 -- instr bit 9: 1=dword 0=word
821 -- instr bit 8: 1=unsigned 0=signed
822 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
823 opsel_a <= AIN_B;
824 v.result_class := r.b.class;
825 v.result_sign := r.b.negative;
826 v.result_exp := r.b.exponent;
827 v.fpscr(FPSCR_FR) := '0';
828 v.fpscr(FPSCR_FI) := '0';
829 if r.b.class = NAN and r.b.mantissa(53) = '0' then
830 -- Signalling NAN
831 v.fpscr(FPSCR_VXSNAN) := '1';
832 invalid := '1';
833 end if;
834
835 v.int_result := '1';
836 case r.b.class is
837 when ZERO =>
838 arith_done := '1';
839 when FINITE =>
840 if r.b.exponent >= to_signed(64, EXP_BITS) or
841 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
842 v.state := INT_OFLOW;
843 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
844 -- integer already, no rounding required,
845 -- shift into final position
846 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
847 if r.insn(8) = '1' and r.b.negative = '1' then
848 v.state := INT_OFLOW;
849 else
850 v.state := INT_ISHIFT;
851 end if;
852 else
853 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
854 v.state := INT_SHIFT;
855 end if;
856 when INFINITY | NAN =>
857 v.state := INT_OFLOW;
858 end case;
859
860 when DO_FCFID =>
861 v.result_sign := '0';
862 opsel_a <= AIN_B;
863 if r.insn(8) = '0' and r.b.negative = '1' then
864 -- fcfid[s] with negative operand, set R = -B
865 opsel_ainv <= '1';
866 carry_in <= '1';
867 v.result_sign := '1';
868 end if;
869 v.result_class := r.b.class;
870 v.result_exp := to_signed(54, EXP_BITS);
871 v.fpscr(FPSCR_FR) := '0';
872 v.fpscr(FPSCR_FI) := '0';
873 if r.b.class = ZERO then
874 arith_done := '1';
875 else
876 v.state := FINISH;
877 end if;
878
879 when DO_FADD =>
880 -- fadd[s] and fsub[s]
881 opsel_a <= AIN_A;
882 v.result_sign := r.a.negative;
883 v.result_class := r.a.class;
884 v.result_exp := r.a.exponent;
885 v.fpscr(FPSCR_FR) := '0';
886 v.fpscr(FPSCR_FI) := '0';
887 is_add := r.a.negative xor r.b.negative xor r.insn(1);
888 if r.a.class = FINITE and r.b.class = FINITE then
889 v.is_subtract := not is_add;
890 v.add_bsmall := r.exp_cmp;
891 if r.exp_cmp = '0' then
892 v.shift := r.a.exponent - r.b.exponent;
893 v.result_sign := r.b.negative xnor r.insn(1);
894 if r.a.exponent = r.b.exponent then
895 v.state := ADD_2;
896 else
897 v.state := ADD_SHIFT;
898 end if;
899 else
900 opsel_a <= AIN_B;
901 v.shift := r.b.exponent - r.a.exponent;
902 v.result_exp := r.b.exponent;
903 v.state := ADD_SHIFT;
904 end if;
905 else
906 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
907 (r.b.class = NAN and r.b.mantissa(53) = '0') then
908 -- Signalling NAN
909 v.fpscr(FPSCR_VXSNAN) := '1';
910 invalid := '1';
911 end if;
912 if r.a.class = NAN then
913 -- nothing to do, result is A
914 elsif r.b.class = NAN then
915 v.result_class := NAN;
916 v.result_sign := r.b.negative;
917 opsel_a <= AIN_B;
918 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
919 -- invalid operation, construct QNaN
920 v.fpscr(FPSCR_VXISI) := '1';
921 qnan_result := '1';
922 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
923 -- return -0 for rounding to -infinity
924 v.result_sign := r.round_mode(1) and r.round_mode(0);
925 elsif r.a.class = INFINITY or r.b.class = ZERO then
926 -- nothing to do, result is A
927 else
928 -- result is +/- B
929 v.result_sign := r.b.negative xnor r.insn(1);
930 v.result_class := r.b.class;
931 v.result_exp := r.b.exponent;
932 opsel_a <= AIN_B;
933 end if;
934 arith_done := '1';
935 end if;
936
937 when DO_FMUL =>
938 -- fmul[s]
939 opsel_a <= AIN_A;
940 v.result_sign := r.a.negative;
941 v.result_class := r.a.class;
942 v.result_exp := r.a.exponent;
943 v.fpscr(FPSCR_FR) := '0';
944 v.fpscr(FPSCR_FI) := '0';
945 if r.a.class = FINITE and r.c.class = FINITE then
946 v.result_sign := r.a.negative xor r.c.negative;
947 v.result_exp := r.a.exponent + r.c.exponent;
948 -- Renormalize denorm operands
949 if r.a.mantissa(54) = '0' then
950 v.state := RENORM_A;
951 elsif r.c.mantissa(54) = '0' then
952 opsel_a <= AIN_C;
953 v.state := RENORM_C;
954 else
955 f_to_multiply.valid <= '1';
956 v.state := MULT_1;
957 end if;
958 else
959 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
960 (r.c.class = NAN and r.c.mantissa(53) = '0') then
961 -- Signalling NAN
962 v.fpscr(FPSCR_VXSNAN) := '1';
963 invalid := '1';
964 end if;
965 if r.a.class = NAN then
966 -- result is A
967 elsif r.c.class = NAN then
968 v.result_class := NAN;
969 v.result_sign := r.c.negative;
970 opsel_a <= AIN_C;
971 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
972 (r.a.class = ZERO and r.c.class = INFINITY) then
973 -- invalid operation, construct QNaN
974 v.fpscr(FPSCR_VXIMZ) := '1';
975 qnan_result := '1';
976 elsif r.a.class = ZERO or r.a.class = INFINITY then
977 -- result is +/- A
978 v.result_sign := r.a.negative xor r.c.negative;
979 else
980 -- r.c.class is ZERO or INFINITY
981 v.result_class := r.c.class;
982 v.result_sign := r.a.negative xor r.c.negative;
983 end if;
984 arith_done := '1';
985 end if;
986
987 when DO_FDIV =>
988 opsel_a <= AIN_A;
989 v.result_sign := r.a.negative;
990 v.result_class := r.a.class;
991 v.result_exp := r.a.exponent;
992 v.fpscr(FPSCR_FR) := '0';
993 v.fpscr(FPSCR_FI) := '0';
994 v.result_sign := r.a.negative xor r.b.negative;
995 v.result_exp := r.a.exponent - r.b.exponent;
996 v.count := "00";
997 if r.a.class = FINITE and r.b.class = FINITE then
998 -- Renormalize denorm operands
999 if r.a.mantissa(54) = '0' then
1000 v.state := RENORM_A;
1001 elsif r.b.mantissa(54) = '0' then
1002 opsel_a <= AIN_B;
1003 v.state := RENORM_B;
1004 else
1005 v.first := '1';
1006 v.state := DIV_2;
1007 end if;
1008 else
1009 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1010 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1011 -- Signalling NAN
1012 v.fpscr(FPSCR_VXSNAN) := '1';
1013 invalid := '1';
1014 end if;
1015 if r.a.class = NAN then
1016 -- result is A
1017 v.result_sign := r.a.negative;
1018 elsif r.b.class = NAN then
1019 v.result_class := NAN;
1020 v.result_sign := r.b.negative;
1021 opsel_a <= AIN_B;
1022 elsif r.b.class = INFINITY then
1023 if r.a.class = INFINITY then
1024 v.fpscr(FPSCR_VXIDI) := '1';
1025 qnan_result := '1';
1026 else
1027 v.result_class := ZERO;
1028 end if;
1029 elsif r.b.class = ZERO then
1030 if r.a.class = ZERO then
1031 v.fpscr(FPSCR_VXZDZ) := '1';
1032 qnan_result := '1';
1033 else
1034 if r.a.class = FINITE then
1035 zero_divide := '1';
1036 end if;
1037 v.result_class := INFINITY;
1038 end if;
1039 -- else r.b.class = FINITE, result_class = r.a.class
1040 end if;
1041 arith_done := '1';
1042 end if;
1043
1044 when RENORM_A =>
1045 renormalize := '1';
1046 v.state := RENORM_A2;
1047
1048 when RENORM_A2 =>
1049 set_a := '1';
1050 v.result_exp := new_exp;
1051 if r.insn(4) = '1' then
1052 opsel_a <= AIN_C;
1053 if r.c.mantissa(54) = '1' then
1054 v.first := '1';
1055 v.state := MULT_1;
1056 else
1057 v.state := RENORM_C;
1058 end if;
1059 else
1060 opsel_a <= AIN_B;
1061 if r.b.mantissa(54) = '1' then
1062 v.first := '1';
1063 v.state := DIV_2;
1064 else
1065 v.state := RENORM_B;
1066 end if;
1067 end if;
1068
1069 when RENORM_B =>
1070 renormalize := '1';
1071 v.state := RENORM_B2;
1072
1073 when RENORM_B2 =>
1074 set_b := '1';
1075 v.result_exp := r.result_exp + r.shift;
1076 v.state := LOOKUP;
1077
1078 when RENORM_C =>
1079 renormalize := '1';
1080 v.state := RENORM_C2;
1081
1082 when RENORM_C2 =>
1083 set_c := '1';
1084 v.result_exp := new_exp;
1085 v.first := '1';
1086 v.state := MULT_1;
1087
1088 when ADD_SHIFT =>
1089 opsel_r <= RES_SHIFT;
1090 set_x := '1';
1091 longmask := '0';
1092 v.state := ADD_2;
1093
1094 when ADD_2 =>
1095 if r.add_bsmall = '1' then
1096 opsel_a <= AIN_A;
1097 else
1098 opsel_a <= AIN_B;
1099 end if;
1100 opsel_b <= BIN_R;
1101 opsel_binv <= r.is_subtract;
1102 carry_in <= r.is_subtract and not r.x;
1103 v.shift := to_signed(-1, EXP_BITS);
1104 v.state := ADD_3;
1105
1106 when ADD_3 =>
1107 -- check for overflow or negative result (can't get both)
1108 if r.r(63) = '1' then
1109 -- result is opposite sign to expected
1110 v.result_sign := not r.result_sign;
1111 opsel_ainv <= '1';
1112 carry_in <= '1';
1113 v.state := FINISH;
1114 elsif r.r(55) = '1' then
1115 -- sum overflowed, shift right
1116 opsel_r <= RES_SHIFT;
1117 set_x := '1';
1118 v.shift := to_signed(-2, EXP_BITS);
1119 if exp_huge = '1' then
1120 v.state := ROUND_OFLOW;
1121 else
1122 v.state := ROUNDING;
1123 end if;
1124 elsif r.r(54) = '1' then
1125 set_x := '1';
1126 v.shift := to_signed(-2, EXP_BITS);
1127 v.state := ROUNDING;
1128 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1129 -- r.x must be zero at this point
1130 v.result_class := ZERO;
1131 if r.is_subtract = '1' then
1132 -- set result sign depending on rounding mode
1133 v.result_sign := r.round_mode(1) and r.round_mode(0);
1134 end if;
1135 arith_done := '1';
1136 else
1137 renormalize := '1';
1138 v.state := NORMALIZE;
1139 end if;
1140
1141 when MULT_1 =>
1142 f_to_multiply.valid <= r.first;
1143 opsel_r <= RES_MULT;
1144 if multiply_to_f.valid = '1' then
1145 v.state := FINISH;
1146 end if;
1147
1148 when LOOKUP =>
1149 opsel_a <= AIN_B;
1150 -- wait one cycle for inverse_table[B] lookup
1151 v.first := '1';
1152 v.state := DIV_2;
1153
1154 when DIV_2 =>
1155 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1156 msel_1 <= MUL1_B;
1157 msel_add <= MULADD_CONST;
1158 msel_inv <= '1';
1159 if r.count = 0 then
1160 msel_2 <= MUL2_LUT;
1161 else
1162 msel_2 <= MUL2_P;
1163 end if;
1164 set_y := r.first;
1165 pshift := '1';
1166 f_to_multiply.valid <= r.first;
1167 if multiply_to_f.valid = '1' then
1168 v.first := '1';
1169 v.count := r.count + 1;
1170 v.state := DIV_3;
1171 end if;
1172
1173 when DIV_3 =>
1174 -- compute Y = P = P * Y
1175 msel_1 <= MUL1_Y;
1176 msel_2 <= MUL2_P;
1177 f_to_multiply.valid <= r.first;
1178 pshift := '1';
1179 if multiply_to_f.valid = '1' then
1180 v.first := '1';
1181 if r.count = 3 then
1182 v.state := DIV_4;
1183 else
1184 v.state := DIV_2;
1185 end if;
1186 end if;
1187
1188 when DIV_4 =>
1189 -- compute R = P = A * Y (quotient)
1190 msel_1 <= MUL1_A;
1191 msel_2 <= MUL2_P;
1192 set_y := r.first;
1193 f_to_multiply.valid <= r.first;
1194 pshift := '1';
1195 if multiply_to_f.valid = '1' then
1196 opsel_r <= RES_MULT;
1197 v.first := '1';
1198 v.state := DIV_5;
1199 end if;
1200
1201 when DIV_5 =>
1202 -- compute P = A - B * R (remainder)
1203 msel_1 <= MUL1_B;
1204 msel_2 <= MUL2_R;
1205 msel_add <= MULADD_A;
1206 msel_inv <= '1';
1207 f_to_multiply.valid <= r.first;
1208 if multiply_to_f.valid = '1' then
1209 v.state := DIV_6;
1210 end if;
1211
1212 when DIV_6 =>
1213 -- test if remainder is 0 or >= B
1214 if pcmpb_lt = '1' then
1215 -- quotient is correct, set X if remainder non-zero
1216 v.x := r.p(58) or px_nz;
1217 else
1218 -- quotient needs to be incremented by 1
1219 carry_in <= '1';
1220 v.x := not pcmpb_eq;
1221 end if;
1222 v.state := FINISH;
1223
1224 when INT_SHIFT =>
1225 opsel_r <= RES_SHIFT;
1226 set_x := '1';
1227 v.state := INT_ROUND;
1228 v.shift := to_signed(-2, EXP_BITS);
1229
1230 when INT_ROUND =>
1231 opsel_r <= RES_SHIFT;
1232 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
1233 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1234 -- Check for negative values that don't round to 0 for fcti*u*
1235 if r.insn(8) = '1' and r.result_sign = '1' and
1236 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
1237 v.state := INT_OFLOW;
1238 else
1239 v.state := INT_FINAL;
1240 end if;
1241
1242 when INT_ISHIFT =>
1243 opsel_r <= RES_SHIFT;
1244 v.state := INT_FINAL;
1245
1246 when INT_FINAL =>
1247 -- Negate if necessary, and increment for rounding if needed
1248 opsel_ainv <= r.result_sign;
1249 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
1250 -- Check for possible overflows
1251 case r.insn(9 downto 8) is
1252 when "00" => -- fctiw[z]
1253 need_check := r.r(31) or (r.r(30) and not r.result_sign);
1254 when "01" => -- fctiwu[z]
1255 need_check := r.r(31);
1256 when "10" => -- fctid[z]
1257 need_check := r.r(63) or (r.r(62) and not r.result_sign);
1258 when others => -- fctidu[z]
1259 need_check := r.r(63);
1260 end case;
1261 if need_check = '1' then
1262 v.state := INT_CHECK;
1263 else
1264 if r.fpscr(FPSCR_FI) = '1' then
1265 v.fpscr(FPSCR_XX) := '1';
1266 end if;
1267 arith_done := '1';
1268 end if;
1269
1270 when INT_CHECK =>
1271 if r.insn(9) = '0' then
1272 msb := r.r(31);
1273 else
1274 msb := r.r(63);
1275 end if;
1276 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1277 if (r.insn(8) = '0' and msb /= r.result_sign) or
1278 (r.insn(8) = '1' and msb /= '1') then
1279 opsel_r <= RES_MISC;
1280 v.fpscr(FPSCR_VXCVI) := '1';
1281 invalid := '1';
1282 else
1283 if r.fpscr(FPSCR_FI) = '1' then
1284 v.fpscr(FPSCR_XX) := '1';
1285 end if;
1286 end if;
1287 arith_done := '1';
1288
1289 when INT_OFLOW =>
1290 opsel_r <= RES_MISC;
1291 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1292 if r.b.class = NAN then
1293 misc_sel(0) <= '1';
1294 end if;
1295 v.fpscr(FPSCR_VXCVI) := '1';
1296 invalid := '1';
1297 arith_done := '1';
1298
1299 when FRI_1 =>
1300 opsel_r <= RES_SHIFT;
1301 set_x := '1';
1302 v.shift := to_signed(-2, EXP_BITS);
1303 v.state := ROUNDING;
1304
1305 when FINISH =>
1306 if r.is_multiply = '1' and px_nz = '1' then
1307 v.x := '1';
1308 end if;
1309 if r.r(63 downto 54) /= "0000000001" then
1310 renormalize := '1';
1311 v.state := NORMALIZE;
1312 else
1313 set_x := '1';
1314 if exp_tiny = '1' then
1315 v.shift := new_exp - min_exp;
1316 v.state := ROUND_UFLOW;
1317 elsif exp_huge = '1' then
1318 v.state := ROUND_OFLOW;
1319 else
1320 v.shift := to_signed(-2, EXP_BITS);
1321 v.state := ROUNDING;
1322 end if;
1323 end if;
1324
1325 when NORMALIZE =>
1326 -- Shift so we have 9 leading zeroes (we know R is non-zero)
1327 opsel_r <= RES_SHIFT;
1328 set_x := '1';
1329 if exp_tiny = '1' then
1330 v.shift := new_exp - min_exp;
1331 v.state := ROUND_UFLOW;
1332 elsif exp_huge = '1' then
1333 v.state := ROUND_OFLOW;
1334 else
1335 v.shift := to_signed(-2, EXP_BITS);
1336 v.state := ROUNDING;
1337 end if;
1338
1339 when ROUND_UFLOW =>
1340 v.tiny := '1';
1341 if r.fpscr(FPSCR_UE) = '0' then
1342 -- disabled underflow exception case
1343 -- have to denormalize before rounding
1344 opsel_r <= RES_SHIFT;
1345 set_x := '1';
1346 v.shift := to_signed(-2, EXP_BITS);
1347 v.state := ROUNDING;
1348 else
1349 -- enabled underflow exception case
1350 -- if denormalized, have to normalize before rounding
1351 v.fpscr(FPSCR_UX) := '1';
1352 v.result_exp := r.result_exp + bias_exp;
1353 if r.r(54) = '0' then
1354 renormalize := '1';
1355 v.state := NORMALIZE;
1356 else
1357 v.shift := to_signed(-2, EXP_BITS);
1358 v.state := ROUNDING;
1359 end if;
1360 end if;
1361
1362 when ROUND_OFLOW =>
1363 v.fpscr(FPSCR_OX) := '1';
1364 if r.fpscr(FPSCR_OE) = '0' then
1365 -- disabled overflow exception
1366 -- result depends on rounding mode
1367 v.fpscr(FPSCR_XX) := '1';
1368 v.fpscr(FPSCR_FI) := '1';
1369 if r.round_mode(1 downto 0) = "00" or
1370 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
1371 v.result_class := INFINITY;
1372 v.fpscr(FPSCR_FR) := '1';
1373 else
1374 v.fpscr(FPSCR_FR) := '0';
1375 end if;
1376 -- construct largest representable number
1377 v.result_exp := max_exp;
1378 opsel_r <= RES_MISC;
1379 misc_sel <= "001" & r.single_prec;
1380 arith_done := '1';
1381 else
1382 -- enabled overflow exception
1383 v.result_exp := r.result_exp - bias_exp;
1384 v.shift := to_signed(-2, EXP_BITS);
1385 v.state := ROUNDING;
1386 end if;
1387
1388 when ROUNDING =>
1389 opsel_amask <= '1';
1390 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
1391 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1392 if round(1) = '1' then
1393 -- set mask to increment the LSB for the precision
1394 opsel_b <= BIN_MASK;
1395 carry_in <= '1';
1396 v.shift := to_signed(-1, EXP_BITS);
1397 v.state := ROUNDING_2;
1398 else
1399 if r.r(54) = '0' then
1400 -- result after masking could be zero, or could be a
1401 -- denormalized result that needs to be renormalized
1402 renormalize := '1';
1403 v.state := ROUNDING_3;
1404 else
1405 arith_done := '1';
1406 end if;
1407 end if;
1408 if round(0) = '1' then
1409 v.fpscr(FPSCR_XX) := '1';
1410 if r.tiny = '1' then
1411 v.fpscr(FPSCR_UX) := '1';
1412 end if;
1413 end if;
1414
1415 when ROUNDING_2 =>
1416 -- Check for overflow during rounding
1417 v.x := '0';
1418 if r.r(55) = '1' then
1419 opsel_r <= RES_SHIFT;
1420 if exp_huge = '1' then
1421 v.state := ROUND_OFLOW;
1422 else
1423 arith_done := '1';
1424 end if;
1425 elsif r.r(54) = '0' then
1426 -- Do CLZ so we can renormalize the result
1427 renormalize := '1';
1428 v.state := ROUNDING_3;
1429 else
1430 arith_done := '1';
1431 end if;
1432
1433 when ROUNDING_3 =>
1434 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
1435 if mant_nz = '0' then
1436 v.result_class := ZERO;
1437 if r.is_subtract = '1' then
1438 -- set result sign depending on rounding mode
1439 v.result_sign := r.round_mode(1) and r.round_mode(0);
1440 end if;
1441 arith_done := '1';
1442 else
1443 -- Renormalize result after rounding
1444 opsel_r <= RES_SHIFT;
1445 v.denorm := exp_tiny;
1446 v.shift := new_exp - to_signed(-1022, EXP_BITS);
1447 if new_exp < to_signed(-1022, EXP_BITS) then
1448 v.state := DENORM;
1449 else
1450 arith_done := '1';
1451 end if;
1452 end if;
1453
1454 when DENORM =>
1455 opsel_r <= RES_SHIFT;
1456 arith_done := '1';
1457
1458 end case;
1459
1460 if zero_divide = '1' then
1461 v.fpscr(FPSCR_ZX) := '1';
1462 end if;
1463 if qnan_result = '1' then
1464 invalid := '1';
1465 v.result_class := NAN;
1466 v.result_sign := '0';
1467 misc_sel <= "0001";
1468 opsel_r <= RES_MISC;
1469 end if;
1470 if arith_done = '1' then
1471 -- Enabled invalid exception doesn't write result or FPRF
1472 -- Neither does enabled zero-divide exception
1473 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
1474 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
1475 v.writing_back := '1';
1476 v.update_fprf := '1';
1477 end if;
1478 v.instr_done := '1';
1479 v.state := IDLE;
1480 update_fx := '1';
1481 end if;
1482
1483 -- Multiplier and divide/square root data path
1484 case msel_1 is
1485 when MUL1_A =>
1486 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
1487 when MUL1_B =>
1488 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
1489 when MUL1_Y =>
1490 f_to_multiply.data1 <= r.y;
1491 when others =>
1492 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
1493 end case;
1494 case msel_2 is
1495 when MUL2_C =>
1496 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
1497 when MUL2_LUT =>
1498 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
1499 when MUL2_P =>
1500 f_to_multiply.data2 <= r.p;
1501 when others =>
1502 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
1503 end case;
1504 maddend := (others => '0');
1505 case msel_add is
1506 when MULADD_CONST =>
1507 -- addend is 2.0 in 16.112 format
1508 maddend(113) := '1'; -- 2.0
1509 when MULADD_A =>
1510 -- addend is A in 16.112 format
1511 maddend(121 downto 58) := r.a.mantissa;
1512 when others =>
1513 end case;
1514 if msel_inv = '1' then
1515 f_to_multiply.addend <= not maddend;
1516 else
1517 f_to_multiply.addend <= maddend;
1518 end if;
1519 f_to_multiply.not_result <= msel_inv;
1520 if set_y = '1' then
1521 v.y := f_to_multiply.data2;
1522 end if;
1523 if multiply_to_f.valid = '1' then
1524 if pshift = '0' then
1525 v.p := multiply_to_f.result(63 downto 0);
1526 else
1527 v.p := multiply_to_f.result(119 downto 56);
1528 end if;
1529 end if;
1530
1531 -- Data path.
1532 -- This has A and B input multiplexers, an adder, a shifter,
1533 -- count-leading-zeroes logic, and a result mux.
1534 if longmask = '1' then
1535 mshift := r.shift + to_signed(-29, EXP_BITS);
1536 else
1537 mshift := r.shift;
1538 end if;
1539 if mshift < to_signed(-64, EXP_BITS) then
1540 mask := (others => '1');
1541 elsif mshift >= to_signed(0, EXP_BITS) then
1542 mask := (others => '0');
1543 else
1544 mask := right_mask(unsigned(mshift(5 downto 0)));
1545 end if;
1546 case opsel_a is
1547 when AIN_R =>
1548 in_a0 := r.r;
1549 when AIN_A =>
1550 in_a0 := r.a.mantissa;
1551 when AIN_B =>
1552 in_a0 := r.b.mantissa;
1553 when others =>
1554 in_a0 := r.c.mantissa;
1555 end case;
1556 if (or (mask and in_a0)) = '1' and set_x = '1' then
1557 v.x := '1';
1558 end if;
1559 if opsel_ainv = '1' then
1560 in_a0 := not in_a0;
1561 end if;
1562 if opsel_amask = '1' then
1563 in_a0 := in_a0 and not mask;
1564 end if;
1565 in_a <= in_a0;
1566 case opsel_b is
1567 when BIN_ZERO =>
1568 in_b0 := (others => '0');
1569 when BIN_R =>
1570 in_b0 := r.r;
1571 when BIN_MASK =>
1572 in_b0 := mask;
1573 when others =>
1574 in_b0 := (others => '0');
1575 end case;
1576 if opsel_binv = '1' then
1577 in_b0 := not in_b0;
1578 end if;
1579 in_b <= in_b0;
1580 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
1581 shift_res := shifter_64(r.r & x"00000000000000",
1582 std_ulogic_vector(r.shift(6 downto 0)));
1583 else
1584 shift_res := (others => '0');
1585 end if;
1586 case opsel_r is
1587 when RES_SUM =>
1588 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
1589 when RES_SHIFT =>
1590 result <= shift_res;
1591 when RES_MULT =>
1592 result <= multiply_to_f.result(121 downto 58);
1593 when others =>
1594 case misc_sel is
1595 when "0000" =>
1596 misc := x"00000000" & (r.fpscr and fpscr_mask);
1597 when "0001" =>
1598 -- generated QNaN mantissa
1599 misc := x"0020000000000000";
1600 when "0010" =>
1601 -- mantissa of max representable DP number
1602 misc := x"007ffffffffffffc";
1603 when "0011" =>
1604 -- mantissa of max representable SP number
1605 misc := x"007fffff80000000";
1606 when "0100" =>
1607 -- fmrgow result
1608 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
1609 when "0110" =>
1610 -- fmrgew result
1611 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
1612 when "1000" =>
1613 -- max positive result for fctiw[z]
1614 misc := x"000000007fffffff";
1615 when "1001" =>
1616 -- max negative result for fctiw[z]
1617 misc := x"ffffffff80000000";
1618 when "1010" =>
1619 -- max positive result for fctiwu[z]
1620 misc := x"00000000ffffffff";
1621 when "1011" =>
1622 -- max negative result for fctiwu[z]
1623 misc := x"0000000000000000";
1624 when "1100" =>
1625 -- max positive result for fctid[z]
1626 misc := x"7fffffffffffffff";
1627 when "1101" =>
1628 -- max negative result for fctid[z]
1629 misc := x"8000000000000000";
1630 when "1110" =>
1631 -- max positive result for fctidu[z]
1632 misc := x"ffffffffffffffff";
1633 when "1111" =>
1634 -- max negative result for fctidu[z]
1635 misc := x"0000000000000000";
1636 when others =>
1637 misc := x"0000000000000000";
1638 end case;
1639 result <= misc;
1640 end case;
1641 v.r := result;
1642
1643 if set_a = '1' then
1644 v.a.exponent := new_exp;
1645 v.a.mantissa := shift_res;
1646 end if;
1647 if set_b = '1' then
1648 v.b.exponent := new_exp;
1649 v.b.mantissa := shift_res;
1650 end if;
1651 if set_c = '1' then
1652 v.c.exponent := new_exp;
1653 v.c.mantissa := shift_res;
1654 end if;
1655
1656 if opsel_r = RES_SHIFT then
1657 v.result_exp := new_exp;
1658 end if;
1659
1660 if renormalize = '1' then
1661 clz := count_left_zeroes(r.r);
1662 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
1663 end if;
1664
1665 if r.int_result = '1' then
1666 fp_result <= r.r;
1667 else
1668 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
1669 r.single_prec, r.quieten_nan);
1670 end if;
1671 if r.update_fprf = '1' then
1672 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
1673 r.r(54) and not r.denorm);
1674 end if;
1675
1676 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
1677 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
1678 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
1679 v.fpscr(FPSCR_VE downto FPSCR_XE));
1680 if update_fx = '1' and
1681 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
1682 v.fpscr(FPSCR_FX) := '1';
1683 end if;
1684 if r.rc = '1' then
1685 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
1686 end if;
1687
1688 if illegal = '1' then
1689 v.instr_done := '0';
1690 v.do_intr := '0';
1691 v.writing_back := '0';
1692 v.busy := '0';
1693 v.state := IDLE;
1694 else
1695 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
1696 if v.state /= IDLE or v.do_intr = '1' then
1697 v.busy := '1';
1698 end if;
1699 end if;
1700
1701 rin <= v;
1702 e_out.illegal <= illegal;
1703 end process;
1704
1705 end architecture behaviour;