FPU: Implement fadd[s] and fsub[s] and add tests for them
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD,
44 FRI_1,
45 ADD_SHIFT, ADD_2, ADD_3,
46 INT_SHIFT, INT_ROUND, INT_ISHIFT,
47 INT_FINAL, INT_CHECK, INT_OFLOW,
48 FINISH, NORMALIZE,
49 ROUND_UFLOW, ROUND_OFLOW,
50 ROUNDING, ROUNDING_2, ROUNDING_3,
51 DENORM);
52
53 type reg_type is record
54 state : state_t;
55 busy : std_ulogic;
56 instr_done : std_ulogic;
57 do_intr : std_ulogic;
58 op : insn_type_t;
59 insn : std_ulogic_vector(31 downto 0);
60 dest_fpr : gspr_index_t;
61 fe_mode : std_ulogic;
62 rc : std_ulogic;
63 is_cmp : std_ulogic;
64 single_prec : std_ulogic;
65 fpscr : std_ulogic_vector(31 downto 0);
66 a : fpu_reg_type;
67 b : fpu_reg_type;
68 r : std_ulogic_vector(63 downto 0); -- 10.54 format
69 x : std_ulogic;
70 result_sign : std_ulogic;
71 result_class : fp_number_class;
72 result_exp : signed(EXP_BITS-1 downto 0);
73 shift : signed(EXP_BITS-1 downto 0);
74 writing_back : std_ulogic;
75 int_result : std_ulogic;
76 cr_result : std_ulogic_vector(3 downto 0);
77 cr_mask : std_ulogic_vector(7 downto 0);
78 old_exc : std_ulogic_vector(4 downto 0);
79 update_fprf : std_ulogic;
80 quieten_nan : std_ulogic;
81 tiny : std_ulogic;
82 denorm : std_ulogic;
83 round_mode : std_ulogic_vector(2 downto 0);
84 is_subtract : std_ulogic;
85 exp_cmp : std_ulogic;
86 add_bsmall : std_ulogic;
87 end record;
88
89 signal r, rin : reg_type;
90
91 signal fp_result : std_ulogic_vector(63 downto 0);
92 signal opsel_a : std_ulogic_vector(1 downto 0);
93 signal opsel_b : std_ulogic_vector(1 downto 0);
94 signal opsel_r : std_ulogic_vector(1 downto 0);
95 signal opsel_ainv : std_ulogic;
96 signal opsel_amask : std_ulogic;
97 signal opsel_binv : std_ulogic;
98 signal in_a : std_ulogic_vector(63 downto 0);
99 signal in_b : std_ulogic_vector(63 downto 0);
100 signal result : std_ulogic_vector(63 downto 0);
101 signal carry_in : std_ulogic;
102 signal lost_bits : std_ulogic;
103 signal r_hi_nz : std_ulogic;
104 signal r_lo_nz : std_ulogic;
105 signal misc_sel : std_ulogic_vector(3 downto 0);
106
107 -- opsel values
108 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
109 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
110 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
111
112 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
113 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
114 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
115
116 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
117 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
118 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
119
120 -- Left and right shifter with 120 bit input and 64 bit output.
121 -- Shifts inp left by shift bits and returns the upper 64 bits of
122 -- the result. The shift parameter is interpreted as a signed
123 -- number in the range -64..63, with negative values indicating
124 -- right shifts.
125 function shifter_64(inp: std_ulogic_vector(119 downto 0);
126 shift: std_ulogic_vector(6 downto 0))
127 return std_ulogic_vector is
128 variable s1 : std_ulogic_vector(94 downto 0);
129 variable s2 : std_ulogic_vector(70 downto 0);
130 variable result : std_ulogic_vector(63 downto 0);
131 begin
132 case shift(6 downto 5) is
133 when "00" =>
134 s1 := inp(119 downto 25);
135 when "01" =>
136 s1 := inp(87 downto 0) & "0000000";
137 when "10" =>
138 s1 := x"0000000000000000" & inp(119 downto 89);
139 when others =>
140 s1 := x"00000000" & inp(119 downto 57);
141 end case;
142 case shift(4 downto 3) is
143 when "00" =>
144 s2 := s1(94 downto 24);
145 when "01" =>
146 s2 := s1(86 downto 16);
147 when "10" =>
148 s2 := s1(78 downto 8);
149 when others =>
150 s2 := s1(70 downto 0);
151 end case;
152 case shift(2 downto 0) is
153 when "000" =>
154 result := s2(70 downto 7);
155 when "001" =>
156 result := s2(69 downto 6);
157 when "010" =>
158 result := s2(68 downto 5);
159 when "011" =>
160 result := s2(67 downto 4);
161 when "100" =>
162 result := s2(66 downto 3);
163 when "101" =>
164 result := s2(65 downto 2);
165 when "110" =>
166 result := s2(64 downto 1);
167 when others =>
168 result := s2(63 downto 0);
169 end case;
170 return result;
171 end;
172
173 -- Generate a mask with 0-bits on the left and 1-bits on the right which
174 -- selects the bits will be lost in doing a right shift. The shift
175 -- parameter is the bottom 6 bits of a negative shift count,
176 -- indicating a right shift.
177 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
178 variable result: std_ulogic_vector(63 downto 0);
179 begin
180 result := (others => '0');
181 for i in 0 to 63 loop
182 if i >= shift then
183 result(63 - i) := '1';
184 end if;
185 end loop;
186 return result;
187 end;
188
189 -- Split a DP floating-point number into components and work out its class.
190 -- If is_int = 1, the input is considered an integer
191 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
192 variable r : fpu_reg_type;
193 variable exp_nz : std_ulogic;
194 variable exp_ao : std_ulogic;
195 variable frac_nz : std_ulogic;
196 variable cls : std_ulogic_vector(2 downto 0);
197 begin
198 r.negative := fpr(63);
199 exp_nz := or (fpr(62 downto 52));
200 exp_ao := and (fpr(62 downto 52));
201 frac_nz := or (fpr(51 downto 0));
202 if is_int = '0' then
203 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
204 if exp_nz = '0' then
205 r.exponent := to_signed(-1022, EXP_BITS);
206 end if;
207 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
208 cls := exp_ao & exp_nz & frac_nz;
209 case cls is
210 when "000" => r.class := ZERO;
211 when "001" => r.class := FINITE; -- denormalized
212 when "010" => r.class := FINITE;
213 when "011" => r.class := FINITE;
214 when "110" => r.class := INFINITY;
215 when others => r.class := NAN;
216 end case;
217 else
218 r.mantissa := fpr;
219 r.exponent := (others => '0');
220 if (fpr(63) or exp_nz or frac_nz) = '1' then
221 r.class := FINITE;
222 else
223 r.class := ZERO;
224 end if;
225 end if;
226 return r;
227 end;
228
229 -- Construct a DP floating-point result from components
230 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
231 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
232 return std_ulogic_vector is
233 variable result : std_ulogic_vector(63 downto 0);
234 begin
235 result := (others => '0');
236 result(63) := sign;
237 case class is
238 when ZERO =>
239 when FINITE =>
240 if mantissa(54) = '1' then
241 -- normalized number
242 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
243 end if;
244 result(51 downto 29) := mantissa(53 downto 31);
245 if single_prec = '0' then
246 result(28 downto 0) := mantissa(30 downto 2);
247 end if;
248 when INFINITY =>
249 result(62 downto 52) := "11111111111";
250 when NAN =>
251 result(62 downto 52) := "11111111111";
252 result(51) := quieten_nan or mantissa(53);
253 result(50 downto 29) := mantissa(52 downto 31);
254 if single_prec = '0' then
255 result(28 downto 0) := mantissa(30 downto 2);
256 end if;
257 end case;
258 return result;
259 end;
260
261 -- Determine whether to increment when rounding
262 -- Returns rounding_inc & inexact
263 -- Assumes x includes the bottom 29 bits of the mantissa already
264 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
265 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
266 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
267 sign: std_ulogic)
268 return std_ulogic_vector is
269 variable grx : std_ulogic_vector(2 downto 0);
270 variable ret : std_ulogic_vector(1 downto 0);
271 variable lsb : std_ulogic;
272 begin
273 if single_prec = '0' then
274 grx := mantissa(1 downto 0) & x;
275 lsb := mantissa(2);
276 else
277 grx := mantissa(30 downto 29) & x;
278 lsb := mantissa(31);
279 end if;
280 ret(1) := '0';
281 ret(0) := or (grx);
282 case rn(1 downto 0) is
283 when "00" => -- round to nearest
284 if grx = "100" and rn(2) = '0' then
285 ret(1) := lsb; -- tie, round to even
286 else
287 ret(1) := grx(2);
288 end if;
289 when "01" => -- round towards zero
290 when others => -- round towards +/- inf
291 if rn(0) = sign then
292 -- round towards greater magnitude
293 ret(1) := ret(0);
294 end if;
295 end case;
296 return ret;
297 end;
298
299 -- Determine result flags to write into the FPSCR
300 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
301 return std_ulogic_vector is
302 begin
303 case class is
304 when ZERO =>
305 return sign & "0010";
306 when FINITE =>
307 return (not unitbit) & sign & (not sign) & "00";
308 when INFINITY =>
309 return '0' & sign & (not sign) & "01";
310 when NAN =>
311 return "10001";
312 end case;
313 end;
314
315 begin
316 fpu_0: process(clk)
317 begin
318 if rising_edge(clk) then
319 if rst = '1' then
320 r.state <= IDLE;
321 r.busy <= '0';
322 r.instr_done <= '0';
323 r.do_intr <= '0';
324 r.fpscr <= (others => '0');
325 r.writing_back <= '0';
326 else
327 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
328 r <= rin;
329 end if;
330 end if;
331 end process;
332
333 e_out.busy <= r.busy;
334 e_out.exception <= r.fpscr(FPSCR_FEX);
335 e_out.interrupt <= r.do_intr;
336
337 w_out.valid <= r.instr_done and not r.do_intr;
338 w_out.write_enable <= r.writing_back;
339 w_out.write_reg <= r.dest_fpr;
340 w_out.write_data <= fp_result;
341 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
342 w_out.write_cr_mask <= r.cr_mask;
343 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
344 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
345
346 fpu_1: process(all)
347 variable v : reg_type;
348 variable adec : fpu_reg_type;
349 variable bdec : fpu_reg_type;
350 variable fpscr_mask : std_ulogic_vector(31 downto 0);
351 variable illegal : std_ulogic;
352 variable j, k : integer;
353 variable flm : std_ulogic_vector(7 downto 0);
354 variable int_input : std_ulogic;
355 variable mask : std_ulogic_vector(63 downto 0);
356 variable in_a0 : std_ulogic_vector(63 downto 0);
357 variable in_b0 : std_ulogic_vector(63 downto 0);
358 variable misc : std_ulogic_vector(63 downto 0);
359 variable shift_res : std_ulogic_vector(63 downto 0);
360 variable round : std_ulogic_vector(1 downto 0);
361 variable update_fx : std_ulogic;
362 variable arith_done : std_ulogic;
363 variable invalid : std_ulogic;
364 variable mant_nz : std_ulogic;
365 variable min_exp : signed(EXP_BITS-1 downto 0);
366 variable max_exp : signed(EXP_BITS-1 downto 0);
367 variable bias_exp : signed(EXP_BITS-1 downto 0);
368 variable new_exp : signed(EXP_BITS-1 downto 0);
369 variable exp_tiny : std_ulogic;
370 variable exp_huge : std_ulogic;
371 variable renormalize : std_ulogic;
372 variable clz : std_ulogic_vector(5 downto 0);
373 variable set_x : std_ulogic;
374 variable mshift : signed(EXP_BITS-1 downto 0);
375 variable need_check : std_ulogic;
376 variable msb : std_ulogic;
377 variable is_add : std_ulogic;
378 variable qnan_result : std_ulogic;
379 variable longmask : std_ulogic;
380 begin
381 v := r;
382 illegal := '0';
383 v.busy := '0';
384 int_input := '0';
385
386 -- capture incoming instruction
387 if e_in.valid = '1' then
388 v.insn := e_in.insn;
389 v.op := e_in.op;
390 v.fe_mode := or (e_in.fe_mode);
391 v.dest_fpr := e_in.frt;
392 v.single_prec := e_in.single;
393 v.int_result := '0';
394 v.rc := e_in.rc;
395 v.is_cmp := e_in.out_cr;
396 if e_in.out_cr = '0' then
397 v.cr_mask := num_to_fxm(1);
398 else
399 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
400 end if;
401 int_input := '0';
402 if e_in.op = OP_FPOP_I then
403 int_input := '1';
404 end if;
405 v.quieten_nan := '1';
406 v.tiny := '0';
407 v.denorm := '0';
408 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
409 v.is_subtract := '0';
410 v.add_bsmall := '0';
411 adec := decode_dp(e_in.fra, int_input);
412 bdec := decode_dp(e_in.frb, int_input);
413 v.a := adec;
414 v.b := bdec;
415 v.exp_cmp := '0';
416 if adec.exponent > bdec.exponent then
417 v.exp_cmp := '1';
418 end if;
419 end if;
420
421 r_hi_nz <= or (r.r(55 downto 31));
422 r_lo_nz <= or (r.r(30 downto 2));
423
424 if r.single_prec = '0' then
425 max_exp := to_signed(1023, EXP_BITS);
426 min_exp := to_signed(-1022, EXP_BITS);
427 bias_exp := to_signed(1536, EXP_BITS);
428 else
429 max_exp := to_signed(127, EXP_BITS);
430 min_exp := to_signed(-126, EXP_BITS);
431 bias_exp := to_signed(192, EXP_BITS);
432 end if;
433 new_exp := r.result_exp - r.shift;
434 exp_tiny := '0';
435 exp_huge := '0';
436 if new_exp < min_exp then
437 exp_tiny := '1';
438 end if;
439 if new_exp > max_exp then
440 exp_huge := '1';
441 end if;
442
443 v.writing_back := '0';
444 v.instr_done := '0';
445 v.update_fprf := '0';
446 v.shift := to_signed(0, EXP_BITS);
447 opsel_a <= AIN_R;
448 opsel_ainv <= '0';
449 opsel_amask <= '0';
450 opsel_b <= BIN_ZERO;
451 opsel_binv <= '0';
452 opsel_r <= RES_SUM;
453 carry_in <= '0';
454 misc_sel <= "0000";
455 fpscr_mask := (others => '1');
456 update_fx := '0';
457 arith_done := '0';
458 invalid := '0';
459 renormalize := '0';
460 set_x := '0';
461 qnan_result := '0';
462 longmask := r.single_prec;
463
464 case r.state is
465 when IDLE =>
466 if e_in.valid = '1' then
467 case e_in.insn(5 downto 1) is
468 when "00000" =>
469 v.state := DO_MCRFS;
470 when "00110" =>
471 if e_in.insn(10) = '0' then
472 if e_in.insn(8) = '0' then
473 v.state := DO_MTFSB;
474 else
475 v.state := DO_MTFSFI;
476 end if;
477 else
478 v.state := DO_FMRG;
479 end if;
480 when "00111" =>
481 if e_in.insn(8) = '0' then
482 v.state := DO_MFFS;
483 else
484 v.state := DO_MTFSF;
485 end if;
486 when "01000" =>
487 if e_in.insn(9 downto 8) /= "11" then
488 v.state := DO_FMR;
489 else
490 v.state := DO_FRI;
491 end if;
492 when "01100" =>
493 v.state := DO_FRSP;
494 when "01110" =>
495 if int_input = '1' then
496 -- fcfid[u][s]
497 v.state := DO_FCFID;
498 else
499 v.state := DO_FCTI;
500 end if;
501 when "01111" =>
502 v.round_mode := "001";
503 v.state := DO_FCTI;
504 when "10100" | "10101" =>
505 v.state := DO_FADD;
506 when others =>
507 illegal := '1';
508 end case;
509 end if;
510 v.x := '0';
511 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
512
513 when DO_MCRFS =>
514 j := to_integer(unsigned(insn_bfa(r.insn)));
515 for i in 0 to 7 loop
516 if i = j then
517 k := (7 - i) * 4;
518 v.cr_result := r.fpscr(k + 3 downto k);
519 fpscr_mask(k + 3 downto k) := "0000";
520 end if;
521 end loop;
522 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
523 v.instr_done := '1';
524 v.state := IDLE;
525
526 when DO_MTFSB =>
527 -- mtfsb{0,1}
528 j := to_integer(unsigned(insn_bt(r.insn)));
529 for i in 0 to 31 loop
530 if i = j then
531 v.fpscr(31 - i) := r.insn(6);
532 end if;
533 end loop;
534 v.instr_done := '1';
535 v.state := IDLE;
536
537 when DO_MTFSFI =>
538 -- mtfsfi
539 j := to_integer(unsigned(insn_bf(r.insn)));
540 if r.insn(16) = '0' then
541 for i in 0 to 7 loop
542 if i = j then
543 k := (7 - i) * 4;
544 v.fpscr(k + 3 downto k) := insn_u(r.insn);
545 end if;
546 end loop;
547 end if;
548 v.instr_done := '1';
549 v.state := IDLE;
550
551 when DO_FMRG =>
552 -- fmrgew, fmrgow
553 opsel_r <= RES_MISC;
554 misc_sel <= "01" & r.insn(8) & '0';
555 v.int_result := '1';
556 v.writing_back := '1';
557 v.instr_done := '1';
558 v.state := IDLE;
559
560 when DO_MFFS =>
561 v.int_result := '1';
562 v.writing_back := '1';
563 opsel_r <= RES_MISC;
564 case r.insn(20 downto 16) is
565 when "00000" =>
566 -- mffs
567 when "00001" =>
568 -- mffsce
569 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
570 when "10100" | "10101" =>
571 -- mffscdrn[i] (but we don't implement DRN)
572 fpscr_mask := x"000000FF";
573 when "10110" =>
574 -- mffscrn
575 fpscr_mask := x"000000FF";
576 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
577 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
578 when "10111" =>
579 -- mffscrni
580 fpscr_mask := x"000000FF";
581 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
582 when "11000" =>
583 -- mffsl
584 fpscr_mask := x"0007F0FF";
585 when others =>
586 illegal := '1';
587 end case;
588 v.instr_done := '1';
589 v.state := IDLE;
590
591 when DO_MTFSF =>
592 if r.insn(25) = '1' then
593 flm := x"FF";
594 elsif r.insn(16) = '1' then
595 flm := x"00";
596 else
597 flm := r.insn(24 downto 17);
598 end if;
599 for i in 0 to 7 loop
600 k := i * 4;
601 if flm(i) = '1' then
602 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
603 end if;
604 end loop;
605 v.instr_done := '1';
606 v.state := IDLE;
607
608 when DO_FMR =>
609 opsel_a <= AIN_B;
610 v.result_class := r.b.class;
611 v.result_exp := r.b.exponent;
612 v.quieten_nan := '0';
613 if r.insn(9) = '1' then
614 v.result_sign := '0'; -- fabs
615 elsif r.insn(8) = '1' then
616 v.result_sign := '1'; -- fnabs
617 elsif r.insn(7) = '1' then
618 v.result_sign := r.b.negative; -- fmr
619 elsif r.insn(6) = '1' then
620 v.result_sign := not r.b.negative; -- fneg
621 else
622 v.result_sign := r.a.negative; -- fcpsgn
623 end if;
624 v.writing_back := '1';
625 v.instr_done := '1';
626 v.state := IDLE;
627
628 when DO_FRI => -- fri[nzpm]
629 opsel_a <= AIN_B;
630 v.result_class := r.b.class;
631 v.result_sign := r.b.negative;
632 v.result_exp := r.b.exponent;
633 v.fpscr(FPSCR_FR) := '0';
634 v.fpscr(FPSCR_FI) := '0';
635 if r.b.class = NAN and r.b.mantissa(53) = '0' then
636 -- Signalling NAN
637 v.fpscr(FPSCR_VXSNAN) := '1';
638 invalid := '1';
639 end if;
640 if r.b.class = FINITE then
641 if r.b.exponent >= to_signed(52, EXP_BITS) then
642 -- integer already, no rounding required
643 arith_done := '1';
644 else
645 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
646 v.state := FRI_1;
647 v.round_mode := '1' & r.insn(7 downto 6);
648 end if;
649 else
650 arith_done := '1';
651 end if;
652
653 when DO_FRSP =>
654 opsel_a <= AIN_B;
655 v.result_class := r.b.class;
656 v.result_sign := r.b.negative;
657 v.result_exp := r.b.exponent;
658 v.fpscr(FPSCR_FR) := '0';
659 v.fpscr(FPSCR_FI) := '0';
660 if r.b.class = NAN and r.b.mantissa(53) = '0' then
661 -- Signalling NAN
662 v.fpscr(FPSCR_VXSNAN) := '1';
663 invalid := '1';
664 end if;
665 set_x := '1';
666 if r.b.class = FINITE then
667 if r.b.exponent < to_signed(-126, EXP_BITS) then
668 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
669 v.state := ROUND_UFLOW;
670 elsif r.b.exponent > to_signed(127, EXP_BITS) then
671 v.state := ROUND_OFLOW;
672 else
673 v.shift := to_signed(-2, EXP_BITS);
674 v.state := ROUNDING;
675 end if;
676 else
677 arith_done := '1';
678 end if;
679
680 when DO_FCTI =>
681 -- instr bit 9: 1=dword 0=word
682 -- instr bit 8: 1=unsigned 0=signed
683 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
684 opsel_a <= AIN_B;
685 v.result_class := r.b.class;
686 v.result_sign := r.b.negative;
687 v.result_exp := r.b.exponent;
688 v.fpscr(FPSCR_FR) := '0';
689 v.fpscr(FPSCR_FI) := '0';
690 if r.b.class = NAN and r.b.mantissa(53) = '0' then
691 -- Signalling NAN
692 v.fpscr(FPSCR_VXSNAN) := '1';
693 invalid := '1';
694 end if;
695
696 v.int_result := '1';
697 case r.b.class is
698 when ZERO =>
699 arith_done := '1';
700 when FINITE =>
701 if r.b.exponent >= to_signed(64, EXP_BITS) or
702 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
703 v.state := INT_OFLOW;
704 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
705 -- integer already, no rounding required,
706 -- shift into final position
707 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
708 if r.insn(8) = '1' and r.b.negative = '1' then
709 v.state := INT_OFLOW;
710 else
711 v.state := INT_ISHIFT;
712 end if;
713 else
714 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
715 v.state := INT_SHIFT;
716 end if;
717 when INFINITY | NAN =>
718 v.state := INT_OFLOW;
719 end case;
720
721 when DO_FCFID =>
722 v.result_sign := '0';
723 opsel_a <= AIN_B;
724 if r.insn(8) = '0' and r.b.negative = '1' then
725 -- fcfid[s] with negative operand, set R = -B
726 opsel_ainv <= '1';
727 carry_in <= '1';
728 v.result_sign := '1';
729 end if;
730 v.result_class := r.b.class;
731 v.result_exp := to_signed(54, EXP_BITS);
732 v.fpscr(FPSCR_FR) := '0';
733 v.fpscr(FPSCR_FI) := '0';
734 if r.b.class = ZERO then
735 arith_done := '1';
736 else
737 v.state := FINISH;
738 end if;
739
740 when DO_FADD =>
741 -- fadd[s] and fsub[s]
742 opsel_a <= AIN_A;
743 v.result_sign := r.a.negative;
744 v.result_class := r.a.class;
745 v.result_exp := r.a.exponent;
746 v.fpscr(FPSCR_FR) := '0';
747 v.fpscr(FPSCR_FI) := '0';
748 is_add := r.a.negative xor r.b.negative xor r.insn(1);
749 if r.a.class = FINITE and r.b.class = FINITE then
750 v.is_subtract := not is_add;
751 v.add_bsmall := r.exp_cmp;
752 if r.exp_cmp = '0' then
753 v.shift := r.a.exponent - r.b.exponent;
754 v.result_sign := r.b.negative xnor r.insn(1);
755 if r.a.exponent = r.b.exponent then
756 v.state := ADD_2;
757 else
758 v.state := ADD_SHIFT;
759 end if;
760 else
761 opsel_a <= AIN_B;
762 v.shift := r.b.exponent - r.a.exponent;
763 v.result_exp := r.b.exponent;
764 v.state := ADD_SHIFT;
765 end if;
766 else
767 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
768 (r.b.class = NAN and r.b.mantissa(53) = '0') then
769 -- Signalling NAN
770 v.fpscr(FPSCR_VXSNAN) := '1';
771 invalid := '1';
772 end if;
773 if r.a.class = NAN then
774 -- nothing to do, result is A
775 elsif r.b.class = NAN then
776 v.result_class := NAN;
777 v.result_sign := r.b.negative;
778 opsel_a <= AIN_B;
779 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
780 -- invalid operation, construct QNaN
781 v.fpscr(FPSCR_VXISI) := '1';
782 qnan_result := '1';
783 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
784 -- return -0 for rounding to -infinity
785 v.result_sign := r.round_mode(1) and r.round_mode(0);
786 elsif r.a.class = INFINITY or r.b.class = ZERO then
787 -- nothing to do, result is A
788 else
789 -- result is +/- B
790 v.result_sign := r.b.negative xnor r.insn(1);
791 v.result_class := r.b.class;
792 v.result_exp := r.b.exponent;
793 opsel_a <= AIN_B;
794 end if;
795 arith_done := '1';
796 end if;
797
798 when ADD_SHIFT =>
799 opsel_r <= RES_SHIFT;
800 set_x := '1';
801 longmask := '0';
802 v.state := ADD_2;
803
804 when ADD_2 =>
805 if r.add_bsmall = '1' then
806 opsel_a <= AIN_A;
807 else
808 opsel_a <= AIN_B;
809 end if;
810 opsel_b <= BIN_R;
811 opsel_binv <= r.is_subtract;
812 carry_in <= r.is_subtract and not r.x;
813 v.shift := to_signed(-1, EXP_BITS);
814 v.state := ADD_3;
815
816 when ADD_3 =>
817 -- check for overflow or negative result (can't get both)
818 if r.r(63) = '1' then
819 -- result is opposite sign to expected
820 v.result_sign := not r.result_sign;
821 opsel_ainv <= '1';
822 carry_in <= '1';
823 v.state := FINISH;
824 elsif r.r(55) = '1' then
825 -- sum overflowed, shift right
826 opsel_r <= RES_SHIFT;
827 set_x := '1';
828 v.shift := to_signed(-2, EXP_BITS);
829 if exp_huge = '1' then
830 v.state := ROUND_OFLOW;
831 else
832 v.state := ROUNDING;
833 end if;
834 elsif r.r(54) = '1' then
835 set_x := '1';
836 v.shift := to_signed(-2, EXP_BITS);
837 v.state := ROUNDING;
838 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
839 -- r.x must be zero at this point
840 v.result_class := ZERO;
841 if r.is_subtract = '1' then
842 -- set result sign depending on rounding mode
843 v.result_sign := r.round_mode(1) and r.round_mode(0);
844 end if;
845 arith_done := '1';
846 else
847 renormalize := '1';
848 v.state := NORMALIZE;
849 end if;
850
851 when INT_SHIFT =>
852 opsel_r <= RES_SHIFT;
853 set_x := '1';
854 v.state := INT_ROUND;
855 v.shift := to_signed(-2, EXP_BITS);
856
857 when INT_ROUND =>
858 opsel_r <= RES_SHIFT;
859 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
860 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
861 -- Check for negative values that don't round to 0 for fcti*u*
862 if r.insn(8) = '1' and r.result_sign = '1' and
863 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
864 v.state := INT_OFLOW;
865 else
866 v.state := INT_FINAL;
867 end if;
868
869 when INT_ISHIFT =>
870 opsel_r <= RES_SHIFT;
871 v.state := INT_FINAL;
872
873 when INT_FINAL =>
874 -- Negate if necessary, and increment for rounding if needed
875 opsel_ainv <= r.result_sign;
876 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
877 -- Check for possible overflows
878 case r.insn(9 downto 8) is
879 when "00" => -- fctiw[z]
880 need_check := r.r(31) or (r.r(30) and not r.result_sign);
881 when "01" => -- fctiwu[z]
882 need_check := r.r(31);
883 when "10" => -- fctid[z]
884 need_check := r.r(63) or (r.r(62) and not r.result_sign);
885 when others => -- fctidu[z]
886 need_check := r.r(63);
887 end case;
888 if need_check = '1' then
889 v.state := INT_CHECK;
890 else
891 if r.fpscr(FPSCR_FI) = '1' then
892 v.fpscr(FPSCR_XX) := '1';
893 end if;
894 arith_done := '1';
895 end if;
896
897 when INT_CHECK =>
898 if r.insn(9) = '0' then
899 msb := r.r(31);
900 else
901 msb := r.r(63);
902 end if;
903 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
904 if (r.insn(8) = '0' and msb /= r.result_sign) or
905 (r.insn(8) = '1' and msb /= '1') then
906 opsel_r <= RES_MISC;
907 v.fpscr(FPSCR_VXCVI) := '1';
908 invalid := '1';
909 else
910 if r.fpscr(FPSCR_FI) = '1' then
911 v.fpscr(FPSCR_XX) := '1';
912 end if;
913 end if;
914 arith_done := '1';
915
916 when INT_OFLOW =>
917 opsel_r <= RES_MISC;
918 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
919 if r.b.class = NAN then
920 misc_sel(0) <= '1';
921 end if;
922 v.fpscr(FPSCR_VXCVI) := '1';
923 invalid := '1';
924 arith_done := '1';
925
926 when FRI_1 =>
927 opsel_r <= RES_SHIFT;
928 set_x := '1';
929 v.shift := to_signed(-2, EXP_BITS);
930 v.state := ROUNDING;
931
932 when FINISH =>
933 if r.r(63 downto 54) /= "0000000001" then
934 renormalize := '1';
935 v.state := NORMALIZE;
936 else
937 set_x := '1';
938 if exp_tiny = '1' then
939 v.shift := new_exp - min_exp;
940 v.state := ROUND_UFLOW;
941 elsif exp_huge = '1' then
942 v.state := ROUND_OFLOW;
943 else
944 v.shift := to_signed(-2, EXP_BITS);
945 v.state := ROUNDING;
946 end if;
947 end if;
948
949 when NORMALIZE =>
950 -- Shift so we have 9 leading zeroes (we know R is non-zero)
951 opsel_r <= RES_SHIFT;
952 set_x := '1';
953 if exp_tiny = '1' then
954 v.shift := new_exp - min_exp;
955 v.state := ROUND_UFLOW;
956 elsif exp_huge = '1' then
957 v.state := ROUND_OFLOW;
958 else
959 v.shift := to_signed(-2, EXP_BITS);
960 v.state := ROUNDING;
961 end if;
962
963 when ROUND_UFLOW =>
964 v.tiny := '1';
965 if r.fpscr(FPSCR_UE) = '0' then
966 -- disabled underflow exception case
967 -- have to denormalize before rounding
968 opsel_r <= RES_SHIFT;
969 set_x := '1';
970 v.shift := to_signed(-2, EXP_BITS);
971 v.state := ROUNDING;
972 else
973 -- enabled underflow exception case
974 -- if denormalized, have to normalize before rounding
975 v.fpscr(FPSCR_UX) := '1';
976 v.result_exp := r.result_exp + bias_exp;
977 if r.r(54) = '0' then
978 renormalize := '1';
979 v.state := NORMALIZE;
980 else
981 v.shift := to_signed(-2, EXP_BITS);
982 v.state := ROUNDING;
983 end if;
984 end if;
985
986 when ROUND_OFLOW =>
987 v.fpscr(FPSCR_OX) := '1';
988 if r.fpscr(FPSCR_OE) = '0' then
989 -- disabled overflow exception
990 -- result depends on rounding mode
991 v.fpscr(FPSCR_XX) := '1';
992 v.fpscr(FPSCR_FI) := '1';
993 if r.round_mode(1 downto 0) = "00" or
994 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
995 v.result_class := INFINITY;
996 v.fpscr(FPSCR_FR) := '1';
997 else
998 v.fpscr(FPSCR_FR) := '0';
999 end if;
1000 -- construct largest representable number
1001 v.result_exp := max_exp;
1002 opsel_r <= RES_MISC;
1003 misc_sel <= "001" & r.single_prec;
1004 arith_done := '1';
1005 else
1006 -- enabled overflow exception
1007 v.result_exp := r.result_exp - bias_exp;
1008 v.shift := to_signed(-2, EXP_BITS);
1009 v.state := ROUNDING;
1010 end if;
1011
1012 when ROUNDING =>
1013 opsel_amask <= '1';
1014 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
1015 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1016 if round(1) = '1' then
1017 -- set mask to increment the LSB for the precision
1018 opsel_b <= BIN_MASK;
1019 carry_in <= '1';
1020 v.shift := to_signed(-1, EXP_BITS);
1021 v.state := ROUNDING_2;
1022 else
1023 if r.r(54) = '0' then
1024 -- result after masking could be zero, or could be a
1025 -- denormalized result that needs to be renormalized
1026 renormalize := '1';
1027 v.state := ROUNDING_3;
1028 else
1029 arith_done := '1';
1030 end if;
1031 end if;
1032 if round(0) = '1' then
1033 v.fpscr(FPSCR_XX) := '1';
1034 if r.tiny = '1' then
1035 v.fpscr(FPSCR_UX) := '1';
1036 end if;
1037 end if;
1038
1039 when ROUNDING_2 =>
1040 -- Check for overflow during rounding
1041 v.x := '0';
1042 if r.r(55) = '1' then
1043 opsel_r <= RES_SHIFT;
1044 if exp_huge = '1' then
1045 v.state := ROUND_OFLOW;
1046 else
1047 arith_done := '1';
1048 end if;
1049 elsif r.r(54) = '0' then
1050 -- Do CLZ so we can renormalize the result
1051 renormalize := '1';
1052 v.state := ROUNDING_3;
1053 else
1054 arith_done := '1';
1055 end if;
1056
1057 when ROUNDING_3 =>
1058 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
1059 if mant_nz = '0' then
1060 v.result_class := ZERO;
1061 if r.is_subtract = '1' then
1062 -- set result sign depending on rounding mode
1063 v.result_sign := r.round_mode(1) and r.round_mode(0);
1064 end if;
1065 arith_done := '1';
1066 else
1067 -- Renormalize result after rounding
1068 opsel_r <= RES_SHIFT;
1069 v.denorm := exp_tiny;
1070 v.shift := new_exp - to_signed(-1022, EXP_BITS);
1071 if new_exp < to_signed(-1022, EXP_BITS) then
1072 v.state := DENORM;
1073 else
1074 arith_done := '1';
1075 end if;
1076 end if;
1077
1078 when DENORM =>
1079 opsel_r <= RES_SHIFT;
1080 arith_done := '1';
1081
1082 end case;
1083
1084 if qnan_result = '1' then
1085 invalid := '1';
1086 v.result_class := NAN;
1087 v.result_sign := '0';
1088 misc_sel <= "0001";
1089 opsel_r <= RES_MISC;
1090 end if;
1091 if arith_done = '1' then
1092 -- Enabled invalid exception doesn't write result or FPRF
1093 if (invalid and r.fpscr(FPSCR_VE)) = '0' then
1094 v.writing_back := '1';
1095 v.update_fprf := '1';
1096 end if;
1097 v.instr_done := '1';
1098 v.state := IDLE;
1099 update_fx := '1';
1100 end if;
1101
1102 -- Data path.
1103 -- This has A and B input multiplexers, an adder, a shifter,
1104 -- count-leading-zeroes logic, and a result mux.
1105 if longmask = '1' then
1106 mshift := r.shift + to_signed(-29, EXP_BITS);
1107 else
1108 mshift := r.shift;
1109 end if;
1110 if mshift < to_signed(-64, EXP_BITS) then
1111 mask := (others => '1');
1112 elsif mshift >= to_signed(0, EXP_BITS) then
1113 mask := (others => '0');
1114 else
1115 mask := right_mask(unsigned(mshift(5 downto 0)));
1116 end if;
1117 case opsel_a is
1118 when AIN_R =>
1119 in_a0 := r.r;
1120 when AIN_A =>
1121 in_a0 := r.a.mantissa;
1122 when others =>
1123 in_a0 := r.b.mantissa;
1124 end case;
1125 if (or (mask and in_a0)) = '1' and set_x = '1' then
1126 v.x := '1';
1127 end if;
1128 if opsel_ainv = '1' then
1129 in_a0 := not in_a0;
1130 end if;
1131 if opsel_amask = '1' then
1132 in_a0 := in_a0 and not mask;
1133 end if;
1134 in_a <= in_a0;
1135 case opsel_b is
1136 when BIN_ZERO =>
1137 in_b0 := (others => '0');
1138 when BIN_R =>
1139 in_b0 := r.r;
1140 when BIN_MASK =>
1141 in_b0 := mask;
1142 when others =>
1143 in_b0 := (others => '0');
1144 end case;
1145 if opsel_binv = '1' then
1146 in_b0 := not in_b0;
1147 end if;
1148 in_b <= in_b0;
1149 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
1150 shift_res := shifter_64(r.r & x"00000000000000",
1151 std_ulogic_vector(r.shift(6 downto 0)));
1152 else
1153 shift_res := (others => '0');
1154 end if;
1155 case opsel_r is
1156 when RES_SUM =>
1157 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
1158 when RES_SHIFT =>
1159 result <= shift_res;
1160 when others =>
1161 case misc_sel is
1162 when "0000" =>
1163 misc := x"00000000" & (r.fpscr and fpscr_mask);
1164 when "0001" =>
1165 -- generated QNaN mantissa
1166 misc := x"0020000000000000";
1167 when "0010" =>
1168 -- mantissa of max representable DP number
1169 misc := x"007ffffffffffffc";
1170 when "0011" =>
1171 -- mantissa of max representable SP number
1172 misc := x"007fffff80000000";
1173 when "0100" =>
1174 -- fmrgow result
1175 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
1176 when "0110" =>
1177 -- fmrgew result
1178 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
1179 when "1000" =>
1180 -- max positive result for fctiw[z]
1181 misc := x"000000007fffffff";
1182 when "1001" =>
1183 -- max negative result for fctiw[z]
1184 misc := x"ffffffff80000000";
1185 when "1010" =>
1186 -- max positive result for fctiwu[z]
1187 misc := x"00000000ffffffff";
1188 when "1011" =>
1189 -- max negative result for fctiwu[z]
1190 misc := x"0000000000000000";
1191 when "1100" =>
1192 -- max positive result for fctid[z]
1193 misc := x"7fffffffffffffff";
1194 when "1101" =>
1195 -- max negative result for fctid[z]
1196 misc := x"8000000000000000";
1197 when "1110" =>
1198 -- max positive result for fctidu[z]
1199 misc := x"ffffffffffffffff";
1200 when "1111" =>
1201 -- max negative result for fctidu[z]
1202 misc := x"0000000000000000";
1203 when others =>
1204 misc := x"0000000000000000";
1205 end case;
1206 result <= misc;
1207 end case;
1208 v.r := result;
1209
1210 if opsel_r = RES_SHIFT then
1211 v.result_exp := new_exp;
1212 end if;
1213
1214 if renormalize = '1' then
1215 clz := count_left_zeroes(r.r);
1216 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
1217 end if;
1218
1219 if r.int_result = '1' then
1220 fp_result <= r.r;
1221 else
1222 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
1223 r.single_prec, r.quieten_nan);
1224 end if;
1225 if r.update_fprf = '1' then
1226 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
1227 r.r(54) and not r.denorm);
1228 end if;
1229
1230 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
1231 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
1232 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
1233 v.fpscr(FPSCR_VE downto FPSCR_XE));
1234 if update_fx = '1' and
1235 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
1236 v.fpscr(FPSCR_FX) := '1';
1237 end if;
1238 if r.rc = '1' then
1239 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
1240 end if;
1241
1242 if illegal = '1' then
1243 v.instr_done := '0';
1244 v.do_intr := '0';
1245 v.writing_back := '0';
1246 v.busy := '0';
1247 v.state := IDLE;
1248 else
1249 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
1250 if v.state /= IDLE or v.do_intr = '1' then
1251 v.busy := '1';
1252 end if;
1253 end if;
1254
1255 rin <= v;
1256 e_out.illegal <= illegal;
1257 end process;
1258
1259 end architecture behaviour;