fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP,
  43                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  44                      INT_FINAL, INT_CHECK, INT_OFLOW,
  45                      FINISH, NORMALIZE,
  46                      ROUND_UFLOW, ROUND_OFLOW,
  47                      ROUNDING, ROUNDING_2, ROUNDING_3,
  48                      DENORM);
  49
  50     type reg_type is record
  51         state        : state_t;
  52         busy         : std_ulogic;
  53         instr_done   : std_ulogic;
  54         do_intr      : std_ulogic;
  55         op           : insn_type_t;
  56         insn         : std_ulogic_vector(31 downto 0);
  57         dest_fpr     : gspr_index_t;
  58         fe_mode      : std_ulogic;
  59         rc           : std_ulogic;
  60         is_cmp       : std_ulogic;
  61         single_prec  : std_ulogic;
  62         fpscr        : std_ulogic_vector(31 downto 0);
  63         a            : fpu_reg_type;
  64         b            : fpu_reg_type;
  65         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  66         x            : std_ulogic;
  67         result_sign  : std_ulogic;
  68         result_class : fp_number_class;
  69         result_exp   : signed(EXP_BITS-1 downto 0);
  70         shift        : signed(EXP_BITS-1 downto 0);
  71         writing_back : std_ulogic;
  72         int_result   : std_ulogic;
  73         cr_result    : std_ulogic_vector(3 downto 0);
  74         cr_mask      : std_ulogic_vector(7 downto 0);
  75         old_exc      : std_ulogic_vector(4 downto 0);
  76         update_fprf  : std_ulogic;
  77         quieten_nan  : std_ulogic;
  78         tiny         : std_ulogic;
  79         denorm       : std_ulogic;
  80         round_mode   : std_ulogic_vector(2 downto 0);
  81     end record;
  82
  83     signal r, rin : reg_type;
  84
  85     signal fp_result     : std_ulogic_vector(63 downto 0);
  86     signal opsel_a       : std_ulogic_vector(1 downto 0);
  87     signal opsel_b       : std_ulogic_vector(1 downto 0);
  88     signal opsel_r       : std_ulogic_vector(1 downto 0);
  89     signal opsel_ainv    : std_ulogic;
  90     signal opsel_amask   : std_ulogic;
  91     signal in_a          : std_ulogic_vector(63 downto 0);
  92     signal in_b          : std_ulogic_vector(63 downto 0);
  93     signal result        : std_ulogic_vector(63 downto 0);
  94     signal carry_in      : std_ulogic;
  95     signal lost_bits     : std_ulogic;
  96     signal r_hi_nz       : std_ulogic;
  97     signal r_lo_nz       : std_ulogic;
  98     signal misc_sel      : std_ulogic_vector(3 downto 0);
  99
 100     -- opsel values
 101     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 102     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 103     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 104
 105     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 106     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 107     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 108
 109     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 110     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 111     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 112
 113     -- Left and right shifter with 120 bit input and 64 bit output.
 114     -- Shifts inp left by shift bits and returns the upper 64 bits of
 115     -- the result.  The shift parameter is interpreted as a signed
 116     -- number in the range -64..63, with negative values indicating
 117     -- right shifts.
 118     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 119                         shift: std_ulogic_vector(6 downto 0))
 120         return std_ulogic_vector is
 121         variable s1 : std_ulogic_vector(94 downto 0);
 122         variable s2 : std_ulogic_vector(70 downto 0);
 123         variable result : std_ulogic_vector(63 downto 0);
 124     begin
 125         case shift(6 downto 5) is
 126             when "00" =>
 127                 s1 := inp(119 downto 25);
 128             when "01" =>
 129                 s1 := inp(87 downto 0) & "0000000";
 130             when "10" =>
 131                 s1 := x"0000000000000000" & inp(119 downto 89);
 132             when others =>
 133                 s1 := x"00000000" & inp(119 downto 57);
 134         end case;
 135         case shift(4 downto 3) is
 136             when "00" =>
 137                 s2 := s1(94 downto 24);
 138             when "01" =>
 139                 s2 := s1(86 downto 16);
 140             when "10" =>
 141                 s2 := s1(78 downto 8);
 142             when others =>
 143                 s2 := s1(70 downto 0);
 144         end case;
 145         case shift(2 downto 0) is
 146             when "000" =>
 147                 result := s2(70 downto 7);
 148             when "001" =>
 149                 result := s2(69 downto 6);
 150             when "010" =>
 151                 result := s2(68 downto 5);
 152             when "011" =>
 153                 result := s2(67 downto 4);
 154             when "100" =>
 155                 result := s2(66 downto 3);
 156             when "101" =>
 157                 result := s2(65 downto 2);
 158             when "110" =>
 159                 result := s2(64 downto 1);
 160             when others =>
 161                 result := s2(63 downto 0);
 162         end case;
 163         return result;
 164     end;
 165
 166     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 167     -- selects the bits will be lost in doing a right shift.  The shift
 168     -- parameter is the bottom 6 bits of a negative shift count,
 169     -- indicating a right shift.
 170     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 171         variable result: std_ulogic_vector(63 downto 0);
 172     begin
 173         result := (others => '0');
 174         for i in 0 to 63 loop
 175             if i >= shift then
 176                 result(63 - i) := '1';
 177             end if;
 178         end loop;
 179         return result;
 180     end;
 181
 182     -- Split a DP floating-point number into components and work out its class.
 183     -- If is_int = 1, the input is considered an integer
 184     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 185         variable r       : fpu_reg_type;
 186         variable exp_nz  : std_ulogic;
 187         variable exp_ao  : std_ulogic;
 188         variable frac_nz : std_ulogic;
 189         variable cls     : std_ulogic_vector(2 downto 0);
 190     begin
 191         r.negative := fpr(63);
 192         exp_nz := or (fpr(62 downto 52));
 193         exp_ao := and (fpr(62 downto 52));
 194         frac_nz := or (fpr(51 downto 0));
 195         if is_int = '0' then
 196             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 197             if exp_nz = '0' then
 198                 r.exponent := to_signed(-1022, EXP_BITS);
 199             end if;
 200             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 201             cls := exp_ao & exp_nz & frac_nz;
 202             case cls is
 203                 when "000"  => r.class := ZERO;
 204                 when "001"  => r.class := FINITE;    -- denormalized
 205                 when "010"  => r.class := FINITE;
 206                 when "011"  => r.class := FINITE;
 207                 when "110"  => r.class := INFINITY;
 208                 when others => r.class := NAN;
 209             end case;
 210         else
 211             r.mantissa := fpr;
 212             r.exponent := (others => '0');
 213             if (fpr(63) or exp_nz or frac_nz) = '1' then
 214                 r.class := FINITE;
 215             else
 216                 r.class := ZERO;
 217             end if;
 218         end if;
 219         return r;
 220     end;
 221
 222     -- Construct a DP floating-point result from components
 223     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 224                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 225         return std_ulogic_vector is
 226         variable result : std_ulogic_vector(63 downto 0);
 227     begin
 228         result := (others => '0');
 229         result(63) := sign;
 230         case class is
 231             when ZERO =>
 232             when FINITE =>
 233                 if mantissa(54) = '1' then
 234                     -- normalized number
 235                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 236                 end if;
 237                 result(51 downto 29) := mantissa(53 downto 31);
 238                 if single_prec = '0' then
 239                     result(28 downto 0) := mantissa(30 downto 2);
 240                 end if;
 241             when INFINITY =>
 242                 result(62 downto 52) := "11111111111";
 243             when NAN =>
 244                 result(62 downto 52) := "11111111111";
 245                 result(51) := quieten_nan or mantissa(53);
 246                 result(50 downto 29) := mantissa(52 downto 31);
 247                 if single_prec = '0' then
 248                     result(28 downto 0) := mantissa(30 downto 2);
 249                 end if;
 250         end case;
 251         return result;
 252     end;
 253
 254     -- Determine whether to increment when rounding
 255     -- Returns rounding_inc & inexact
 256     -- Assumes x includes the bottom 29 bits of the mantissa already
 257     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 258     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 259                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 260                          sign: std_ulogic)
 261         return std_ulogic_vector is
 262         variable grx : std_ulogic_vector(2 downto 0);
 263         variable ret : std_ulogic_vector(1 downto 0);
 264         variable lsb : std_ulogic;
 265     begin
 266         if single_prec = '0' then
 267             grx := mantissa(1 downto 0) & x;
 268             lsb := mantissa(2);
 269         else
 270             grx := mantissa(30 downto 29) & x;
 271             lsb := mantissa(31);
 272         end if;
 273         ret(1) := '0';
 274         ret(0) := or (grx);
 275         case rn(1 downto 0) is
 276             when "00" =>        -- round to nearest
 277                 if grx = "100" and rn(2) = '0' then
 278                     ret(1) := lsb; -- tie, round to even
 279                 else
 280                     ret(1) := grx(2);
 281                 end if;
 282             when "01" =>        -- round towards zero
 283             when others =>      -- round towards +/- inf
 284                 if rn(0) = sign then
 285                     -- round towards greater magnitude
 286                     ret(1) := ret(0);
 287                 end if;
 288         end case;
 289         return ret;
 290     end;
 291
 292     -- Determine result flags to write into the FPSCR
 293     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 294         return std_ulogic_vector is
 295     begin
 296         case class is
 297             when ZERO =>
 298                 return sign & "0010";
 299             when FINITE =>
 300                 return (not unitbit) & sign & (not sign) & "00";
 301             when INFINITY =>
 302                 return '0' & sign & (not sign) & "01";
 303             when NAN =>
 304                 return "10001";
 305         end case;
 306     end;
 307
 308 begin
 309     fpu_0: process(clk)
 310     begin
 311         if rising_edge(clk) then
 312             if rst = '1' then
 313                 r.state <= IDLE;
 314                 r.busy <= '0';
 315                 r.instr_done <= '0';
 316                 r.do_intr <= '0';
 317                 r.fpscr <= (others => '0');
 318                 r.writing_back <= '0';
 319             else
 320                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 321                 r <= rin;
 322             end if;
 323         end if;
 324     end process;
 325
 326     e_out.busy <= r.busy;
 327     e_out.exception <= r.fpscr(FPSCR_FEX);
 328     e_out.interrupt <= r.do_intr;
 329
 330     w_out.valid <= r.instr_done and not r.do_intr;
 331     w_out.write_enable <= r.writing_back;
 332     w_out.write_reg <= r.dest_fpr;
 333     w_out.write_data <= fp_result;
 334     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 335     w_out.write_cr_mask <= r.cr_mask;
 336     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 337                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 338
 339     fpu_1: process(all)
 340         variable v           : reg_type;
 341         variable adec        : fpu_reg_type;
 342         variable bdec        : fpu_reg_type;
 343         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 344         variable illegal     : std_ulogic;
 345         variable j, k        : integer;
 346         variable flm         : std_ulogic_vector(7 downto 0);
 347         variable int_input   : std_ulogic;
 348         variable mask        : std_ulogic_vector(63 downto 0);
 349         variable in_a0       : std_ulogic_vector(63 downto 0);
 350         variable in_b0       : std_ulogic_vector(63 downto 0);
 351         variable misc        : std_ulogic_vector(63 downto 0);
 352         variable shift_res   : std_ulogic_vector(63 downto 0);
 353         variable round       : std_ulogic_vector(1 downto 0);
 354         variable update_fx   : std_ulogic;
 355         variable arith_done  : std_ulogic;
 356         variable invalid     : std_ulogic;
 357         variable mant_nz     : std_ulogic;
 358         variable min_exp     : signed(EXP_BITS-1 downto 0);
 359         variable max_exp     : signed(EXP_BITS-1 downto 0);
 360         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 361         variable new_exp     : signed(EXP_BITS-1 downto 0);
 362         variable exp_tiny    : std_ulogic;
 363         variable exp_huge    : std_ulogic;
 364         variable renormalize : std_ulogic;
 365         variable clz         : std_ulogic_vector(5 downto 0);
 366         variable set_x       : std_ulogic;
 367         variable mshift      : signed(EXP_BITS-1 downto 0);
 368         variable need_check  : std_ulogic;
 369         variable msb         : std_ulogic;
 370     begin
 371         v := r;
 372         illegal := '0';
 373         v.busy := '0';
 374         int_input := '0';
 375
 376         -- capture incoming instruction
 377         if e_in.valid = '1' then
 378             v.insn := e_in.insn;
 379             v.op := e_in.op;
 380             v.fe_mode := or (e_in.fe_mode);
 381             v.dest_fpr := e_in.frt;
 382             v.single_prec := e_in.single;
 383             v.int_result := '0';
 384             v.rc := e_in.rc;
 385             v.is_cmp := e_in.out_cr;
 386             if e_in.out_cr = '0' then
 387                 v.cr_mask := num_to_fxm(1);
 388             else
 389                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 390             end if;
 391             int_input := '0';
 392             if e_in.op = OP_FPOP_I then
 393                 int_input := '1';
 394             end if;
 395             v.quieten_nan := '1';
 396             v.tiny := '0';
 397             v.denorm := '0';
 398             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 399             adec := decode_dp(e_in.fra, int_input);
 400             bdec := decode_dp(e_in.frb, int_input);
 401             v.a := adec;
 402             v.b := bdec;
 403         end if;
 404
 405         r_hi_nz <= or (r.r(55 downto 31));
 406         r_lo_nz <= or (r.r(30 downto 2));
 407
 408         if r.single_prec = '0' then
 409             max_exp := to_signed(1023, EXP_BITS);
 410             min_exp := to_signed(-1022, EXP_BITS);
 411             bias_exp := to_signed(1536, EXP_BITS);
 412         else
 413             max_exp := to_signed(127, EXP_BITS);
 414             min_exp := to_signed(-126, EXP_BITS);
 415             bias_exp := to_signed(192, EXP_BITS);
 416         end if;
 417         new_exp := r.result_exp - r.shift;
 418         exp_tiny := '0';
 419         exp_huge := '0';
 420         if new_exp < min_exp then
 421             exp_tiny := '1';
 422         end if;
 423         if new_exp > max_exp then
 424             exp_huge := '1';
 425         end if;
 426
 427         v.writing_back := '0';
 428         v.instr_done := '0';
 429         v.update_fprf := '0';
 430         v.shift := to_signed(0, EXP_BITS);
 431         opsel_a <= AIN_R;
 432         opsel_ainv <= '0';
 433         opsel_amask <= '0';
 434         opsel_b <= BIN_ZERO;
 435         opsel_r <= RES_SUM;
 436         carry_in <= '0';
 437         misc_sel <= "0000";
 438         fpscr_mask := (others => '1');
 439         update_fx := '0';
 440         arith_done := '0';
 441         invalid := '0';
 442         renormalize := '0';
 443         set_x := '0';
 444
 445         case r.state is
 446             when IDLE =>
 447                 if e_in.valid = '1' then
 448                     case e_in.insn(5 downto 1) is
 449                         when "00000" =>
 450                             v.state := DO_MCRFS;
 451                         when "00110" =>
 452                             if e_in.insn(8) = '0' then
 453                                 v.state := DO_MTFSB;
 454                             else
 455                                 v.state := DO_MTFSFI;
 456                             end if;
 457                         when "00111" =>
 458                             if e_in.insn(8) = '0' then
 459                                 v.state := DO_MFFS;
 460                             else
 461                                 v.state := DO_MTFSF;
 462                             end if;
 463                         when "01000" =>
 464                             v.state := DO_FMR;
 465                         when "01100" =>
 466                             v.state := DO_FRSP;
 467                         when "01110" =>
 468                             if int_input = '1' then
 469                                 -- fcfid[u][s]
 470                                 v.state := DO_FCFID;
 471                             else
 472                                 v.state := DO_FCTI;
 473                             end if;
 474                         when "01111" =>
 475                             v.round_mode := "001";
 476                             v.state := DO_FCTI;
 477                         when others =>
 478                             illegal := '1';
 479                     end case;
 480                 end if;
 481                 v.x := '0';
 482                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 483
 484             when DO_MCRFS =>
 485                 j := to_integer(unsigned(insn_bfa(r.insn)));
 486                 for i in 0 to 7 loop
 487                     if i = j then
 488                         k := (7 - i) * 4;
 489                         v.cr_result := r.fpscr(k + 3 downto k);
 490                         fpscr_mask(k + 3 downto k) := "0000";
 491                     end if;
 492                 end loop;
 493                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 494                 v.instr_done := '1';
 495                 v.state := IDLE;
 496
 497             when DO_MTFSB =>
 498                 -- mtfsb{0,1}
 499                 j := to_integer(unsigned(insn_bt(r.insn)));
 500                 for i in 0 to 31 loop
 501                     if i = j then
 502                         v.fpscr(31 - i) := r.insn(6);
 503                     end if;
 504                 end loop;
 505                 v.instr_done := '1';
 506                 v.state := IDLE;
 507
 508             when DO_MTFSFI =>
 509                 -- mtfsfi
 510                 j := to_integer(unsigned(insn_bf(r.insn)));
 511                 if r.insn(16) = '0' then
 512                     for i in 0 to 7 loop
 513                         if i = j then
 514                             k := (7 - i) * 4;
 515                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 516                         end if;
 517                     end loop;
 518                 end if;
 519                 v.instr_done := '1';
 520                 v.state := IDLE;
 521
 522             when DO_MFFS =>
 523                 v.int_result := '1';
 524                 v.writing_back := '1';
 525                 opsel_r <= RES_MISC;
 526                 case r.insn(20 downto 16) is
 527                     when "00000" =>
 528                         -- mffs
 529                     when "00001" =>
 530                         -- mffsce
 531                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
 532                     when "10100" | "10101" =>
 533                         -- mffscdrn[i] (but we don't implement DRN)
 534                         fpscr_mask := x"000000FF";
 535                     when "10110" =>
 536                         -- mffscrn
 537                         fpscr_mask := x"000000FF";
 538                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
 539                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
 540                     when "10111" =>
 541                         -- mffscrni
 542                         fpscr_mask := x"000000FF";
 543                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
 544                     when "11000" =>
 545                         -- mffsl
 546                         fpscr_mask := x"0007F0FF";
 547                     when others =>
 548                         illegal := '1';
 549                 end case;
 550                 v.instr_done := '1';
 551                 v.state := IDLE;
 552
 553             when DO_MTFSF =>
 554                 if r.insn(25) = '1' then
 555                     flm := x"FF";
 556                 elsif r.insn(16) = '1' then
 557                     flm := x"00";
 558                 else
 559                     flm := r.insn(24 downto 17);
 560                 end if;
 561                 for i in 0 to 7 loop
 562                     k := i * 4;
 563                     if flm(i) = '1' then
 564                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
 565                     end if;
 566                 end loop;
 567                 v.instr_done := '1';
 568                 v.state := IDLE;
 569
 570             when DO_FMR =>
 571                 opsel_a <= AIN_B;
 572                 v.result_class := r.b.class;
 573                 v.result_exp := r.b.exponent;
 574                 v.quieten_nan := '0';
 575                 if r.insn(9) = '1' then
 576                     v.result_sign := '0';              -- fabs
 577                 elsif r.insn(8) = '1' then
 578                     v.result_sign := '1';              -- fnabs
 579                 elsif r.insn(7) = '1' then
 580                     v.result_sign := r.b.negative;     -- fmr
 581                 elsif r.insn(6) = '1' then
 582                     v.result_sign := not r.b.negative; -- fneg
 583                 else
 584                     v.result_sign := r.a.negative;     -- fcpsgn
 585                 end if;
 586                 v.writing_back := '1';
 587                 v.instr_done := '1';
 588                 v.state := IDLE;
 589
 590             when DO_FRSP =>
 591                 opsel_a <= AIN_B;
 592                 v.result_class := r.b.class;
 593                 v.result_sign := r.b.negative;
 594                 v.result_exp := r.b.exponent;
 595                 v.fpscr(FPSCR_FR) := '0';
 596                 v.fpscr(FPSCR_FI) := '0';
 597                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
 598                     -- Signalling NAN
 599                     v.fpscr(FPSCR_VXSNAN) := '1';
 600                     invalid := '1';
 601                 end if;
 602                 set_x := '1';
 603                 if r.b.class = FINITE then
 604                     if r.b.exponent < to_signed(-126, EXP_BITS) then
 605                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
 606                         v.state := ROUND_UFLOW;
 607                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
 608                         v.state := ROUND_OFLOW;
 609                     else
 610                         v.shift := to_signed(-2, EXP_BITS);
 611                         v.state := ROUNDING;
 612                     end if;
 613                 else
 614                     arith_done := '1';
 615                 end if;
 616
 617             when DO_FCTI =>
 618                 -- instr bit 9: 1=dword 0=word
 619                 -- instr bit 8: 1=unsigned 0=signed
 620                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
 621                 opsel_a <= AIN_B;
 622                 v.result_class := r.b.class;
 623                 v.result_sign := r.b.negative;
 624                 v.result_exp := r.b.exponent;
 625                 v.fpscr(FPSCR_FR) := '0';
 626                 v.fpscr(FPSCR_FI) := '0';
 627                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
 628                     -- Signalling NAN
 629                     v.fpscr(FPSCR_VXSNAN) := '1';
 630                     invalid := '1';
 631                 end if;
 632
 633                 v.int_result := '1';
 634                 case r.b.class is
 635                     when ZERO =>
 636                         arith_done := '1';
 637                     when FINITE =>
 638                         if r.b.exponent >= to_signed(64, EXP_BITS) or
 639                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
 640                             v.state := INT_OFLOW;
 641                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
 642                             -- integer already, no rounding required,
 643                             -- shift into final position
 644                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
 645                             if r.insn(8) = '1' and r.b.negative = '1' then
 646                                 v.state := INT_OFLOW;
 647                             else
 648                                 v.state := INT_ISHIFT;
 649                             end if;
 650                         else
 651                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
 652                             v.state := INT_SHIFT;
 653                         end if;
 654                     when INFINITY | NAN =>
 655                         v.state := INT_OFLOW;
 656                 end case;
 657
 658             when DO_FCFID =>
 659                 v.result_sign := '0';
 660                 opsel_a <= AIN_B;
 661                 if r.insn(8) = '0' and r.b.negative = '1' then
 662                     -- fcfid[s] with negative operand, set R = -B
 663                     opsel_ainv <= '1';
 664                     carry_in <= '1';
 665                     v.result_sign := '1';
 666                 end if;
 667                 v.result_class := r.b.class;
 668                 v.result_exp := to_signed(54, EXP_BITS);
 669                 v.fpscr(FPSCR_FR) := '0';
 670                 v.fpscr(FPSCR_FI) := '0';
 671                 if r.b.class = ZERO then
 672                     arith_done := '1';
 673                 else
 674                     v.state := FINISH;
 675                 end if;
 676
 677             when INT_SHIFT =>
 678                 opsel_r <= RES_SHIFT;
 679                 set_x := '1';
 680                 v.state := INT_ROUND;
 681                 v.shift := to_signed(-2, EXP_BITS);
 682
 683             when INT_ROUND =>
 684                 opsel_r <= RES_SHIFT;
 685                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
 686                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
 687                 -- Check for negative values that don't round to 0 for fcti*u*
 688                 if r.insn(8) = '1' and r.result_sign = '1' and
 689                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
 690                     v.state := INT_OFLOW;
 691                 else
 692                     v.state := INT_FINAL;
 693                 end if;
 694
 695             when INT_ISHIFT =>
 696                 opsel_r <= RES_SHIFT;
 697                 v.state := INT_FINAL;
 698
 699             when INT_FINAL =>
 700                 -- Negate if necessary, and increment for rounding if needed
 701                 opsel_ainv <= r.result_sign;
 702                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
 703                 -- Check for possible overflows
 704                 case r.insn(9 downto 8) is
 705                     when "00" =>        -- fctiw[z]
 706                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
 707                     when "01" =>        -- fctiwu[z]
 708                         need_check := r.r(31);
 709                     when "10" =>        -- fctid[z]
 710                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
 711                     when others =>      -- fctidu[z]
 712                         need_check := r.r(63);
 713                 end case;
 714                 if need_check = '1' then
 715                     v.state := INT_CHECK;
 716                 else
 717                     if r.fpscr(FPSCR_FI) = '1' then
 718                         v.fpscr(FPSCR_XX) := '1';
 719                     end if;
 720                     arith_done := '1';
 721                 end if;
 722
 723             when INT_CHECK =>
 724                 if r.insn(9) = '0' then
 725                     msb := r.r(31);
 726                 else
 727                     msb := r.r(63);
 728                 end if;
 729                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
 730                 if (r.insn(8) = '0' and msb /= r.result_sign) or
 731                     (r.insn(8) = '1' and msb /= '1') then
 732                     opsel_r <= RES_MISC;
 733                     v.fpscr(FPSCR_VXCVI) := '1';
 734                     invalid := '1';
 735                 else
 736                     if r.fpscr(FPSCR_FI) = '1' then
 737                         v.fpscr(FPSCR_XX) := '1';
 738                     end if;
 739                 end if;
 740                 arith_done := '1';
 741
 742             when INT_OFLOW =>
 743                 opsel_r <= RES_MISC;
 744                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
 745                 if r.b.class = NAN then
 746                     misc_sel(0) <= '1';
 747                 end if;
 748                 v.fpscr(FPSCR_VXCVI) := '1';
 749                 invalid := '1';
 750                 arith_done := '1';
 751
 752             when FINISH =>
 753                 if r.r(63 downto 54) /= "0000000001" then
 754                     renormalize := '1';
 755                     v.state := NORMALIZE;
 756                 else
 757                     set_x := '1';
 758                     if exp_tiny = '1' then
 759                         v.shift := new_exp - min_exp;
 760                         v.state := ROUND_UFLOW;
 761                     elsif exp_huge = '1' then
 762                         v.state := ROUND_OFLOW;
 763                     else
 764                         v.shift := to_signed(-2, EXP_BITS);
 765                         v.state := ROUNDING;
 766                     end if;
 767                 end if;
 768
 769             when NORMALIZE =>
 770                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
 771                 opsel_r <= RES_SHIFT;
 772                 set_x := '1';
 773                 if exp_tiny = '1' then
 774                     v.shift := new_exp - min_exp;
 775                     v.state := ROUND_UFLOW;
 776                 elsif exp_huge = '1' then
 777                     v.state := ROUND_OFLOW;
 778                 else
 779                     v.shift := to_signed(-2, EXP_BITS);
 780                     v.state := ROUNDING;
 781                 end if;
 782
 783             when ROUND_UFLOW =>
 784                 v.tiny := '1';
 785                 if r.fpscr(FPSCR_UE) = '0' then
 786                     -- disabled underflow exception case
 787                     -- have to denormalize before rounding
 788                     opsel_r <= RES_SHIFT;
 789                     set_x := '1';
 790                     v.shift := to_signed(-2, EXP_BITS);
 791                     v.state := ROUNDING;
 792                 else
 793                     -- enabled underflow exception case
 794                     -- if denormalized, have to normalize before rounding
 795                     v.fpscr(FPSCR_UX) := '1';
 796                     v.result_exp := r.result_exp + bias_exp;
 797                     if r.r(54) = '0' then
 798                         renormalize := '1';
 799                         v.state := NORMALIZE;
 800                     else
 801                         v.shift := to_signed(-2, EXP_BITS);
 802                         v.state := ROUNDING;
 803                     end if;
 804                 end if;
 805
 806             when ROUND_OFLOW =>
 807                 v.fpscr(FPSCR_OX) := '1';
 808                 if r.fpscr(FPSCR_OE) = '0' then
 809                     -- disabled overflow exception
 810                     -- result depends on rounding mode
 811                     v.fpscr(FPSCR_XX) := '1';
 812                     v.fpscr(FPSCR_FI) := '1';
 813                     if r.round_mode(1 downto 0) = "00" or
 814                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
 815                         v.result_class := INFINITY;
 816                         v.fpscr(FPSCR_FR) := '1';
 817                     else
 818                         v.fpscr(FPSCR_FR) := '0';
 819                     end if;
 820                     -- construct largest representable number
 821                     v.result_exp := max_exp;
 822                     opsel_r <= RES_MISC;
 823                     misc_sel <= "001" & r.single_prec;
 824                     arith_done := '1';
 825                 else
 826                     -- enabled overflow exception
 827                     v.result_exp := r.result_exp - bias_exp;
 828                     v.shift := to_signed(-2, EXP_BITS);
 829                     v.state := ROUNDING;
 830                 end if;
 831
 832             when ROUNDING =>
 833                 opsel_amask <= '1';
 834                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
 835                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
 836                 if round(1) = '1' then
 837                     -- set mask to increment the LSB for the precision
 838                     opsel_b <= BIN_MASK;
 839                     carry_in <= '1';
 840                     v.shift := to_signed(-1, EXP_BITS);
 841                     v.state := ROUNDING_2;
 842                 else
 843                     if r.r(54) = '0' then
 844                         -- result after masking could be zero, or could be a
 845                         -- denormalized result that needs to be renormalized
 846                         renormalize := '1';
 847                         v.state := ROUNDING_3;
 848                     else
 849                         arith_done := '1';
 850                     end if;
 851                 end if;
 852                 if round(0) = '1' then
 853                     v.fpscr(FPSCR_XX) := '1';
 854                     if r.tiny = '1' then
 855                         v.fpscr(FPSCR_UX) := '1';
 856                     end if;
 857                 end if;
 858
 859             when ROUNDING_2 =>
 860                 -- Check for overflow during rounding
 861                 v.x := '0';
 862                 if r.r(55) = '1' then
 863                     opsel_r <= RES_SHIFT;
 864                     if exp_huge = '1' then
 865                         v.state := ROUND_OFLOW;
 866                     else
 867                         arith_done := '1';
 868                     end if;
 869                 elsif r.r(54) = '0' then
 870                     -- Do CLZ so we can renormalize the result
 871                     renormalize := '1';
 872                     v.state := ROUNDING_3;
 873                 else
 874                     arith_done := '1';
 875                 end if;
 876
 877             when ROUNDING_3 =>
 878                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
 879                 if mant_nz = '0' then
 880                     v.result_class := ZERO;
 881                     arith_done := '1';
 882                 else
 883                     -- Renormalize result after rounding
 884                     opsel_r <= RES_SHIFT;
 885                     v.denorm := exp_tiny;
 886                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
 887                     if new_exp < to_signed(-1022, EXP_BITS) then
 888                         v.state := DENORM;
 889                     else
 890                         arith_done := '1';
 891                     end if;
 892                 end if;
 893
 894             when DENORM =>
 895                 opsel_r <= RES_SHIFT;
 896                 arith_done := '1';
 897
 898         end case;
 899
 900         if arith_done = '1' then
 901             -- Enabled invalid exception doesn't write result or FPRF
 902             if (invalid and r.fpscr(FPSCR_VE)) = '0' then
 903                 v.writing_back := '1';
 904                 v.update_fprf := '1';
 905             end if;
 906             v.instr_done := '1';
 907             v.state := IDLE;
 908             update_fx := '1';
 909         end if;
 910
 911         -- Data path.
 912         -- This has A and B input multiplexers, an adder, a shifter,
 913         -- count-leading-zeroes logic, and a result mux.
 914         if r.single_prec = '1' then
 915             mshift := r.shift + to_signed(-29, EXP_BITS);
 916         else
 917             mshift := r.shift;
 918         end if;
 919         if mshift < to_signed(-64, EXP_BITS) then
 920             mask := (others => '1');
 921         elsif mshift >= to_signed(0, EXP_BITS) then
 922             mask := (others => '0');
 923         else
 924             mask := right_mask(unsigned(mshift(5 downto 0)));
 925         end if;
 926         case opsel_a is
 927             when AIN_R =>
 928                 in_a0 := r.r;
 929             when AIN_A =>
 930                 in_a0 := r.a.mantissa;
 931             when others =>
 932                 in_a0 := r.b.mantissa;
 933         end case;
 934         if (or (mask and in_a0)) = '1' and set_x = '1' then
 935             v.x := '1';
 936         end if;
 937         if opsel_ainv = '1' then
 938             in_a0 := not in_a0;
 939         end if;
 940         if opsel_amask = '1' then
 941             in_a0 := in_a0 and not mask;
 942         end if;
 943         in_a <= in_a0;
 944         case opsel_b is
 945             when BIN_ZERO =>
 946                 in_b0 := (others => '0');
 947             when BIN_R =>
 948                 in_b0 := r.r;
 949             when BIN_MASK =>
 950                 in_b0 := mask;
 951             when others =>
 952                 in_b0 := (others => '0');
 953         end case;
 954         in_b <= in_b0;
 955         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
 956             shift_res := shifter_64(r.r & x"00000000000000",
 957                                     std_ulogic_vector(r.shift(6 downto 0)));
 958         else
 959             shift_res := (others => '0');
 960         end if;
 961         case opsel_r is
 962             when RES_SUM =>
 963                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
 964             when RES_SHIFT =>
 965                 result <= shift_res;
 966             when others =>
 967                 case misc_sel is
 968                     when "0000" =>
 969                         misc := x"00000000" & (r.fpscr and fpscr_mask);
 970                     when "0010" =>
 971                         -- mantissa of max representable DP number
 972                         misc := x"007ffffffffffffc";
 973                     when "0011" =>
 974                         -- mantissa of max representable SP number
 975                         misc := x"007fffff80000000";
 976                     when "1000" =>
 977                         -- max positive result for fctiw[z]
 978                         misc := x"000000007fffffff";
 979                     when "1001" =>
 980                         -- max negative result for fctiw[z]
 981                         misc := x"ffffffff80000000";
 982                     when "1010" =>
 983                         -- max positive result for fctiwu[z]
 984                         misc := x"00000000ffffffff";
 985                     when "1011" =>
 986                         -- max negative result for fctiwu[z]
 987                         misc := x"0000000000000000";
 988                     when "1100" =>
 989                         -- max positive result for fctid[z]
 990                         misc := x"7fffffffffffffff";
 991                     when "1101" =>
 992                         -- max negative result for fctid[z]
 993                         misc := x"8000000000000000";
 994                     when "1110" =>
 995                         -- max positive result for fctidu[z]
 996                         misc := x"ffffffffffffffff";
 997                     when "1111" =>
 998                         -- max negative result for fctidu[z]
 999                         misc := x"0000000000000000";
1000                     when others =>
1001                         misc := x"0000000000000000";
1002                 end case;
1003                 result <= misc;
1004         end case;
1005         v.r := result;
1006
1007         if opsel_r = RES_SHIFT then
1008             v.result_exp := new_exp;
1009         end if;
1010
1011         if renormalize = '1' then
1012             clz := count_left_zeroes(r.r);
1013             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
1014         end if;
1015
1016         if r.int_result = '1' then
1017             fp_result <= r.r;
1018         else
1019             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
1020                                  r.single_prec, r.quieten_nan);
1021         end if;
1022         if r.update_fprf = '1' then
1023             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
1024                                                              r.r(54) and not r.denorm);
1025         end if;
1026
1027         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
1028                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
1029         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
1030                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
1031         if update_fx = '1' and
1032             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
1033             v.fpscr(FPSCR_FX) := '1';
1034         end if;
1035         if r.rc = '1' then
1036             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
1037         end if;
1038
1039         if illegal = '1' then
1040             v.instr_done := '0';
1041             v.do_intr := '0';
1042             v.writing_back := '0';
1043             v.busy := '0';
1044             v.state := IDLE;
1045         else
1046             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
1047             if v.state /= IDLE or v.do_intr = '1' then
1048                 v.busy := '1';
1049             end if;
1050         end if;
1051
1052         rin <= v;
1053         e_out.illegal <= illegal;
1054     end process;
1055
1056 end architecture behaviour;