fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV,
  44                      FRI_1,
  45                      ADD_SHIFT, ADD_2, ADD_3,
  46                      MULT_1,
  47                      LOOKUP,
  48                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  49                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  50                      INT_FINAL, INT_CHECK, INT_OFLOW,
  51                      FINISH, NORMALIZE,
  52                      ROUND_UFLOW, ROUND_OFLOW,
  53                      ROUNDING, ROUNDING_2, ROUNDING_3,
  54                      DENORM,
  55                      RENORM_A, RENORM_A2,
  56                      RENORM_B, RENORM_B2,
  57                      RENORM_C, RENORM_C2);
  58
  59     type reg_type is record
  60         state        : state_t;
  61         busy         : std_ulogic;
  62         instr_done   : std_ulogic;
  63         do_intr      : std_ulogic;
  64         op           : insn_type_t;
  65         insn         : std_ulogic_vector(31 downto 0);
  66         dest_fpr     : gspr_index_t;
  67         fe_mode      : std_ulogic;
  68         rc           : std_ulogic;
  69         is_cmp       : std_ulogic;
  70         single_prec  : std_ulogic;
  71         fpscr        : std_ulogic_vector(31 downto 0);
  72         a            : fpu_reg_type;
  73         b            : fpu_reg_type;
  74         c            : fpu_reg_type;
  75         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  76         x            : std_ulogic;
  77         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  78         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  79         result_sign  : std_ulogic;
  80         result_class : fp_number_class;
  81         result_exp   : signed(EXP_BITS-1 downto 0);
  82         shift        : signed(EXP_BITS-1 downto 0);
  83         writing_back : std_ulogic;
  84         int_result   : std_ulogic;
  85         cr_result    : std_ulogic_vector(3 downto 0);
  86         cr_mask      : std_ulogic_vector(7 downto 0);
  87         old_exc      : std_ulogic_vector(4 downto 0);
  88         update_fprf  : std_ulogic;
  89         quieten_nan  : std_ulogic;
  90         tiny         : std_ulogic;
  91         denorm       : std_ulogic;
  92         round_mode   : std_ulogic_vector(2 downto 0);
  93         is_subtract  : std_ulogic;
  94         exp_cmp      : std_ulogic;
  95         add_bsmall   : std_ulogic;
  96         is_multiply  : std_ulogic;
  97         first        : std_ulogic;
  98         count        : unsigned(1 downto 0);
  99     end record;
 100
 101     type lookup_table is array(0 to 255) of std_ulogic_vector(17 downto 0);
 102
 103     signal r, rin : reg_type;
 104
 105     signal fp_result     : std_ulogic_vector(63 downto 0);
 106     signal opsel_a       : std_ulogic_vector(1 downto 0);
 107     signal opsel_b       : std_ulogic_vector(1 downto 0);
 108     signal opsel_r       : std_ulogic_vector(1 downto 0);
 109     signal opsel_ainv    : std_ulogic;
 110     signal opsel_amask   : std_ulogic;
 111     signal opsel_binv    : std_ulogic;
 112     signal in_a          : std_ulogic_vector(63 downto 0);
 113     signal in_b          : std_ulogic_vector(63 downto 0);
 114     signal result        : std_ulogic_vector(63 downto 0);
 115     signal carry_in      : std_ulogic;
 116     signal lost_bits     : std_ulogic;
 117     signal r_hi_nz       : std_ulogic;
 118     signal r_lo_nz       : std_ulogic;
 119     signal misc_sel      : std_ulogic_vector(3 downto 0);
 120     signal f_to_multiply : MultiplyInputType;
 121     signal multiply_to_f : MultiplyOutputType;
 122     signal msel_1        : std_ulogic_vector(1 downto 0);
 123     signal msel_2        : std_ulogic_vector(1 downto 0);
 124     signal msel_add      : std_ulogic_vector(1 downto 0);
 125     signal msel_inv      : std_ulogic;
 126     signal inverse_est   : std_ulogic_vector(18 downto 0);
 127
 128     -- opsel values
 129     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 130     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 131     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 132     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 133
 134     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 135     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 136     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 137
 138     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 139     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 140     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 141     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 142
 143     -- msel values
 144     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 145     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 146     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 147     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 148
 149     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 150     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 151     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 152     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 153
 154     constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
 155     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 156     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 157
 158     -- Inverse lookup table, indexed by the top 8 fraction bits
 159     -- Output range is [0.5, 1) in 0.19 format, though the top
 160     -- bit isn't stored since it is always 1.
 161     -- Each output value is the inverse of the center of the input
 162     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 163     -- entry 1 is 1 / (1 + 3/512), etc.
 164     signal inverse_table : lookup_table := (
 165         -- 1/x lookup table
 166         -- Unit bit is assumed to be 1, so input range is [1, 2)
 167         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 168         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 169         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 170         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 171         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 172         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 173         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 174         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 175         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 176         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 177         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 178         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 179         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 180         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 181         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 182         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 183         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 184         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 185         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 186         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 187         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 188         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 189         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 190         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 191         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 192         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 193         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 194         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 195         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 196         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 197         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 198         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100"
 199         );
 200
 201     -- Left and right shifter with 120 bit input and 64 bit output.
 202     -- Shifts inp left by shift bits and returns the upper 64 bits of
 203     -- the result.  The shift parameter is interpreted as a signed
 204     -- number in the range -64..63, with negative values indicating
 205     -- right shifts.
 206     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 207                         shift: std_ulogic_vector(6 downto 0))
 208         return std_ulogic_vector is
 209         variable s1 : std_ulogic_vector(94 downto 0);
 210         variable s2 : std_ulogic_vector(70 downto 0);
 211         variable result : std_ulogic_vector(63 downto 0);
 212     begin
 213         case shift(6 downto 5) is
 214             when "00" =>
 215                 s1 := inp(119 downto 25);
 216             when "01" =>
 217                 s1 := inp(87 downto 0) & "0000000";
 218             when "10" =>
 219                 s1 := x"0000000000000000" & inp(119 downto 89);
 220             when others =>
 221                 s1 := x"00000000" & inp(119 downto 57);
 222         end case;
 223         case shift(4 downto 3) is
 224             when "00" =>
 225                 s2 := s1(94 downto 24);
 226             when "01" =>
 227                 s2 := s1(86 downto 16);
 228             when "10" =>
 229                 s2 := s1(78 downto 8);
 230             when others =>
 231                 s2 := s1(70 downto 0);
 232         end case;
 233         case shift(2 downto 0) is
 234             when "000" =>
 235                 result := s2(70 downto 7);
 236             when "001" =>
 237                 result := s2(69 downto 6);
 238             when "010" =>
 239                 result := s2(68 downto 5);
 240             when "011" =>
 241                 result := s2(67 downto 4);
 242             when "100" =>
 243                 result := s2(66 downto 3);
 244             when "101" =>
 245                 result := s2(65 downto 2);
 246             when "110" =>
 247                 result := s2(64 downto 1);
 248             when others =>
 249                 result := s2(63 downto 0);
 250         end case;
 251         return result;
 252     end;
 253
 254     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 255     -- selects the bits will be lost in doing a right shift.  The shift
 256     -- parameter is the bottom 6 bits of a negative shift count,
 257     -- indicating a right shift.
 258     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 259         variable result: std_ulogic_vector(63 downto 0);
 260     begin
 261         result := (others => '0');
 262         for i in 0 to 63 loop
 263             if i >= shift then
 264                 result(63 - i) := '1';
 265             end if;
 266         end loop;
 267         return result;
 268     end;
 269
 270     -- Split a DP floating-point number into components and work out its class.
 271     -- If is_int = 1, the input is considered an integer
 272     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 273         variable r       : fpu_reg_type;
 274         variable exp_nz  : std_ulogic;
 275         variable exp_ao  : std_ulogic;
 276         variable frac_nz : std_ulogic;
 277         variable cls     : std_ulogic_vector(2 downto 0);
 278     begin
 279         r.negative := fpr(63);
 280         exp_nz := or (fpr(62 downto 52));
 281         exp_ao := and (fpr(62 downto 52));
 282         frac_nz := or (fpr(51 downto 0));
 283         if is_int = '0' then
 284             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 285             if exp_nz = '0' then
 286                 r.exponent := to_signed(-1022, EXP_BITS);
 287             end if;
 288             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 289             cls := exp_ao & exp_nz & frac_nz;
 290             case cls is
 291                 when "000"  => r.class := ZERO;
 292                 when "001"  => r.class := FINITE;    -- denormalized
 293                 when "010"  => r.class := FINITE;
 294                 when "011"  => r.class := FINITE;
 295                 when "110"  => r.class := INFINITY;
 296                 when others => r.class := NAN;
 297             end case;
 298         else
 299             r.mantissa := fpr;
 300             r.exponent := (others => '0');
 301             if (fpr(63) or exp_nz or frac_nz) = '1' then
 302                 r.class := FINITE;
 303             else
 304                 r.class := ZERO;
 305             end if;
 306         end if;
 307         return r;
 308     end;
 309
 310     -- Construct a DP floating-point result from components
 311     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 312                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 313         return std_ulogic_vector is
 314         variable result : std_ulogic_vector(63 downto 0);
 315     begin
 316         result := (others => '0');
 317         result(63) := sign;
 318         case class is
 319             when ZERO =>
 320             when FINITE =>
 321                 if mantissa(54) = '1' then
 322                     -- normalized number
 323                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 324                 end if;
 325                 result(51 downto 29) := mantissa(53 downto 31);
 326                 if single_prec = '0' then
 327                     result(28 downto 0) := mantissa(30 downto 2);
 328                 end if;
 329             when INFINITY =>
 330                 result(62 downto 52) := "11111111111";
 331             when NAN =>
 332                 result(62 downto 52) := "11111111111";
 333                 result(51) := quieten_nan or mantissa(53);
 334                 result(50 downto 29) := mantissa(52 downto 31);
 335                 if single_prec = '0' then
 336                     result(28 downto 0) := mantissa(30 downto 2);
 337                 end if;
 338         end case;
 339         return result;
 340     end;
 341
 342     -- Determine whether to increment when rounding
 343     -- Returns rounding_inc & inexact
 344     -- Assumes x includes the bottom 29 bits of the mantissa already
 345     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 346     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 347                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 348                          sign: std_ulogic)
 349         return std_ulogic_vector is
 350         variable grx : std_ulogic_vector(2 downto 0);
 351         variable ret : std_ulogic_vector(1 downto 0);
 352         variable lsb : std_ulogic;
 353     begin
 354         if single_prec = '0' then
 355             grx := mantissa(1 downto 0) & x;
 356             lsb := mantissa(2);
 357         else
 358             grx := mantissa(30 downto 29) & x;
 359             lsb := mantissa(31);
 360         end if;
 361         ret(1) := '0';
 362         ret(0) := or (grx);
 363         case rn(1 downto 0) is
 364             when "00" =>        -- round to nearest
 365                 if grx = "100" and rn(2) = '0' then
 366                     ret(1) := lsb; -- tie, round to even
 367                 else
 368                     ret(1) := grx(2);
 369                 end if;
 370             when "01" =>        -- round towards zero
 371             when others =>      -- round towards +/- inf
 372                 if rn(0) = sign then
 373                     -- round towards greater magnitude
 374                     ret(1) := ret(0);
 375                 end if;
 376         end case;
 377         return ret;
 378     end;
 379
 380     -- Determine result flags to write into the FPSCR
 381     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 382         return std_ulogic_vector is
 383     begin
 384         case class is
 385             when ZERO =>
 386                 return sign & "0010";
 387             when FINITE =>
 388                 return (not unitbit) & sign & (not sign) & "00";
 389             when INFINITY =>
 390                 return '0' & sign & (not sign) & "01";
 391             when NAN =>
 392                 return "10001";
 393         end case;
 394     end;
 395
 396 begin
 397     fpu_multiply_0: entity work.multiply
 398         port map (
 399             clk => clk,
 400             m_in => f_to_multiply,
 401             m_out => multiply_to_f
 402             );
 403
 404     fpu_0: process(clk)
 405     begin
 406         if rising_edge(clk) then
 407             if rst = '1' then
 408                 r.state <= IDLE;
 409                 r.busy <= '0';
 410                 r.instr_done <= '0';
 411                 r.do_intr <= '0';
 412                 r.fpscr <= (others => '0');
 413                 r.writing_back <= '0';
 414             else
 415                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 416                 r <= rin;
 417             end if;
 418         end if;
 419     end process;
 420
 421     -- synchronous reads from lookup table
 422     lut_access: process(clk)
 423     begin
 424         if rising_edge(clk) then
 425             inverse_est <= '1' & inverse_table(to_integer(unsigned(r.b.mantissa(53 downto 46))));
 426         end if;
 427     end process;
 428
 429     e_out.busy <= r.busy;
 430     e_out.exception <= r.fpscr(FPSCR_FEX);
 431     e_out.interrupt <= r.do_intr;
 432
 433     w_out.valid <= r.instr_done and not r.do_intr;
 434     w_out.write_enable <= r.writing_back;
 435     w_out.write_reg <= r.dest_fpr;
 436     w_out.write_data <= fp_result;
 437     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 438     w_out.write_cr_mask <= r.cr_mask;
 439     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 440                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 441
 442     fpu_1: process(all)
 443         variable v           : reg_type;
 444         variable adec        : fpu_reg_type;
 445         variable bdec        : fpu_reg_type;
 446         variable cdec        : fpu_reg_type;
 447         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 448         variable illegal     : std_ulogic;
 449         variable j, k        : integer;
 450         variable flm         : std_ulogic_vector(7 downto 0);
 451         variable int_input   : std_ulogic;
 452         variable mask        : std_ulogic_vector(63 downto 0);
 453         variable in_a0       : std_ulogic_vector(63 downto 0);
 454         variable in_b0       : std_ulogic_vector(63 downto 0);
 455         variable misc        : std_ulogic_vector(63 downto 0);
 456         variable shift_res   : std_ulogic_vector(63 downto 0);
 457         variable round       : std_ulogic_vector(1 downto 0);
 458         variable update_fx   : std_ulogic;
 459         variable arith_done  : std_ulogic;
 460         variable invalid     : std_ulogic;
 461         variable zero_divide : std_ulogic;
 462         variable mant_nz     : std_ulogic;
 463         variable min_exp     : signed(EXP_BITS-1 downto 0);
 464         variable max_exp     : signed(EXP_BITS-1 downto 0);
 465         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 466         variable new_exp     : signed(EXP_BITS-1 downto 0);
 467         variable exp_tiny    : std_ulogic;
 468         variable exp_huge    : std_ulogic;
 469         variable renormalize : std_ulogic;
 470         variable clz         : std_ulogic_vector(5 downto 0);
 471         variable set_x       : std_ulogic;
 472         variable mshift      : signed(EXP_BITS-1 downto 0);
 473         variable need_check  : std_ulogic;
 474         variable msb         : std_ulogic;
 475         variable is_add      : std_ulogic;
 476         variable qnan_result : std_ulogic;
 477         variable longmask    : std_ulogic;
 478         variable set_a       : std_ulogic;
 479         variable set_b       : std_ulogic;
 480         variable set_c       : std_ulogic;
 481         variable px_nz       : std_ulogic;
 482         variable maddend     : std_ulogic_vector(127 downto 0);
 483         variable set_y       : std_ulogic;
 484         variable pcmpb_eq    : std_ulogic;
 485         variable pcmpb_lt    : std_ulogic;
 486         variable pshift      : std_ulogic;
 487     begin
 488         v := r;
 489         illegal := '0';
 490         v.busy := '0';
 491         int_input := '0';
 492
 493         -- capture incoming instruction
 494         if e_in.valid = '1' then
 495             v.insn := e_in.insn;
 496             v.op := e_in.op;
 497             v.fe_mode := or (e_in.fe_mode);
 498             v.dest_fpr := e_in.frt;
 499             v.single_prec := e_in.single;
 500             v.int_result := '0';
 501             v.rc := e_in.rc;
 502             v.is_cmp := e_in.out_cr;
 503             if e_in.out_cr = '0' then
 504                 v.cr_mask := num_to_fxm(1);
 505             else
 506                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 507             end if;
 508             int_input := '0';
 509             if e_in.op = OP_FPOP_I then
 510                 int_input := '1';
 511             end if;
 512             v.quieten_nan := '1';
 513             v.tiny := '0';
 514             v.denorm := '0';
 515             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 516             v.is_subtract := '0';
 517             v.is_multiply := '0';
 518             v.add_bsmall := '0';
 519             adec := decode_dp(e_in.fra, int_input);
 520             bdec := decode_dp(e_in.frb, int_input);
 521             cdec := decode_dp(e_in.frc, int_input);
 522             v.a := adec;
 523             v.b := bdec;
 524             v.c := cdec;
 525
 526             v.exp_cmp := '0';
 527             if adec.exponent > bdec.exponent then
 528                 v.exp_cmp := '1';
 529             end if;
 530         end if;
 531
 532         r_hi_nz <= or (r.r(55 downto 31));
 533         r_lo_nz <= or (r.r(30 downto 2));
 534
 535         if r.single_prec = '0' then
 536             max_exp := to_signed(1023, EXP_BITS);
 537             min_exp := to_signed(-1022, EXP_BITS);
 538             bias_exp := to_signed(1536, EXP_BITS);
 539         else
 540             max_exp := to_signed(127, EXP_BITS);
 541             min_exp := to_signed(-126, EXP_BITS);
 542             bias_exp := to_signed(192, EXP_BITS);
 543         end if;
 544         new_exp := r.result_exp - r.shift;
 545         exp_tiny := '0';
 546         exp_huge := '0';
 547         if new_exp < min_exp then
 548             exp_tiny := '1';
 549         end if;
 550         if new_exp > max_exp then
 551             exp_huge := '1';
 552         end if;
 553
 554         -- Compare P with zero and with B
 555         px_nz := or (r.p(57 downto 4));
 556         pcmpb_eq := '0';
 557         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 558             pcmpb_eq := '1';
 559         end if;
 560         pcmpb_lt := '0';
 561         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 562             pcmpb_lt := '1';
 563         end if;
 564
 565         v.writing_back := '0';
 566         v.instr_done := '0';
 567         v.update_fprf := '0';
 568         v.shift := to_signed(0, EXP_BITS);
 569         v.first := '0';
 570         opsel_a <= AIN_R;
 571         opsel_ainv <= '0';
 572         opsel_amask <= '0';
 573         opsel_b <= BIN_ZERO;
 574         opsel_binv <= '0';
 575         opsel_r <= RES_SUM;
 576         carry_in <= '0';
 577         misc_sel <= "0000";
 578         fpscr_mask := (others => '1');
 579         update_fx := '0';
 580         arith_done := '0';
 581         invalid := '0';
 582         zero_divide := '0';
 583         renormalize := '0';
 584         set_x := '0';
 585         qnan_result := '0';
 586         longmask := r.single_prec;
 587         set_a := '0';
 588         set_b := '0';
 589         set_c := '0';
 590         f_to_multiply.is_32bit <= '0';
 591         f_to_multiply.valid <= '0';
 592         msel_1 <= MUL1_A;
 593         msel_2 <= MUL2_C;
 594         msel_add <= MULADD_ZERO;
 595         msel_inv <= '0';
 596         set_y := '0';
 597         pshift := '0';
 598         case r.state is
 599             when IDLE =>
 600                 if e_in.valid = '1' then
 601                     case e_in.insn(5 downto 1) is
 602                         when "00000" =>
 603                             v.state := DO_MCRFS;
 604                         when "00110" =>
 605                             if e_in.insn(10) = '0' then
 606                                 if e_in.insn(8) = '0' then
 607                                     v.state := DO_MTFSB;
 608                                 else
 609                                     v.state := DO_MTFSFI;
 610                                 end if;
 611                             else
 612                                 v.state := DO_FMRG;
 613                             end if;
 614                         when "00111" =>
 615                             if e_in.insn(8) = '0' then
 616                                 v.state := DO_MFFS;
 617                             else
 618                                 v.state := DO_MTFSF;
 619                             end if;
 620                         when "01000" =>
 621                             if e_in.insn(9 downto 8) /= "11" then
 622                                 v.state := DO_FMR;
 623                             else
 624                                 v.state := DO_FRI;
 625                             end if;
 626                         when "01100" =>
 627                             v.state := DO_FRSP;
 628                         when "01110" =>
 629                             if int_input = '1' then
 630                                 -- fcfid[u][s]
 631                                 v.state := DO_FCFID;
 632                             else
 633                                 v.state := DO_FCTI;
 634                             end if;
 635                         when "01111" =>
 636                             v.round_mode := "001";
 637                             v.state := DO_FCTI;
 638                         when "10010" =>
 639                             v.state := DO_FDIV;
 640                         when "10100" | "10101" =>
 641                             v.state := DO_FADD;
 642                         when "11001" =>
 643                             v.is_multiply := '1';
 644                             v.state := DO_FMUL;
 645                         when others =>
 646                             illegal := '1';
 647                     end case;
 648                 end if;
 649                 v.x := '0';
 650                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 651
 652             when DO_MCRFS =>
 653                 j := to_integer(unsigned(insn_bfa(r.insn)));
 654                 for i in 0 to 7 loop
 655                     if i = j then
 656                         k := (7 - i) * 4;
 657                         v.cr_result := r.fpscr(k + 3 downto k);
 658                         fpscr_mask(k + 3 downto k) := "0000";
 659                     end if;
 660                 end loop;
 661                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 662                 v.instr_done := '1';
 663                 v.state := IDLE;
 664
 665             when DO_MTFSB =>
 666                 -- mtfsb{0,1}
 667                 j := to_integer(unsigned(insn_bt(r.insn)));
 668                 for i in 0 to 31 loop
 669                     if i = j then
 670                         v.fpscr(31 - i) := r.insn(6);
 671                     end if;
 672                 end loop;
 673                 v.instr_done := '1';
 674                 v.state := IDLE;
 675
 676             when DO_MTFSFI =>
 677                 -- mtfsfi
 678                 j := to_integer(unsigned(insn_bf(r.insn)));
 679                 if r.insn(16) = '0' then
 680                     for i in 0 to 7 loop
 681                         if i = j then
 682                             k := (7 - i) * 4;
 683                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 684                         end if;
 685                     end loop;
 686                 end if;
 687                 v.instr_done := '1';
 688                 v.state := IDLE;
 689
 690             when DO_FMRG =>
 691                 -- fmrgew, fmrgow
 692                 opsel_r <= RES_MISC;
 693                 misc_sel <= "01" & r.insn(8) & '0';
 694                 v.int_result := '1';
 695                 v.writing_back := '1';
 696                 v.instr_done := '1';
 697                 v.state := IDLE;
 698
 699             when DO_MFFS =>
 700                 v.int_result := '1';
 701                 v.writing_back := '1';
 702                 opsel_r <= RES_MISC;
 703                 case r.insn(20 downto 16) is
 704                     when "00000" =>
 705                         -- mffs
 706                     when "00001" =>
 707                         -- mffsce
 708                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
 709                     when "10100" | "10101" =>
 710                         -- mffscdrn[i] (but we don't implement DRN)
 711                         fpscr_mask := x"000000FF";
 712                     when "10110" =>
 713                         -- mffscrn
 714                         fpscr_mask := x"000000FF";
 715                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
 716                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
 717                     when "10111" =>
 718                         -- mffscrni
 719                         fpscr_mask := x"000000FF";
 720                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
 721                     when "11000" =>
 722                         -- mffsl
 723                         fpscr_mask := x"0007F0FF";
 724                     when others =>
 725                         illegal := '1';
 726                 end case;
 727                 v.instr_done := '1';
 728                 v.state := IDLE;
 729
 730             when DO_MTFSF =>
 731                 if r.insn(25) = '1' then
 732                     flm := x"FF";
 733                 elsif r.insn(16) = '1' then
 734                     flm := x"00";
 735                 else
 736                     flm := r.insn(24 downto 17);
 737                 end if;
 738                 for i in 0 to 7 loop
 739                     k := i * 4;
 740                     if flm(i) = '1' then
 741                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
 742                     end if;
 743                 end loop;
 744                 v.instr_done := '1';
 745                 v.state := IDLE;
 746
 747             when DO_FMR =>
 748                 opsel_a <= AIN_B;
 749                 v.result_class := r.b.class;
 750                 v.result_exp := r.b.exponent;
 751                 v.quieten_nan := '0';
 752                 if r.insn(9) = '1' then
 753                     v.result_sign := '0';              -- fabs
 754                 elsif r.insn(8) = '1' then
 755                     v.result_sign := '1';              -- fnabs
 756                 elsif r.insn(7) = '1' then
 757                     v.result_sign := r.b.negative;     -- fmr
 758                 elsif r.insn(6) = '1' then
 759                     v.result_sign := not r.b.negative; -- fneg
 760                 else
 761                     v.result_sign := r.a.negative;     -- fcpsgn
 762                 end if;
 763                 v.writing_back := '1';
 764                 v.instr_done := '1';
 765                 v.state := IDLE;
 766
 767             when DO_FRI =>    -- fri[nzpm]
 768                 opsel_a <= AIN_B;
 769                 v.result_class := r.b.class;
 770                 v.result_sign := r.b.negative;
 771                 v.result_exp := r.b.exponent;
 772                 v.fpscr(FPSCR_FR) := '0';
 773                 v.fpscr(FPSCR_FI) := '0';
 774                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
 775                     -- Signalling NAN
 776                     v.fpscr(FPSCR_VXSNAN) := '1';
 777                     invalid := '1';
 778                 end if;
 779                 if r.b.class = FINITE then
 780                     if r.b.exponent >= to_signed(52, EXP_BITS) then
 781                         -- integer already, no rounding required
 782                         arith_done := '1';
 783                     else
 784                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
 785                         v.state := FRI_1;
 786                         v.round_mode := '1' & r.insn(7 downto 6);
 787                     end if;
 788                 else
 789                     arith_done := '1';
 790                 end if;
 791
 792             when DO_FRSP =>
 793                 opsel_a <= AIN_B;
 794                 v.result_class := r.b.class;
 795                 v.result_sign := r.b.negative;
 796                 v.result_exp := r.b.exponent;
 797                 v.fpscr(FPSCR_FR) := '0';
 798                 v.fpscr(FPSCR_FI) := '0';
 799                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
 800                     -- Signalling NAN
 801                     v.fpscr(FPSCR_VXSNAN) := '1';
 802                     invalid := '1';
 803                 end if;
 804                 set_x := '1';
 805                 if r.b.class = FINITE then
 806                     if r.b.exponent < to_signed(-126, EXP_BITS) then
 807                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
 808                         v.state := ROUND_UFLOW;
 809                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
 810                         v.state := ROUND_OFLOW;
 811                     else
 812                         v.shift := to_signed(-2, EXP_BITS);
 813                         v.state := ROUNDING;
 814                     end if;
 815                 else
 816                     arith_done := '1';
 817                 end if;
 818
 819             when DO_FCTI =>
 820                 -- instr bit 9: 1=dword 0=word
 821                 -- instr bit 8: 1=unsigned 0=signed
 822                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
 823                 opsel_a <= AIN_B;
 824                 v.result_class := r.b.class;
 825                 v.result_sign := r.b.negative;
 826                 v.result_exp := r.b.exponent;
 827                 v.fpscr(FPSCR_FR) := '0';
 828                 v.fpscr(FPSCR_FI) := '0';
 829                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
 830                     -- Signalling NAN
 831                     v.fpscr(FPSCR_VXSNAN) := '1';
 832                     invalid := '1';
 833                 end if;
 834
 835                 v.int_result := '1';
 836                 case r.b.class is
 837                     when ZERO =>
 838                         arith_done := '1';
 839                     when FINITE =>
 840                         if r.b.exponent >= to_signed(64, EXP_BITS) or
 841                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
 842                             v.state := INT_OFLOW;
 843                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
 844                             -- integer already, no rounding required,
 845                             -- shift into final position
 846                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
 847                             if r.insn(8) = '1' and r.b.negative = '1' then
 848                                 v.state := INT_OFLOW;
 849                             else
 850                                 v.state := INT_ISHIFT;
 851                             end if;
 852                         else
 853                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
 854                             v.state := INT_SHIFT;
 855                         end if;
 856                     when INFINITY | NAN =>
 857                         v.state := INT_OFLOW;
 858                 end case;
 859
 860             when DO_FCFID =>
 861                 v.result_sign := '0';
 862                 opsel_a <= AIN_B;
 863                 if r.insn(8) = '0' and r.b.negative = '1' then
 864                     -- fcfid[s] with negative operand, set R = -B
 865                     opsel_ainv <= '1';
 866                     carry_in <= '1';
 867                     v.result_sign := '1';
 868                 end if;
 869                 v.result_class := r.b.class;
 870                 v.result_exp := to_signed(54, EXP_BITS);
 871                 v.fpscr(FPSCR_FR) := '0';
 872                 v.fpscr(FPSCR_FI) := '0';
 873                 if r.b.class = ZERO then
 874                     arith_done := '1';
 875                 else
 876                     v.state := FINISH;
 877                 end if;
 878
 879             when DO_FADD =>
 880                 -- fadd[s] and fsub[s]
 881                 opsel_a <= AIN_A;
 882                 v.result_sign := r.a.negative;
 883                 v.result_class := r.a.class;
 884                 v.result_exp := r.a.exponent;
 885                 v.fpscr(FPSCR_FR) := '0';
 886                 v.fpscr(FPSCR_FI) := '0';
 887                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
 888                 if r.a.class = FINITE and r.b.class = FINITE then
 889                     v.is_subtract := not is_add;
 890                     v.add_bsmall := r.exp_cmp;
 891                     if r.exp_cmp = '0' then
 892                         v.shift := r.a.exponent - r.b.exponent;
 893                         v.result_sign := r.b.negative xnor r.insn(1);
 894                         if r.a.exponent = r.b.exponent then
 895                             v.state := ADD_2;
 896                         else
 897                             v.state := ADD_SHIFT;
 898                         end if;
 899                     else
 900                         opsel_a <= AIN_B;
 901                         v.shift := r.b.exponent - r.a.exponent;
 902                         v.result_exp := r.b.exponent;
 903                         v.state := ADD_SHIFT;
 904                     end if;
 905                 else
 906                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 907                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
 908                         -- Signalling NAN
 909                         v.fpscr(FPSCR_VXSNAN) := '1';
 910                         invalid := '1';
 911                     end if;
 912                     if r.a.class = NAN then
 913                         -- nothing to do, result is A
 914                     elsif r.b.class = NAN then
 915                         v.result_class := NAN;
 916                         v.result_sign := r.b.negative;
 917                         opsel_a <= AIN_B;
 918                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
 919                         -- invalid operation, construct QNaN
 920                         v.fpscr(FPSCR_VXISI) := '1';
 921                         qnan_result := '1';
 922                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
 923                         -- return -0 for rounding to -infinity
 924                         v.result_sign := r.round_mode(1) and r.round_mode(0);
 925                     elsif r.a.class = INFINITY or r.b.class = ZERO then
 926                         -- nothing to do, result is A
 927                     else
 928                         -- result is +/- B
 929                         v.result_sign := r.b.negative xnor r.insn(1);
 930                         v.result_class := r.b.class;
 931                         v.result_exp := r.b.exponent;
 932                         opsel_a <= AIN_B;
 933                     end if;
 934                     arith_done := '1';
 935                 end if;
 936
 937             when DO_FMUL =>
 938                 -- fmul[s]
 939                 opsel_a <= AIN_A;
 940                 v.result_sign := r.a.negative;
 941                 v.result_class := r.a.class;
 942                 v.result_exp := r.a.exponent;
 943                 v.fpscr(FPSCR_FR) := '0';
 944                 v.fpscr(FPSCR_FI) := '0';
 945                 if r.a.class = FINITE and r.c.class = FINITE then
 946                     v.result_sign := r.a.negative xor r.c.negative;
 947                     v.result_exp := r.a.exponent + r.c.exponent;
 948                     -- Renormalize denorm operands
 949                     if r.a.mantissa(54) = '0' then
 950                         v.state := RENORM_A;
 951                     elsif r.c.mantissa(54) = '0' then
 952                         opsel_a <= AIN_C;
 953                         v.state := RENORM_C;
 954                     else
 955                         f_to_multiply.valid <= '1';
 956                         v.state := MULT_1;
 957                     end if;
 958                 else
 959                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 960                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
 961                         -- Signalling NAN
 962                         v.fpscr(FPSCR_VXSNAN) := '1';
 963                         invalid := '1';
 964                     end if;
 965                     if r.a.class = NAN then
 966                     -- result is A
 967                     elsif r.c.class = NAN then
 968                         v.result_class := NAN;
 969                         v.result_sign := r.c.negative;
 970                         opsel_a <= AIN_C;
 971                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
 972                         (r.a.class = ZERO and r.c.class = INFINITY) then
 973                         -- invalid operation, construct QNaN
 974                         v.fpscr(FPSCR_VXIMZ) := '1';
 975                         qnan_result := '1';
 976                     elsif r.a.class = ZERO or r.a.class = INFINITY then
 977                         -- result is +/- A
 978                         v.result_sign := r.a.negative xor r.c.negative;
 979                     else
 980                         -- r.c.class is ZERO or INFINITY
 981                         v.result_class := r.c.class;
 982                         v.result_sign := r.a.negative xor r.c.negative;
 983                     end if;
 984                     arith_done := '1';
 985                 end if;
 986
 987             when DO_FDIV =>
 988                 opsel_a <= AIN_A;
 989                 v.result_sign := r.a.negative;
 990                 v.result_class := r.a.class;
 991                 v.result_exp := r.a.exponent;
 992                 v.fpscr(FPSCR_FR) := '0';
 993                 v.fpscr(FPSCR_FI) := '0';
 994                 v.result_sign := r.a.negative xor r.b.negative;
 995                 v.result_exp := r.a.exponent - r.b.exponent;
 996                 v.count := "00";
 997                 if r.a.class = FINITE and r.b.class = FINITE then
 998                     -- Renormalize denorm operands
 999                     if r.a.mantissa(54) = '0' then
1000                         v.state := RENORM_A;
1001                     elsif r.b.mantissa(54) = '0' then
1002                         opsel_a <= AIN_B;
1003                         v.state := RENORM_B;
1004                     else
1005                         v.first := '1';
1006                         v.state := DIV_2;
1007                     end if;
1008                 else
1009                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1010                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1011                         -- Signalling NAN
1012                         v.fpscr(FPSCR_VXSNAN) := '1';
1013                         invalid := '1';
1014                     end if;
1015                     if r.a.class = NAN then
1016                         -- result is A
1017                         v.result_sign := r.a.negative;
1018                     elsif r.b.class = NAN then
1019                         v.result_class := NAN;
1020                         v.result_sign := r.b.negative;
1021                         opsel_a <= AIN_B;
1022                     elsif r.b.class = INFINITY then
1023                         if r.a.class = INFINITY then
1024                             v.fpscr(FPSCR_VXIDI) := '1';
1025                             qnan_result := '1';
1026                         else
1027                             v.result_class := ZERO;
1028                         end if;
1029                     elsif r.b.class = ZERO then
1030                         if r.a.class = ZERO then
1031                             v.fpscr(FPSCR_VXZDZ) := '1';
1032                             qnan_result := '1';
1033                         else
1034                             if r.a.class = FINITE then
1035                                 zero_divide := '1';
1036                             end if;
1037                             v.result_class := INFINITY;
1038                         end if;
1039                     -- else r.b.class = FINITE, result_class = r.a.class
1040                     end if;
1041                     arith_done := '1';
1042                 end if;
1043
1044             when RENORM_A =>
1045                 renormalize := '1';
1046                 v.state := RENORM_A2;
1047
1048             when RENORM_A2 =>
1049                 set_a := '1';
1050                 v.result_exp := new_exp;
1051                 if r.insn(4) = '1' then
1052                     opsel_a <= AIN_C;
1053                     if r.c.mantissa(54) = '1' then
1054                         v.first := '1';
1055                         v.state := MULT_1;
1056                     else
1057                         v.state := RENORM_C;
1058                     end if;
1059                 else
1060                         opsel_a <= AIN_B;
1061                         if r.b.mantissa(54) = '1' then
1062                             v.first := '1';
1063                             v.state := DIV_2;
1064                         else
1065                             v.state := RENORM_B;
1066                     end if;
1067                 end if;
1068
1069             when RENORM_B =>
1070                 renormalize := '1';
1071                 v.state := RENORM_B2;
1072
1073             when RENORM_B2 =>
1074                 set_b := '1';
1075                 v.result_exp := r.result_exp + r.shift;
1076                 v.state := LOOKUP;
1077
1078             when RENORM_C =>
1079                 renormalize := '1';
1080                 v.state := RENORM_C2;
1081
1082             when RENORM_C2 =>
1083                 set_c := '1';
1084                 v.result_exp := new_exp;
1085                 v.first := '1';
1086                 v.state := MULT_1;
1087
1088             when ADD_SHIFT =>
1089                 opsel_r <= RES_SHIFT;
1090                 set_x := '1';
1091                 longmask := '0';
1092                 v.state := ADD_2;
1093
1094             when ADD_2 =>
1095                 if r.add_bsmall = '1' then
1096                     opsel_a <= AIN_A;
1097                 else
1098                     opsel_a <= AIN_B;
1099                 end if;
1100                 opsel_b <= BIN_R;
1101                 opsel_binv <= r.is_subtract;
1102                 carry_in <= r.is_subtract and not r.x;
1103                 v.shift := to_signed(-1, EXP_BITS);
1104                 v.state := ADD_3;
1105
1106             when ADD_3 =>
1107                 -- check for overflow or negative result (can't get both)
1108                 if r.r(63) = '1' then
1109                     -- result is opposite sign to expected
1110                     v.result_sign := not r.result_sign;
1111                     opsel_ainv <= '1';
1112                     carry_in <= '1';
1113                     v.state := FINISH;
1114                 elsif r.r(55) = '1' then
1115                     -- sum overflowed, shift right
1116                     opsel_r <= RES_SHIFT;
1117                     set_x := '1';
1118                     v.shift := to_signed(-2, EXP_BITS);
1119                     if exp_huge = '1' then
1120                         v.state := ROUND_OFLOW;
1121                     else
1122                         v.state := ROUNDING;
1123                     end if;
1124                 elsif r.r(54) = '1' then
1125                     set_x := '1';
1126                     v.shift := to_signed(-2, EXP_BITS);
1127                     v.state := ROUNDING;
1128                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1129                     -- r.x must be zero at this point
1130                     v.result_class := ZERO;
1131                     if r.is_subtract = '1' then
1132                         -- set result sign depending on rounding mode
1133                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1134                     end if;
1135                     arith_done := '1';
1136                 else
1137                     renormalize := '1';
1138                     v.state := NORMALIZE;
1139                 end if;
1140
1141             when MULT_1 =>
1142                 f_to_multiply.valid <= r.first;
1143                 opsel_r <= RES_MULT;
1144                 if multiply_to_f.valid = '1' then
1145                     v.state := FINISH;
1146                 end if;
1147
1148             when LOOKUP =>
1149                 opsel_a <= AIN_B;
1150                 -- wait one cycle for inverse_table[B] lookup
1151                 v.first := '1';
1152                 v.state := DIV_2;
1153
1154             when DIV_2 =>
1155                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1156                 msel_1 <= MUL1_B;
1157                 msel_add <= MULADD_CONST;
1158                 msel_inv <= '1';
1159                 if r.count = 0 then
1160                     msel_2 <= MUL2_LUT;
1161                 else
1162                     msel_2 <= MUL2_P;
1163                 end if;
1164                 set_y := r.first;
1165                 pshift := '1';
1166                 f_to_multiply.valid <= r.first;
1167                 if multiply_to_f.valid = '1' then
1168                     v.first := '1';
1169                     v.count := r.count + 1;
1170                     v.state := DIV_3;
1171                 end if;
1172
1173             when DIV_3 =>
1174                 -- compute Y = P = P * Y
1175                 msel_1 <= MUL1_Y;
1176                 msel_2 <= MUL2_P;
1177                 f_to_multiply.valid <= r.first;
1178                 pshift := '1';
1179                 if multiply_to_f.valid = '1' then
1180                     v.first := '1';
1181                     if r.count = 3 then
1182                         v.state := DIV_4;
1183                     else
1184                         v.state := DIV_2;
1185                     end if;
1186                 end if;
1187
1188             when DIV_4 =>
1189                 -- compute R = P = A * Y (quotient)
1190                 msel_1 <= MUL1_A;
1191                 msel_2 <= MUL2_P;
1192                 set_y := r.first;
1193                 f_to_multiply.valid <= r.first;
1194                 pshift := '1';
1195                 if multiply_to_f.valid = '1' then
1196                     opsel_r <= RES_MULT;
1197                     v.first := '1';
1198                     v.state := DIV_5;
1199                 end if;
1200
1201             when DIV_5 =>
1202                 -- compute P = A - B * R (remainder)
1203                 msel_1 <= MUL1_B;
1204                 msel_2 <= MUL2_R;
1205                 msel_add <= MULADD_A;
1206                 msel_inv <= '1';
1207                 f_to_multiply.valid <= r.first;
1208                 if multiply_to_f.valid = '1' then
1209                     v.state := DIV_6;
1210                 end if;
1211
1212             when DIV_6 =>
1213                 -- test if remainder is 0 or >= B
1214                 if pcmpb_lt = '1' then
1215                     -- quotient is correct, set X if remainder non-zero
1216                     v.x := r.p(58) or px_nz;
1217                 else
1218                     -- quotient needs to be incremented by 1
1219                     carry_in <= '1';
1220                     v.x := not pcmpb_eq;
1221                 end if;
1222                 v.state := FINISH;
1223
1224             when INT_SHIFT =>
1225                 opsel_r <= RES_SHIFT;
1226                 set_x := '1';
1227                 v.state := INT_ROUND;
1228                 v.shift := to_signed(-2, EXP_BITS);
1229
1230             when INT_ROUND =>
1231                 opsel_r <= RES_SHIFT;
1232                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
1233                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1234                 -- Check for negative values that don't round to 0 for fcti*u*
1235                 if r.insn(8) = '1' and r.result_sign = '1' and
1236                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
1237                     v.state := INT_OFLOW;
1238                 else
1239                     v.state := INT_FINAL;
1240                 end if;
1241
1242             when INT_ISHIFT =>
1243                 opsel_r <= RES_SHIFT;
1244                 v.state := INT_FINAL;
1245
1246             when INT_FINAL =>
1247                 -- Negate if necessary, and increment for rounding if needed
1248                 opsel_ainv <= r.result_sign;
1249                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
1250                 -- Check for possible overflows
1251                 case r.insn(9 downto 8) is
1252                     when "00" =>        -- fctiw[z]
1253                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
1254                     when "01" =>        -- fctiwu[z]
1255                         need_check := r.r(31);
1256                     when "10" =>        -- fctid[z]
1257                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
1258                     when others =>      -- fctidu[z]
1259                         need_check := r.r(63);
1260                 end case;
1261                 if need_check = '1' then
1262                     v.state := INT_CHECK;
1263                 else
1264                     if r.fpscr(FPSCR_FI) = '1' then
1265                         v.fpscr(FPSCR_XX) := '1';
1266                     end if;
1267                     arith_done := '1';
1268                 end if;
1269
1270             when INT_CHECK =>
1271                 if r.insn(9) = '0' then
1272                     msb := r.r(31);
1273                 else
1274                     msb := r.r(63);
1275                 end if;
1276                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1277                 if (r.insn(8) = '0' and msb /= r.result_sign) or
1278                     (r.insn(8) = '1' and msb /= '1') then
1279                     opsel_r <= RES_MISC;
1280                     v.fpscr(FPSCR_VXCVI) := '1';
1281                     invalid := '1';
1282                 else
1283                     if r.fpscr(FPSCR_FI) = '1' then
1284                         v.fpscr(FPSCR_XX) := '1';
1285                     end if;
1286                 end if;
1287                 arith_done := '1';
1288
1289             when INT_OFLOW =>
1290                 opsel_r <= RES_MISC;
1291                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1292                 if r.b.class = NAN then
1293                     misc_sel(0) <= '1';
1294                 end if;
1295                 v.fpscr(FPSCR_VXCVI) := '1';
1296                 invalid := '1';
1297                 arith_done := '1';
1298
1299             when FRI_1 =>
1300                 opsel_r <= RES_SHIFT;
1301                 set_x := '1';
1302                 v.shift := to_signed(-2, EXP_BITS);
1303                 v.state := ROUNDING;
1304
1305             when FINISH =>
1306                 if r.is_multiply = '1' and px_nz = '1' then
1307                     v.x := '1';
1308                 end if;
1309                 if r.r(63 downto 54) /= "0000000001" then
1310                     renormalize := '1';
1311                     v.state := NORMALIZE;
1312                 else
1313                     set_x := '1';
1314                     if exp_tiny = '1' then
1315                         v.shift := new_exp - min_exp;
1316                         v.state := ROUND_UFLOW;
1317                     elsif exp_huge = '1' then
1318                         v.state := ROUND_OFLOW;
1319                     else
1320                         v.shift := to_signed(-2, EXP_BITS);
1321                         v.state := ROUNDING;
1322                     end if;
1323                 end if;
1324
1325             when NORMALIZE =>
1326                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
1327                 opsel_r <= RES_SHIFT;
1328                 set_x := '1';
1329                 if exp_tiny = '1' then
1330                     v.shift := new_exp - min_exp;
1331                     v.state := ROUND_UFLOW;
1332                 elsif exp_huge = '1' then
1333                     v.state := ROUND_OFLOW;
1334                 else
1335                     v.shift := to_signed(-2, EXP_BITS);
1336                     v.state := ROUNDING;
1337                 end if;
1338
1339             when ROUND_UFLOW =>
1340                 v.tiny := '1';
1341                 if r.fpscr(FPSCR_UE) = '0' then
1342                     -- disabled underflow exception case
1343                     -- have to denormalize before rounding
1344                     opsel_r <= RES_SHIFT;
1345                     set_x := '1';
1346                     v.shift := to_signed(-2, EXP_BITS);
1347                     v.state := ROUNDING;
1348                 else
1349                     -- enabled underflow exception case
1350                     -- if denormalized, have to normalize before rounding
1351                     v.fpscr(FPSCR_UX) := '1';
1352                     v.result_exp := r.result_exp + bias_exp;
1353                     if r.r(54) = '0' then
1354                         renormalize := '1';
1355                         v.state := NORMALIZE;
1356                     else
1357                         v.shift := to_signed(-2, EXP_BITS);
1358                         v.state := ROUNDING;
1359                     end if;
1360                 end if;
1361
1362             when ROUND_OFLOW =>
1363                 v.fpscr(FPSCR_OX) := '1';
1364                 if r.fpscr(FPSCR_OE) = '0' then
1365                     -- disabled overflow exception
1366                     -- result depends on rounding mode
1367                     v.fpscr(FPSCR_XX) := '1';
1368                     v.fpscr(FPSCR_FI) := '1';
1369                     if r.round_mode(1 downto 0) = "00" or
1370                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
1371                         v.result_class := INFINITY;
1372                         v.fpscr(FPSCR_FR) := '1';
1373                     else
1374                         v.fpscr(FPSCR_FR) := '0';
1375                     end if;
1376                     -- construct largest representable number
1377                     v.result_exp := max_exp;
1378                     opsel_r <= RES_MISC;
1379                     misc_sel <= "001" & r.single_prec;
1380                     arith_done := '1';
1381                 else
1382                     -- enabled overflow exception
1383                     v.result_exp := r.result_exp - bias_exp;
1384                     v.shift := to_signed(-2, EXP_BITS);
1385                     v.state := ROUNDING;
1386                 end if;
1387
1388             when ROUNDING =>
1389                 opsel_amask <= '1';
1390                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
1391                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1392                 if round(1) = '1' then
1393                     -- set mask to increment the LSB for the precision
1394                     opsel_b <= BIN_MASK;
1395                     carry_in <= '1';
1396                     v.shift := to_signed(-1, EXP_BITS);
1397                     v.state := ROUNDING_2;
1398                 else
1399                     if r.r(54) = '0' then
1400                         -- result after masking could be zero, or could be a
1401                         -- denormalized result that needs to be renormalized
1402                         renormalize := '1';
1403                         v.state := ROUNDING_3;
1404                     else
1405                         arith_done := '1';
1406                     end if;
1407                 end if;
1408                 if round(0) = '1' then
1409                     v.fpscr(FPSCR_XX) := '1';
1410                     if r.tiny = '1' then
1411                         v.fpscr(FPSCR_UX) := '1';
1412                     end if;
1413                 end if;
1414
1415             when ROUNDING_2 =>
1416                 -- Check for overflow during rounding
1417                 v.x := '0';
1418                 if r.r(55) = '1' then
1419                     opsel_r <= RES_SHIFT;
1420                     if exp_huge = '1' then
1421                         v.state := ROUND_OFLOW;
1422                     else
1423                         arith_done := '1';
1424                     end if;
1425                 elsif r.r(54) = '0' then
1426                     -- Do CLZ so we can renormalize the result
1427                     renormalize := '1';
1428                     v.state := ROUNDING_3;
1429                 else
1430                     arith_done := '1';
1431                 end if;
1432
1433             when ROUNDING_3 =>
1434                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
1435                 if mant_nz = '0' then
1436                     v.result_class := ZERO;
1437                     if r.is_subtract = '1' then
1438                         -- set result sign depending on rounding mode
1439                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1440                     end if;
1441                     arith_done := '1';
1442                 else
1443                     -- Renormalize result after rounding
1444                     opsel_r <= RES_SHIFT;
1445                     v.denorm := exp_tiny;
1446                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
1447                     if new_exp < to_signed(-1022, EXP_BITS) then
1448                         v.state := DENORM;
1449                     else
1450                         arith_done := '1';
1451                     end if;
1452                 end if;
1453
1454             when DENORM =>
1455                 opsel_r <= RES_SHIFT;
1456                 arith_done := '1';
1457
1458         end case;
1459
1460         if zero_divide = '1' then
1461             v.fpscr(FPSCR_ZX) := '1';
1462         end if;
1463         if qnan_result = '1' then
1464             invalid := '1';
1465             v.result_class := NAN;
1466             v.result_sign := '0';
1467             misc_sel <= "0001";
1468             opsel_r <= RES_MISC;
1469         end if;
1470         if arith_done = '1' then
1471             -- Enabled invalid exception doesn't write result or FPRF
1472             -- Neither does enabled zero-divide exception
1473             if (invalid and r.fpscr(FPSCR_VE)) = '0' and
1474                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
1475                 v.writing_back := '1';
1476                 v.update_fprf := '1';
1477             end if;
1478             v.instr_done := '1';
1479             v.state := IDLE;
1480             update_fx := '1';
1481         end if;
1482
1483         -- Multiplier and divide/square root data path
1484         case msel_1 is
1485             when MUL1_A =>
1486                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
1487             when MUL1_B =>
1488                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
1489             when MUL1_Y =>
1490                 f_to_multiply.data1 <= r.y;
1491             when others =>
1492                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
1493         end case;
1494         case msel_2 is
1495             when MUL2_C =>
1496                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
1497             when MUL2_LUT =>
1498                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
1499             when MUL2_P =>
1500                 f_to_multiply.data2 <= r.p;
1501             when others =>
1502                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
1503         end case;
1504         maddend := (others => '0');
1505         case msel_add is
1506             when MULADD_CONST =>
1507                 -- addend is 2.0 in 16.112 format
1508                 maddend(113) := '1';                -- 2.0
1509             when MULADD_A =>
1510                 -- addend is A in 16.112 format
1511                 maddend(121 downto 58) := r.a.mantissa;
1512             when others =>
1513         end case;
1514         if msel_inv = '1' then
1515             f_to_multiply.addend <= not maddend;
1516         else
1517             f_to_multiply.addend <= maddend;
1518         end if;
1519         f_to_multiply.not_result <= msel_inv;
1520         if set_y = '1' then
1521             v.y := f_to_multiply.data2;
1522         end if;
1523         if multiply_to_f.valid = '1' then
1524             if pshift = '0' then
1525                 v.p := multiply_to_f.result(63 downto 0);
1526             else
1527                 v.p := multiply_to_f.result(119 downto 56);
1528             end if;
1529         end if;
1530
1531         -- Data path.
1532         -- This has A and B input multiplexers, an adder, a shifter,
1533         -- count-leading-zeroes logic, and a result mux.
1534         if longmask = '1' then
1535             mshift := r.shift + to_signed(-29, EXP_BITS);
1536         else
1537             mshift := r.shift;
1538         end if;
1539         if mshift < to_signed(-64, EXP_BITS) then
1540             mask := (others => '1');
1541         elsif mshift >= to_signed(0, EXP_BITS) then
1542             mask := (others => '0');
1543         else
1544             mask := right_mask(unsigned(mshift(5 downto 0)));
1545         end if;
1546         case opsel_a is
1547             when AIN_R =>
1548                 in_a0 := r.r;
1549             when AIN_A =>
1550                 in_a0 := r.a.mantissa;
1551             when AIN_B =>
1552                 in_a0 := r.b.mantissa;
1553             when others =>
1554                 in_a0 := r.c.mantissa;
1555         end case;
1556         if (or (mask and in_a0)) = '1' and set_x = '1' then
1557             v.x := '1';
1558         end if;
1559         if opsel_ainv = '1' then
1560             in_a0 := not in_a0;
1561         end if;
1562         if opsel_amask = '1' then
1563             in_a0 := in_a0 and not mask;
1564         end if;
1565         in_a <= in_a0;
1566         case opsel_b is
1567             when BIN_ZERO =>
1568                 in_b0 := (others => '0');
1569             when BIN_R =>
1570                 in_b0 := r.r;
1571             when BIN_MASK =>
1572                 in_b0 := mask;
1573             when others =>
1574                 in_b0 := (others => '0');
1575         end case;
1576         if opsel_binv = '1' then
1577             in_b0 := not in_b0;
1578         end if;
1579         in_b <= in_b0;
1580         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
1581             shift_res := shifter_64(r.r & x"00000000000000",
1582                                     std_ulogic_vector(r.shift(6 downto 0)));
1583         else
1584             shift_res := (others => '0');
1585         end if;
1586         case opsel_r is
1587             when RES_SUM =>
1588                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
1589             when RES_SHIFT =>
1590                 result <= shift_res;
1591             when RES_MULT =>
1592                 result <= multiply_to_f.result(121 downto 58);
1593             when others =>
1594                 case misc_sel is
1595                     when "0000" =>
1596                         misc := x"00000000" & (r.fpscr and fpscr_mask);
1597                     when "0001" =>
1598                         -- generated QNaN mantissa
1599                         misc := x"0020000000000000";
1600                     when "0010" =>
1601                         -- mantissa of max representable DP number
1602                         misc := x"007ffffffffffffc";
1603                     when "0011" =>
1604                         -- mantissa of max representable SP number
1605                         misc := x"007fffff80000000";
1606                     when "0100" =>
1607                         -- fmrgow result
1608                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
1609                     when "0110" =>
1610                         -- fmrgew result
1611                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
1612                     when "1000" =>
1613                         -- max positive result for fctiw[z]
1614                         misc := x"000000007fffffff";
1615                     when "1001" =>
1616                         -- max negative result for fctiw[z]
1617                         misc := x"ffffffff80000000";
1618                     when "1010" =>
1619                         -- max positive result for fctiwu[z]
1620                         misc := x"00000000ffffffff";
1621                     when "1011" =>
1622                         -- max negative result for fctiwu[z]
1623                         misc := x"0000000000000000";
1624                     when "1100" =>
1625                         -- max positive result for fctid[z]
1626                         misc := x"7fffffffffffffff";
1627                     when "1101" =>
1628                         -- max negative result for fctid[z]
1629                         misc := x"8000000000000000";
1630                     when "1110" =>
1631                         -- max positive result for fctidu[z]
1632                         misc := x"ffffffffffffffff";
1633                     when "1111" =>
1634                         -- max negative result for fctidu[z]
1635                         misc := x"0000000000000000";
1636                     when others =>
1637                         misc := x"0000000000000000";
1638                 end case;
1639                 result <= misc;
1640         end case;
1641         v.r := result;
1642
1643         if set_a = '1' then
1644             v.a.exponent := new_exp;
1645             v.a.mantissa := shift_res;
1646         end if;
1647         if set_b = '1' then
1648             v.b.exponent := new_exp;
1649             v.b.mantissa := shift_res;
1650         end if;
1651         if set_c = '1' then
1652             v.c.exponent := new_exp;
1653             v.c.mantissa := shift_res;
1654         end if;
1655
1656         if opsel_r = RES_SHIFT then
1657             v.result_exp := new_exp;
1658         end if;
1659
1660         if renormalize = '1' then
1661             clz := count_left_zeroes(r.r);
1662             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
1663         end if;
1664
1665         if r.int_result = '1' then
1666             fp_result <= r.r;
1667         else
1668             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
1669                                  r.single_prec, r.quieten_nan);
1670         end if;
1671         if r.update_fprf = '1' then
1672             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
1673                                                              r.r(54) and not r.denorm);
1674         end if;
1675
1676         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
1677                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
1678         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
1679                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
1680         if update_fx = '1' and
1681             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
1682             v.fpscr(FPSCR_FX) := '1';
1683         end if;
1684         if r.rc = '1' then
1685             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
1686         end if;
1687
1688         if illegal = '1' then
1689             v.instr_done := '0';
1690             v.do_intr := '0';
1691             v.writing_back := '0';
1692             v.busy := '0';
1693             v.state := IDLE;
1694         else
1695             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
1696             if v.state /= IDLE or v.do_intr = '1' then
1697                 v.busy := '1';
1698             end if;
1699         end if;
1700
1701         rin <= v;
1702         e_out.illegal <= illegal;
1703     end process;
1704
1705 end architecture behaviour;