fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2);
  69
  70     type reg_type is record
  71         state        : state_t;
  72         busy         : std_ulogic;
  73         instr_done   : std_ulogic;
  74         do_intr      : std_ulogic;
  75         op           : insn_type_t;
  76         insn         : std_ulogic_vector(31 downto 0);
  77         dest_fpr     : gspr_index_t;
  78         fe_mode      : std_ulogic;
  79         rc           : std_ulogic;
  80         is_cmp       : std_ulogic;
  81         single_prec  : std_ulogic;
  82         fpscr        : std_ulogic_vector(31 downto 0);
  83         a            : fpu_reg_type;
  84         b            : fpu_reg_type;
  85         c            : fpu_reg_type;
  86         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  87         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  88         x            : std_ulogic;
  89         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  90         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         result_sign  : std_ulogic;
  92         result_class : fp_number_class;
  93         result_exp   : signed(EXP_BITS-1 downto 0);
  94         shift        : signed(EXP_BITS-1 downto 0);
  95         writing_back : std_ulogic;
  96         int_result   : std_ulogic;
  97         cr_result    : std_ulogic_vector(3 downto 0);
  98         cr_mask      : std_ulogic_vector(7 downto 0);
  99         old_exc      : std_ulogic_vector(4 downto 0);
 100         update_fprf  : std_ulogic;
 101         quieten_nan  : std_ulogic;
 102         tiny         : std_ulogic;
 103         denorm       : std_ulogic;
 104         round_mode   : std_ulogic_vector(2 downto 0);
 105         is_subtract  : std_ulogic;
 106         exp_cmp      : std_ulogic;
 107         madd_cmp     : std_ulogic;
 108         add_bsmall   : std_ulogic;
 109         is_multiply  : std_ulogic;
 110         is_sqrt      : std_ulogic;
 111         first        : std_ulogic;
 112         count        : unsigned(1 downto 0);
 113         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 114     end record;
 115
 116     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 117
 118     signal r, rin : reg_type;
 119
 120     signal fp_result     : std_ulogic_vector(63 downto 0);
 121     signal opsel_a       : std_ulogic_vector(1 downto 0);
 122     signal opsel_b       : std_ulogic_vector(1 downto 0);
 123     signal opsel_r       : std_ulogic_vector(1 downto 0);
 124     signal opsel_s       : std_ulogic_vector(1 downto 0);
 125     signal opsel_ainv    : std_ulogic;
 126     signal opsel_amask   : std_ulogic;
 127     signal opsel_binv    : std_ulogic;
 128     signal in_a          : std_ulogic_vector(63 downto 0);
 129     signal in_b          : std_ulogic_vector(63 downto 0);
 130     signal result        : std_ulogic_vector(63 downto 0);
 131     signal carry_in      : std_ulogic;
 132     signal lost_bits     : std_ulogic;
 133     signal r_hi_nz       : std_ulogic;
 134     signal r_lo_nz       : std_ulogic;
 135     signal s_nz          : std_ulogic;
 136     signal misc_sel      : std_ulogic_vector(3 downto 0);
 137     signal f_to_multiply : MultiplyInputType;
 138     signal multiply_to_f : MultiplyOutputType;
 139     signal msel_1        : std_ulogic_vector(1 downto 0);
 140     signal msel_2        : std_ulogic_vector(1 downto 0);
 141     signal msel_add      : std_ulogic_vector(1 downto 0);
 142     signal msel_inv      : std_ulogic;
 143     signal inverse_est   : std_ulogic_vector(18 downto 0);
 144
 145     -- opsel values
 146     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 147     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 148     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 149     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 150
 151     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 152     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 153     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 154     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 155
 156     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 157     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 158     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 159     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 160
 161     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 162     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 163     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 164     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 165
 166     -- msel values
 167     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 168     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 169     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 170     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 171
 172     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 173     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 174     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 175     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 176
 177     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 178     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 179     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 180     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 181
 182     -- Inverse lookup table, indexed by the top 8 fraction bits
 183     -- The first 256 entries are the reciprocal (1/x) lookup table,
 184     -- and the remaining 768 entries are the reciprocal square root table.
 185     -- Output range is [0.5, 1) in 0.19 format, though the top
 186     -- bit isn't stored since it is always 1.
 187     -- Each output value is the inverse of the center of the input
 188     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 189     -- entry 1 is 1 / (1 + 3/512), etc.
 190     signal inverse_table : lookup_table := (
 191         -- 1/x lookup table
 192         -- Unit bit is assumed to be 1, so input range is [1, 2)
 193         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 194         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 195         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 196         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 197         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 198         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 199         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 200         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 201         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 202         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 203         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 204         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 205         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 206         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 207         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 208         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 209         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 210         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 211         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 212         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 213         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 214         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 215         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 216         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 217         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 218         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 219         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 220         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 221         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 222         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 223         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 224         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 225         -- 1/sqrt(x) lookup table
 226         -- Input is in the range [1, 4), i.e. two bits to the left of the
 227         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 228         -- 1.0 ... 1.9999
 229         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 230         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 231         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 232         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 233         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 234         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 235         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 236         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 237         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 238         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 239         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 240         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 241         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 242         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 243         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 244         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 245         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 246         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 247         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 248         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 249         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 250         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 251         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 252         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 253         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 254         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 255         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 256         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 257         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 258         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 259         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 260         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 261         -- 2.0 ... 2.9999
 262         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 263         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 264         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 265         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 266         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 267         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 268         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 269         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 270         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 271         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 272         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 273         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 274         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 275         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 276         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 277         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 278         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 279         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 280         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 281         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 282         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 283         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 284         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 285         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 286         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 287         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 288         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 289         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 290         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 291         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 292         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 293         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 294         -- 3.0 ... 3.9999
 295         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 296         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 297         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 298         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 299         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 300         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 301         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 302         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 303         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 304         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 305         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 306         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 307         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 308         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 309         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 310         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 311         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 312         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 313         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 314         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 315         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 316         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 317         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 318         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 319         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 320         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 321         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 322         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 323         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 324         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 325         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 326         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 327         );
 328
 329     -- Left and right shifter with 120 bit input and 64 bit output.
 330     -- Shifts inp left by shift bits and returns the upper 64 bits of
 331     -- the result.  The shift parameter is interpreted as a signed
 332     -- number in the range -64..63, with negative values indicating
 333     -- right shifts.
 334     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 335                         shift: std_ulogic_vector(6 downto 0))
 336         return std_ulogic_vector is
 337         variable s1 : std_ulogic_vector(94 downto 0);
 338         variable s2 : std_ulogic_vector(70 downto 0);
 339         variable result : std_ulogic_vector(63 downto 0);
 340     begin
 341         case shift(6 downto 5) is
 342             when "00" =>
 343                 s1 := inp(119 downto 25);
 344             when "01" =>
 345                 s1 := inp(87 downto 0) & "0000000";
 346             when "10" =>
 347                 s1 := x"0000000000000000" & inp(119 downto 89);
 348             when others =>
 349                 s1 := x"00000000" & inp(119 downto 57);
 350         end case;
 351         case shift(4 downto 3) is
 352             when "00" =>
 353                 s2 := s1(94 downto 24);
 354             when "01" =>
 355                 s2 := s1(86 downto 16);
 356             when "10" =>
 357                 s2 := s1(78 downto 8);
 358             when others =>
 359                 s2 := s1(70 downto 0);
 360         end case;
 361         case shift(2 downto 0) is
 362             when "000" =>
 363                 result := s2(70 downto 7);
 364             when "001" =>
 365                 result := s2(69 downto 6);
 366             when "010" =>
 367                 result := s2(68 downto 5);
 368             when "011" =>
 369                 result := s2(67 downto 4);
 370             when "100" =>
 371                 result := s2(66 downto 3);
 372             when "101" =>
 373                 result := s2(65 downto 2);
 374             when "110" =>
 375                 result := s2(64 downto 1);
 376             when others =>
 377                 result := s2(63 downto 0);
 378         end case;
 379         return result;
 380     end;
 381
 382     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 383     -- selects the bits will be lost in doing a right shift.  The shift
 384     -- parameter is the bottom 6 bits of a negative shift count,
 385     -- indicating a right shift.
 386     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 387         variable result: std_ulogic_vector(63 downto 0);
 388     begin
 389         result := (others => '0');
 390         for i in 0 to 63 loop
 391             if i >= shift then
 392                 result(63 - i) := '1';
 393             end if;
 394         end loop;
 395         return result;
 396     end;
 397
 398     -- Split a DP floating-point number into components and work out its class.
 399     -- If is_int = 1, the input is considered an integer
 400     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 401         variable r       : fpu_reg_type;
 402         variable exp_nz  : std_ulogic;
 403         variable exp_ao  : std_ulogic;
 404         variable frac_nz : std_ulogic;
 405         variable cls     : std_ulogic_vector(2 downto 0);
 406     begin
 407         r.negative := fpr(63);
 408         exp_nz := or (fpr(62 downto 52));
 409         exp_ao := and (fpr(62 downto 52));
 410         frac_nz := or (fpr(51 downto 0));
 411         if is_int = '0' then
 412             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 413             if exp_nz = '0' then
 414                 r.exponent := to_signed(-1022, EXP_BITS);
 415             end if;
 416             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 417             cls := exp_ao & exp_nz & frac_nz;
 418             case cls is
 419                 when "000"  => r.class := ZERO;
 420                 when "001"  => r.class := FINITE;    -- denormalized
 421                 when "010"  => r.class := FINITE;
 422                 when "011"  => r.class := FINITE;
 423                 when "110"  => r.class := INFINITY;
 424                 when others => r.class := NAN;
 425             end case;
 426         else
 427             r.mantissa := fpr;
 428             r.exponent := (others => '0');
 429             if (fpr(63) or exp_nz or frac_nz) = '1' then
 430                 r.class := FINITE;
 431             else
 432                 r.class := ZERO;
 433             end if;
 434         end if;
 435         return r;
 436     end;
 437
 438     -- Construct a DP floating-point result from components
 439     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 440                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 441         return std_ulogic_vector is
 442         variable result : std_ulogic_vector(63 downto 0);
 443     begin
 444         result := (others => '0');
 445         result(63) := sign;
 446         case class is
 447             when ZERO =>
 448             when FINITE =>
 449                 if mantissa(54) = '1' then
 450                     -- normalized number
 451                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 452                 end if;
 453                 result(51 downto 29) := mantissa(53 downto 31);
 454                 if single_prec = '0' then
 455                     result(28 downto 0) := mantissa(30 downto 2);
 456                 end if;
 457             when INFINITY =>
 458                 result(62 downto 52) := "11111111111";
 459             when NAN =>
 460                 result(62 downto 52) := "11111111111";
 461                 result(51) := quieten_nan or mantissa(53);
 462                 result(50 downto 29) := mantissa(52 downto 31);
 463                 if single_prec = '0' then
 464                     result(28 downto 0) := mantissa(30 downto 2);
 465                 end if;
 466         end case;
 467         return result;
 468     end;
 469
 470     -- Determine whether to increment when rounding
 471     -- Returns rounding_inc & inexact
 472     -- Assumes x includes the bottom 29 bits of the mantissa already
 473     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 474     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 475                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 476                          sign: std_ulogic)
 477         return std_ulogic_vector is
 478         variable grx : std_ulogic_vector(2 downto 0);
 479         variable ret : std_ulogic_vector(1 downto 0);
 480         variable lsb : std_ulogic;
 481     begin
 482         if single_prec = '0' then
 483             grx := mantissa(1 downto 0) & x;
 484             lsb := mantissa(2);
 485         else
 486             grx := mantissa(30 downto 29) & x;
 487             lsb := mantissa(31);
 488         end if;
 489         ret(1) := '0';
 490         ret(0) := or (grx);
 491         case rn(1 downto 0) is
 492             when "00" =>        -- round to nearest
 493                 if grx = "100" and rn(2) = '0' then
 494                     ret(1) := lsb; -- tie, round to even
 495                 else
 496                     ret(1) := grx(2);
 497                 end if;
 498             when "01" =>        -- round towards zero
 499             when others =>      -- round towards +/- inf
 500                 if rn(0) = sign then
 501                     -- round towards greater magnitude
 502                     ret(1) := ret(0);
 503                 end if;
 504         end case;
 505         return ret;
 506     end;
 507
 508     -- Determine result flags to write into the FPSCR
 509     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 510         return std_ulogic_vector is
 511     begin
 512         case class is
 513             when ZERO =>
 514                 return sign & "0010";
 515             when FINITE =>
 516                 return (not unitbit) & sign & (not sign) & "00";
 517             when INFINITY =>
 518                 return '0' & sign & (not sign) & "01";
 519             when NAN =>
 520                 return "10001";
 521         end case;
 522     end;
 523
 524 begin
 525     fpu_multiply_0: entity work.multiply
 526         port map (
 527             clk => clk,
 528             m_in => f_to_multiply,
 529             m_out => multiply_to_f
 530             );
 531
 532     fpu_0: process(clk)
 533     begin
 534         if rising_edge(clk) then
 535             if rst = '1' then
 536                 r.state <= IDLE;
 537                 r.busy <= '0';
 538                 r.instr_done <= '0';
 539                 r.do_intr <= '0';
 540                 r.fpscr <= (others => '0');
 541                 r.writing_back <= '0';
 542             else
 543                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 544                 r <= rin;
 545             end if;
 546         end if;
 547     end process;
 548
 549     -- synchronous reads from lookup table
 550     lut_access: process(clk)
 551         variable addrhi : std_ulogic_vector(1 downto 0);
 552         variable addr   : std_ulogic_vector(9 downto 0);
 553     begin
 554         if rising_edge(clk) then
 555             if r.is_sqrt = '1' then
 556                 addrhi := r.b.mantissa(55 downto 54);
 557             else
 558                 addrhi := "00";
 559             end if;
 560             addr := addrhi & r.b.mantissa(53 downto 46);
 561             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 562         end if;
 563     end process;
 564
 565     e_out.busy <= r.busy;
 566     e_out.exception <= r.fpscr(FPSCR_FEX);
 567     e_out.interrupt <= r.do_intr;
 568
 569     w_out.valid <= r.instr_done and not r.do_intr;
 570     w_out.write_enable <= r.writing_back;
 571     w_out.write_reg <= r.dest_fpr;
 572     w_out.write_data <= fp_result;
 573     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 574     w_out.write_cr_mask <= r.cr_mask;
 575     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 576                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 577
 578     fpu_1: process(all)
 579         variable v           : reg_type;
 580         variable adec        : fpu_reg_type;
 581         variable bdec        : fpu_reg_type;
 582         variable cdec        : fpu_reg_type;
 583         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 584         variable illegal     : std_ulogic;
 585         variable j, k        : integer;
 586         variable flm         : std_ulogic_vector(7 downto 0);
 587         variable int_input   : std_ulogic;
 588         variable mask        : std_ulogic_vector(63 downto 0);
 589         variable in_a0       : std_ulogic_vector(63 downto 0);
 590         variable in_b0       : std_ulogic_vector(63 downto 0);
 591         variable misc        : std_ulogic_vector(63 downto 0);
 592         variable shift_res   : std_ulogic_vector(63 downto 0);
 593         variable round       : std_ulogic_vector(1 downto 0);
 594         variable update_fx   : std_ulogic;
 595         variable arith_done  : std_ulogic;
 596         variable invalid     : std_ulogic;
 597         variable zero_divide : std_ulogic;
 598         variable mant_nz     : std_ulogic;
 599         variable min_exp     : signed(EXP_BITS-1 downto 0);
 600         variable max_exp     : signed(EXP_BITS-1 downto 0);
 601         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 602         variable new_exp     : signed(EXP_BITS-1 downto 0);
 603         variable exp_tiny    : std_ulogic;
 604         variable exp_huge    : std_ulogic;
 605         variable renormalize : std_ulogic;
 606         variable clz         : std_ulogic_vector(5 downto 0);
 607         variable set_x       : std_ulogic;
 608         variable mshift      : signed(EXP_BITS-1 downto 0);
 609         variable need_check  : std_ulogic;
 610         variable msb         : std_ulogic;
 611         variable is_add      : std_ulogic;
 612         variable longmask    : std_ulogic;
 613         variable set_a       : std_ulogic;
 614         variable set_b       : std_ulogic;
 615         variable set_c       : std_ulogic;
 616         variable set_y       : std_ulogic;
 617         variable set_s       : std_ulogic;
 618         variable qnan_result : std_ulogic;
 619         variable px_nz       : std_ulogic;
 620         variable pcmpb_eq    : std_ulogic;
 621         variable pcmpb_lt    : std_ulogic;
 622         variable pshift      : std_ulogic;
 623         variable renorm_sqrt : std_ulogic;
 624         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 625         variable shiftin     : std_ulogic;
 626         variable mulexp      : signed(EXP_BITS-1 downto 0);
 627         variable maddend     : std_ulogic_vector(127 downto 0);
 628     begin
 629         v := r;
 630         illegal := '0';
 631         v.busy := '0';
 632         int_input := '0';
 633
 634         -- capture incoming instruction
 635         if e_in.valid = '1' then
 636             v.insn := e_in.insn;
 637             v.op := e_in.op;
 638             v.fe_mode := or (e_in.fe_mode);
 639             v.dest_fpr := e_in.frt;
 640             v.single_prec := e_in.single;
 641             v.int_result := '0';
 642             v.rc := e_in.rc;
 643             v.is_cmp := e_in.out_cr;
 644             if e_in.out_cr = '0' then
 645                 v.cr_mask := num_to_fxm(1);
 646             else
 647                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 648             end if;
 649             int_input := '0';
 650             if e_in.op = OP_FPOP_I then
 651                 int_input := '1';
 652             end if;
 653             v.quieten_nan := '1';
 654             v.tiny := '0';
 655             v.denorm := '0';
 656             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 657             v.is_subtract := '0';
 658             v.is_multiply := '0';
 659             v.is_sqrt := '0';
 660             v.add_bsmall := '0';
 661             v.doing_ftdiv := "00";
 662
 663             adec := decode_dp(e_in.fra, int_input);
 664             bdec := decode_dp(e_in.frb, int_input);
 665             cdec := decode_dp(e_in.frc, int_input);
 666             v.a := adec;
 667             v.b := bdec;
 668             v.c := cdec;
 669
 670             v.exp_cmp := '0';
 671             if adec.exponent > bdec.exponent then
 672                 v.exp_cmp := '1';
 673             end if;
 674             v.madd_cmp := '0';
 675             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 676                 v.madd_cmp := '1';
 677             end if;
 678         end if;
 679
 680         r_hi_nz <= or (r.r(55 downto 31));
 681         r_lo_nz <= or (r.r(30 downto 2));
 682         s_nz <= or (r.s);
 683
 684         if r.single_prec = '0' then
 685             if r.doing_ftdiv(1) = '0' then
 686                 max_exp := to_signed(1023, EXP_BITS);
 687             else
 688                 max_exp := to_signed(1020, EXP_BITS);
 689             end if;
 690             if r.doing_ftdiv(0) = '0' then
 691                 min_exp := to_signed(-1022, EXP_BITS);
 692             else
 693                 min_exp := to_signed(-1021, EXP_BITS);
 694             end if;
 695             bias_exp := to_signed(1536, EXP_BITS);
 696         else
 697             max_exp := to_signed(127, EXP_BITS);
 698             min_exp := to_signed(-126, EXP_BITS);
 699             bias_exp := to_signed(192, EXP_BITS);
 700         end if;
 701         new_exp := r.result_exp - r.shift;
 702         exp_tiny := '0';
 703         exp_huge := '0';
 704         if new_exp < min_exp then
 705             exp_tiny := '1';
 706         end if;
 707         if new_exp > max_exp then
 708             exp_huge := '1';
 709         end if;
 710
 711         -- Compare P with zero and with B
 712         px_nz := or (r.p(57 downto 4));
 713         pcmpb_eq := '0';
 714         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 715             pcmpb_eq := '1';
 716         end if;
 717         pcmpb_lt := '0';
 718         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 719             pcmpb_lt := '1';
 720         end if;
 721
 722         v.writing_back := '0';
 723         v.instr_done := '0';
 724         v.update_fprf := '0';
 725         v.shift := to_signed(0, EXP_BITS);
 726         v.first := '0';
 727         opsel_a <= AIN_R;
 728         opsel_ainv <= '0';
 729         opsel_amask <= '0';
 730         opsel_b <= BIN_ZERO;
 731         opsel_binv <= '0';
 732         opsel_r <= RES_SUM;
 733         opsel_s <= S_ZERO;
 734         carry_in <= '0';
 735         misc_sel <= "0000";
 736         fpscr_mask := (others => '1');
 737         update_fx := '0';
 738         arith_done := '0';
 739         invalid := '0';
 740         zero_divide := '0';
 741         renormalize := '0';
 742         set_x := '0';
 743         qnan_result := '0';
 744         longmask := r.single_prec;
 745         set_a := '0';
 746         set_b := '0';
 747         set_c := '0';
 748         set_s := '0';
 749         f_to_multiply.is_32bit <= '0';
 750         f_to_multiply.valid <= '0';
 751         msel_1 <= MUL1_A;
 752         msel_2 <= MUL2_C;
 753         msel_add <= MULADD_ZERO;
 754         msel_inv <= '0';
 755         set_y := '0';
 756         pshift := '0';
 757         renorm_sqrt := '0';
 758         shiftin := '0';
 759         case r.state is
 760             when IDLE =>
 761                 if e_in.valid = '1' then
 762                     case e_in.insn(5 downto 1) is
 763                         when "00000" =>
 764                             if e_in.insn(8) = '1' then
 765                                 if e_in.insn(6) = '0' then
 766                                     v.state := DO_FTDIV;
 767                                 else
 768                                     v.state := DO_FTSQRT;
 769                                 end if;
 770                             elsif e_in.insn(7) = '1' then
 771                                 v.state := DO_MCRFS;
 772                             else
 773                                 v.state := DO_FCMP;
 774                             end if;
 775                         when "00110" =>
 776                             if e_in.insn(10) = '0' then
 777                                 if e_in.insn(8) = '0' then
 778                                     v.state := DO_MTFSB;
 779                                 else
 780                                     v.state := DO_MTFSFI;
 781                                 end if;
 782                             else
 783                                 v.state := DO_FMRG;
 784                             end if;
 785                         when "00111" =>
 786                             if e_in.insn(8) = '0' then
 787                                 v.state := DO_MFFS;
 788                             else
 789                                 v.state := DO_MTFSF;
 790                             end if;
 791                         when "01000" =>
 792                             if e_in.insn(9 downto 8) /= "11" then
 793                                 v.state := DO_FMR;
 794                             else
 795                                 v.state := DO_FRI;
 796                             end if;
 797                         when "01100" =>
 798                             v.state := DO_FRSP;
 799                         when "01110" =>
 800                             if int_input = '1' then
 801                                 -- fcfid[u][s]
 802                                 v.state := DO_FCFID;
 803                             else
 804                                 v.state := DO_FCTI;
 805                             end if;
 806                         when "01111" =>
 807                             v.round_mode := "001";
 808                             v.state := DO_FCTI;
 809                         when "10010" =>
 810                             v.state := DO_FDIV;
 811                         when "10100" | "10101" =>
 812                             v.state := DO_FADD;
 813                         when "10110" =>
 814                             v.is_sqrt := '1';
 815                             v.state := DO_FSQRT;
 816                         when "10111" =>
 817                             v.state := DO_FSEL;
 818                         when "11000" =>
 819                             v.state := DO_FRE;
 820                         when "11001" =>
 821                             v.is_multiply := '1';
 822                             v.state := DO_FMUL;
 823                         when "11010" =>
 824                             v.is_sqrt := '1';
 825                             v.state := DO_FRSQRTE;
 826                         when "11100" | "11101" | "11110" | "11111" =>
 827                             v.state := DO_FMADD;
 828                         when others =>
 829                             illegal := '1';
 830                     end case;
 831                 end if;
 832                 v.x := '0';
 833                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 834                 set_s := '1';
 835
 836             when DO_MCRFS =>
 837                 j := to_integer(unsigned(insn_bfa(r.insn)));
 838                 for i in 0 to 7 loop
 839                     if i = j then
 840                         k := (7 - i) * 4;
 841                         v.cr_result := r.fpscr(k + 3 downto k);
 842                         fpscr_mask(k + 3 downto k) := "0000";
 843                     end if;
 844                 end loop;
 845                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 846                 v.instr_done := '1';
 847                 v.state := IDLE;
 848
 849             when DO_FTDIV =>
 850                 v.instr_done := '1';
 851                 v.state := IDLE;
 852                 v.cr_result := "0000";
 853                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 854                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 855                     v.cr_result(2) := '1';
 856                 end if;
 857                 if r.a.class = NAN or r.a.class = INFINITY or
 858                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 859                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 860                     v.cr_result(1) := '1';
 861                 else
 862                     v.doing_ftdiv := "11";
 863                     v.first := '1';
 864                     v.state := FTDIV_1;
 865                     v.instr_done := '0';
 866                 end if;
 867
 868             when DO_FTSQRT =>
 869                 v.instr_done := '1';
 870                 v.state := IDLE;
 871                 v.cr_result := "0000";
 872                 if r.b.class = ZERO or r.b.class = INFINITY or
 873                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 874                     v.cr_result(2) := '1';
 875                 end if;
 876                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 877                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 878                     v.cr_result(1) := '0';
 879                 end if;
 880
 881             when DO_FCMP =>
 882                 -- fcmp[uo]
 883                 v.instr_done := '1';
 884                 v.state := IDLE;
 885                 update_fx := '1';
 886                 opsel_a <= AIN_B;
 887                 opsel_r <= RES_SUM;
 888                 v.result_exp := r.b.exponent;
 889                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 890                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 891                     -- Signalling NAN
 892                     v.fpscr(FPSCR_VXSNAN) := '1';
 893                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 894                         v.fpscr(FPSCR_VXVC) := '1';
 895                     end if;
 896                     invalid := '1';
 897                     v.cr_result := "0001";          -- unordered
 898                 elsif r.a.class = NAN or r.b.class = NAN then
 899                     if r.insn(6) = '1' then
 900                         -- fcmpo
 901                         v.fpscr(FPSCR_VXVC) := '1';
 902                         invalid := '1';
 903                     end if;
 904                     v.cr_result := "0001";          -- unordered
 905                 elsif r.a.class = ZERO and r.b.class = ZERO then
 906                     v.cr_result := "0010";          -- equal
 907                 elsif r.a.negative /= r.b.negative then
 908                     v.cr_result := r.a.negative & r.b.negative & "00";
 909                 elsif r.a.class = ZERO then
 910                     -- A and B are the same sign from here down
 911                     v.cr_result := not r.b.negative & r.b.negative & "00";
 912                 elsif r.a.class = INFINITY then
 913                     if r.b.class = INFINITY then
 914                         v.cr_result := "0010";
 915                     else
 916                         v.cr_result := r.a.negative & not r.a.negative & "00";
 917                     end if;
 918                 elsif r.b.class = ZERO then
 919                     -- A is finite from here down
 920                     v.cr_result := r.a.negative & not r.a.negative & "00";
 921                 elsif r.b.class = INFINITY then
 922                     v.cr_result := not r.b.negative & r.b.negative & "00";
 923                 elsif r.exp_cmp = '1' then
 924                     -- A and B are both finite from here down
 925                     v.cr_result := r.a.negative & not r.a.negative & "00";
 926                 elsif r.a.exponent /= r.b.exponent then
 927                     -- A exponent is smaller than B
 928                     v.cr_result := not r.a.negative & r.a.negative & "00";
 929                 else
 930                     -- Prepare to subtract mantissas, put B in R
 931                     v.cr_result := "0000";
 932                     v.instr_done := '0';
 933                     v.state := CMP_1;
 934                 end if;
 935                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 936
 937             when DO_MTFSB =>
 938                 -- mtfsb{0,1}
 939                 j := to_integer(unsigned(insn_bt(r.insn)));
 940                 for i in 0 to 31 loop
 941                     if i = j then
 942                         v.fpscr(31 - i) := r.insn(6);
 943                     end if;
 944                 end loop;
 945                 v.instr_done := '1';
 946                 v.state := IDLE;
 947
 948             when DO_MTFSFI =>
 949                 -- mtfsfi
 950                 j := to_integer(unsigned(insn_bf(r.insn)));
 951                 if r.insn(16) = '0' then
 952                     for i in 0 to 7 loop
 953                         if i = j then
 954                             k := (7 - i) * 4;
 955                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 956                         end if;
 957                     end loop;
 958                 end if;
 959                 v.instr_done := '1';
 960                 v.state := IDLE;
 961
 962             when DO_FMRG =>
 963                 -- fmrgew, fmrgow
 964                 opsel_r <= RES_MISC;
 965                 misc_sel <= "01" & r.insn(8) & '0';
 966                 v.int_result := '1';
 967                 v.writing_back := '1';
 968                 v.instr_done := '1';
 969                 v.state := IDLE;
 970
 971             when DO_MFFS =>
 972                 v.int_result := '1';
 973                 v.writing_back := '1';
 974                 opsel_r <= RES_MISC;
 975                 case r.insn(20 downto 16) is
 976                     when "00000" =>
 977                         -- mffs
 978                     when "00001" =>
 979                         -- mffsce
 980                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
 981                     when "10100" | "10101" =>
 982                         -- mffscdrn[i] (but we don't implement DRN)
 983                         fpscr_mask := x"000000FF";
 984                     when "10110" =>
 985                         -- mffscrn
 986                         fpscr_mask := x"000000FF";
 987                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
 988                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
 989                     when "10111" =>
 990                         -- mffscrni
 991                         fpscr_mask := x"000000FF";
 992                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
 993                     when "11000" =>
 994                         -- mffsl
 995                         fpscr_mask := x"0007F0FF";
 996                     when others =>
 997                         illegal := '1';
 998                 end case;
 999                 v.instr_done := '1';
1000                 v.state := IDLE;
1001
1002             when DO_MTFSF =>
1003                 if r.insn(25) = '1' then
1004                     flm := x"FF";
1005                 elsif r.insn(16) = '1' then
1006                     flm := x"00";
1007                 else
1008                     flm := r.insn(24 downto 17);
1009                 end if;
1010                 for i in 0 to 7 loop
1011                     k := i * 4;
1012                     if flm(i) = '1' then
1013                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1014                     end if;
1015                 end loop;
1016                 v.instr_done := '1';
1017                 v.state := IDLE;
1018
1019             when DO_FMR =>
1020                 opsel_a <= AIN_B;
1021                 v.result_class := r.b.class;
1022                 v.result_exp := r.b.exponent;
1023                 v.quieten_nan := '0';
1024                 if r.insn(9) = '1' then
1025                     v.result_sign := '0';              -- fabs
1026                 elsif r.insn(8) = '1' then
1027                     v.result_sign := '1';              -- fnabs
1028                 elsif r.insn(7) = '1' then
1029                     v.result_sign := r.b.negative;     -- fmr
1030                 elsif r.insn(6) = '1' then
1031                     v.result_sign := not r.b.negative; -- fneg
1032                 else
1033                     v.result_sign := r.a.negative;     -- fcpsgn
1034                 end if;
1035                 v.writing_back := '1';
1036                 v.instr_done := '1';
1037                 v.state := IDLE;
1038
1039             when DO_FRI =>    -- fri[nzpm]
1040                 opsel_a <= AIN_B;
1041                 v.result_class := r.b.class;
1042                 v.result_sign := r.b.negative;
1043                 v.result_exp := r.b.exponent;
1044                 v.fpscr(FPSCR_FR) := '0';
1045                 v.fpscr(FPSCR_FI) := '0';
1046                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1047                     -- Signalling NAN
1048                     v.fpscr(FPSCR_VXSNAN) := '1';
1049                     invalid := '1';
1050                 end if;
1051                 if r.b.class = FINITE then
1052                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1053                         -- integer already, no rounding required
1054                         arith_done := '1';
1055                     else
1056                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1057                         v.state := FRI_1;
1058                         v.round_mode := '1' & r.insn(7 downto 6);
1059                     end if;
1060                 else
1061                     arith_done := '1';
1062                 end if;
1063
1064             when DO_FRSP =>
1065                 opsel_a <= AIN_B;
1066                 v.result_class := r.b.class;
1067                 v.result_sign := r.b.negative;
1068                 v.result_exp := r.b.exponent;
1069                 v.fpscr(FPSCR_FR) := '0';
1070                 v.fpscr(FPSCR_FI) := '0';
1071                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1072                     -- Signalling NAN
1073                     v.fpscr(FPSCR_VXSNAN) := '1';
1074                     invalid := '1';
1075                 end if;
1076                 set_x := '1';
1077                 if r.b.class = FINITE then
1078                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1079                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080                         v.state := ROUND_UFLOW;
1081                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082                         v.state := ROUND_OFLOW;
1083                     else
1084                         v.shift := to_signed(-2, EXP_BITS);
1085                         v.state := ROUNDING;
1086                     end if;
1087                 else
1088                     arith_done := '1';
1089                 end if;
1090
1091             when DO_FCTI =>
1092                 -- instr bit 9: 1=dword 0=word
1093                 -- instr bit 8: 1=unsigned 0=signed
1094                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1095                 opsel_a <= AIN_B;
1096                 v.result_class := r.b.class;
1097                 v.result_sign := r.b.negative;
1098                 v.result_exp := r.b.exponent;
1099                 v.fpscr(FPSCR_FR) := '0';
1100                 v.fpscr(FPSCR_FI) := '0';
1101                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1102                     -- Signalling NAN
1103                     v.fpscr(FPSCR_VXSNAN) := '1';
1104                     invalid := '1';
1105                 end if;
1106
1107                 v.int_result := '1';
1108                 case r.b.class is
1109                     when ZERO =>
1110                         arith_done := '1';
1111                     when FINITE =>
1112                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1113                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114                             v.state := INT_OFLOW;
1115                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116                             -- integer already, no rounding required,
1117                             -- shift into final position
1118                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119                             if r.insn(8) = '1' and r.b.negative = '1' then
1120                                 v.state := INT_OFLOW;
1121                             else
1122                                 v.state := INT_ISHIFT;
1123                             end if;
1124                         else
1125                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126                             v.state := INT_SHIFT;
1127                         end if;
1128                     when INFINITY | NAN =>
1129                         v.state := INT_OFLOW;
1130                 end case;
1131
1132             when DO_FCFID =>
1133                 v.result_sign := '0';
1134                 opsel_a <= AIN_B;
1135                 if r.insn(8) = '0' and r.b.negative = '1' then
1136                     -- fcfid[s] with negative operand, set R = -B
1137                     opsel_ainv <= '1';
1138                     carry_in <= '1';
1139                     v.result_sign := '1';
1140                 end if;
1141                 v.result_class := r.b.class;
1142                 v.result_exp := to_signed(54, EXP_BITS);
1143                 v.fpscr(FPSCR_FR) := '0';
1144                 v.fpscr(FPSCR_FI) := '0';
1145                 if r.b.class = ZERO then
1146                     arith_done := '1';
1147                 else
1148                     v.state := FINISH;
1149                 end if;
1150
1151             when DO_FADD =>
1152                 -- fadd[s] and fsub[s]
1153                 opsel_a <= AIN_A;
1154                 v.result_sign := r.a.negative;
1155                 v.result_class := r.a.class;
1156                 v.result_exp := r.a.exponent;
1157                 v.fpscr(FPSCR_FR) := '0';
1158                 v.fpscr(FPSCR_FI) := '0';
1159                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160                 if r.a.class = FINITE and r.b.class = FINITE then
1161                     v.is_subtract := not is_add;
1162                     v.add_bsmall := r.exp_cmp;
1163                     if r.exp_cmp = '0' then
1164                         v.shift := r.a.exponent - r.b.exponent;
1165                         v.result_sign := r.b.negative xnor r.insn(1);
1166                         if r.a.exponent = r.b.exponent then
1167                             v.state := ADD_2;
1168                         else
1169                             v.state := ADD_SHIFT;
1170                         end if;
1171                     else
1172                         opsel_a <= AIN_B;
1173                         v.shift := r.b.exponent - r.a.exponent;
1174                         v.result_exp := r.b.exponent;
1175                         v.state := ADD_SHIFT;
1176                     end if;
1177                 else
1178                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1180                         -- Signalling NAN
1181                         v.fpscr(FPSCR_VXSNAN) := '1';
1182                         invalid := '1';
1183                     end if;
1184                     if r.a.class = NAN then
1185                         -- nothing to do, result is A
1186                     elsif r.b.class = NAN then
1187                         v.result_class := NAN;
1188                         v.result_sign := r.b.negative;
1189                         opsel_a <= AIN_B;
1190                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191                         -- invalid operation, construct QNaN
1192                         v.fpscr(FPSCR_VXISI) := '1';
1193                         qnan_result := '1';
1194                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195                         -- return -0 for rounding to -infinity
1196                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1197                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1198                         -- nothing to do, result is A
1199                     else
1200                         -- result is +/- B
1201                         v.result_sign := r.b.negative xnor r.insn(1);
1202                         v.result_class := r.b.class;
1203                         v.result_exp := r.b.exponent;
1204                         opsel_a <= AIN_B;
1205                     end if;
1206                     arith_done := '1';
1207                 end if;
1208
1209             when DO_FMUL =>
1210                 -- fmul[s]
1211                 opsel_a <= AIN_A;
1212                 v.result_sign := r.a.negative;
1213                 v.result_class := r.a.class;
1214                 v.result_exp := r.a.exponent;
1215                 v.fpscr(FPSCR_FR) := '0';
1216                 v.fpscr(FPSCR_FI) := '0';
1217                 if r.a.class = FINITE and r.c.class = FINITE then
1218                     v.result_sign := r.a.negative xor r.c.negative;
1219                     v.result_exp := r.a.exponent + r.c.exponent;
1220                     -- Renormalize denorm operands
1221                     if r.a.mantissa(54) = '0' then
1222                         v.state := RENORM_A;
1223                     elsif r.c.mantissa(54) = '0' then
1224                         opsel_a <= AIN_C;
1225                         v.state := RENORM_C;
1226                     else
1227                         f_to_multiply.valid <= '1';
1228                         v.state := MULT_1;
1229                     end if;
1230                 else
1231                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
1233                         -- Signalling NAN
1234                         v.fpscr(FPSCR_VXSNAN) := '1';
1235                         invalid := '1';
1236                     end if;
1237                     if r.a.class = NAN then
1238                     -- result is A
1239                     elsif r.c.class = NAN then
1240                         v.result_class := NAN;
1241                         v.result_sign := r.c.negative;
1242                         opsel_a <= AIN_C;
1243                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244                         (r.a.class = ZERO and r.c.class = INFINITY) then
1245                         -- invalid operation, construct QNaN
1246                         v.fpscr(FPSCR_VXIMZ) := '1';
1247                         qnan_result := '1';
1248                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1249                         -- result is +/- A
1250                         v.result_sign := r.a.negative xor r.c.negative;
1251                     else
1252                         -- r.c.class is ZERO or INFINITY
1253                         v.result_class := r.c.class;
1254                         v.result_sign := r.a.negative xor r.c.negative;
1255                     end if;
1256                     arith_done := '1';
1257                 end if;
1258
1259             when DO_FDIV =>
1260                 opsel_a <= AIN_A;
1261                 v.result_sign := r.a.negative;
1262                 v.result_class := r.a.class;
1263                 v.result_exp := r.a.exponent;
1264                 v.fpscr(FPSCR_FR) := '0';
1265                 v.fpscr(FPSCR_FI) := '0';
1266                 v.result_sign := r.a.negative xor r.b.negative;
1267                 v.result_exp := r.a.exponent - r.b.exponent;
1268                 v.count := "00";
1269                 if r.a.class = FINITE and r.b.class = FINITE then
1270                     -- Renormalize denorm operands
1271                     if r.a.mantissa(54) = '0' then
1272                         v.state := RENORM_A;
1273                     elsif r.b.mantissa(54) = '0' then
1274                         opsel_a <= AIN_B;
1275                         v.state := RENORM_B;
1276                     else
1277                         v.first := '1';
1278                         v.state := DIV_2;
1279                     end if;
1280                 else
1281                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1283                         -- Signalling NAN
1284                         v.fpscr(FPSCR_VXSNAN) := '1';
1285                         invalid := '1';
1286                     end if;
1287                     if r.a.class = NAN then
1288                         -- result is A
1289                         v.result_sign := r.a.negative;
1290                     elsif r.b.class = NAN then
1291                         v.result_class := NAN;
1292                         v.result_sign := r.b.negative;
1293                         opsel_a <= AIN_B;
1294                     elsif r.b.class = INFINITY then
1295                         if r.a.class = INFINITY then
1296                             v.fpscr(FPSCR_VXIDI) := '1';
1297                             qnan_result := '1';
1298                         else
1299                             v.result_class := ZERO;
1300                         end if;
1301                     elsif r.b.class = ZERO then
1302                         if r.a.class = ZERO then
1303                             v.fpscr(FPSCR_VXZDZ) := '1';
1304                             qnan_result := '1';
1305                         else
1306                             if r.a.class = FINITE then
1307                                 zero_divide := '1';
1308                             end if;
1309                             v.result_class := INFINITY;
1310                         end if;
1311                     -- else r.b.class = FINITE, result_class = r.a.class
1312                     end if;
1313                     arith_done := '1';
1314                 end if;
1315
1316             when DO_FSEL =>
1317                 opsel_a <= AIN_A;
1318                 v.fpscr(FPSCR_FR) := '0';
1319                 v.fpscr(FPSCR_FI) := '0';
1320                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321                     v.result_sign := r.c.negative;
1322                     v.result_exp := r.c.exponent;
1323                     v.result_class := r.c.class;
1324                     opsel_a <= AIN_C;
1325                 else
1326                     v.result_sign := r.b.negative;
1327                     v.result_exp := r.b.exponent;
1328                     v.result_class := r.b.class;
1329                     opsel_a <= AIN_B;
1330                 end if;
1331                 v.quieten_nan := '0';
1332                 arith_done := '1';
1333
1334             when DO_FSQRT =>
1335                 opsel_a <= AIN_B;
1336                 v.result_class := r.b.class;
1337                 v.result_sign := r.b.negative;
1338                 v.fpscr(FPSCR_FR) := '0';
1339                 v.fpscr(FPSCR_FI) := '0';
1340                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341                     v.fpscr(FPSCR_VXSNAN) := '1';
1342                     invalid := '1';
1343                 end if;
1344                 case r.b.class is
1345                     when FINITE =>
1346                         v.result_exp := r.b.exponent;
1347                         if r.b.negative = '1' then
1348                             v.fpscr(FPSCR_VXSQRT) := '1';
1349                             qnan_result := '1';
1350                             arith_done := '1';
1351                         elsif r.b.mantissa(54) = '0' then
1352                             v.state := RENORM_B;
1353                         elsif r.b.exponent(0) = '0' then
1354                             v.state := SQRT_1;
1355                         else
1356                             v.shift := to_signed(1, EXP_BITS);
1357                             v.state := RENORM_B2;
1358                         end if;
1359                     when NAN | ZERO =>
1360                         -- result is B
1361                         arith_done := '1';
1362                     when INFINITY =>
1363                         if r.b.negative = '1' then
1364                             v.fpscr(FPSCR_VXSQRT) := '1';
1365                             qnan_result := '1';
1366                         -- else result is B
1367                         end if;
1368                         arith_done := '1';
1369                 end case;
1370
1371             when DO_FRE =>
1372                 opsel_a <= AIN_B;
1373                 v.result_class := r.b.class;
1374                 v.result_sign := r.b.negative;
1375                 v.fpscr(FPSCR_FR) := '0';
1376                 v.fpscr(FPSCR_FI) := '0';
1377                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378                     v.fpscr(FPSCR_VXSNAN) := '1';
1379                     invalid := '1';
1380                 end if;
1381                 case r.b.class is
1382                     when FINITE =>
1383                         v.result_exp := - r.b.exponent;
1384                         if r.b.mantissa(54) = '0' then
1385                             v.state := RENORM_B;
1386                         else
1387                             v.state := FRE_1;
1388                         end if;
1389                     when NAN =>
1390                         -- result is B
1391                         arith_done := '1';
1392                     when INFINITY =>
1393                         v.result_class := ZERO;
1394                         arith_done := '1';
1395                     when ZERO =>
1396                         v.result_class := INFINITY;
1397                         zero_divide := '1';
1398                         arith_done := '1';
1399                 end case;
1400
1401             when DO_FRSQRTE =>
1402                 opsel_a <= AIN_B;
1403                 v.result_class := r.b.class;
1404                 v.result_sign := r.b.negative;
1405                 v.fpscr(FPSCR_FR) := '0';
1406                 v.fpscr(FPSCR_FI) := '0';
1407                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408                     v.fpscr(FPSCR_VXSNAN) := '1';
1409                     invalid := '1';
1410                 end if;
1411                 v.shift := to_signed(1, EXP_BITS);
1412                 case r.b.class is
1413                     when FINITE =>
1414                         v.result_exp := r.b.exponent;
1415                         if r.b.negative = '1' then
1416                             v.fpscr(FPSCR_VXSQRT) := '1';
1417                             qnan_result := '1';
1418                             arith_done := '1';
1419                         elsif r.b.mantissa(54) = '0' then
1420                             v.state := RENORM_B;
1421                         elsif r.b.exponent(0) = '0' then
1422                             v.state := RSQRT_1;
1423                         else
1424                             v.state := RENORM_B2;
1425                         end if;
1426                     when NAN =>
1427                         -- result is B
1428                         arith_done := '1';
1429                     when INFINITY =>
1430                         if r.b.negative = '1' then
1431                             v.fpscr(FPSCR_VXSQRT) := '1';
1432                             qnan_result := '1';
1433                         else
1434                             v.result_class := ZERO;
1435                         end if;
1436                         arith_done := '1';
1437                     when ZERO =>
1438                         v.result_class := INFINITY;
1439                         zero_divide := '1';
1440                         arith_done := '1';
1441                 end case;
1442
1443             when DO_FMADD =>
1444                 -- fmadd, fmsub, fnmadd, fnmsub
1445                 opsel_a <= AIN_A;
1446                 v.result_sign := r.a.negative;
1447                 v.result_class := r.a.class;
1448                 v.result_exp := r.a.exponent;
1449                 v.fpscr(FPSCR_FR) := '0';
1450                 v.fpscr(FPSCR_FI) := '0';
1451                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452                 if r.a.class = FINITE and r.c.class = FINITE and
1453                     (r.b.class = FINITE or r.b.class = ZERO) then
1454                     v.is_subtract := not is_add;
1455                     mulexp := r.a.exponent + r.c.exponent;
1456                     v.result_exp := mulexp;
1457                     opsel_a <= AIN_B;
1458                     -- Make sure A and C are normalized
1459                     if r.a.mantissa(54) = '0' then
1460                         opsel_a <= AIN_A;
1461                         v.state := RENORM_A;
1462                     elsif r.c.mantissa(54) = '0' then
1463                         opsel_a <= AIN_C;
1464                         v.state := RENORM_C;
1465                     elsif r.b.class = ZERO then
1466                         -- no addend, degenerates to multiply
1467                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468                         f_to_multiply.valid <= '1';
1469                         v.is_multiply := '1';
1470                         v.state := MULT_1;
1471                     elsif r.madd_cmp = '0' then
1472                         -- addend is bigger, do multiply first
1473                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474                         f_to_multiply.valid <= '1';
1475                         v.state := FMADD_1;
1476                     else
1477                         -- product is bigger, shift B right and use it as the
1478                         -- addend to the multiplier
1479                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480                         -- for subtract, multiplier does B - A * C
1481                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482                         v.result_exp := r.b.exponent;
1483                         v.state := FMADD_2;
1484                     end if;
1485                 else
1486                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487                         (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
1489                         -- Signalling NAN
1490                         v.fpscr(FPSCR_VXSNAN) := '1';
1491                         invalid := '1';
1492                     end if;
1493                     if r.a.class = NAN then
1494                         -- nothing to do, result is A
1495                     elsif r.b.class = NAN then
1496                         -- result is B
1497                         v.result_class := NAN;
1498                         v.result_sign := r.b.negative;
1499                         opsel_a <= AIN_B;
1500                     elsif r.c.class = NAN then
1501                         -- result is C
1502                         v.result_class := NAN;
1503                         v.result_sign := r.c.negative;
1504                         opsel_a <= AIN_C;
1505                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506                         (r.a.class = INFINITY and r.c.class = ZERO) then
1507                         -- invalid operation, construct QNaN
1508                         v.fpscr(FPSCR_VXIMZ) := '1';
1509                         qnan_result := '1';
1510                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511                         if r.b.class = INFINITY and is_add = '0' then
1512                             -- invalid operation, construct QNaN
1513                             v.fpscr(FPSCR_VXISI) := '1';
1514                             qnan_result := '1';
1515                         else
1516                             -- result is infinity
1517                             v.result_class := INFINITY;
1518                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1519                         end if;
1520                     else
1521                         -- Here A is zero, C is zero, or B is infinity
1522                         -- Result is +/-B in all of those cases
1523                         v.result_class := r.b.class;
1524                         v.result_exp := r.b.exponent;
1525                         if v.result_class /= ZERO or is_add = '1' then
1526                             v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1527                         else
1528                             -- have to be careful about rule for 0 - 0 result sign
1529                             v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1530                         end if;
1531                         opsel_a <= AIN_B;
1532                     end if;
1533                     arith_done := '1';
1534                 end if;
1535
1536             when RENORM_A =>
1537                 renormalize := '1';
1538                 v.state := RENORM_A2;
1539
1540             when RENORM_A2 =>
1541                 set_a := '1';
1542                 v.result_exp := new_exp;
1543                 if r.insn(4) = '1' then
1544                     opsel_a <= AIN_C;
1545                     if r.c.mantissa(54) = '1' then
1546                         if r.insn(3) = '0' or r.b.class = ZERO then
1547                             v.first := '1';
1548                             v.state := MULT_1;
1549                         else
1550                             v.madd_cmp := '0';
1551                             if new_exp + 1 >= r.b.exponent then
1552                                 v.madd_cmp := '1';
1553                             end if;
1554                             v.state := DO_FMADD;
1555                         end if;
1556                     else
1557                         v.state := RENORM_C;
1558                     end if;
1559                 else
1560                         opsel_a <= AIN_B;
1561                         if r.b.mantissa(54) = '1' then
1562                             v.first := '1';
1563                             v.state := DIV_2;
1564                         else
1565                             v.state := RENORM_B;
1566                     end if;
1567                 end if;
1568
1569             when RENORM_B =>
1570                 renormalize := '1';
1571                 renorm_sqrt := r.is_sqrt;
1572                 v.state := RENORM_B2;
1573
1574             when RENORM_B2 =>
1575                 set_b := '1';
1576                 if r.is_sqrt = '0' then
1577                     v.result_exp := r.result_exp + r.shift;
1578                 else
1579                     v.result_exp := new_exp;
1580                 end if;
1581                 v.state := LOOKUP;
1582
1583             when RENORM_C =>
1584                 renormalize := '1';
1585                 v.state := RENORM_C2;
1586
1587             when RENORM_C2 =>
1588                 set_c := '1';
1589                 v.result_exp := new_exp;
1590                 if r.insn(3) = '0' or r.b.class = ZERO then
1591                     v.first := '1';
1592                     v.state := MULT_1;
1593                 else
1594                     v.madd_cmp := '0';
1595                     if new_exp + 1 >= r.b.exponent then
1596                         v.madd_cmp := '1';
1597                     end if;
1598                     v.state := DO_FMADD;
1599                 end if;
1600
1601             when ADD_SHIFT =>
1602                 opsel_r <= RES_SHIFT;
1603                 v.x := s_nz;
1604                 set_x := '1';
1605                 longmask := '0';
1606                 v.state := ADD_2;
1607
1608             when ADD_2 =>
1609                 if r.add_bsmall = '1' then
1610                     opsel_a <= AIN_A;
1611                 else
1612                     opsel_a <= AIN_B;
1613                 end if;
1614                 opsel_b <= BIN_R;
1615                 opsel_binv <= r.is_subtract;
1616                 carry_in <= r.is_subtract and not r.x;
1617                 v.shift := to_signed(-1, EXP_BITS);
1618                 v.state := ADD_3;
1619
1620             when ADD_3 =>
1621                 -- check for overflow or negative result (can't get both)
1622                 if r.r(63) = '1' then
1623                     -- result is opposite sign to expected
1624                     v.result_sign := not r.result_sign;
1625                     opsel_ainv <= '1';
1626                     carry_in <= '1';
1627                     v.state := FINISH;
1628                 elsif r.r(55) = '1' then
1629                     -- sum overflowed, shift right
1630                     opsel_r <= RES_SHIFT;
1631                     set_x := '1';
1632                     v.shift := to_signed(-2, EXP_BITS);
1633                     if exp_huge = '1' then
1634                         v.state := ROUND_OFLOW;
1635                     else
1636                         v.state := ROUNDING;
1637                     end if;
1638                 elsif r.r(54) = '1' then
1639                     set_x := '1';
1640                     v.shift := to_signed(-2, EXP_BITS);
1641                     v.state := ROUNDING;
1642                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1643                     -- r.x must be zero at this point
1644                     v.result_class := ZERO;
1645                     if r.is_subtract = '1' then
1646                         -- set result sign depending on rounding mode
1647                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1648                     end if;
1649                     arith_done := '1';
1650                 else
1651                     renormalize := '1';
1652                     v.state := NORMALIZE;
1653                 end if;
1654
1655             when CMP_1 =>
1656                 opsel_a <= AIN_A;
1657                 opsel_b <= BIN_R;
1658                 opsel_binv <= '1';
1659                 carry_in <= '1';
1660                 v.state := CMP_2;
1661
1662             when CMP_2 =>
1663                 if r.r(63) = '1' then
1664                     -- A is smaller in magnitude
1665                     v.cr_result := not r.a.negative & r.a.negative & "00";
1666                 elsif (r_hi_nz or r_lo_nz) = '0' then
1667                     v.cr_result := "0010";
1668                 else
1669                     v.cr_result := r.a.negative & not r.a.negative & "00";
1670                 end if;
1671                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1672                 v.instr_done := '1';
1673                 v.state := IDLE;
1674
1675             when MULT_1 =>
1676                 f_to_multiply.valid <= r.first;
1677                 opsel_r <= RES_MULT;
1678                 if multiply_to_f.valid = '1' then
1679                     v.state := FINISH;
1680                 end if;
1681
1682             when FMADD_1 =>
1683                 -- Addend is bigger here
1684                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1685                 -- note v.shift is at most -2 here
1686                 v.shift := r.result_exp - r.b.exponent;
1687                 opsel_r <= RES_MULT;
1688                 opsel_s <= S_MULT;
1689                 set_s := '1';
1690                 f_to_multiply.valid <= r.first;
1691                 if multiply_to_f.valid = '1' then
1692                     v.state := ADD_SHIFT;
1693                 end if;
1694
1695             when FMADD_2 =>
1696                 -- Product is potentially bigger here
1697                 set_s := '1';
1698                 opsel_s <= S_SHIFT;
1699                 v.shift := r.shift - to_signed(64, EXP_BITS);
1700                 v.state := FMADD_3;
1701
1702             when FMADD_3 =>
1703                 opsel_r <= RES_SHIFT;
1704                 v.first := '1';
1705                 v.state := FMADD_4;
1706
1707             when FMADD_4 =>
1708                 msel_add <= MULADD_RS;
1709                 f_to_multiply.valid <= r.first;
1710                 msel_inv <= r.is_subtract;
1711                 opsel_r <= RES_MULT;
1712                 opsel_s <= S_MULT;
1713                 set_s := '1';
1714                 v.shift := to_signed(56, EXP_BITS);
1715                 if multiply_to_f.valid = '1' then
1716                     if multiply_to_f.result(121) = '1' then
1717                         v.state := FMADD_5;
1718                     else
1719                         v.state := FMADD_6;
1720                     end if;
1721                 end if;
1722
1723             when FMADD_5 =>
1724                 -- negate R:S:X
1725                 v.result_sign := not r.result_sign;
1726                 opsel_ainv <= '1';
1727                 carry_in <= not (s_nz or r.x);
1728                 opsel_s <= S_NEG;
1729                 set_s := '1';
1730                 v.shift := to_signed(56, EXP_BITS);
1731                 v.state := FMADD_6;
1732
1733             when FMADD_6 =>
1734                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1735                     if s_nz = '0' then
1736                         -- must be a subtraction, and r.x must be zero
1737                         v.result_class := ZERO;
1738                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1739                         arith_done := '1';
1740                     else
1741                         -- R is all zeroes but there are non-zero bits in S
1742                         -- so shift them into R and set S to 0
1743                         opsel_r <= RES_SHIFT;
1744                         set_s := '1';
1745                         -- stay in state FMADD_6
1746                     end if;
1747                 elsif r.r(56 downto 54) = "001" then
1748                     v.state := FINISH;
1749                 else
1750                     renormalize := '1';
1751                     v.state := NORMALIZE;
1752                 end if;
1753
1754             when LOOKUP =>
1755                 opsel_a <= AIN_B;
1756                 -- wait one cycle for inverse_table[B] lookup
1757                 v.first := '1';
1758                 if r.insn(4) = '0' then
1759                     if r.insn(3) = '0' then
1760                         v.state := DIV_2;
1761                     else
1762                         v.state := SQRT_1;
1763                     end if;
1764                 elsif r.insn(2) = '0' then
1765                     v.state := FRE_1;
1766                 else
1767                     v.state := RSQRT_1;
1768                 end if;
1769
1770             when DIV_2 =>
1771                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1772                 msel_1 <= MUL1_B;
1773                 msel_add <= MULADD_CONST;
1774                 msel_inv <= '1';
1775                 if r.count = 0 then
1776                     msel_2 <= MUL2_LUT;
1777                 else
1778                     msel_2 <= MUL2_P;
1779                 end if;
1780                 set_y := r.first;
1781                 pshift := '1';
1782                 f_to_multiply.valid <= r.first;
1783                 if multiply_to_f.valid = '1' then
1784                     v.first := '1';
1785                     v.count := r.count + 1;
1786                     v.state := DIV_3;
1787                 end if;
1788
1789             when DIV_3 =>
1790                 -- compute Y = P = P * Y
1791                 msel_1 <= MUL1_Y;
1792                 msel_2 <= MUL2_P;
1793                 f_to_multiply.valid <= r.first;
1794                 pshift := '1';
1795                 if multiply_to_f.valid = '1' then
1796                     v.first := '1';
1797                     if r.count = 3 then
1798                         v.state := DIV_4;
1799                     else
1800                         v.state := DIV_2;
1801                     end if;
1802                 end if;
1803
1804             when DIV_4 =>
1805                 -- compute R = P = A * Y (quotient)
1806                 msel_1 <= MUL1_A;
1807                 msel_2 <= MUL2_P;
1808                 set_y := r.first;
1809                 f_to_multiply.valid <= r.first;
1810                 pshift := '1';
1811                 if multiply_to_f.valid = '1' then
1812                     opsel_r <= RES_MULT;
1813                     v.first := '1';
1814                     v.state := DIV_5;
1815                 end if;
1816
1817             when DIV_5 =>
1818                 -- compute P = A - B * R (remainder)
1819                 msel_1 <= MUL1_B;
1820                 msel_2 <= MUL2_R;
1821                 msel_add <= MULADD_A;
1822                 msel_inv <= '1';
1823                 f_to_multiply.valid <= r.first;
1824                 if multiply_to_f.valid = '1' then
1825                     v.state := DIV_6;
1826                 end if;
1827
1828             when DIV_6 =>
1829                 -- test if remainder is 0 or >= B
1830                 if pcmpb_lt = '1' then
1831                     -- quotient is correct, set X if remainder non-zero
1832                     v.x := r.p(58) or px_nz;
1833                 else
1834                     -- quotient needs to be incremented by 1
1835                     carry_in <= '1';
1836                     v.x := not pcmpb_eq;
1837                 end if;
1838                 v.state := FINISH;
1839
1840             when FRE_1 =>
1841                 opsel_r <= RES_MISC;
1842                 misc_sel <= "0111";
1843                 v.shift := to_signed(1, EXP_BITS);
1844                 v.state := NORMALIZE;
1845
1846             when FTDIV_1 =>
1847                 v.cr_result(1) := exp_tiny or exp_huge;
1848                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1849                     v.instr_done := '1';
1850                     v.state := IDLE;
1851                 else
1852                     v.shift := r.a.exponent;
1853                     v.doing_ftdiv := "10";
1854                 end if;
1855
1856             when RSQRT_1 =>
1857                 opsel_r <= RES_MISC;
1858                 misc_sel <= "0111";
1859                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1860                 v.result_exp := - sqrt_exp;
1861                 v.shift := to_signed(1, EXP_BITS);
1862                 v.state := NORMALIZE;
1863
1864             when SQRT_1 =>
1865                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1866                 -- also transfer B (in R) to A
1867                 set_a := '1';
1868                 opsel_r <= RES_MISC;
1869                 misc_sel <= "0111";
1870                 msel_1 <= MUL1_B;
1871                 msel_2 <= MUL2_LUT;
1872                 f_to_multiply.valid <= '1';
1873                 v.shift := to_signed(-1, EXP_BITS);
1874                 v.count := "00";
1875                 v.state := SQRT_2;
1876
1877             when SQRT_2 =>
1878                 -- shift R right one place
1879                 -- not expecting multiplier result yet
1880                 opsel_r <= RES_SHIFT;
1881                 v.first := '1';
1882                 v.state := SQRT_3;
1883
1884             when SQRT_3 =>
1885                 -- put R into Y, wait for product from multiplier
1886                 msel_2 <= MUL2_R;
1887                 set_y := r.first;
1888                 pshift := '1';
1889                 if multiply_to_f.valid = '1' then
1890                     -- put result into R
1891                     opsel_r <= RES_MULT;
1892                     v.first := '1';
1893                     v.state := SQRT_4;
1894                 end if;
1895
1896             when SQRT_4 =>
1897                 -- compute 1.5 - Y * P
1898                 msel_1 <= MUL1_Y;
1899                 msel_2 <= MUL2_P;
1900                 msel_add <= MULADD_CONST;
1901                 msel_inv <= '1';
1902                 f_to_multiply.valid <= r.first;
1903                 pshift := '1';
1904                 if multiply_to_f.valid = '1' then
1905                     v.state := SQRT_5;
1906                 end if;
1907
1908             when SQRT_5 =>
1909                 -- compute Y = Y * P
1910                 msel_1 <= MUL1_Y;
1911                 msel_2 <= MUL2_P;
1912                 f_to_multiply.valid <= '1';
1913                 v.first := '1';
1914                 v.state := SQRT_6;
1915
1916             when SQRT_6 =>
1917                 -- pipeline in R = R * P
1918                 msel_1 <= MUL1_R;
1919                 msel_2 <= MUL2_P;
1920                 f_to_multiply.valid <= r.first;
1921                 pshift := '1';
1922                 if multiply_to_f.valid = '1' then
1923                     v.first := '1';
1924                     v.state := SQRT_7;
1925                 end if;
1926
1927             when SQRT_7 =>
1928                 -- first multiply is done, put result in Y
1929                 msel_2 <= MUL2_P;
1930                 set_y := r.first;
1931                 -- wait for second multiply (should be here already)
1932                 pshift := '1';
1933                 if multiply_to_f.valid = '1' then
1934                     -- put result into R
1935                     opsel_r <= RES_MULT;
1936                     v.first := '1';
1937                     v.count := r.count + 1;
1938                     if r.count < 2 then
1939                         v.state := SQRT_4;
1940                     else
1941                         v.first := '1';
1942                         v.state := SQRT_8;
1943                     end if;
1944                 end if;
1945
1946             when SQRT_8 =>
1947                 -- compute P = A - R * R, which can be +ve or -ve
1948                 -- we arranged for B to be put into A earlier
1949                 msel_1 <= MUL1_R;
1950                 msel_2 <= MUL2_R;
1951                 msel_add <= MULADD_A;
1952                 msel_inv <= '1';
1953                 pshift := '1';
1954                 f_to_multiply.valid <= r.first;
1955                 if multiply_to_f.valid = '1' then
1956                     v.first := '1';
1957                     v.state := SQRT_9;
1958                 end if;
1959
1960             when SQRT_9 =>
1961                 -- compute P = P * Y
1962                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1963                 -- estimate of the adjustment needed to R.  Since the error
1964                 -- could be negative and we have an unsigned multiplier, the
1965                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1966                 -- are correct and are all we need (given 3 iterations through
1967                 -- SQRT_4 to SQRT_7).
1968                 msel_1 <= MUL1_Y;
1969                 msel_2 <= MUL2_P;
1970                 pshift := '1';
1971                 f_to_multiply.valid <= r.first;
1972                 if multiply_to_f.valid = '1' then
1973                     v.state := SQRT_10;
1974                 end if;
1975
1976             when SQRT_10 =>
1977                 -- Add the bottom 8 bits of P, sign-extended,
1978                 -- divided by 4, onto R.
1979                 -- The division by 4 is because R is 10.54 format
1980                 -- whereas P is 8.56 format.
1981                 opsel_b <= BIN_PS6;
1982                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1983                 v.result_exp := sqrt_exp;
1984                 v.shift := to_signed(1, EXP_BITS);
1985                 v.first := '1';
1986                 v.state := SQRT_11;
1987
1988             when SQRT_11 =>
1989                 -- compute P = A - R * R (remainder)
1990                 -- also put 2 * R + 1 into B for comparison with P
1991                 msel_1 <= MUL1_R;
1992                 msel_2 <= MUL2_R;
1993                 msel_add <= MULADD_A;
1994                 msel_inv <= '1';
1995                 f_to_multiply.valid <= r.first;
1996                 shiftin := '1';
1997                 set_b := r.first;
1998                 if multiply_to_f.valid = '1' then
1999                     v.state := SQRT_12;
2000                 end if;
2001
2002             when SQRT_12 =>
2003                 -- test if remainder is 0 or >= B = 2*R + 1
2004                 if pcmpb_lt = '1' then
2005                     -- square root is correct, set X if remainder non-zero
2006                     v.x := r.p(58) or px_nz;
2007                 else
2008                     -- square root needs to be incremented by 1
2009                     carry_in <= '1';
2010                     v.x := not pcmpb_eq;
2011                 end if;
2012                 v.state := FINISH;
2013
2014             when INT_SHIFT =>
2015                 opsel_r <= RES_SHIFT;
2016                 set_x := '1';
2017                 v.state := INT_ROUND;
2018                 v.shift := to_signed(-2, EXP_BITS);
2019
2020             when INT_ROUND =>
2021                 opsel_r <= RES_SHIFT;
2022                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2023                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2024                 -- Check for negative values that don't round to 0 for fcti*u*
2025                 if r.insn(8) = '1' and r.result_sign = '1' and
2026                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2027                     v.state := INT_OFLOW;
2028                 else
2029                     v.state := INT_FINAL;
2030                 end if;
2031
2032             when INT_ISHIFT =>
2033                 opsel_r <= RES_SHIFT;
2034                 v.state := INT_FINAL;
2035
2036             when INT_FINAL =>
2037                 -- Negate if necessary, and increment for rounding if needed
2038                 opsel_ainv <= r.result_sign;
2039                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2040                 -- Check for possible overflows
2041                 case r.insn(9 downto 8) is
2042                     when "00" =>        -- fctiw[z]
2043                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2044                     when "01" =>        -- fctiwu[z]
2045                         need_check := r.r(31);
2046                     when "10" =>        -- fctid[z]
2047                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2048                     when others =>      -- fctidu[z]
2049                         need_check := r.r(63);
2050                 end case;
2051                 if need_check = '1' then
2052                     v.state := INT_CHECK;
2053                 else
2054                     if r.fpscr(FPSCR_FI) = '1' then
2055                         v.fpscr(FPSCR_XX) := '1';
2056                     end if;
2057                     arith_done := '1';
2058                 end if;
2059
2060             when INT_CHECK =>
2061                 if r.insn(9) = '0' then
2062                     msb := r.r(31);
2063                 else
2064                     msb := r.r(63);
2065                 end if;
2066                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2067                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2068                     (r.insn(8) = '1' and msb /= '1') then
2069                     opsel_r <= RES_MISC;
2070                     v.fpscr(FPSCR_VXCVI) := '1';
2071                     invalid := '1';
2072                 else
2073                     if r.fpscr(FPSCR_FI) = '1' then
2074                         v.fpscr(FPSCR_XX) := '1';
2075                     end if;
2076                 end if;
2077                 arith_done := '1';
2078
2079             when INT_OFLOW =>
2080                 opsel_r <= RES_MISC;
2081                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2082                 if r.b.class = NAN then
2083                     misc_sel(0) <= '1';
2084                 end if;
2085                 v.fpscr(FPSCR_VXCVI) := '1';
2086                 invalid := '1';
2087                 arith_done := '1';
2088
2089             when FRI_1 =>
2090                 opsel_r <= RES_SHIFT;
2091                 set_x := '1';
2092                 v.shift := to_signed(-2, EXP_BITS);
2093                 v.state := ROUNDING;
2094
2095             when FINISH =>
2096                 if r.is_multiply = '1' and px_nz = '1' then
2097                     v.x := '1';
2098                 end if;
2099                 if r.r(63 downto 54) /= "0000000001" then
2100                     renormalize := '1';
2101                     v.state := NORMALIZE;
2102                 else
2103                     set_x := '1';
2104                     if exp_tiny = '1' then
2105                         v.shift := new_exp - min_exp;
2106                         v.state := ROUND_UFLOW;
2107                     elsif exp_huge = '1' then
2108                         v.state := ROUND_OFLOW;
2109                     else
2110                         v.shift := to_signed(-2, EXP_BITS);
2111                         v.state := ROUNDING;
2112                     end if;
2113                 end if;
2114
2115             when NORMALIZE =>
2116                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2117                 opsel_r <= RES_SHIFT;
2118                 set_x := '1';
2119                 if exp_tiny = '1' then
2120                     v.shift := new_exp - min_exp;
2121                     v.state := ROUND_UFLOW;
2122                 elsif exp_huge = '1' then
2123                     v.state := ROUND_OFLOW;
2124                 else
2125                     v.shift := to_signed(-2, EXP_BITS);
2126                     v.state := ROUNDING;
2127                 end if;
2128
2129             when ROUND_UFLOW =>
2130                 v.tiny := '1';
2131                 if r.fpscr(FPSCR_UE) = '0' then
2132                     -- disabled underflow exception case
2133                     -- have to denormalize before rounding
2134                     opsel_r <= RES_SHIFT;
2135                     set_x := '1';
2136                     v.shift := to_signed(-2, EXP_BITS);
2137                     v.state := ROUNDING;
2138                 else
2139                     -- enabled underflow exception case
2140                     -- if denormalized, have to normalize before rounding
2141                     v.fpscr(FPSCR_UX) := '1';
2142                     v.result_exp := r.result_exp + bias_exp;
2143                     if r.r(54) = '0' then
2144                         renormalize := '1';
2145                         v.state := NORMALIZE;
2146                     else
2147                         v.shift := to_signed(-2, EXP_BITS);
2148                         v.state := ROUNDING;
2149                     end if;
2150                 end if;
2151
2152             when ROUND_OFLOW =>
2153                 v.fpscr(FPSCR_OX) := '1';
2154                 if r.fpscr(FPSCR_OE) = '0' then
2155                     -- disabled overflow exception
2156                     -- result depends on rounding mode
2157                     v.fpscr(FPSCR_XX) := '1';
2158                     v.fpscr(FPSCR_FI) := '1';
2159                     if r.round_mode(1 downto 0) = "00" or
2160                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161                         v.result_class := INFINITY;
2162                         v.fpscr(FPSCR_FR) := '1';
2163                     else
2164                         v.fpscr(FPSCR_FR) := '0';
2165                     end if;
2166                     -- construct largest representable number
2167                     v.result_exp := max_exp;
2168                     opsel_r <= RES_MISC;
2169                     misc_sel <= "001" & r.single_prec;
2170                     arith_done := '1';
2171                 else
2172                     -- enabled overflow exception
2173                     v.result_exp := r.result_exp - bias_exp;
2174                     v.shift := to_signed(-2, EXP_BITS);
2175                     v.state := ROUNDING;
2176                 end if;
2177
2178             when ROUNDING =>
2179                 opsel_amask <= '1';
2180                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2181                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2182                 if round(1) = '1' then
2183                     -- set mask to increment the LSB for the precision
2184                     opsel_b <= BIN_MASK;
2185                     carry_in <= '1';
2186                     v.shift := to_signed(-1, EXP_BITS);
2187                     v.state := ROUNDING_2;
2188                 else
2189                     if r.r(54) = '0' then
2190                         -- result after masking could be zero, or could be a
2191                         -- denormalized result that needs to be renormalized
2192                         renormalize := '1';
2193                         v.state := ROUNDING_3;
2194                     else
2195                         arith_done := '1';
2196                     end if;
2197                 end if;
2198                 if round(0) = '1' then
2199                     v.fpscr(FPSCR_XX) := '1';
2200                     if r.tiny = '1' then
2201                         v.fpscr(FPSCR_UX) := '1';
2202                     end if;
2203                 end if;
2204
2205             when ROUNDING_2 =>
2206                 -- Check for overflow during rounding
2207                 v.x := '0';
2208                 if r.r(55) = '1' then
2209                     opsel_r <= RES_SHIFT;
2210                     if exp_huge = '1' then
2211                         v.state := ROUND_OFLOW;
2212                     else
2213                         arith_done := '1';
2214                     end if;
2215                 elsif r.r(54) = '0' then
2216                     -- Do CLZ so we can renormalize the result
2217                     renormalize := '1';
2218                     v.state := ROUNDING_3;
2219                 else
2220                     arith_done := '1';
2221                 end if;
2222
2223             when ROUNDING_3 =>
2224                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225                 if mant_nz = '0' then
2226                     v.result_class := ZERO;
2227                     if r.is_subtract = '1' then
2228                         -- set result sign depending on rounding mode
2229                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2230                     end if;
2231                     arith_done := '1';
2232                 else
2233                     -- Renormalize result after rounding
2234                     opsel_r <= RES_SHIFT;
2235                     v.denorm := exp_tiny;
2236                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237                     if new_exp < to_signed(-1022, EXP_BITS) then
2238                         v.state := DENORM;
2239                     else
2240                         arith_done := '1';
2241                     end if;
2242                 end if;
2243
2244             when DENORM =>
2245                 opsel_r <= RES_SHIFT;
2246                 arith_done := '1';
2247
2248         end case;
2249
2250         if zero_divide = '1' then
2251             v.fpscr(FPSCR_ZX) := '1';
2252         end if;
2253         if qnan_result = '1' then
2254             invalid := '1';
2255             v.result_class := NAN;
2256             v.result_sign := '0';
2257             misc_sel <= "0001";
2258             opsel_r <= RES_MISC;
2259         end if;
2260         if arith_done = '1' then
2261             -- Enabled invalid exception doesn't write result or FPRF
2262             -- Neither does enabled zero-divide exception
2263             if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2264                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2265                 v.writing_back := '1';
2266                 v.update_fprf := '1';
2267             end if;
2268             v.instr_done := '1';
2269             v.state := IDLE;
2270             update_fx := '1';
2271         end if;
2272
2273         -- Multiplier and divide/square root data path
2274         case msel_1 is
2275             when MUL1_A =>
2276                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2277             when MUL1_B =>
2278                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2279             when MUL1_Y =>
2280                 f_to_multiply.data1 <= r.y;
2281             when others =>
2282                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2283         end case;
2284         case msel_2 is
2285             when MUL2_C =>
2286                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2287             when MUL2_LUT =>
2288                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2289             when MUL2_P =>
2290                 f_to_multiply.data2 <= r.p;
2291             when others =>
2292                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2293         end case;
2294         maddend := (others => '0');
2295         case msel_add is
2296             when MULADD_CONST =>
2297                 -- addend is 2.0 or 1.5 in 16.112 format
2298                 if r.is_sqrt = '0' then
2299                     maddend(113) := '1';                -- 2.0
2300                 else
2301                     maddend(112 downto 111) := "11";    -- 1.5
2302                 end if;
2303             when MULADD_A =>
2304                 -- addend is A in 16.112 format
2305                 maddend(121 downto 58) := r.a.mantissa;
2306             when MULADD_RS =>
2307                 -- addend is concatenation of R and S in 16.112 format
2308                 maddend := "000000" & r.r & r.s & "00";
2309             when others =>
2310         end case;
2311         if msel_inv = '1' then
2312             f_to_multiply.addend <= not maddend;
2313         else
2314             f_to_multiply.addend <= maddend;
2315         end if;
2316         f_to_multiply.not_result <= msel_inv;
2317         if set_y = '1' then
2318             v.y := f_to_multiply.data2;
2319         end if;
2320         if multiply_to_f.valid = '1' then
2321             if pshift = '0' then
2322                 v.p := multiply_to_f.result(63 downto 0);
2323             else
2324                 v.p := multiply_to_f.result(119 downto 56);
2325             end if;
2326         end if;
2327
2328         -- Data path.
2329         -- This has A and B input multiplexers, an adder, a shifter,
2330         -- count-leading-zeroes logic, and a result mux.
2331         if longmask = '1' then
2332             mshift := r.shift + to_signed(-29, EXP_BITS);
2333         else
2334             mshift := r.shift;
2335         end if;
2336         if mshift < to_signed(-64, EXP_BITS) then
2337             mask := (others => '1');
2338         elsif mshift >= to_signed(0, EXP_BITS) then
2339             mask := (others => '0');
2340         else
2341             mask := right_mask(unsigned(mshift(5 downto 0)));
2342         end if;
2343         case opsel_a is
2344             when AIN_R =>
2345                 in_a0 := r.r;
2346             when AIN_A =>
2347                 in_a0 := r.a.mantissa;
2348             when AIN_B =>
2349                 in_a0 := r.b.mantissa;
2350             when others =>
2351                 in_a0 := r.c.mantissa;
2352         end case;
2353         if (or (mask and in_a0)) = '1' and set_x = '1' then
2354             v.x := '1';
2355         end if;
2356         if opsel_ainv = '1' then
2357             in_a0 := not in_a0;
2358         end if;
2359         if opsel_amask = '1' then
2360             in_a0 := in_a0 and not mask;
2361         end if;
2362         in_a <= in_a0;
2363         case opsel_b is
2364             when BIN_ZERO =>
2365                 in_b0 := (others => '0');
2366             when BIN_R =>
2367                 in_b0 := r.r;
2368             when BIN_MASK =>
2369                 in_b0 := mask;
2370             when others =>
2371                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2372                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2373         end case;
2374         if opsel_binv = '1' then
2375             in_b0 := not in_b0;
2376         end if;
2377         in_b <= in_b0;
2378         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2379             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2380                                     std_ulogic_vector(r.shift(6 downto 0)));
2381         else
2382             shift_res := (others => '0');
2383         end if;
2384         case opsel_r is
2385             when RES_SUM =>
2386                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2387             when RES_SHIFT =>
2388                 result <= shift_res;
2389             when RES_MULT =>
2390                 result <= multiply_to_f.result(121 downto 58);
2391             when others =>
2392                 case misc_sel is
2393                     when "0000" =>
2394                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2395                     when "0001" =>
2396                         -- generated QNaN mantissa
2397                         misc := x"0020000000000000";
2398                     when "0010" =>
2399                         -- mantissa of max representable DP number
2400                         misc := x"007ffffffffffffc";
2401                     when "0011" =>
2402                         -- mantissa of max representable SP number
2403                         misc := x"007fffff80000000";
2404                     when "0100" =>
2405                         -- fmrgow result
2406                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2407                     when "0110" =>
2408                         -- fmrgew result
2409                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2410                     when "0111" =>
2411                         misc := 10x"000" & inverse_est & 35x"000000000";
2412                     when "1000" =>
2413                         -- max positive result for fctiw[z]
2414                         misc := x"000000007fffffff";
2415                     when "1001" =>
2416                         -- max negative result for fctiw[z]
2417                         misc := x"ffffffff80000000";
2418                     when "1010" =>
2419                         -- max positive result for fctiwu[z]
2420                         misc := x"00000000ffffffff";
2421                     when "1011" =>
2422                         -- max negative result for fctiwu[z]
2423                         misc := x"0000000000000000";
2424                     when "1100" =>
2425                         -- max positive result for fctid[z]
2426                         misc := x"7fffffffffffffff";
2427                     when "1101" =>
2428                         -- max negative result for fctid[z]
2429                         misc := x"8000000000000000";
2430                     when "1110" =>
2431                         -- max positive result for fctidu[z]
2432                         misc := x"ffffffffffffffff";
2433                     when "1111" =>
2434                         -- max negative result for fctidu[z]
2435                         misc := x"0000000000000000";
2436                     when others =>
2437                         misc := x"0000000000000000";
2438                 end case;
2439                 result <= misc;
2440         end case;
2441         v.r := result;
2442         if set_s = '1' then
2443             case opsel_s is
2444                 when S_NEG =>
2445                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2446                 when S_MULT =>
2447                     v.s := multiply_to_f.result(57 downto 2);
2448                 when S_SHIFT =>
2449                     v.s := shift_res(63 downto 8);
2450                     if shift_res(7 downto 0) /= x"00" then
2451                         v.x := '1';
2452                     end if;
2453                 when others =>
2454                     v.s := (others => '0');
2455             end case;
2456         end if;
2457
2458         if set_a = '1' then
2459             v.a.exponent := new_exp;
2460             v.a.mantissa := shift_res;
2461         end if;
2462         if set_b = '1' then
2463             v.b.exponent := new_exp;
2464             v.b.mantissa := shift_res;
2465         end if;
2466         if set_c = '1' then
2467             v.c.exponent := new_exp;
2468             v.c.mantissa := shift_res;
2469         end if;
2470
2471         if opsel_r = RES_SHIFT then
2472             v.result_exp := new_exp;
2473         end if;
2474
2475         if renormalize = '1' then
2476             clz := count_left_zeroes(r.r);
2477             if renorm_sqrt = '1' then
2478                 -- make denormalized value end up with even exponent
2479                 clz(0) := '1';
2480             end if;
2481             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2482         end if;
2483
2484         if r.int_result = '1' then
2485             fp_result <= r.r;
2486         else
2487             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2488                                  r.single_prec, r.quieten_nan);
2489         end if;
2490         if r.update_fprf = '1' then
2491             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2492                                                              r.r(54) and not r.denorm);
2493         end if;
2494
2495         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2496                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2497         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2498                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2499         if update_fx = '1' and
2500             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2501             v.fpscr(FPSCR_FX) := '1';
2502         end if;
2503         if r.rc = '1' then
2504             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2505         end if;
2506
2507         if illegal = '1' then
2508             v.instr_done := '0';
2509             v.do_intr := '0';
2510             v.writing_back := '0';
2511             v.busy := '0';
2512             v.state := IDLE;
2513         else
2514             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2515             if v.state /= IDLE or v.do_intr = '1' then
2516                 v.busy := '1';
2517             end if;
2518         end if;
2519
2520         rin <= v;
2521         e_out.illegal <= illegal;
2522     end process;
2523
2524 end architecture behaviour;