fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         op           : insn_type_t;
  77         insn         : std_ulogic_vector(31 downto 0);
  78         dest_fpr     : gspr_index_t;
  79         fe_mode      : std_ulogic;
  80         rc           : std_ulogic;
  81         is_cmp       : std_ulogic;
  82         single_prec  : std_ulogic;
  83         fpscr        : std_ulogic_vector(31 downto 0);
  84         a            : fpu_reg_type;
  85         b            : fpu_reg_type;
  86         c            : fpu_reg_type;
  87         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  88         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  89         x            : std_ulogic;
  90         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  92         result_sign  : std_ulogic;
  93         result_class : fp_number_class;
  94         result_exp   : signed(EXP_BITS-1 downto 0);
  95         shift        : signed(EXP_BITS-1 downto 0);
  96         writing_back : std_ulogic;
  97         int_result   : std_ulogic;
  98         cr_result    : std_ulogic_vector(3 downto 0);
  99         cr_mask      : std_ulogic_vector(7 downto 0);
 100         old_exc      : std_ulogic_vector(4 downto 0);
 101         update_fprf  : std_ulogic;
 102         quieten_nan  : std_ulogic;
 103         tiny         : std_ulogic;
 104         denorm       : std_ulogic;
 105         round_mode   : std_ulogic_vector(2 downto 0);
 106         is_subtract  : std_ulogic;
 107         exp_cmp      : std_ulogic;
 108         madd_cmp     : std_ulogic;
 109         add_bsmall   : std_ulogic;
 110         is_multiply  : std_ulogic;
 111         is_sqrt      : std_ulogic;
 112         first        : std_ulogic;
 113         count        : unsigned(1 downto 0);
 114         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 115         opsel_a      : std_ulogic_vector(1 downto 0);
 116         use_a        : std_ulogic;
 117         use_b        : std_ulogic;
 118         use_c        : std_ulogic;
 119         invalid      : std_ulogic;
 120         negate       : std_ulogic;
 121         longmask     : std_ulogic;
 122     end record;
 123
 124     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 125
 126     signal r, rin : reg_type;
 127
 128     signal fp_result     : std_ulogic_vector(63 downto 0);
 129     signal opsel_b       : std_ulogic_vector(1 downto 0);
 130     signal opsel_r       : std_ulogic_vector(1 downto 0);
 131     signal opsel_s       : std_ulogic_vector(1 downto 0);
 132     signal opsel_ainv    : std_ulogic;
 133     signal opsel_mask    : std_ulogic;
 134     signal opsel_binv    : std_ulogic;
 135     signal in_a          : std_ulogic_vector(63 downto 0);
 136     signal in_b          : std_ulogic_vector(63 downto 0);
 137     signal result        : std_ulogic_vector(63 downto 0);
 138     signal carry_in      : std_ulogic;
 139     signal lost_bits     : std_ulogic;
 140     signal r_hi_nz       : std_ulogic;
 141     signal r_lo_nz       : std_ulogic;
 142     signal s_nz          : std_ulogic;
 143     signal misc_sel      : std_ulogic_vector(3 downto 0);
 144     signal f_to_multiply : MultiplyInputType;
 145     signal multiply_to_f : MultiplyOutputType;
 146     signal msel_1        : std_ulogic_vector(1 downto 0);
 147     signal msel_2        : std_ulogic_vector(1 downto 0);
 148     signal msel_add      : std_ulogic_vector(1 downto 0);
 149     signal msel_inv      : std_ulogic;
 150     signal inverse_est   : std_ulogic_vector(18 downto 0);
 151
 152     -- opsel values
 153     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 154     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 155     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 156     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 157
 158     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 159     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 160     constant BIN_RND  : std_ulogic_vector(1 downto 0) := "10";
 161     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 162
 163     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 164     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 165     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 166     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 167
 168     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 169     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 170     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 171     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 172
 173     -- msel values
 174     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 175     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 176     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 177     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 178
 179     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 180     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 181     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 182     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 183
 184     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 185     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 186     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 187     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 188
 189     -- Inverse lookup table, indexed by the top 8 fraction bits
 190     -- The first 256 entries are the reciprocal (1/x) lookup table,
 191     -- and the remaining 768 entries are the reciprocal square root table.
 192     -- Output range is [0.5, 1) in 0.19 format, though the top
 193     -- bit isn't stored since it is always 1.
 194     -- Each output value is the inverse of the center of the input
 195     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 196     -- entry 1 is 1 / (1 + 3/512), etc.
 197     signal inverse_table : lookup_table := (
 198         -- 1/x lookup table
 199         -- Unit bit is assumed to be 1, so input range is [1, 2)
 200         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 201         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 202         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 203         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 204         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 205         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 206         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 207         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 208         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 209         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 210         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 211         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 212         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 213         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 214         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 215         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 216         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 217         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 218         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 219         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 220         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 221         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 222         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 223         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 224         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 225         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 226         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 227         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 228         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 229         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 230         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 231         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 232         -- 1/sqrt(x) lookup table
 233         -- Input is in the range [1, 4), i.e. two bits to the left of the
 234         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 235         -- 1.0 ... 1.9999
 236         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 237         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 238         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 239         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 240         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 241         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 242         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 243         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 244         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 245         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 246         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 247         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 248         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 249         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 250         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 251         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 252         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 253         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 254         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 255         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 256         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 257         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 258         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 259         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 260         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 261         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 262         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 263         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 264         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 265         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 266         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 267         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 268         -- 2.0 ... 2.9999
 269         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 270         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 271         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 272         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 273         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 274         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 275         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 276         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 277         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 278         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 279         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 280         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 281         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 282         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 283         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 284         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 285         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 286         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 287         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 288         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 289         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 290         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 291         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 292         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 293         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 294         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 295         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 296         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 297         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 298         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 299         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 300         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 301         -- 3.0 ... 3.9999
 302         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 303         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 304         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 305         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 306         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 307         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 308         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 309         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 310         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 311         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 312         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 313         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 314         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 315         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 316         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 317         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 318         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 319         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 320         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 321         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 322         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 323         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 324         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 325         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 326         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 327         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 328         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 329         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 330         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 331         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 332         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 333         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 334         );
 335
 336     -- Left and right shifter with 120 bit input and 64 bit output.
 337     -- Shifts inp left by shift bits and returns the upper 64 bits of
 338     -- the result.  The shift parameter is interpreted as a signed
 339     -- number in the range -64..63, with negative values indicating
 340     -- right shifts.
 341     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 342                         shift: std_ulogic_vector(6 downto 0))
 343         return std_ulogic_vector is
 344         variable s1 : std_ulogic_vector(94 downto 0);
 345         variable s2 : std_ulogic_vector(70 downto 0);
 346         variable result : std_ulogic_vector(63 downto 0);
 347     begin
 348         case shift(6 downto 5) is
 349             when "00" =>
 350                 s1 := inp(119 downto 25);
 351             when "01" =>
 352                 s1 := inp(87 downto 0) & "0000000";
 353             when "10" =>
 354                 s1 := x"0000000000000000" & inp(119 downto 89);
 355             when others =>
 356                 s1 := x"00000000" & inp(119 downto 57);
 357         end case;
 358         case shift(4 downto 3) is
 359             when "00" =>
 360                 s2 := s1(94 downto 24);
 361             when "01" =>
 362                 s2 := s1(86 downto 16);
 363             when "10" =>
 364                 s2 := s1(78 downto 8);
 365             when others =>
 366                 s2 := s1(70 downto 0);
 367         end case;
 368         case shift(2 downto 0) is
 369             when "000" =>
 370                 result := s2(70 downto 7);
 371             when "001" =>
 372                 result := s2(69 downto 6);
 373             when "010" =>
 374                 result := s2(68 downto 5);
 375             when "011" =>
 376                 result := s2(67 downto 4);
 377             when "100" =>
 378                 result := s2(66 downto 3);
 379             when "101" =>
 380                 result := s2(65 downto 2);
 381             when "110" =>
 382                 result := s2(64 downto 1);
 383             when others =>
 384                 result := s2(63 downto 0);
 385         end case;
 386         return result;
 387     end;
 388
 389     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 390     -- selects the bits will be lost in doing a right shift.  The shift
 391     -- parameter is the bottom 6 bits of a negative shift count,
 392     -- indicating a right shift.
 393     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 394         variable result: std_ulogic_vector(63 downto 0);
 395     begin
 396         result := (others => '0');
 397         for i in 0 to 63 loop
 398             if i >= shift then
 399                 result(63 - i) := '1';
 400             end if;
 401         end loop;
 402         return result;
 403     end;
 404
 405     -- Split a DP floating-point number into components and work out its class.
 406     -- If is_int = 1, the input is considered an integer
 407     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 408         variable r       : fpu_reg_type;
 409         variable exp_nz  : std_ulogic;
 410         variable exp_ao  : std_ulogic;
 411         variable frac_nz : std_ulogic;
 412         variable cls     : std_ulogic_vector(2 downto 0);
 413     begin
 414         r.negative := fpr(63);
 415         exp_nz := or (fpr(62 downto 52));
 416         exp_ao := and (fpr(62 downto 52));
 417         frac_nz := or (fpr(51 downto 0));
 418         if is_int = '0' then
 419             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 420             if exp_nz = '0' then
 421                 r.exponent := to_signed(-1022, EXP_BITS);
 422             end if;
 423             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 424             cls := exp_ao & exp_nz & frac_nz;
 425             case cls is
 426                 when "000"  => r.class := ZERO;
 427                 when "001"  => r.class := FINITE;    -- denormalized
 428                 when "010"  => r.class := FINITE;
 429                 when "011"  => r.class := FINITE;
 430                 when "110"  => r.class := INFINITY;
 431                 when others => r.class := NAN;
 432             end case;
 433         else
 434             r.mantissa := fpr;
 435             r.exponent := (others => '0');
 436             if (fpr(63) or exp_nz or frac_nz) = '1' then
 437                 r.class := FINITE;
 438             else
 439                 r.class := ZERO;
 440             end if;
 441         end if;
 442         return r;
 443     end;
 444
 445     -- Construct a DP floating-point result from components
 446     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 447                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 448         return std_ulogic_vector is
 449         variable result : std_ulogic_vector(63 downto 0);
 450     begin
 451         result := (others => '0');
 452         result(63) := sign;
 453         case class is
 454             when ZERO =>
 455             when FINITE =>
 456                 if mantissa(54) = '1' then
 457                     -- normalized number
 458                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 459                 end if;
 460                 result(51 downto 29) := mantissa(53 downto 31);
 461                 if single_prec = '0' then
 462                     result(28 downto 0) := mantissa(30 downto 2);
 463                 end if;
 464             when INFINITY =>
 465                 result(62 downto 52) := "11111111111";
 466             when NAN =>
 467                 result(62 downto 52) := "11111111111";
 468                 result(51) := quieten_nan or mantissa(53);
 469                 result(50 downto 29) := mantissa(52 downto 31);
 470                 if single_prec = '0' then
 471                     result(28 downto 0) := mantissa(30 downto 2);
 472                 end if;
 473         end case;
 474         return result;
 475     end;
 476
 477     -- Determine whether to increment when rounding
 478     -- Returns rounding_inc & inexact
 479     -- Assumes x includes the bottom 29 bits of the mantissa already
 480     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 481     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 482                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 483                          sign: std_ulogic)
 484         return std_ulogic_vector is
 485         variable grx : std_ulogic_vector(2 downto 0);
 486         variable ret : std_ulogic_vector(1 downto 0);
 487         variable lsb : std_ulogic;
 488     begin
 489         if single_prec = '0' then
 490             grx := mantissa(1 downto 0) & x;
 491             lsb := mantissa(2);
 492         else
 493             grx := mantissa(30 downto 29) & x;
 494             lsb := mantissa(31);
 495         end if;
 496         ret(1) := '0';
 497         ret(0) := or (grx);
 498         case rn(1 downto 0) is
 499             when "00" =>        -- round to nearest
 500                 if grx = "100" and rn(2) = '0' then
 501                     ret(1) := lsb; -- tie, round to even
 502                 else
 503                     ret(1) := grx(2);
 504                 end if;
 505             when "01" =>        -- round towards zero
 506             when others =>      -- round towards +/- inf
 507                 if rn(0) = sign then
 508                     -- round towards greater magnitude
 509                     ret(1) := ret(0);
 510                 end if;
 511         end case;
 512         return ret;
 513     end;
 514
 515     -- Determine result flags to write into the FPSCR
 516     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 517         return std_ulogic_vector is
 518     begin
 519         case class is
 520             when ZERO =>
 521                 return sign & "0010";
 522             when FINITE =>
 523                 return (not unitbit) & sign & (not sign) & "00";
 524             when INFINITY =>
 525                 return '0' & sign & (not sign) & "01";
 526             when NAN =>
 527                 return "10001";
 528         end case;
 529     end;
 530
 531 begin
 532     fpu_multiply_0: entity work.multiply
 533         port map (
 534             clk => clk,
 535             m_in => f_to_multiply,
 536             m_out => multiply_to_f
 537             );
 538
 539     fpu_0: process(clk)
 540     begin
 541         if rising_edge(clk) then
 542             if rst = '1' then
 543                 r.state <= IDLE;
 544                 r.busy <= '0';
 545                 r.instr_done <= '0';
 546                 r.do_intr <= '0';
 547                 r.fpscr <= (others => '0');
 548                 r.writing_back <= '0';
 549             else
 550                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 551                 r <= rin;
 552             end if;
 553         end if;
 554     end process;
 555
 556     -- synchronous reads from lookup table
 557     lut_access: process(clk)
 558         variable addrhi : std_ulogic_vector(1 downto 0);
 559         variable addr   : std_ulogic_vector(9 downto 0);
 560     begin
 561         if rising_edge(clk) then
 562             if r.is_sqrt = '1' then
 563                 addrhi := r.b.mantissa(55 downto 54);
 564             else
 565                 addrhi := "00";
 566             end if;
 567             addr := addrhi & r.b.mantissa(53 downto 46);
 568             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 569         end if;
 570     end process;
 571
 572     e_out.busy <= r.busy;
 573     e_out.exception <= r.fpscr(FPSCR_FEX);
 574     e_out.interrupt <= r.do_intr;
 575
 576     w_out.valid <= r.instr_done and not r.do_intr;
 577     w_out.write_enable <= r.writing_back;
 578     w_out.write_reg <= r.dest_fpr;
 579     w_out.write_data <= fp_result;
 580     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 581     w_out.write_cr_mask <= r.cr_mask;
 582     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 583                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 584
 585     fpu_1: process(all)
 586         variable v           : reg_type;
 587         variable adec        : fpu_reg_type;
 588         variable bdec        : fpu_reg_type;
 589         variable cdec        : fpu_reg_type;
 590         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 591         variable illegal     : std_ulogic;
 592         variable j, k        : integer;
 593         variable flm         : std_ulogic_vector(7 downto 0);
 594         variable int_input   : std_ulogic;
 595         variable mask        : std_ulogic_vector(63 downto 0);
 596         variable in_a0       : std_ulogic_vector(63 downto 0);
 597         variable in_b0       : std_ulogic_vector(63 downto 0);
 598         variable misc        : std_ulogic_vector(63 downto 0);
 599         variable shift_res   : std_ulogic_vector(63 downto 0);
 600         variable round       : std_ulogic_vector(1 downto 0);
 601         variable update_fx   : std_ulogic;
 602         variable arith_done  : std_ulogic;
 603         variable invalid     : std_ulogic;
 604         variable zero_divide : std_ulogic;
 605         variable mant_nz     : std_ulogic;
 606         variable min_exp     : signed(EXP_BITS-1 downto 0);
 607         variable max_exp     : signed(EXP_BITS-1 downto 0);
 608         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 609         variable new_exp     : signed(EXP_BITS-1 downto 0);
 610         variable exp_tiny    : std_ulogic;
 611         variable exp_huge    : std_ulogic;
 612         variable renormalize : std_ulogic;
 613         variable clz         : std_ulogic_vector(5 downto 0);
 614         variable set_x       : std_ulogic;
 615         variable mshift      : signed(EXP_BITS-1 downto 0);
 616         variable need_check  : std_ulogic;
 617         variable msb         : std_ulogic;
 618         variable is_add      : std_ulogic;
 619         variable set_a       : std_ulogic;
 620         variable set_b       : std_ulogic;
 621         variable set_c       : std_ulogic;
 622         variable set_y       : std_ulogic;
 623         variable set_s       : std_ulogic;
 624         variable qnan_result : std_ulogic;
 625         variable px_nz       : std_ulogic;
 626         variable pcmpb_eq    : std_ulogic;
 627         variable pcmpb_lt    : std_ulogic;
 628         variable pshift      : std_ulogic;
 629         variable renorm_sqrt : std_ulogic;
 630         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 631         variable shiftin     : std_ulogic;
 632         variable mulexp      : signed(EXP_BITS-1 downto 0);
 633         variable maddend     : std_ulogic_vector(127 downto 0);
 634         variable sum         : std_ulogic_vector(63 downto 0);
 635         variable round_inc   : std_ulogic_vector(63 downto 0);
 636     begin
 637         v := r;
 638         illegal := '0';
 639         v.busy := '0';
 640         int_input := '0';
 641
 642         -- capture incoming instruction
 643         if e_in.valid = '1' then
 644             v.insn := e_in.insn;
 645             v.op := e_in.op;
 646             v.fe_mode := or (e_in.fe_mode);
 647             v.dest_fpr := e_in.frt;
 648             v.single_prec := e_in.single;
 649             v.longmask := e_in.single;
 650             v.int_result := '0';
 651             v.rc := e_in.rc;
 652             v.is_cmp := e_in.out_cr;
 653             if e_in.out_cr = '0' then
 654                 v.cr_mask := num_to_fxm(1);
 655             else
 656                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 657             end if;
 658             int_input := '0';
 659             if e_in.op = OP_FPOP_I then
 660                 int_input := '1';
 661             end if;
 662             v.quieten_nan := '1';
 663             v.tiny := '0';
 664             v.denorm := '0';
 665             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 666             v.is_subtract := '0';
 667             v.is_multiply := '0';
 668             v.is_sqrt := '0';
 669             v.add_bsmall := '0';
 670             v.doing_ftdiv := "00";
 671
 672             adec := decode_dp(e_in.fra, int_input);
 673             bdec := decode_dp(e_in.frb, int_input);
 674             cdec := decode_dp(e_in.frc, int_input);
 675             v.a := adec;
 676             v.b := bdec;
 677             v.c := cdec;
 678
 679             v.exp_cmp := '0';
 680             if adec.exponent > bdec.exponent then
 681                 v.exp_cmp := '1';
 682             end if;
 683             v.madd_cmp := '0';
 684             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 685                 v.madd_cmp := '1';
 686             end if;
 687         end if;
 688
 689         r_hi_nz <= or (r.r(55 downto 31));
 690         r_lo_nz <= or (r.r(30 downto 2));
 691         s_nz <= or (r.s);
 692
 693         if r.single_prec = '0' then
 694             if r.doing_ftdiv(1) = '0' then
 695                 max_exp := to_signed(1023, EXP_BITS);
 696             else
 697                 max_exp := to_signed(1020, EXP_BITS);
 698             end if;
 699             if r.doing_ftdiv(0) = '0' then
 700                 min_exp := to_signed(-1022, EXP_BITS);
 701             else
 702                 min_exp := to_signed(-1021, EXP_BITS);
 703             end if;
 704             bias_exp := to_signed(1536, EXP_BITS);
 705         else
 706             max_exp := to_signed(127, EXP_BITS);
 707             min_exp := to_signed(-126, EXP_BITS);
 708             bias_exp := to_signed(192, EXP_BITS);
 709         end if;
 710         new_exp := r.result_exp - r.shift;
 711         exp_tiny := '0';
 712         exp_huge := '0';
 713         if new_exp < min_exp then
 714             exp_tiny := '1';
 715         end if;
 716         if new_exp > max_exp then
 717             exp_huge := '1';
 718         end if;
 719
 720         -- Compare P with zero and with B
 721         px_nz := or (r.p(57 downto 4));
 722         pcmpb_eq := '0';
 723         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 724             pcmpb_eq := '1';
 725         end if;
 726         pcmpb_lt := '0';
 727         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 728             pcmpb_lt := '1';
 729         end if;
 730
 731         v.writing_back := '0';
 732         v.instr_done := '0';
 733         v.update_fprf := '0';
 734         v.shift := to_signed(0, EXP_BITS);
 735         v.first := '0';
 736         v.opsel_a := AIN_R;
 737         opsel_ainv <= '0';
 738         opsel_mask <= '0';
 739         opsel_b <= BIN_ZERO;
 740         opsel_binv <= '0';
 741         opsel_r <= RES_SUM;
 742         opsel_s <= S_ZERO;
 743         carry_in <= '0';
 744         misc_sel <= "0000";
 745         fpscr_mask := (others => '1');
 746         update_fx := '0';
 747         arith_done := '0';
 748         invalid := '0';
 749         zero_divide := '0';
 750         renormalize := '0';
 751         set_x := '0';
 752         qnan_result := '0';
 753         set_a := '0';
 754         set_b := '0';
 755         set_c := '0';
 756         set_s := '0';
 757         f_to_multiply.is_32bit <= '0';
 758         f_to_multiply.valid <= '0';
 759         msel_1 <= MUL1_A;
 760         msel_2 <= MUL2_C;
 761         msel_add <= MULADD_ZERO;
 762         msel_inv <= '0';
 763         set_y := '0';
 764         pshift := '0';
 765         renorm_sqrt := '0';
 766         shiftin := '0';
 767         case r.state is
 768             when IDLE =>
 769                 v.use_a := '0';
 770                 v.use_b := '0';
 771                 v.use_c := '0';
 772                 v.invalid := '0';
 773                 v.negate := '0';
 774                 if e_in.valid = '1' then
 775                     case e_in.insn(5 downto 1) is
 776                         when "00000" =>
 777                             if e_in.insn(8) = '1' then
 778                                 if e_in.insn(6) = '0' then
 779                                     v.state := DO_FTDIV;
 780                                 else
 781                                     v.state := DO_FTSQRT;
 782                                 end if;
 783                             elsif e_in.insn(7) = '1' then
 784                                 v.state := DO_MCRFS;
 785                             else
 786                                 v.opsel_a := AIN_B;
 787                                 v.state := DO_FCMP;
 788                             end if;
 789                         when "00110" =>
 790                             if e_in.insn(10) = '0' then
 791                                 if e_in.insn(8) = '0' then
 792                                     v.state := DO_MTFSB;
 793                                 else
 794                                     v.state := DO_MTFSFI;
 795                                 end if;
 796                             else
 797                                 v.state := DO_FMRG;
 798                             end if;
 799                         when "00111" =>
 800                             if e_in.insn(8) = '0' then
 801                                 v.state := DO_MFFS;
 802                             else
 803                                 v.state := DO_MTFSF;
 804                             end if;
 805                         when "01000" =>
 806                             v.opsel_a := AIN_B;
 807                             if e_in.insn(9 downto 8) /= "11" then
 808                                 v.state := DO_FMR;
 809                             else
 810                                 v.state := DO_FRI;
 811                             end if;
 812                         when "01100" =>
 813                             v.opsel_a := AIN_B;
 814                             v.state := DO_FRSP;
 815                         when "01110" =>
 816                             v.opsel_a := AIN_B;
 817                             if int_input = '1' then
 818                                 -- fcfid[u][s]
 819                                 v.state := DO_FCFID;
 820                             else
 821                                 v.state := DO_FCTI;
 822                             end if;
 823                         when "01111" =>
 824                             v.round_mode := "001";
 825                             v.opsel_a := AIN_B;
 826                             v.state := DO_FCTI;
 827                         when "10010" =>
 828                             v.opsel_a := AIN_A;
 829                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 830                                 v.opsel_a := AIN_B;
 831                             end if;
 832                             v.state := DO_FDIV;
 833                         when "10100" | "10101" =>
 834                             v.opsel_a := AIN_A;
 835                             v.state := DO_FADD;
 836                         when "10110" =>
 837                             v.is_sqrt := '1';
 838                             v.opsel_a := AIN_B;
 839                             v.state := DO_FSQRT;
 840                         when "10111" =>
 841                             v.state := DO_FSEL;
 842                         when "11000" =>
 843                             v.opsel_a := AIN_B;
 844                             v.state := DO_FRE;
 845                         when "11001" =>
 846                             v.is_multiply := '1';
 847                             v.opsel_a := AIN_A;
 848                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 849                                 v.opsel_a := AIN_C;
 850                             end if;
 851                             v.state := DO_FMUL;
 852                         when "11010" =>
 853                             v.is_sqrt := '1';
 854                             v.opsel_a := AIN_B;
 855                             v.state := DO_FRSQRTE;
 856                         when "11100" | "11101" | "11110" | "11111" =>
 857                             if v.a.mantissa(54) = '0' then
 858                                 v.opsel_a := AIN_A;
 859                             elsif v.c.mantissa(54) = '0' then
 860                                 v.opsel_a := AIN_C;
 861                             else
 862                                 v.opsel_a := AIN_B;
 863                             end if;
 864                             v.state := DO_FMADD;
 865                         when others =>
 866                             illegal := '1';
 867                     end case;
 868                 end if;
 869                 v.x := '0';
 870                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 871                 set_s := '1';
 872
 873             when DO_MCRFS =>
 874                 j := to_integer(unsigned(insn_bfa(r.insn)));
 875                 for i in 0 to 7 loop
 876                     if i = j then
 877                         k := (7 - i) * 4;
 878                         v.cr_result := r.fpscr(k + 3 downto k);
 879                         fpscr_mask(k + 3 downto k) := "0000";
 880                     end if;
 881                 end loop;
 882                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 883                 v.instr_done := '1';
 884                 v.state := IDLE;
 885
 886             when DO_FTDIV =>
 887                 v.instr_done := '1';
 888                 v.state := IDLE;
 889                 v.cr_result := "0000";
 890                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 891                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 892                     v.cr_result(2) := '1';
 893                 end if;
 894                 if r.a.class = NAN or r.a.class = INFINITY or
 895                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 896                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 897                     v.cr_result(1) := '1';
 898                 else
 899                     v.doing_ftdiv := "11";
 900                     v.first := '1';
 901                     v.state := FTDIV_1;
 902                     v.instr_done := '0';
 903                 end if;
 904
 905             when DO_FTSQRT =>
 906                 v.instr_done := '1';
 907                 v.state := IDLE;
 908                 v.cr_result := "0000";
 909                 if r.b.class = ZERO or r.b.class = INFINITY or
 910                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 911                     v.cr_result(2) := '1';
 912                 end if;
 913                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 914                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 915                     v.cr_result(1) := '0';
 916                 end if;
 917
 918             when DO_FCMP =>
 919                 -- fcmp[uo]
 920                 -- r.opsel_a = AIN_B
 921                 v.instr_done := '1';
 922                 v.state := IDLE;
 923                 update_fx := '1';
 924                 v.result_exp := r.b.exponent;
 925                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 926                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 927                     -- Signalling NAN
 928                     v.fpscr(FPSCR_VXSNAN) := '1';
 929                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 930                         v.fpscr(FPSCR_VXVC) := '1';
 931                     end if;
 932                     invalid := '1';
 933                     v.cr_result := "0001";          -- unordered
 934                 elsif r.a.class = NAN or r.b.class = NAN then
 935                     if r.insn(6) = '1' then
 936                         -- fcmpo
 937                         v.fpscr(FPSCR_VXVC) := '1';
 938                         invalid := '1';
 939                     end if;
 940                     v.cr_result := "0001";          -- unordered
 941                 elsif r.a.class = ZERO and r.b.class = ZERO then
 942                     v.cr_result := "0010";          -- equal
 943                 elsif r.a.negative /= r.b.negative then
 944                     v.cr_result := r.a.negative & r.b.negative & "00";
 945                 elsif r.a.class = ZERO then
 946                     -- A and B are the same sign from here down
 947                     v.cr_result := not r.b.negative & r.b.negative & "00";
 948                 elsif r.a.class = INFINITY then
 949                     if r.b.class = INFINITY then
 950                         v.cr_result := "0010";
 951                     else
 952                         v.cr_result := r.a.negative & not r.a.negative & "00";
 953                     end if;
 954                 elsif r.b.class = ZERO then
 955                     -- A is finite from here down
 956                     v.cr_result := r.a.negative & not r.a.negative & "00";
 957                 elsif r.b.class = INFINITY then
 958                     v.cr_result := not r.b.negative & r.b.negative & "00";
 959                 elsif r.exp_cmp = '1' then
 960                     -- A and B are both finite from here down
 961                     v.cr_result := r.a.negative & not r.a.negative & "00";
 962                 elsif r.a.exponent /= r.b.exponent then
 963                     -- A exponent is smaller than B
 964                     v.cr_result := not r.a.negative & r.a.negative & "00";
 965                 else
 966                     -- Prepare to subtract mantissas, put B in R
 967                     v.cr_result := "0000";
 968                     v.instr_done := '0';
 969                     v.opsel_a := AIN_A;
 970                     v.state := CMP_1;
 971                 end if;
 972                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 973
 974             when DO_MTFSB =>
 975                 -- mtfsb{0,1}
 976                 j := to_integer(unsigned(insn_bt(r.insn)));
 977                 for i in 0 to 31 loop
 978                     if i = j then
 979                         v.fpscr(31 - i) := r.insn(6);
 980                     end if;
 981                 end loop;
 982                 v.instr_done := '1';
 983                 v.state := IDLE;
 984
 985             when DO_MTFSFI =>
 986                 -- mtfsfi
 987                 j := to_integer(unsigned(insn_bf(r.insn)));
 988                 if r.insn(16) = '0' then
 989                     for i in 0 to 7 loop
 990                         if i = j then
 991                             k := (7 - i) * 4;
 992                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 993                         end if;
 994                     end loop;
 995                 end if;
 996                 v.instr_done := '1';
 997                 v.state := IDLE;
 998
 999             when DO_FMRG =>
1000                 -- fmrgew, fmrgow
1001                 opsel_r <= RES_MISC;
1002                 misc_sel <= "01" & r.insn(8) & '0';
1003                 v.int_result := '1';
1004                 v.writing_back := '1';
1005                 v.instr_done := '1';
1006                 v.state := IDLE;
1007
1008             when DO_MFFS =>
1009                 v.int_result := '1';
1010                 v.writing_back := '1';
1011                 opsel_r <= RES_MISC;
1012                 case r.insn(20 downto 16) is
1013                     when "00000" =>
1014                         -- mffs
1015                     when "00001" =>
1016                         -- mffsce
1017                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1018                     when "10100" | "10101" =>
1019                         -- mffscdrn[i] (but we don't implement DRN)
1020                         fpscr_mask := x"000000FF";
1021                     when "10110" =>
1022                         -- mffscrn
1023                         fpscr_mask := x"000000FF";
1024                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1025                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1026                     when "10111" =>
1027                         -- mffscrni
1028                         fpscr_mask := x"000000FF";
1029                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1030                     when "11000" =>
1031                         -- mffsl
1032                         fpscr_mask := x"0007F0FF";
1033                     when others =>
1034                         illegal := '1';
1035                 end case;
1036                 v.instr_done := '1';
1037                 v.state := IDLE;
1038
1039             when DO_MTFSF =>
1040                 if r.insn(25) = '1' then
1041                     flm := x"FF";
1042                 elsif r.insn(16) = '1' then
1043                     flm := x"00";
1044                 else
1045                     flm := r.insn(24 downto 17);
1046                 end if;
1047                 for i in 0 to 7 loop
1048                     k := i * 4;
1049                     if flm(i) = '1' then
1050                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1051                     end if;
1052                 end loop;
1053                 v.instr_done := '1';
1054                 v.state := IDLE;
1055
1056             when DO_FMR =>
1057                 -- r.opsel_a = AIN_B
1058                 v.result_class := r.b.class;
1059                 v.result_exp := r.b.exponent;
1060                 v.quieten_nan := '0';
1061                 if r.insn(9) = '1' then
1062                     v.result_sign := '0';              -- fabs
1063                 elsif r.insn(8) = '1' then
1064                     v.result_sign := '1';              -- fnabs
1065                 elsif r.insn(7) = '1' then
1066                     v.result_sign := r.b.negative;     -- fmr
1067                 elsif r.insn(6) = '1' then
1068                     v.result_sign := not r.b.negative; -- fneg
1069                 else
1070                     v.result_sign := r.a.negative;     -- fcpsgn
1071                 end if;
1072                 v.writing_back := '1';
1073                 v.instr_done := '1';
1074                 v.state := IDLE;
1075
1076             when DO_FRI =>    -- fri[nzpm]
1077                 -- r.opsel_a = AIN_B
1078                 v.result_class := r.b.class;
1079                 v.result_sign := r.b.negative;
1080                 v.result_exp := r.b.exponent;
1081                 v.fpscr(FPSCR_FR) := '0';
1082                 v.fpscr(FPSCR_FI) := '0';
1083                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1084                     -- Signalling NAN
1085                     v.fpscr(FPSCR_VXSNAN) := '1';
1086                     invalid := '1';
1087                 end if;
1088                 if r.b.class = FINITE then
1089                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1090                         -- integer already, no rounding required
1091                         arith_done := '1';
1092                     else
1093                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1094                         v.state := FRI_1;
1095                         v.round_mode := '1' & r.insn(7 downto 6);
1096                     end if;
1097                 else
1098                     arith_done := '1';
1099                 end if;
1100
1101             when DO_FRSP =>
1102                 -- r.opsel_a = AIN_B, r.shift = 0
1103                 v.result_class := r.b.class;
1104                 v.result_sign := r.b.negative;
1105                 v.result_exp := r.b.exponent;
1106                 v.fpscr(FPSCR_FR) := '0';
1107                 v.fpscr(FPSCR_FI) := '0';
1108                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1109                     -- Signalling NAN
1110                     v.fpscr(FPSCR_VXSNAN) := '1';
1111                     invalid := '1';
1112                 end if;
1113                 set_x := '1';
1114                 if r.b.class = FINITE then
1115                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1116                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1117                         v.state := ROUND_UFLOW;
1118                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1119                         v.state := ROUND_OFLOW;
1120                     else
1121                         v.state := ROUNDING;
1122                     end if;
1123                 else
1124                     arith_done := '1';
1125                 end if;
1126
1127             when DO_FCTI =>
1128                 -- instr bit 9: 1=dword 0=word
1129                 -- instr bit 8: 1=unsigned 0=signed
1130                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131                 -- r.opsel_a = AIN_B
1132                 v.result_class := r.b.class;
1133                 v.result_sign := r.b.negative;
1134                 v.result_exp := r.b.exponent;
1135                 v.fpscr(FPSCR_FR) := '0';
1136                 v.fpscr(FPSCR_FI) := '0';
1137                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138                     -- Signalling NAN
1139                     v.fpscr(FPSCR_VXSNAN) := '1';
1140                     invalid := '1';
1141                 end if;
1142
1143                 v.int_result := '1';
1144                 case r.b.class is
1145                     when ZERO =>
1146                         arith_done := '1';
1147                     when FINITE =>
1148                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1149                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150                             v.state := INT_OFLOW;
1151                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152                             -- integer already, no rounding required,
1153                             -- shift into final position
1154                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155                             if r.insn(8) = '1' and r.b.negative = '1' then
1156                                 v.state := INT_OFLOW;
1157                             else
1158                                 v.state := INT_ISHIFT;
1159                             end if;
1160                         else
1161                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162                             v.state := INT_SHIFT;
1163                         end if;
1164                     when INFINITY | NAN =>
1165                         v.state := INT_OFLOW;
1166                 end case;
1167
1168             when DO_FCFID =>
1169                 -- r.opsel_a = AIN_B
1170                 v.result_sign := '0';
1171                 if r.insn(8) = '0' and r.b.negative = '1' then
1172                     -- fcfid[s] with negative operand, set R = -B
1173                     opsel_ainv <= '1';
1174                     carry_in <= '1';
1175                     v.result_sign := '1';
1176                 end if;
1177                 v.result_class := r.b.class;
1178                 v.result_exp := to_signed(54, EXP_BITS);
1179                 v.fpscr(FPSCR_FR) := '0';
1180                 v.fpscr(FPSCR_FI) := '0';
1181                 if r.b.class = ZERO then
1182                     arith_done := '1';
1183                 else
1184                     v.state := FINISH;
1185                 end if;
1186
1187             when DO_FADD =>
1188                 -- fadd[s] and fsub[s]
1189                 -- r.opsel_a = AIN_A
1190                 v.result_sign := r.a.negative;
1191                 v.result_class := r.a.class;
1192                 v.result_exp := r.a.exponent;
1193                 v.fpscr(FPSCR_FR) := '0';
1194                 v.fpscr(FPSCR_FI) := '0';
1195                 v.use_a := '1';
1196                 v.use_b := '1';
1197                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198                 if r.a.class = FINITE and r.b.class = FINITE then
1199                     v.is_subtract := not is_add;
1200                     v.add_bsmall := r.exp_cmp;
1201                     v.opsel_a := AIN_B;
1202                     if r.exp_cmp = '0' then
1203                         v.shift := r.a.exponent - r.b.exponent;
1204                         v.result_sign := r.b.negative xnor r.insn(1);
1205                         if r.a.exponent = r.b.exponent then
1206                             v.state := ADD_2;
1207                         else
1208                             v.longmask := '0';
1209                             v.state := ADD_SHIFT;
1210                         end if;
1211                     else
1212                         v.state := ADD_1;
1213                     end if;
1214                 else
1215                     if r.a.class = NAN or r.b.class = NAN then
1216                         v.state := NAN_RESULT;
1217                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218                         -- invalid operation, construct QNaN
1219                         v.fpscr(FPSCR_VXISI) := '1';
1220                         qnan_result := '1';
1221                         arith_done := '1';
1222                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223                         -- return -0 for rounding to -infinity
1224                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1225                         arith_done := '1';
1226                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1227                         -- result is A
1228                         v.opsel_a := AIN_A;
1229                         v.state := EXC_RESULT;
1230                     else
1231                         -- result is +/- B
1232                         v.opsel_a := AIN_B;
1233                         v.negate := not r.insn(1);
1234                         v.state := EXC_RESULT;
1235                     end if;
1236                 end if;
1237
1238             when DO_FMUL =>
1239                 -- fmul[s]
1240                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241                 v.result_sign := r.a.negative xor r.c.negative;
1242                 v.result_class := r.a.class;
1243                 v.fpscr(FPSCR_FR) := '0';
1244                 v.fpscr(FPSCR_FI) := '0';
1245                 v.use_a := '1';
1246                 v.use_c := '1';
1247                 if r.a.class = FINITE and r.c.class = FINITE then
1248                     v.result_exp := r.a.exponent + r.c.exponent;
1249                     -- Renormalize denorm operands
1250                     if r.a.mantissa(54) = '0' then
1251                         v.state := RENORM_A;
1252                     elsif r.c.mantissa(54) = '0' then
1253                         v.state := RENORM_C;
1254                     else
1255                         f_to_multiply.valid <= '1';
1256                         v.state := MULT_1;
1257                     end if;
1258                 else
1259                     if r.a.class = NAN or r.c.class = NAN then
1260                         v.state := NAN_RESULT;
1261                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262                         (r.a.class = ZERO and r.c.class = INFINITY) then
1263                         -- invalid operation, construct QNaN
1264                         v.fpscr(FPSCR_VXIMZ) := '1';
1265                         qnan_result := '1';
1266                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1267                         -- result is +/- A
1268                         arith_done := '1';
1269                     else
1270                         -- r.c.class is ZERO or INFINITY
1271                         v.opsel_a := AIN_C;
1272                         v.negate := r.a.negative;
1273                         v.state := EXC_RESULT;
1274                     end if;
1275                 end if;
1276
1277             when DO_FDIV =>
1278                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279                 v.result_class := r.a.class;
1280                 v.fpscr(FPSCR_FR) := '0';
1281                 v.fpscr(FPSCR_FI) := '0';
1282                 v.use_a := '1';
1283                 v.use_b := '1';
1284                 v.result_sign := r.a.negative xor r.b.negative;
1285                 v.result_exp := r.a.exponent - r.b.exponent;
1286                 v.count := "00";
1287                 if r.a.class = FINITE and r.b.class = FINITE then
1288                     -- Renormalize denorm operands
1289                     if r.a.mantissa(54) = '0' then
1290                         v.state := RENORM_A;
1291                     elsif r.b.mantissa(54) = '0' then
1292                         v.state := RENORM_B;
1293                     else
1294                         v.first := '1';
1295                         v.state := DIV_2;
1296                     end if;
1297                 else
1298                     if r.a.class = NAN or r.b.class = NAN then
1299                         v.state := NAN_RESULT;
1300                     elsif r.b.class = INFINITY then
1301                         if r.a.class = INFINITY then
1302                             v.fpscr(FPSCR_VXIDI) := '1';
1303                             qnan_result := '1';
1304                         else
1305                             v.result_class := ZERO;
1306                         end if;
1307                         arith_done := '1';
1308                     elsif r.b.class = ZERO then
1309                         if r.a.class = ZERO then
1310                             v.fpscr(FPSCR_VXZDZ) := '1';
1311                             qnan_result := '1';
1312                         else
1313                             if r.a.class = FINITE then
1314                                 zero_divide := '1';
1315                             end if;
1316                             v.result_class := INFINITY;
1317                         end if;
1318                         arith_done := '1';
1319                     else -- r.b.class = FINITE, result_class = r.a.class
1320                         arith_done := '1';
1321                     end if;
1322                 end if;
1323
1324             when DO_FSEL =>
1325                 v.fpscr(FPSCR_FR) := '0';
1326                 v.fpscr(FPSCR_FI) := '0';
1327                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328                     v.opsel_a := AIN_C;
1329                 else
1330                     v.opsel_a := AIN_B;
1331                 end if;
1332                 v.quieten_nan := '0';
1333                 v.state := EXC_RESULT;
1334
1335             when DO_FSQRT =>
1336                 -- r.opsel_a = AIN_B
1337                 v.result_class := r.b.class;
1338                 v.result_sign := r.b.negative;
1339                 v.fpscr(FPSCR_FR) := '0';
1340                 v.fpscr(FPSCR_FI) := '0';
1341                 v.use_b := '1';
1342                 case r.b.class is
1343                     when FINITE =>
1344                         v.result_exp := r.b.exponent;
1345                         if r.b.negative = '1' then
1346                             v.fpscr(FPSCR_VXSQRT) := '1';
1347                             qnan_result := '1';
1348                         elsif r.b.mantissa(54) = '0' then
1349                             v.state := RENORM_B;
1350                         elsif r.b.exponent(0) = '0' then
1351                             v.state := SQRT_1;
1352                         else
1353                             v.shift := to_signed(1, EXP_BITS);
1354                             v.state := RENORM_B2;
1355                         end if;
1356                     when NAN =>
1357                         v.state := NAN_RESULT;
1358                     when ZERO =>
1359                         -- result is B
1360                         arith_done := '1';
1361                     when INFINITY =>
1362                         if r.b.negative = '1' then
1363                             v.fpscr(FPSCR_VXSQRT) := '1';
1364                             qnan_result := '1';
1365                         -- else result is B
1366                         end if;
1367                         arith_done := '1';
1368                 end case;
1369
1370             when DO_FRE =>
1371                 -- r.opsel_a = AIN_B
1372                 v.result_class := r.b.class;
1373                 v.result_sign := r.b.negative;
1374                 v.fpscr(FPSCR_FR) := '0';
1375                 v.fpscr(FPSCR_FI) := '0';
1376                 v.use_b := '1';
1377                 case r.b.class is
1378                     when FINITE =>
1379                         v.result_exp := - r.b.exponent;
1380                         if r.b.mantissa(54) = '0' then
1381                             v.state := RENORM_B;
1382                         else
1383                             v.state := FRE_1;
1384                         end if;
1385                     when NAN =>
1386                         v.state := NAN_RESULT;
1387                     when INFINITY =>
1388                         v.result_class := ZERO;
1389                         arith_done := '1';
1390                     when ZERO =>
1391                         v.result_class := INFINITY;
1392                         zero_divide := '1';
1393                         arith_done := '1';
1394                 end case;
1395
1396             when DO_FRSQRTE =>
1397                 -- r.opsel_a = AIN_B
1398                 v.result_class := r.b.class;
1399                 v.result_sign := r.b.negative;
1400                 v.fpscr(FPSCR_FR) := '0';
1401                 v.fpscr(FPSCR_FI) := '0';
1402                 v.use_b := '1';
1403                 v.shift := to_signed(1, EXP_BITS);
1404                 case r.b.class is
1405                     when FINITE =>
1406                         v.result_exp := r.b.exponent;
1407                         if r.b.negative = '1' then
1408                             v.fpscr(FPSCR_VXSQRT) := '1';
1409                             qnan_result := '1';
1410                         elsif r.b.mantissa(54) = '0' then
1411                             v.state := RENORM_B;
1412                         elsif r.b.exponent(0) = '0' then
1413                             v.state := RSQRT_1;
1414                         else
1415                             v.state := RENORM_B2;
1416                         end if;
1417                     when NAN =>
1418                         v.state := NAN_RESULT;
1419                     when INFINITY =>
1420                         if r.b.negative = '1' then
1421                             v.fpscr(FPSCR_VXSQRT) := '1';
1422                             qnan_result := '1';
1423                         else
1424                             v.result_class := ZERO;
1425                         end if;
1426                         arith_done := '1';
1427                     when ZERO =>
1428                         v.result_class := INFINITY;
1429                         zero_divide := '1';
1430                         arith_done := '1';
1431                 end case;
1432
1433             when DO_FMADD =>
1434                 -- fmadd, fmsub, fnmadd, fnmsub
1435                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436                 -- else AIN_B
1437                 v.result_sign := r.a.negative;
1438                 v.result_class := r.a.class;
1439                 v.result_exp := r.a.exponent;
1440                 v.fpscr(FPSCR_FR) := '0';
1441                 v.fpscr(FPSCR_FI) := '0';
1442                 v.use_a := '1';
1443                 v.use_b := '1';
1444                 v.use_c := '1';
1445                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446                 if r.a.class = FINITE and r.c.class = FINITE and
1447                     (r.b.class = FINITE or r.b.class = ZERO) then
1448                     v.is_subtract := not is_add;
1449                     mulexp := r.a.exponent + r.c.exponent;
1450                     v.result_exp := mulexp;
1451                     -- Make sure A and C are normalized
1452                     if r.a.mantissa(54) = '0' then
1453                         v.state := RENORM_A;
1454                     elsif r.c.mantissa(54) = '0' then
1455                         v.state := RENORM_C;
1456                     elsif r.b.class = ZERO then
1457                         -- no addend, degenerates to multiply
1458                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459                         f_to_multiply.valid <= '1';
1460                         v.is_multiply := '1';
1461                         v.state := MULT_1;
1462                     elsif r.madd_cmp = '0' then
1463                         -- addend is bigger, do multiply first
1464                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465                         f_to_multiply.valid <= '1';
1466                         v.state := FMADD_1;
1467                     else
1468                         -- product is bigger, shift B right and use it as the
1469                         -- addend to the multiplier
1470                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471                         -- for subtract, multiplier does B - A * C
1472                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473                         v.result_exp := r.b.exponent;
1474                         v.state := FMADD_2;
1475                     end if;
1476                 else
1477                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478                         v.state := NAN_RESULT;
1479                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480                         (r.a.class = INFINITY and r.c.class = ZERO) then
1481                         -- invalid operation, construct QNaN
1482                         v.fpscr(FPSCR_VXIMZ) := '1';
1483                         qnan_result := '1';
1484                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485                         if r.b.class = INFINITY and is_add = '0' then
1486                             -- invalid operation, construct QNaN
1487                             v.fpscr(FPSCR_VXISI) := '1';
1488                             qnan_result := '1';
1489                         else
1490                             -- result is infinity
1491                             v.result_class := INFINITY;
1492                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493                             arith_done := '1';
1494                         end if;
1495                     else
1496                         -- Here A is zero, C is zero, or B is infinity
1497                         -- Result is +/-B in all of those cases
1498                         v.opsel_a := AIN_B;
1499                         if r.b.class /= ZERO or is_add = '1' then
1500                             v.negate := not (r.insn(1) xor r.insn(2));
1501                         else
1502                             -- have to be careful about rule for 0 - 0 result sign
1503                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504                         end if;
1505                         v.state := EXC_RESULT;
1506                     end if;
1507                 end if;
1508
1509             when RENORM_A =>
1510                 renormalize := '1';
1511                 v.state := RENORM_A2;
1512                 if r.insn(4) = '1' then
1513                     v.opsel_a := AIN_C;
1514                 else
1515                     v.opsel_a := AIN_B;
1516                 end if;
1517
1518             when RENORM_A2 =>
1519                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520                 set_a := '1';
1521                 v.result_exp := new_exp;
1522                 if r.insn(4) = '1' then
1523                     if r.c.mantissa(54) = '1' then
1524                         if r.insn(3) = '0' or r.b.class = ZERO then
1525                             v.first := '1';
1526                             v.state := MULT_1;
1527                         else
1528                             v.madd_cmp := '0';
1529                             if new_exp + 1 >= r.b.exponent then
1530                                 v.madd_cmp := '1';
1531                             end if;
1532                             v.opsel_a := AIN_B;
1533                             v.state := DO_FMADD;
1534                         end if;
1535                     else
1536                         v.state := RENORM_C;
1537                     end if;
1538                 else
1539                     if r.b.mantissa(54) = '1' then
1540                         v.first := '1';
1541                         v.state := DIV_2;
1542                     else
1543                         v.state := RENORM_B;
1544                     end if;
1545                 end if;
1546
1547             when RENORM_B =>
1548                 renormalize := '1';
1549                 renorm_sqrt := r.is_sqrt;
1550                 v.state := RENORM_B2;
1551
1552             when RENORM_B2 =>
1553                 set_b := '1';
1554                 if r.is_sqrt = '0' then
1555                     v.result_exp := r.result_exp + r.shift;
1556                 else
1557                     v.result_exp := new_exp;
1558                 end if;
1559                 v.opsel_a := AIN_B;
1560                 v.state := LOOKUP;
1561
1562             when RENORM_C =>
1563                 renormalize := '1';
1564                 v.state := RENORM_C2;
1565
1566             when RENORM_C2 =>
1567                 set_c := '1';
1568                 v.result_exp := new_exp;
1569                 if r.insn(3) = '0' or r.b.class = ZERO then
1570                     v.first := '1';
1571                     v.state := MULT_1;
1572                 else
1573                     v.madd_cmp := '0';
1574                     if new_exp + 1 >= r.b.exponent then
1575                         v.madd_cmp := '1';
1576                     end if;
1577                     v.opsel_a := AIN_B;
1578                     v.state := DO_FMADD;
1579                 end if;
1580
1581             when ADD_1 =>
1582                 -- transferring B to R
1583                 v.shift := r.b.exponent - r.a.exponent;
1584                 v.result_exp := r.b.exponent;
1585                 v.longmask := '0';
1586                 v.state := ADD_SHIFT;
1587
1588             when ADD_SHIFT =>
1589                 -- r.shift = - exponent difference, r.longmask = 0
1590                 opsel_r <= RES_SHIFT;
1591                 v.x := s_nz;
1592                 set_x := '1';
1593                 v.longmask := r.single_prec;
1594                 if r.add_bsmall = '1' then
1595                     v.opsel_a := AIN_A;
1596                 else
1597                     v.opsel_a := AIN_B;
1598                 end if;
1599                 v.state := ADD_2;
1600
1601             when ADD_2 =>
1602                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603                 opsel_b <= BIN_R;
1604                 opsel_binv <= r.is_subtract;
1605                 carry_in <= r.is_subtract and not r.x;
1606                 v.shift := to_signed(-1, EXP_BITS);
1607                 v.state := ADD_3;
1608
1609             when ADD_3 =>
1610                 -- check for overflow or negative result (can't get both)
1611                 -- r.shift = -1
1612                 if r.r(63) = '1' then
1613                     -- result is opposite sign to expected
1614                     v.result_sign := not r.result_sign;
1615                     opsel_ainv <= '1';
1616                     carry_in <= '1';
1617                     v.state := FINISH;
1618                 elsif r.r(55) = '1' then
1619                     -- sum overflowed, shift right
1620                     opsel_r <= RES_SHIFT;
1621                     set_x := '1';
1622                     if exp_huge = '1' then
1623                         v.state := ROUND_OFLOW;
1624                     else
1625                         v.state := ROUNDING;
1626                     end if;
1627                 elsif r.r(54) = '1' then
1628                     set_x := '1';
1629                     v.state := ROUNDING;
1630                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1631                     -- r.x must be zero at this point
1632                     v.result_class := ZERO;
1633                     if r.is_subtract = '1' then
1634                         -- set result sign depending on rounding mode
1635                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1636                     end if;
1637                     arith_done := '1';
1638                 else
1639                     renormalize := '1';
1640                     v.state := NORMALIZE;
1641                 end if;
1642
1643             when CMP_1 =>
1644                 -- r.opsel_a = AIN_A
1645                 opsel_b <= BIN_R;
1646                 opsel_binv <= '1';
1647                 carry_in <= '1';
1648                 v.state := CMP_2;
1649
1650             when CMP_2 =>
1651                 if r.r(63) = '1' then
1652                     -- A is smaller in magnitude
1653                     v.cr_result := not r.a.negative & r.a.negative & "00";
1654                 elsif (r_hi_nz or r_lo_nz) = '0' then
1655                     v.cr_result := "0010";
1656                 else
1657                     v.cr_result := r.a.negative & not r.a.negative & "00";
1658                 end if;
1659                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1660                 v.instr_done := '1';
1661                 v.state := IDLE;
1662
1663             when MULT_1 =>
1664                 f_to_multiply.valid <= r.first;
1665                 opsel_r <= RES_MULT;
1666                 if multiply_to_f.valid = '1' then
1667                     v.state := FINISH;
1668                 end if;
1669
1670             when FMADD_1 =>
1671                 -- Addend is bigger here
1672                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1673                 -- note v.shift is at most -2 here
1674                 v.shift := r.result_exp - r.b.exponent;
1675                 opsel_r <= RES_MULT;
1676                 opsel_s <= S_MULT;
1677                 set_s := '1';
1678                 f_to_multiply.valid <= r.first;
1679                 if multiply_to_f.valid = '1' then
1680                     v.longmask := '0';
1681                     v.state := ADD_SHIFT;
1682                 end if;
1683
1684             when FMADD_2 =>
1685                 -- Product is potentially bigger here
1686                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1687                 set_s := '1';
1688                 opsel_s <= S_SHIFT;
1689                 v.shift := r.shift - to_signed(64, EXP_BITS);
1690                 v.state := FMADD_3;
1691
1692             when FMADD_3 =>
1693                 -- r.shift = addend exp - product exp
1694                 opsel_r <= RES_SHIFT;
1695                 v.first := '1';
1696                 v.state := FMADD_4;
1697
1698             when FMADD_4 =>
1699                 msel_add <= MULADD_RS;
1700                 f_to_multiply.valid <= r.first;
1701                 msel_inv <= r.is_subtract;
1702                 opsel_r <= RES_MULT;
1703                 opsel_s <= S_MULT;
1704                 set_s := '1';
1705                 if multiply_to_f.valid = '1' then
1706                     v.state := FMADD_5;
1707                 end if;
1708
1709             when FMADD_5 =>
1710                 -- negate R:S:X if negative
1711                 if r.r(63) = '1' then
1712                     v.result_sign := not r.result_sign;
1713                     opsel_ainv <= '1';
1714                     carry_in <= not (s_nz or r.x);
1715                     opsel_s <= S_NEG;
1716                     set_s := '1';
1717                 end if;
1718                 v.shift := to_signed(56, EXP_BITS);
1719                 v.state := FMADD_6;
1720
1721             when FMADD_6 =>
1722                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1723                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1724                     if s_nz = '0' then
1725                         -- must be a subtraction, and r.x must be zero
1726                         v.result_class := ZERO;
1727                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1728                         arith_done := '1';
1729                     else
1730                         -- R is all zeroes but there are non-zero bits in S
1731                         -- so shift them into R and set S to 0
1732                         opsel_r <= RES_SHIFT;
1733                         set_s := '1';
1734                         -- stay in state FMADD_6
1735                     end if;
1736                 elsif r.r(56 downto 54) = "001" then
1737                     v.state := FINISH;
1738                 else
1739                     renormalize := '1';
1740                     v.state := NORMALIZE;
1741                 end if;
1742
1743             when LOOKUP =>
1744                 -- r.opsel_a = AIN_B
1745                 -- wait one cycle for inverse_table[B] lookup
1746                 v.first := '1';
1747                 if r.insn(4) = '0' then
1748                     if r.insn(3) = '0' then
1749                         v.state := DIV_2;
1750                     else
1751                         v.state := SQRT_1;
1752                     end if;
1753                 elsif r.insn(2) = '0' then
1754                     v.state := FRE_1;
1755                 else
1756                     v.state := RSQRT_1;
1757                 end if;
1758
1759             when DIV_2 =>
1760                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1761                 msel_1 <= MUL1_B;
1762                 msel_add <= MULADD_CONST;
1763                 msel_inv <= '1';
1764                 if r.count = 0 then
1765                     msel_2 <= MUL2_LUT;
1766                 else
1767                     msel_2 <= MUL2_P;
1768                 end if;
1769                 set_y := r.first;
1770                 pshift := '1';
1771                 f_to_multiply.valid <= r.first;
1772                 if multiply_to_f.valid = '1' then
1773                     v.first := '1';
1774                     v.count := r.count + 1;
1775                     v.state := DIV_3;
1776                 end if;
1777
1778             when DIV_3 =>
1779                 -- compute Y = P = P * Y
1780                 msel_1 <= MUL1_Y;
1781                 msel_2 <= MUL2_P;
1782                 f_to_multiply.valid <= r.first;
1783                 pshift := '1';
1784                 if multiply_to_f.valid = '1' then
1785                     v.first := '1';
1786                     if r.count = 3 then
1787                         v.state := DIV_4;
1788                     else
1789                         v.state := DIV_2;
1790                     end if;
1791                 end if;
1792
1793             when DIV_4 =>
1794                 -- compute R = P = A * Y (quotient)
1795                 msel_1 <= MUL1_A;
1796                 msel_2 <= MUL2_P;
1797                 set_y := r.first;
1798                 f_to_multiply.valid <= r.first;
1799                 pshift := '1';
1800                 if multiply_to_f.valid = '1' then
1801                     opsel_r <= RES_MULT;
1802                     v.first := '1';
1803                     v.state := DIV_5;
1804                 end if;
1805
1806             when DIV_5 =>
1807                 -- compute P = A - B * R (remainder)
1808                 msel_1 <= MUL1_B;
1809                 msel_2 <= MUL2_R;
1810                 msel_add <= MULADD_A;
1811                 msel_inv <= '1';
1812                 f_to_multiply.valid <= r.first;
1813                 if multiply_to_f.valid = '1' then
1814                     v.state := DIV_6;
1815                 end if;
1816
1817             when DIV_6 =>
1818                 -- test if remainder is 0 or >= B
1819                 if pcmpb_lt = '1' then
1820                     -- quotient is correct, set X if remainder non-zero
1821                     v.x := r.p(58) or px_nz;
1822                 else
1823                     -- quotient needs to be incremented by 1
1824                     carry_in <= '1';
1825                     v.x := not pcmpb_eq;
1826                 end if;
1827                 v.state := FINISH;
1828
1829             when FRE_1 =>
1830                 opsel_r <= RES_MISC;
1831                 misc_sel <= "0111";
1832                 v.shift := to_signed(1, EXP_BITS);
1833                 v.state := NORMALIZE;
1834
1835             when FTDIV_1 =>
1836                 v.cr_result(1) := exp_tiny or exp_huge;
1837                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1838                     v.instr_done := '1';
1839                     v.state := IDLE;
1840                 else
1841                     v.shift := r.a.exponent;
1842                     v.doing_ftdiv := "10";
1843                 end if;
1844
1845             when RSQRT_1 =>
1846                 opsel_r <= RES_MISC;
1847                 misc_sel <= "0111";
1848                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1849                 v.result_exp := - sqrt_exp;
1850                 v.shift := to_signed(1, EXP_BITS);
1851                 v.state := NORMALIZE;
1852
1853             when SQRT_1 =>
1854                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1855                 -- also transfer B (in R) to A
1856                 set_a := '1';
1857                 opsel_r <= RES_MISC;
1858                 misc_sel <= "0111";
1859                 msel_1 <= MUL1_B;
1860                 msel_2 <= MUL2_LUT;
1861                 f_to_multiply.valid <= '1';
1862                 v.shift := to_signed(-1, EXP_BITS);
1863                 v.count := "00";
1864                 v.state := SQRT_2;
1865
1866             when SQRT_2 =>
1867                 -- shift R right one place
1868                 -- not expecting multiplier result yet
1869                 -- r.shift = -1
1870                 opsel_r <= RES_SHIFT;
1871                 v.first := '1';
1872                 v.state := SQRT_3;
1873
1874             when SQRT_3 =>
1875                 -- put R into Y, wait for product from multiplier
1876                 msel_2 <= MUL2_R;
1877                 set_y := r.first;
1878                 pshift := '1';
1879                 if multiply_to_f.valid = '1' then
1880                     -- put result into R
1881                     opsel_r <= RES_MULT;
1882                     v.first := '1';
1883                     v.state := SQRT_4;
1884                 end if;
1885
1886             when SQRT_4 =>
1887                 -- compute 1.5 - Y * P
1888                 msel_1 <= MUL1_Y;
1889                 msel_2 <= MUL2_P;
1890                 msel_add <= MULADD_CONST;
1891                 msel_inv <= '1';
1892                 f_to_multiply.valid <= r.first;
1893                 pshift := '1';
1894                 if multiply_to_f.valid = '1' then
1895                     v.state := SQRT_5;
1896                 end if;
1897
1898             when SQRT_5 =>
1899                 -- compute Y = Y * P
1900                 msel_1 <= MUL1_Y;
1901                 msel_2 <= MUL2_P;
1902                 f_to_multiply.valid <= '1';
1903                 v.first := '1';
1904                 v.state := SQRT_6;
1905
1906             when SQRT_6 =>
1907                 -- pipeline in R = R * P
1908                 msel_1 <= MUL1_R;
1909                 msel_2 <= MUL2_P;
1910                 f_to_multiply.valid <= r.first;
1911                 pshift := '1';
1912                 if multiply_to_f.valid = '1' then
1913                     v.first := '1';
1914                     v.state := SQRT_7;
1915                 end if;
1916
1917             when SQRT_7 =>
1918                 -- first multiply is done, put result in Y
1919                 msel_2 <= MUL2_P;
1920                 set_y := r.first;
1921                 -- wait for second multiply (should be here already)
1922                 pshift := '1';
1923                 if multiply_to_f.valid = '1' then
1924                     -- put result into R
1925                     opsel_r <= RES_MULT;
1926                     v.first := '1';
1927                     v.count := r.count + 1;
1928                     if r.count < 2 then
1929                         v.state := SQRT_4;
1930                     else
1931                         v.first := '1';
1932                         v.state := SQRT_8;
1933                     end if;
1934                 end if;
1935
1936             when SQRT_8 =>
1937                 -- compute P = A - R * R, which can be +ve or -ve
1938                 -- we arranged for B to be put into A earlier
1939                 msel_1 <= MUL1_R;
1940                 msel_2 <= MUL2_R;
1941                 msel_add <= MULADD_A;
1942                 msel_inv <= '1';
1943                 pshift := '1';
1944                 f_to_multiply.valid <= r.first;
1945                 if multiply_to_f.valid = '1' then
1946                     v.first := '1';
1947                     v.state := SQRT_9;
1948                 end if;
1949
1950             when SQRT_9 =>
1951                 -- compute P = P * Y
1952                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1953                 -- estimate of the adjustment needed to R.  Since the error
1954                 -- could be negative and we have an unsigned multiplier, the
1955                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1956                 -- are correct and are all we need (given 3 iterations through
1957                 -- SQRT_4 to SQRT_7).
1958                 msel_1 <= MUL1_Y;
1959                 msel_2 <= MUL2_P;
1960                 pshift := '1';
1961                 f_to_multiply.valid <= r.first;
1962                 if multiply_to_f.valid = '1' then
1963                     v.state := SQRT_10;
1964                 end if;
1965
1966             when SQRT_10 =>
1967                 -- Add the bottom 8 bits of P, sign-extended,
1968                 -- divided by 4, onto R.
1969                 -- The division by 4 is because R is 10.54 format
1970                 -- whereas P is 8.56 format.
1971                 opsel_b <= BIN_PS6;
1972                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1973                 v.result_exp := sqrt_exp;
1974                 v.shift := to_signed(1, EXP_BITS);
1975                 v.first := '1';
1976                 v.state := SQRT_11;
1977
1978             when SQRT_11 =>
1979                 -- compute P = A - R * R (remainder)
1980                 -- also put 2 * R + 1 into B for comparison with P
1981                 msel_1 <= MUL1_R;
1982                 msel_2 <= MUL2_R;
1983                 msel_add <= MULADD_A;
1984                 msel_inv <= '1';
1985                 f_to_multiply.valid <= r.first;
1986                 shiftin := '1';
1987                 set_b := r.first;
1988                 if multiply_to_f.valid = '1' then
1989                     v.state := SQRT_12;
1990                 end if;
1991
1992             when SQRT_12 =>
1993                 -- test if remainder is 0 or >= B = 2*R + 1
1994                 if pcmpb_lt = '1' then
1995                     -- square root is correct, set X if remainder non-zero
1996                     v.x := r.p(58) or px_nz;
1997                 else
1998                     -- square root needs to be incremented by 1
1999                     carry_in <= '1';
2000                     v.x := not pcmpb_eq;
2001                 end if;
2002                 v.state := FINISH;
2003
2004             when INT_SHIFT =>
2005                 -- r.shift = b.exponent - 52
2006                 opsel_r <= RES_SHIFT;
2007                 set_x := '1';
2008                 v.state := INT_ROUND;
2009                 v.shift := to_signed(-2, EXP_BITS);
2010
2011             when INT_ROUND =>
2012                 -- r.shift = -2
2013                 opsel_r <= RES_SHIFT;
2014                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2015                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2016                 -- Check for negative values that don't round to 0 for fcti*u*
2017                 if r.insn(8) = '1' and r.result_sign = '1' and
2018                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2019                     v.state := INT_OFLOW;
2020                 else
2021                     v.state := INT_FINAL;
2022                 end if;
2023
2024             when INT_ISHIFT =>
2025                 -- r.shift = b.exponent - 54;
2026                 opsel_r <= RES_SHIFT;
2027                 v.state := INT_FINAL;
2028
2029             when INT_FINAL =>
2030                 -- Negate if necessary, and increment for rounding if needed
2031                 opsel_ainv <= r.result_sign;
2032                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2033                 -- Check for possible overflows
2034                 case r.insn(9 downto 8) is
2035                     when "00" =>        -- fctiw[z]
2036                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2037                     when "01" =>        -- fctiwu[z]
2038                         need_check := r.r(31);
2039                     when "10" =>        -- fctid[z]
2040                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2041                     when others =>      -- fctidu[z]
2042                         need_check := r.r(63);
2043                 end case;
2044                 if need_check = '1' then
2045                     v.state := INT_CHECK;
2046                 else
2047                     if r.fpscr(FPSCR_FI) = '1' then
2048                         v.fpscr(FPSCR_XX) := '1';
2049                     end if;
2050                     arith_done := '1';
2051                 end if;
2052
2053             when INT_CHECK =>
2054                 if r.insn(9) = '0' then
2055                     msb := r.r(31);
2056                 else
2057                     msb := r.r(63);
2058                 end if;
2059                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2060                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2061                     (r.insn(8) = '1' and msb /= '1') then
2062                     opsel_r <= RES_MISC;
2063                     v.fpscr(FPSCR_VXCVI) := '1';
2064                     invalid := '1';
2065                 else
2066                     if r.fpscr(FPSCR_FI) = '1' then
2067                         v.fpscr(FPSCR_XX) := '1';
2068                     end if;
2069                 end if;
2070                 arith_done := '1';
2071
2072             when INT_OFLOW =>
2073                 opsel_r <= RES_MISC;
2074                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2075                 if r.b.class = NAN then
2076                     misc_sel(0) <= '1';
2077                 end if;
2078                 v.fpscr(FPSCR_VXCVI) := '1';
2079                 invalid := '1';
2080                 arith_done := '1';
2081
2082             when FRI_1 =>
2083                 -- r.shift = b.exponent - 52
2084                 opsel_r <= RES_SHIFT;
2085                 set_x := '1';
2086                 v.state := ROUNDING;
2087
2088             when FINISH =>
2089                 if r.is_multiply = '1' and px_nz = '1' then
2090                     v.x := '1';
2091                 end if;
2092                 if r.r(63 downto 54) /= "0000000001" then
2093                     renormalize := '1';
2094                     v.state := NORMALIZE;
2095                 else
2096                     set_x := '1';
2097                     if exp_tiny = '1' then
2098                         v.shift := new_exp - min_exp;
2099                         v.state := ROUND_UFLOW;
2100                     elsif exp_huge = '1' then
2101                         v.state := ROUND_OFLOW;
2102                     else
2103                         v.state := ROUNDING;
2104                     end if;
2105                 end if;
2106
2107             when NORMALIZE =>
2108                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2109                 -- r.shift = clz(r.r) - 9
2110                 opsel_r <= RES_SHIFT;
2111                 set_x := '1';
2112                 if exp_tiny = '1' then
2113                     v.shift := new_exp - min_exp;
2114                     v.state := ROUND_UFLOW;
2115                 elsif exp_huge = '1' then
2116                     v.state := ROUND_OFLOW;
2117                 else
2118                     v.state := ROUNDING;
2119                 end if;
2120
2121             when ROUND_UFLOW =>
2122                 -- r.shift = - amount by which exponent underflows
2123                 v.tiny := '1';
2124                 if r.fpscr(FPSCR_UE) = '0' then
2125                     -- disabled underflow exception case
2126                     -- have to denormalize before rounding
2127                     opsel_r <= RES_SHIFT;
2128                     set_x := '1';
2129                     v.state := ROUNDING;
2130                 else
2131                     -- enabled underflow exception case
2132                     -- if denormalized, have to normalize before rounding
2133                     v.fpscr(FPSCR_UX) := '1';
2134                     v.result_exp := r.result_exp + bias_exp;
2135                     if r.r(54) = '0' then
2136                         renormalize := '1';
2137                         v.state := NORMALIZE;
2138                     else
2139                         v.state := ROUNDING;
2140                     end if;
2141                 end if;
2142
2143             when ROUND_OFLOW =>
2144                 v.fpscr(FPSCR_OX) := '1';
2145                 if r.fpscr(FPSCR_OE) = '0' then
2146                     -- disabled overflow exception
2147                     -- result depends on rounding mode
2148                     v.fpscr(FPSCR_XX) := '1';
2149                     v.fpscr(FPSCR_FI) := '1';
2150                     if r.round_mode(1 downto 0) = "00" or
2151                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2152                         v.result_class := INFINITY;
2153                         v.fpscr(FPSCR_FR) := '1';
2154                     else
2155                         v.fpscr(FPSCR_FR) := '0';
2156                     end if;
2157                     -- construct largest representable number
2158                     v.result_exp := max_exp;
2159                     opsel_r <= RES_MISC;
2160                     misc_sel <= "001" & r.single_prec;
2161                     arith_done := '1';
2162                 else
2163                     -- enabled overflow exception
2164                     v.result_exp := r.result_exp - bias_exp;
2165                     v.state := ROUNDING;
2166                 end if;
2167
2168             when ROUNDING =>
2169                 opsel_mask <= '1';
2170                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2171                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2172                 if round(1) = '1' then
2173                     -- increment the LSB for the precision
2174                     opsel_b <= BIN_RND;
2175                     v.shift := to_signed(-1, EXP_BITS);
2176                     v.state := ROUNDING_2;
2177                 else
2178                     if r.r(54) = '0' then
2179                         -- result after masking could be zero, or could be a
2180                         -- denormalized result that needs to be renormalized
2181                         renormalize := '1';
2182                         v.state := ROUNDING_3;
2183                     else
2184                         arith_done := '1';
2185                     end if;
2186                 end if;
2187                 if round(0) = '1' then
2188                     v.fpscr(FPSCR_XX) := '1';
2189                     if r.tiny = '1' then
2190                         v.fpscr(FPSCR_UX) := '1';
2191                     end if;
2192                 end if;
2193
2194             when ROUNDING_2 =>
2195                 -- Check for overflow during rounding
2196                 -- r.shift = -1
2197                 v.x := '0';
2198                 if r.r(55) = '1' then
2199                     opsel_r <= RES_SHIFT;
2200                     if exp_huge = '1' then
2201                         v.state := ROUND_OFLOW;
2202                     else
2203                         arith_done := '1';
2204                     end if;
2205                 elsif r.r(54) = '0' then
2206                     -- Do CLZ so we can renormalize the result
2207                     renormalize := '1';
2208                     v.state := ROUNDING_3;
2209                 else
2210                     arith_done := '1';
2211                 end if;
2212
2213             when ROUNDING_3 =>
2214                 -- r.shift = clz(r.r) - 9
2215                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2216                 if mant_nz = '0' then
2217                     v.result_class := ZERO;
2218                     if r.is_subtract = '1' then
2219                         -- set result sign depending on rounding mode
2220                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2221                     end if;
2222                     arith_done := '1';
2223                 else
2224                     -- Renormalize result after rounding
2225                     opsel_r <= RES_SHIFT;
2226                     v.denorm := exp_tiny;
2227                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2228                     if new_exp < to_signed(-1022, EXP_BITS) then
2229                         v.state := DENORM;
2230                     else
2231                         arith_done := '1';
2232                     end if;
2233                 end if;
2234
2235             when DENORM =>
2236                 -- r.shift = result_exp - -1022
2237                 opsel_r <= RES_SHIFT;
2238                 arith_done := '1';
2239
2240             when NAN_RESULT =>
2241                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2242                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2243                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2244                     -- Signalling NAN
2245                     v.fpscr(FPSCR_VXSNAN) := '1';
2246                     invalid := '1';
2247                 end if;
2248                 if r.use_a = '1' and r.a.class = NAN then
2249                     v.opsel_a := AIN_A;
2250                 elsif r.use_b = '1' and r.b.class = NAN then
2251                     v.opsel_a := AIN_B;
2252                 elsif r.use_c = '1' and r.c.class = NAN then
2253                     v.opsel_a := AIN_C;
2254                 end if;
2255                 v.state := EXC_RESULT;
2256
2257             when EXC_RESULT =>
2258                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2259                 case r.opsel_a is
2260                     when AIN_B =>
2261                         v.result_sign := r.b.negative xor r.negate;
2262                         v.result_exp := r.b.exponent;
2263                         v.result_class := r.b.class;
2264                     when AIN_C =>
2265                         v.result_sign := r.c.negative xor r.negate;
2266                         v.result_exp := r.c.exponent;
2267                         v.result_class := r.c.class;
2268                     when others =>
2269                         v.result_sign := r.a.negative xor r.negate;
2270                         v.result_exp := r.a.exponent;
2271                         v.result_class := r.a.class;
2272                 end case;
2273                 arith_done := '1';
2274
2275         end case;
2276
2277         if zero_divide = '1' then
2278             v.fpscr(FPSCR_ZX) := '1';
2279         end if;
2280         if qnan_result = '1' then
2281             invalid := '1';
2282             v.result_class := NAN;
2283             v.result_sign := '0';
2284             misc_sel <= "0001";
2285             opsel_r <= RES_MISC;
2286             arith_done := '1';
2287         end if;
2288         if invalid = '1' then
2289             v.invalid := '1';
2290         end if;
2291         if arith_done = '1' then
2292             -- Enabled invalid exception doesn't write result or FPRF
2293             -- Neither does enabled zero-divide exception
2294             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2295                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2296                 v.writing_back := '1';
2297                 v.update_fprf := '1';
2298             end if;
2299             v.instr_done := '1';
2300             v.state := IDLE;
2301             update_fx := '1';
2302         end if;
2303
2304         -- Multiplier and divide/square root data path
2305         case msel_1 is
2306             when MUL1_A =>
2307                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2308             when MUL1_B =>
2309                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2310             when MUL1_Y =>
2311                 f_to_multiply.data1 <= r.y;
2312             when others =>
2313                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2314         end case;
2315         case msel_2 is
2316             when MUL2_C =>
2317                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2318             when MUL2_LUT =>
2319                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2320             when MUL2_P =>
2321                 f_to_multiply.data2 <= r.p;
2322             when others =>
2323                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2324         end case;
2325         maddend := (others => '0');
2326         case msel_add is
2327             when MULADD_CONST =>
2328                 -- addend is 2.0 or 1.5 in 16.112 format
2329                 if r.is_sqrt = '0' then
2330                     maddend(113) := '1';                -- 2.0
2331                 else
2332                     maddend(112 downto 111) := "11";    -- 1.5
2333                 end if;
2334             when MULADD_A =>
2335                 -- addend is A in 16.112 format
2336                 maddend(121 downto 58) := r.a.mantissa;
2337             when MULADD_RS =>
2338                 -- addend is concatenation of R and S in 16.112 format
2339                 maddend := "000000" & r.r & r.s & "00";
2340             when others =>
2341         end case;
2342         if msel_inv = '1' then
2343             f_to_multiply.addend <= not maddend;
2344         else
2345             f_to_multiply.addend <= maddend;
2346         end if;
2347         f_to_multiply.not_result <= msel_inv;
2348         if set_y = '1' then
2349             v.y := f_to_multiply.data2;
2350         end if;
2351         if multiply_to_f.valid = '1' then
2352             if pshift = '0' then
2353                 v.p := multiply_to_f.result(63 downto 0);
2354             else
2355                 v.p := multiply_to_f.result(119 downto 56);
2356             end if;
2357         end if;
2358
2359         -- Data path.
2360         -- This has A and B input multiplexers, an adder, a shifter,
2361         -- count-leading-zeroes logic, and a result mux.
2362         if r.longmask = '1' then
2363             mshift := r.shift + to_signed(-29, EXP_BITS);
2364         else
2365             mshift := r.shift;
2366         end if;
2367         if mshift < to_signed(-64, EXP_BITS) then
2368             mask := (others => '1');
2369         elsif mshift >= to_signed(0, EXP_BITS) then
2370             mask := (others => '0');
2371         else
2372             mask := right_mask(unsigned(mshift(5 downto 0)));
2373         end if;
2374         case r.opsel_a is
2375             when AIN_R =>
2376                 in_a0 := r.r;
2377             when AIN_A =>
2378                 in_a0 := r.a.mantissa;
2379             when AIN_B =>
2380                 in_a0 := r.b.mantissa;
2381             when others =>
2382                 in_a0 := r.c.mantissa;
2383         end case;
2384         if (or (mask and in_a0)) = '1' and set_x = '1' then
2385             v.x := '1';
2386         end if;
2387         if opsel_ainv = '1' then
2388             in_a0 := not in_a0;
2389         end if;
2390         in_a <= in_a0;
2391         case opsel_b is
2392             when BIN_ZERO =>
2393                 in_b0 := (others => '0');
2394             when BIN_R =>
2395                 in_b0 := r.r;
2396             when BIN_RND =>
2397                 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2398                 in_b0 := round_inc;
2399             when others =>
2400                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2401                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2402         end case;
2403         if opsel_binv = '1' then
2404             in_b0 := not in_b0;
2405         end if;
2406         in_b <= in_b0;
2407         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2408             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2409                                     std_ulogic_vector(r.shift(6 downto 0)));
2410         else
2411             shift_res := (others => '0');
2412         end if;
2413         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2414         if opsel_mask = '1' then
2415             sum(1 downto 0) := "00";
2416             if r.single_prec = '1' then
2417                 sum(30 downto 2) := (others => '0');
2418             end if;
2419         end if;
2420         case opsel_r is
2421             when RES_SUM =>
2422                 result <= sum;
2423             when RES_SHIFT =>
2424                 result <= shift_res;
2425             when RES_MULT =>
2426                 result <= multiply_to_f.result(121 downto 58);
2427             when others =>
2428                 case misc_sel is
2429                     when "0000" =>
2430                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2431                     when "0001" =>
2432                         -- generated QNaN mantissa
2433                         misc := x"0020000000000000";
2434                     when "0010" =>
2435                         -- mantissa of max representable DP number
2436                         misc := x"007ffffffffffffc";
2437                     when "0011" =>
2438                         -- mantissa of max representable SP number
2439                         misc := x"007fffff80000000";
2440                     when "0100" =>
2441                         -- fmrgow result
2442                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2443                     when "0110" =>
2444                         -- fmrgew result
2445                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2446                     when "0111" =>
2447                         misc := 10x"000" & inverse_est & 35x"000000000";
2448                     when "1000" =>
2449                         -- max positive result for fctiw[z]
2450                         misc := x"000000007fffffff";
2451                     when "1001" =>
2452                         -- max negative result for fctiw[z]
2453                         misc := x"ffffffff80000000";
2454                     when "1010" =>
2455                         -- max positive result for fctiwu[z]
2456                         misc := x"00000000ffffffff";
2457                     when "1011" =>
2458                         -- max negative result for fctiwu[z]
2459                         misc := x"0000000000000000";
2460                     when "1100" =>
2461                         -- max positive result for fctid[z]
2462                         misc := x"7fffffffffffffff";
2463                     when "1101" =>
2464                         -- max negative result for fctid[z]
2465                         misc := x"8000000000000000";
2466                     when "1110" =>
2467                         -- max positive result for fctidu[z]
2468                         misc := x"ffffffffffffffff";
2469                     when "1111" =>
2470                         -- max negative result for fctidu[z]
2471                         misc := x"0000000000000000";
2472                     when others =>
2473                         misc := x"0000000000000000";
2474                 end case;
2475                 result <= misc;
2476         end case;
2477         v.r := result;
2478         if set_s = '1' then
2479             case opsel_s is
2480                 when S_NEG =>
2481                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2482                 when S_MULT =>
2483                     v.s := multiply_to_f.result(57 downto 2);
2484                 when S_SHIFT =>
2485                     v.s := shift_res(63 downto 8);
2486                     if shift_res(7 downto 0) /= x"00" then
2487                         v.x := '1';
2488                     end if;
2489                 when others =>
2490                     v.s := (others => '0');
2491             end case;
2492         end if;
2493
2494         if set_a = '1' then
2495             v.a.exponent := new_exp;
2496             v.a.mantissa := shift_res;
2497         end if;
2498         if set_b = '1' then
2499             v.b.exponent := new_exp;
2500             v.b.mantissa := shift_res;
2501         end if;
2502         if set_c = '1' then
2503             v.c.exponent := new_exp;
2504             v.c.mantissa := shift_res;
2505         end if;
2506
2507         if opsel_r = RES_SHIFT then
2508             v.result_exp := new_exp;
2509         end if;
2510
2511         if renormalize = '1' then
2512             clz := count_left_zeroes(r.r);
2513             if renorm_sqrt = '1' then
2514                 -- make denormalized value end up with even exponent
2515                 clz(0) := '1';
2516             end if;
2517             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2518         end if;
2519
2520         if r.int_result = '1' then
2521             fp_result <= r.r;
2522         else
2523             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2524                                  r.single_prec, r.quieten_nan);
2525         end if;
2526         if r.update_fprf = '1' then
2527             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2528                                                              r.r(54) and not r.denorm);
2529         end if;
2530
2531         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2532                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2533         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2534                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2535         if update_fx = '1' and
2536             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2537             v.fpscr(FPSCR_FX) := '1';
2538         end if;
2539         if r.rc = '1' then
2540             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2541         end if;
2542
2543         if illegal = '1' then
2544             v.instr_done := '0';
2545             v.do_intr := '0';
2546             v.writing_back := '0';
2547             v.busy := '0';
2548             v.state := IDLE;
2549         else
2550             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2551             if v.state /= IDLE or v.do_intr = '1' then
2552                 v.busy := '1';
2553             end if;
2554         end if;
2555
2556         rin <= v;
2557         e_out.illegal <= illegal;
2558     end process;
2559
2560 end architecture behaviour;