fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         op           : insn_type_t;
  77         insn         : std_ulogic_vector(31 downto 0);
  78         dest_fpr     : gspr_index_t;
  79         fe_mode      : std_ulogic;
  80         rc           : std_ulogic;
  81         is_cmp       : std_ulogic;
  82         single_prec  : std_ulogic;
  83         fpscr        : std_ulogic_vector(31 downto 0);
  84         a            : fpu_reg_type;
  85         b            : fpu_reg_type;
  86         c            : fpu_reg_type;
  87         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  88         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  89         x            : std_ulogic;
  90         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  91         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  92         result_sign  : std_ulogic;
  93         result_class : fp_number_class;
  94         result_exp   : signed(EXP_BITS-1 downto 0);
  95         shift        : signed(EXP_BITS-1 downto 0);
  96         writing_back : std_ulogic;
  97         int_result   : std_ulogic;
  98         cr_result    : std_ulogic_vector(3 downto 0);
  99         cr_mask      : std_ulogic_vector(7 downto 0);
 100         old_exc      : std_ulogic_vector(4 downto 0);
 101         update_fprf  : std_ulogic;
 102         quieten_nan  : std_ulogic;
 103         tiny         : std_ulogic;
 104         denorm       : std_ulogic;
 105         round_mode   : std_ulogic_vector(2 downto 0);
 106         is_subtract  : std_ulogic;
 107         exp_cmp      : std_ulogic;
 108         madd_cmp     : std_ulogic;
 109         add_bsmall   : std_ulogic;
 110         is_multiply  : std_ulogic;
 111         is_sqrt      : std_ulogic;
 112         first        : std_ulogic;
 113         count        : unsigned(1 downto 0);
 114         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 115         opsel_a      : std_ulogic_vector(1 downto 0);
 116         use_a        : std_ulogic;
 117         use_b        : std_ulogic;
 118         use_c        : std_ulogic;
 119         invalid      : std_ulogic;
 120         negate       : std_ulogic;
 121         longmask     : std_ulogic;
 122     end record;
 123
 124     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 125
 126     signal r, rin : reg_type;
 127
 128     signal fp_result     : std_ulogic_vector(63 downto 0);
 129     signal opsel_b       : std_ulogic_vector(1 downto 0);
 130     signal opsel_r       : std_ulogic_vector(1 downto 0);
 131     signal opsel_s       : std_ulogic_vector(1 downto 0);
 132     signal opsel_ainv    : std_ulogic;
 133     signal opsel_mask    : std_ulogic;
 134     signal opsel_binv    : std_ulogic;
 135     signal in_a          : std_ulogic_vector(63 downto 0);
 136     signal in_b          : std_ulogic_vector(63 downto 0);
 137     signal result        : std_ulogic_vector(63 downto 0);
 138     signal carry_in      : std_ulogic;
 139     signal lost_bits     : std_ulogic;
 140     signal r_hi_nz       : std_ulogic;
 141     signal r_lo_nz       : std_ulogic;
 142     signal s_nz          : std_ulogic;
 143     signal misc_sel      : std_ulogic_vector(3 downto 0);
 144     signal f_to_multiply : MultiplyInputType;
 145     signal multiply_to_f : MultiplyOutputType;
 146     signal msel_1        : std_ulogic_vector(1 downto 0);
 147     signal msel_2        : std_ulogic_vector(1 downto 0);
 148     signal msel_add      : std_ulogic_vector(1 downto 0);
 149     signal msel_inv      : std_ulogic;
 150     signal inverse_est   : std_ulogic_vector(18 downto 0);
 151
 152     -- opsel values
 153     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 154     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 155     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 156     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 157
 158     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 159     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 160     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 161     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 162
 163     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 164     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 165     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 166     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 167
 168     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 169     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 170     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 171     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 172
 173     -- msel values
 174     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 175     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 176     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 177     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 178
 179     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 180     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 181     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 182     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 183
 184     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 185     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 186     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 187     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 188
 189     -- Inverse lookup table, indexed by the top 8 fraction bits
 190     -- The first 256 entries are the reciprocal (1/x) lookup table,
 191     -- and the remaining 768 entries are the reciprocal square root table.
 192     -- Output range is [0.5, 1) in 0.19 format, though the top
 193     -- bit isn't stored since it is always 1.
 194     -- Each output value is the inverse of the center of the input
 195     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 196     -- entry 1 is 1 / (1 + 3/512), etc.
 197     signal inverse_table : lookup_table := (
 198         -- 1/x lookup table
 199         -- Unit bit is assumed to be 1, so input range is [1, 2)
 200         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 201         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 202         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 203         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 204         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 205         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 206         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 207         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 208         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 209         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 210         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 211         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 212         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 213         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 214         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 215         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 216         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 217         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 218         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 219         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 220         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 221         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 222         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 223         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 224         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 225         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 226         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 227         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 228         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 229         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 230         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 231         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 232         -- 1/sqrt(x) lookup table
 233         -- Input is in the range [1, 4), i.e. two bits to the left of the
 234         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 235         -- 1.0 ... 1.9999
 236         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 237         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 238         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 239         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 240         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 241         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 242         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 243         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 244         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 245         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 246         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 247         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 248         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 249         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 250         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 251         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 252         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 253         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 254         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 255         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 256         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 257         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 258         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 259         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 260         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 261         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 262         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 263         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 264         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 265         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 266         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 267         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 268         -- 2.0 ... 2.9999
 269         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 270         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 271         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 272         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 273         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 274         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 275         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 276         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 277         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 278         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 279         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 280         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 281         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 282         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 283         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 284         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 285         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 286         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 287         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 288         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 289         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 290         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 291         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 292         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 293         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 294         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 295         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 296         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 297         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 298         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 299         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 300         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 301         -- 3.0 ... 3.9999
 302         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 303         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 304         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 305         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 306         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 307         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 308         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 309         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 310         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 311         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 312         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 313         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 314         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 315         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 316         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 317         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 318         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 319         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 320         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 321         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 322         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 323         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 324         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 325         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 326         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 327         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 328         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 329         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 330         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 331         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 332         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 333         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 334         );
 335
 336     -- Left and right shifter with 120 bit input and 64 bit output.
 337     -- Shifts inp left by shift bits and returns the upper 64 bits of
 338     -- the result.  The shift parameter is interpreted as a signed
 339     -- number in the range -64..63, with negative values indicating
 340     -- right shifts.
 341     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 342                         shift: std_ulogic_vector(6 downto 0))
 343         return std_ulogic_vector is
 344         variable s1 : std_ulogic_vector(94 downto 0);
 345         variable s2 : std_ulogic_vector(70 downto 0);
 346         variable result : std_ulogic_vector(63 downto 0);
 347     begin
 348         case shift(6 downto 5) is
 349             when "00" =>
 350                 s1 := inp(119 downto 25);
 351             when "01" =>
 352                 s1 := inp(87 downto 0) & "0000000";
 353             when "10" =>
 354                 s1 := x"0000000000000000" & inp(119 downto 89);
 355             when others =>
 356                 s1 := x"00000000" & inp(119 downto 57);
 357         end case;
 358         case shift(4 downto 3) is
 359             when "00" =>
 360                 s2 := s1(94 downto 24);
 361             when "01" =>
 362                 s2 := s1(86 downto 16);
 363             when "10" =>
 364                 s2 := s1(78 downto 8);
 365             when others =>
 366                 s2 := s1(70 downto 0);
 367         end case;
 368         case shift(2 downto 0) is
 369             when "000" =>
 370                 result := s2(70 downto 7);
 371             when "001" =>
 372                 result := s2(69 downto 6);
 373             when "010" =>
 374                 result := s2(68 downto 5);
 375             when "011" =>
 376                 result := s2(67 downto 4);
 377             when "100" =>
 378                 result := s2(66 downto 3);
 379             when "101" =>
 380                 result := s2(65 downto 2);
 381             when "110" =>
 382                 result := s2(64 downto 1);
 383             when others =>
 384                 result := s2(63 downto 0);
 385         end case;
 386         return result;
 387     end;
 388
 389     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 390     -- selects the bits will be lost in doing a right shift.  The shift
 391     -- parameter is the bottom 6 bits of a negative shift count,
 392     -- indicating a right shift.
 393     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 394         variable result: std_ulogic_vector(63 downto 0);
 395     begin
 396         result := (others => '0');
 397         for i in 0 to 63 loop
 398             if i >= shift then
 399                 result(63 - i) := '1';
 400             end if;
 401         end loop;
 402         return result;
 403     end;
 404
 405     -- Split a DP floating-point number into components and work out its class.
 406     -- If is_int = 1, the input is considered an integer
 407     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 408         variable r       : fpu_reg_type;
 409         variable exp_nz  : std_ulogic;
 410         variable exp_ao  : std_ulogic;
 411         variable frac_nz : std_ulogic;
 412         variable cls     : std_ulogic_vector(2 downto 0);
 413     begin
 414         r.negative := fpr(63);
 415         exp_nz := or (fpr(62 downto 52));
 416         exp_ao := and (fpr(62 downto 52));
 417         frac_nz := or (fpr(51 downto 0));
 418         if is_int = '0' then
 419             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 420             if exp_nz = '0' then
 421                 r.exponent := to_signed(-1022, EXP_BITS);
 422             end if;
 423             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 424             cls := exp_ao & exp_nz & frac_nz;
 425             case cls is
 426                 when "000"  => r.class := ZERO;
 427                 when "001"  => r.class := FINITE;    -- denormalized
 428                 when "010"  => r.class := FINITE;
 429                 when "011"  => r.class := FINITE;
 430                 when "110"  => r.class := INFINITY;
 431                 when others => r.class := NAN;
 432             end case;
 433         else
 434             r.mantissa := fpr;
 435             r.exponent := (others => '0');
 436             if (fpr(63) or exp_nz or frac_nz) = '1' then
 437                 r.class := FINITE;
 438             else
 439                 r.class := ZERO;
 440             end if;
 441         end if;
 442         return r;
 443     end;
 444
 445     -- Construct a DP floating-point result from components
 446     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 447                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 448         return std_ulogic_vector is
 449         variable result : std_ulogic_vector(63 downto 0);
 450     begin
 451         result := (others => '0');
 452         result(63) := sign;
 453         case class is
 454             when ZERO =>
 455             when FINITE =>
 456                 if mantissa(54) = '1' then
 457                     -- normalized number
 458                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 459                 end if;
 460                 result(51 downto 29) := mantissa(53 downto 31);
 461                 if single_prec = '0' then
 462                     result(28 downto 0) := mantissa(30 downto 2);
 463                 end if;
 464             when INFINITY =>
 465                 result(62 downto 52) := "11111111111";
 466             when NAN =>
 467                 result(62 downto 52) := "11111111111";
 468                 result(51) := quieten_nan or mantissa(53);
 469                 result(50 downto 29) := mantissa(52 downto 31);
 470                 if single_prec = '0' then
 471                     result(28 downto 0) := mantissa(30 downto 2);
 472                 end if;
 473         end case;
 474         return result;
 475     end;
 476
 477     -- Determine whether to increment when rounding
 478     -- Returns rounding_inc & inexact
 479     -- Assumes x includes the bottom 29 bits of the mantissa already
 480     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 481     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 482                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 483                          sign: std_ulogic)
 484         return std_ulogic_vector is
 485         variable grx : std_ulogic_vector(2 downto 0);
 486         variable ret : std_ulogic_vector(1 downto 0);
 487         variable lsb : std_ulogic;
 488     begin
 489         if single_prec = '0' then
 490             grx := mantissa(1 downto 0) & x;
 491             lsb := mantissa(2);
 492         else
 493             grx := mantissa(30 downto 29) & x;
 494             lsb := mantissa(31);
 495         end if;
 496         ret(1) := '0';
 497         ret(0) := or (grx);
 498         case rn(1 downto 0) is
 499             when "00" =>        -- round to nearest
 500                 if grx = "100" and rn(2) = '0' then
 501                     ret(1) := lsb; -- tie, round to even
 502                 else
 503                     ret(1) := grx(2);
 504                 end if;
 505             when "01" =>        -- round towards zero
 506             when others =>      -- round towards +/- inf
 507                 if rn(0) = sign then
 508                     -- round towards greater magnitude
 509                     ret(1) := ret(0);
 510                 end if;
 511         end case;
 512         return ret;
 513     end;
 514
 515     -- Determine result flags to write into the FPSCR
 516     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 517         return std_ulogic_vector is
 518     begin
 519         case class is
 520             when ZERO =>
 521                 return sign & "0010";
 522             when FINITE =>
 523                 return (not unitbit) & sign & (not sign) & "00";
 524             when INFINITY =>
 525                 return '0' & sign & (not sign) & "01";
 526             when NAN =>
 527                 return "10001";
 528         end case;
 529     end;
 530
 531 begin
 532     fpu_multiply_0: entity work.multiply
 533         port map (
 534             clk => clk,
 535             m_in => f_to_multiply,
 536             m_out => multiply_to_f
 537             );
 538
 539     fpu_0: process(clk)
 540     begin
 541         if rising_edge(clk) then
 542             if rst = '1' then
 543                 r.state <= IDLE;
 544                 r.busy <= '0';
 545                 r.instr_done <= '0';
 546                 r.do_intr <= '0';
 547                 r.fpscr <= (others => '0');
 548                 r.writing_back <= '0';
 549             else
 550                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 551                 r <= rin;
 552             end if;
 553         end if;
 554     end process;
 555
 556     -- synchronous reads from lookup table
 557     lut_access: process(clk)
 558         variable addrhi : std_ulogic_vector(1 downto 0);
 559         variable addr   : std_ulogic_vector(9 downto 0);
 560     begin
 561         if rising_edge(clk) then
 562             if r.is_sqrt = '1' then
 563                 addrhi := r.b.mantissa(55 downto 54);
 564             else
 565                 addrhi := "00";
 566             end if;
 567             addr := addrhi & r.b.mantissa(53 downto 46);
 568             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 569         end if;
 570     end process;
 571
 572     e_out.busy <= r.busy;
 573     e_out.exception <= r.fpscr(FPSCR_FEX);
 574     e_out.interrupt <= r.do_intr;
 575
 576     w_out.valid <= r.instr_done and not r.do_intr;
 577     w_out.write_enable <= r.writing_back;
 578     w_out.write_reg <= r.dest_fpr;
 579     w_out.write_data <= fp_result;
 580     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 581     w_out.write_cr_mask <= r.cr_mask;
 582     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 583                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 584
 585     fpu_1: process(all)
 586         variable v           : reg_type;
 587         variable adec        : fpu_reg_type;
 588         variable bdec        : fpu_reg_type;
 589         variable cdec        : fpu_reg_type;
 590         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 591         variable illegal     : std_ulogic;
 592         variable j, k        : integer;
 593         variable flm         : std_ulogic_vector(7 downto 0);
 594         variable int_input   : std_ulogic;
 595         variable mask        : std_ulogic_vector(63 downto 0);
 596         variable in_a0       : std_ulogic_vector(63 downto 0);
 597         variable in_b0       : std_ulogic_vector(63 downto 0);
 598         variable misc        : std_ulogic_vector(63 downto 0);
 599         variable shift_res   : std_ulogic_vector(63 downto 0);
 600         variable round       : std_ulogic_vector(1 downto 0);
 601         variable update_fx   : std_ulogic;
 602         variable arith_done  : std_ulogic;
 603         variable invalid     : std_ulogic;
 604         variable zero_divide : std_ulogic;
 605         variable mant_nz     : std_ulogic;
 606         variable min_exp     : signed(EXP_BITS-1 downto 0);
 607         variable max_exp     : signed(EXP_BITS-1 downto 0);
 608         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 609         variable new_exp     : signed(EXP_BITS-1 downto 0);
 610         variable exp_tiny    : std_ulogic;
 611         variable exp_huge    : std_ulogic;
 612         variable renormalize : std_ulogic;
 613         variable clz         : std_ulogic_vector(5 downto 0);
 614         variable set_x       : std_ulogic;
 615         variable mshift      : signed(EXP_BITS-1 downto 0);
 616         variable need_check  : std_ulogic;
 617         variable msb         : std_ulogic;
 618         variable is_add      : std_ulogic;
 619         variable set_a       : std_ulogic;
 620         variable set_b       : std_ulogic;
 621         variable set_c       : std_ulogic;
 622         variable set_y       : std_ulogic;
 623         variable set_s       : std_ulogic;
 624         variable qnan_result : std_ulogic;
 625         variable px_nz       : std_ulogic;
 626         variable pcmpb_eq    : std_ulogic;
 627         variable pcmpb_lt    : std_ulogic;
 628         variable pshift      : std_ulogic;
 629         variable renorm_sqrt : std_ulogic;
 630         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 631         variable shiftin     : std_ulogic;
 632         variable mulexp      : signed(EXP_BITS-1 downto 0);
 633         variable maddend     : std_ulogic_vector(127 downto 0);
 634         variable sum         : std_ulogic_vector(63 downto 0);
 635     begin
 636         v := r;
 637         illegal := '0';
 638         v.busy := '0';
 639         int_input := '0';
 640
 641         -- capture incoming instruction
 642         if e_in.valid = '1' then
 643             v.insn := e_in.insn;
 644             v.op := e_in.op;
 645             v.fe_mode := or (e_in.fe_mode);
 646             v.dest_fpr := e_in.frt;
 647             v.single_prec := e_in.single;
 648             v.longmask := e_in.single;
 649             v.int_result := '0';
 650             v.rc := e_in.rc;
 651             v.is_cmp := e_in.out_cr;
 652             if e_in.out_cr = '0' then
 653                 v.cr_mask := num_to_fxm(1);
 654             else
 655                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 656             end if;
 657             int_input := '0';
 658             if e_in.op = OP_FPOP_I then
 659                 int_input := '1';
 660             end if;
 661             v.quieten_nan := '1';
 662             v.tiny := '0';
 663             v.denorm := '0';
 664             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 665             v.is_subtract := '0';
 666             v.is_multiply := '0';
 667             v.is_sqrt := '0';
 668             v.add_bsmall := '0';
 669             v.doing_ftdiv := "00";
 670
 671             adec := decode_dp(e_in.fra, int_input);
 672             bdec := decode_dp(e_in.frb, int_input);
 673             cdec := decode_dp(e_in.frc, int_input);
 674             v.a := adec;
 675             v.b := bdec;
 676             v.c := cdec;
 677
 678             v.exp_cmp := '0';
 679             if adec.exponent > bdec.exponent then
 680                 v.exp_cmp := '1';
 681             end if;
 682             v.madd_cmp := '0';
 683             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 684                 v.madd_cmp := '1';
 685             end if;
 686         end if;
 687
 688         r_hi_nz <= or (r.r(55 downto 31));
 689         r_lo_nz <= or (r.r(30 downto 2));
 690         s_nz <= or (r.s);
 691
 692         if r.single_prec = '0' then
 693             if r.doing_ftdiv(1) = '0' then
 694                 max_exp := to_signed(1023, EXP_BITS);
 695             else
 696                 max_exp := to_signed(1020, EXP_BITS);
 697             end if;
 698             if r.doing_ftdiv(0) = '0' then
 699                 min_exp := to_signed(-1022, EXP_BITS);
 700             else
 701                 min_exp := to_signed(-1021, EXP_BITS);
 702             end if;
 703             bias_exp := to_signed(1536, EXP_BITS);
 704         else
 705             max_exp := to_signed(127, EXP_BITS);
 706             min_exp := to_signed(-126, EXP_BITS);
 707             bias_exp := to_signed(192, EXP_BITS);
 708         end if;
 709         new_exp := r.result_exp - r.shift;
 710         exp_tiny := '0';
 711         exp_huge := '0';
 712         if new_exp < min_exp then
 713             exp_tiny := '1';
 714         end if;
 715         if new_exp > max_exp then
 716             exp_huge := '1';
 717         end if;
 718
 719         -- Compare P with zero and with B
 720         px_nz := or (r.p(57 downto 4));
 721         pcmpb_eq := '0';
 722         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 723             pcmpb_eq := '1';
 724         end if;
 725         pcmpb_lt := '0';
 726         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 727             pcmpb_lt := '1';
 728         end if;
 729
 730         v.writing_back := '0';
 731         v.instr_done := '0';
 732         v.update_fprf := '0';
 733         v.shift := to_signed(0, EXP_BITS);
 734         v.first := '0';
 735         v.opsel_a := AIN_R;
 736         opsel_ainv <= '0';
 737         opsel_mask <= '0';
 738         opsel_b <= BIN_ZERO;
 739         opsel_binv <= '0';
 740         opsel_r <= RES_SUM;
 741         opsel_s <= S_ZERO;
 742         carry_in <= '0';
 743         misc_sel <= "0000";
 744         fpscr_mask := (others => '1');
 745         update_fx := '0';
 746         arith_done := '0';
 747         invalid := '0';
 748         zero_divide := '0';
 749         renormalize := '0';
 750         set_x := '0';
 751         qnan_result := '0';
 752         set_a := '0';
 753         set_b := '0';
 754         set_c := '0';
 755         set_s := '0';
 756         f_to_multiply.is_32bit <= '0';
 757         f_to_multiply.valid <= '0';
 758         msel_1 <= MUL1_A;
 759         msel_2 <= MUL2_C;
 760         msel_add <= MULADD_ZERO;
 761         msel_inv <= '0';
 762         set_y := '0';
 763         pshift := '0';
 764         renorm_sqrt := '0';
 765         shiftin := '0';
 766         case r.state is
 767             when IDLE =>
 768                 v.use_a := '0';
 769                 v.use_b := '0';
 770                 v.use_c := '0';
 771                 v.invalid := '0';
 772                 v.negate := '0';
 773                 if e_in.valid = '1' then
 774                     case e_in.insn(5 downto 1) is
 775                         when "00000" =>
 776                             if e_in.insn(8) = '1' then
 777                                 if e_in.insn(6) = '0' then
 778                                     v.state := DO_FTDIV;
 779                                 else
 780                                     v.state := DO_FTSQRT;
 781                                 end if;
 782                             elsif e_in.insn(7) = '1' then
 783                                 v.state := DO_MCRFS;
 784                             else
 785                                 v.opsel_a := AIN_B;
 786                                 v.state := DO_FCMP;
 787                             end if;
 788                         when "00110" =>
 789                             if e_in.insn(10) = '0' then
 790                                 if e_in.insn(8) = '0' then
 791                                     v.state := DO_MTFSB;
 792                                 else
 793                                     v.state := DO_MTFSFI;
 794                                 end if;
 795                             else
 796                                 v.state := DO_FMRG;
 797                             end if;
 798                         when "00111" =>
 799                             if e_in.insn(8) = '0' then
 800                                 v.state := DO_MFFS;
 801                             else
 802                                 v.state := DO_MTFSF;
 803                             end if;
 804                         when "01000" =>
 805                             v.opsel_a := AIN_B;
 806                             if e_in.insn(9 downto 8) /= "11" then
 807                                 v.state := DO_FMR;
 808                             else
 809                                 v.state := DO_FRI;
 810                             end if;
 811                         when "01100" =>
 812                             v.opsel_a := AIN_B;
 813                             v.state := DO_FRSP;
 814                         when "01110" =>
 815                             v.opsel_a := AIN_B;
 816                             if int_input = '1' then
 817                                 -- fcfid[u][s]
 818                                 v.state := DO_FCFID;
 819                             else
 820                                 v.state := DO_FCTI;
 821                             end if;
 822                         when "01111" =>
 823                             v.round_mode := "001";
 824                             v.opsel_a := AIN_B;
 825                             v.state := DO_FCTI;
 826                         when "10010" =>
 827                             v.opsel_a := AIN_A;
 828                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 829                                 v.opsel_a := AIN_B;
 830                             end if;
 831                             v.state := DO_FDIV;
 832                         when "10100" | "10101" =>
 833                             v.opsel_a := AIN_A;
 834                             v.state := DO_FADD;
 835                         when "10110" =>
 836                             v.is_sqrt := '1';
 837                             v.opsel_a := AIN_B;
 838                             v.state := DO_FSQRT;
 839                         when "10111" =>
 840                             v.state := DO_FSEL;
 841                         when "11000" =>
 842                             v.opsel_a := AIN_B;
 843                             v.state := DO_FRE;
 844                         when "11001" =>
 845                             v.is_multiply := '1';
 846                             v.opsel_a := AIN_A;
 847                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 848                                 v.opsel_a := AIN_C;
 849                             end if;
 850                             v.state := DO_FMUL;
 851                         when "11010" =>
 852                             v.is_sqrt := '1';
 853                             v.opsel_a := AIN_B;
 854                             v.state := DO_FRSQRTE;
 855                         when "11100" | "11101" | "11110" | "11111" =>
 856                             if v.a.mantissa(54) = '0' then
 857                                 v.opsel_a := AIN_A;
 858                             elsif v.c.mantissa(54) = '0' then
 859                                 v.opsel_a := AIN_C;
 860                             else
 861                                 v.opsel_a := AIN_B;
 862                             end if;
 863                             v.state := DO_FMADD;
 864                         when others =>
 865                             illegal := '1';
 866                     end case;
 867                 end if;
 868                 v.x := '0';
 869                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 870                 set_s := '1';
 871
 872             when DO_MCRFS =>
 873                 j := to_integer(unsigned(insn_bfa(r.insn)));
 874                 for i in 0 to 7 loop
 875                     if i = j then
 876                         k := (7 - i) * 4;
 877                         v.cr_result := r.fpscr(k + 3 downto k);
 878                         fpscr_mask(k + 3 downto k) := "0000";
 879                     end if;
 880                 end loop;
 881                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 882                 v.instr_done := '1';
 883                 v.state := IDLE;
 884
 885             when DO_FTDIV =>
 886                 v.instr_done := '1';
 887                 v.state := IDLE;
 888                 v.cr_result := "0000";
 889                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 890                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 891                     v.cr_result(2) := '1';
 892                 end if;
 893                 if r.a.class = NAN or r.a.class = INFINITY or
 894                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 895                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 896                     v.cr_result(1) := '1';
 897                 else
 898                     v.doing_ftdiv := "11";
 899                     v.first := '1';
 900                     v.state := FTDIV_1;
 901                     v.instr_done := '0';
 902                 end if;
 903
 904             when DO_FTSQRT =>
 905                 v.instr_done := '1';
 906                 v.state := IDLE;
 907                 v.cr_result := "0000";
 908                 if r.b.class = ZERO or r.b.class = INFINITY or
 909                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 910                     v.cr_result(2) := '1';
 911                 end if;
 912                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 913                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 914                     v.cr_result(1) := '0';
 915                 end if;
 916
 917             when DO_FCMP =>
 918                 -- fcmp[uo]
 919                 -- r.opsel_a = AIN_B
 920                 v.instr_done := '1';
 921                 v.state := IDLE;
 922                 update_fx := '1';
 923                 v.result_exp := r.b.exponent;
 924                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 925                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 926                     -- Signalling NAN
 927                     v.fpscr(FPSCR_VXSNAN) := '1';
 928                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 929                         v.fpscr(FPSCR_VXVC) := '1';
 930                     end if;
 931                     invalid := '1';
 932                     v.cr_result := "0001";          -- unordered
 933                 elsif r.a.class = NAN or r.b.class = NAN then
 934                     if r.insn(6) = '1' then
 935                         -- fcmpo
 936                         v.fpscr(FPSCR_VXVC) := '1';
 937                         invalid := '1';
 938                     end if;
 939                     v.cr_result := "0001";          -- unordered
 940                 elsif r.a.class = ZERO and r.b.class = ZERO then
 941                     v.cr_result := "0010";          -- equal
 942                 elsif r.a.negative /= r.b.negative then
 943                     v.cr_result := r.a.negative & r.b.negative & "00";
 944                 elsif r.a.class = ZERO then
 945                     -- A and B are the same sign from here down
 946                     v.cr_result := not r.b.negative & r.b.negative & "00";
 947                 elsif r.a.class = INFINITY then
 948                     if r.b.class = INFINITY then
 949                         v.cr_result := "0010";
 950                     else
 951                         v.cr_result := r.a.negative & not r.a.negative & "00";
 952                     end if;
 953                 elsif r.b.class = ZERO then
 954                     -- A is finite from here down
 955                     v.cr_result := r.a.negative & not r.a.negative & "00";
 956                 elsif r.b.class = INFINITY then
 957                     v.cr_result := not r.b.negative & r.b.negative & "00";
 958                 elsif r.exp_cmp = '1' then
 959                     -- A and B are both finite from here down
 960                     v.cr_result := r.a.negative & not r.a.negative & "00";
 961                 elsif r.a.exponent /= r.b.exponent then
 962                     -- A exponent is smaller than B
 963                     v.cr_result := not r.a.negative & r.a.negative & "00";
 964                 else
 965                     -- Prepare to subtract mantissas, put B in R
 966                     v.cr_result := "0000";
 967                     v.instr_done := '0';
 968                     v.opsel_a := AIN_A;
 969                     v.state := CMP_1;
 970                 end if;
 971                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 972
 973             when DO_MTFSB =>
 974                 -- mtfsb{0,1}
 975                 j := to_integer(unsigned(insn_bt(r.insn)));
 976                 for i in 0 to 31 loop
 977                     if i = j then
 978                         v.fpscr(31 - i) := r.insn(6);
 979                     end if;
 980                 end loop;
 981                 v.instr_done := '1';
 982                 v.state := IDLE;
 983
 984             when DO_MTFSFI =>
 985                 -- mtfsfi
 986                 j := to_integer(unsigned(insn_bf(r.insn)));
 987                 if r.insn(16) = '0' then
 988                     for i in 0 to 7 loop
 989                         if i = j then
 990                             k := (7 - i) * 4;
 991                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 992                         end if;
 993                     end loop;
 994                 end if;
 995                 v.instr_done := '1';
 996                 v.state := IDLE;
 997
 998             when DO_FMRG =>
 999                 -- fmrgew, fmrgow
1000                 opsel_r <= RES_MISC;
1001                 misc_sel <= "01" & r.insn(8) & '0';
1002                 v.int_result := '1';
1003                 v.writing_back := '1';
1004                 v.instr_done := '1';
1005                 v.state := IDLE;
1006
1007             when DO_MFFS =>
1008                 v.int_result := '1';
1009                 v.writing_back := '1';
1010                 opsel_r <= RES_MISC;
1011                 case r.insn(20 downto 16) is
1012                     when "00000" =>
1013                         -- mffs
1014                     when "00001" =>
1015                         -- mffsce
1016                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1017                     when "10100" | "10101" =>
1018                         -- mffscdrn[i] (but we don't implement DRN)
1019                         fpscr_mask := x"000000FF";
1020                     when "10110" =>
1021                         -- mffscrn
1022                         fpscr_mask := x"000000FF";
1023                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1024                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1025                     when "10111" =>
1026                         -- mffscrni
1027                         fpscr_mask := x"000000FF";
1028                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1029                     when "11000" =>
1030                         -- mffsl
1031                         fpscr_mask := x"0007F0FF";
1032                     when others =>
1033                         illegal := '1';
1034                 end case;
1035                 v.instr_done := '1';
1036                 v.state := IDLE;
1037
1038             when DO_MTFSF =>
1039                 if r.insn(25) = '1' then
1040                     flm := x"FF";
1041                 elsif r.insn(16) = '1' then
1042                     flm := x"00";
1043                 else
1044                     flm := r.insn(24 downto 17);
1045                 end if;
1046                 for i in 0 to 7 loop
1047                     k := i * 4;
1048                     if flm(i) = '1' then
1049                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1050                     end if;
1051                 end loop;
1052                 v.instr_done := '1';
1053                 v.state := IDLE;
1054
1055             when DO_FMR =>
1056                 -- r.opsel_a = AIN_B
1057                 v.result_class := r.b.class;
1058                 v.result_exp := r.b.exponent;
1059                 v.quieten_nan := '0';
1060                 if r.insn(9) = '1' then
1061                     v.result_sign := '0';              -- fabs
1062                 elsif r.insn(8) = '1' then
1063                     v.result_sign := '1';              -- fnabs
1064                 elsif r.insn(7) = '1' then
1065                     v.result_sign := r.b.negative;     -- fmr
1066                 elsif r.insn(6) = '1' then
1067                     v.result_sign := not r.b.negative; -- fneg
1068                 else
1069                     v.result_sign := r.a.negative;     -- fcpsgn
1070                 end if;
1071                 v.writing_back := '1';
1072                 v.instr_done := '1';
1073                 v.state := IDLE;
1074
1075             when DO_FRI =>    -- fri[nzpm]
1076                 -- r.opsel_a = AIN_B
1077                 v.result_class := r.b.class;
1078                 v.result_sign := r.b.negative;
1079                 v.result_exp := r.b.exponent;
1080                 v.fpscr(FPSCR_FR) := '0';
1081                 v.fpscr(FPSCR_FI) := '0';
1082                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1083                     -- Signalling NAN
1084                     v.fpscr(FPSCR_VXSNAN) := '1';
1085                     invalid := '1';
1086                 end if;
1087                 if r.b.class = FINITE then
1088                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1089                         -- integer already, no rounding required
1090                         arith_done := '1';
1091                     else
1092                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1093                         v.state := FRI_1;
1094                         v.round_mode := '1' & r.insn(7 downto 6);
1095                     end if;
1096                 else
1097                     arith_done := '1';
1098                 end if;
1099
1100             when DO_FRSP =>
1101                 -- r.opsel_a = AIN_B, r.shift = 0
1102                 v.result_class := r.b.class;
1103                 v.result_sign := r.b.negative;
1104                 v.result_exp := r.b.exponent;
1105                 v.fpscr(FPSCR_FR) := '0';
1106                 v.fpscr(FPSCR_FI) := '0';
1107                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1108                     -- Signalling NAN
1109                     v.fpscr(FPSCR_VXSNAN) := '1';
1110                     invalid := '1';
1111                 end if;
1112                 set_x := '1';
1113                 if r.b.class = FINITE then
1114                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1115                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1116                         v.state := ROUND_UFLOW;
1117                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1118                         v.state := ROUND_OFLOW;
1119                     else
1120                         v.shift := to_signed(-2, EXP_BITS);
1121                         v.state := ROUNDING;
1122                     end if;
1123                 else
1124                     arith_done := '1';
1125                 end if;
1126
1127             when DO_FCTI =>
1128                 -- instr bit 9: 1=dword 0=word
1129                 -- instr bit 8: 1=unsigned 0=signed
1130                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131                 -- r.opsel_a = AIN_B
1132                 v.result_class := r.b.class;
1133                 v.result_sign := r.b.negative;
1134                 v.result_exp := r.b.exponent;
1135                 v.fpscr(FPSCR_FR) := '0';
1136                 v.fpscr(FPSCR_FI) := '0';
1137                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138                     -- Signalling NAN
1139                     v.fpscr(FPSCR_VXSNAN) := '1';
1140                     invalid := '1';
1141                 end if;
1142
1143                 v.int_result := '1';
1144                 case r.b.class is
1145                     when ZERO =>
1146                         arith_done := '1';
1147                     when FINITE =>
1148                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1149                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150                             v.state := INT_OFLOW;
1151                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152                             -- integer already, no rounding required,
1153                             -- shift into final position
1154                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155                             if r.insn(8) = '1' and r.b.negative = '1' then
1156                                 v.state := INT_OFLOW;
1157                             else
1158                                 v.state := INT_ISHIFT;
1159                             end if;
1160                         else
1161                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162                             v.state := INT_SHIFT;
1163                         end if;
1164                     when INFINITY | NAN =>
1165                         v.state := INT_OFLOW;
1166                 end case;
1167
1168             when DO_FCFID =>
1169                 -- r.opsel_a = AIN_B
1170                 v.result_sign := '0';
1171                 if r.insn(8) = '0' and r.b.negative = '1' then
1172                     -- fcfid[s] with negative operand, set R = -B
1173                     opsel_ainv <= '1';
1174                     carry_in <= '1';
1175                     v.result_sign := '1';
1176                 end if;
1177                 v.result_class := r.b.class;
1178                 v.result_exp := to_signed(54, EXP_BITS);
1179                 v.fpscr(FPSCR_FR) := '0';
1180                 v.fpscr(FPSCR_FI) := '0';
1181                 if r.b.class = ZERO then
1182                     arith_done := '1';
1183                 else
1184                     v.state := FINISH;
1185                 end if;
1186
1187             when DO_FADD =>
1188                 -- fadd[s] and fsub[s]
1189                 -- r.opsel_a = AIN_A
1190                 v.result_sign := r.a.negative;
1191                 v.result_class := r.a.class;
1192                 v.result_exp := r.a.exponent;
1193                 v.fpscr(FPSCR_FR) := '0';
1194                 v.fpscr(FPSCR_FI) := '0';
1195                 v.use_a := '1';
1196                 v.use_b := '1';
1197                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198                 if r.a.class = FINITE and r.b.class = FINITE then
1199                     v.is_subtract := not is_add;
1200                     v.add_bsmall := r.exp_cmp;
1201                     v.opsel_a := AIN_B;
1202                     if r.exp_cmp = '0' then
1203                         v.shift := r.a.exponent - r.b.exponent;
1204                         v.result_sign := r.b.negative xnor r.insn(1);
1205                         if r.a.exponent = r.b.exponent then
1206                             v.state := ADD_2;
1207                         else
1208                             v.longmask := '0';
1209                             v.state := ADD_SHIFT;
1210                         end if;
1211                     else
1212                         v.state := ADD_1;
1213                     end if;
1214                 else
1215                     if r.a.class = NAN or r.b.class = NAN then
1216                         v.state := NAN_RESULT;
1217                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218                         -- invalid operation, construct QNaN
1219                         v.fpscr(FPSCR_VXISI) := '1';
1220                         qnan_result := '1';
1221                         arith_done := '1';
1222                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223                         -- return -0 for rounding to -infinity
1224                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1225                         arith_done := '1';
1226                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1227                         -- result is A
1228                         v.opsel_a := AIN_A;
1229                         v.state := EXC_RESULT;
1230                     else
1231                         -- result is +/- B
1232                         v.opsel_a := AIN_B;
1233                         v.negate := not r.insn(1);
1234                         v.state := EXC_RESULT;
1235                     end if;
1236                 end if;
1237
1238             when DO_FMUL =>
1239                 -- fmul[s]
1240                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241                 v.result_sign := r.a.negative xor r.c.negative;
1242                 v.result_class := r.a.class;
1243                 v.fpscr(FPSCR_FR) := '0';
1244                 v.fpscr(FPSCR_FI) := '0';
1245                 v.use_a := '1';
1246                 v.use_c := '1';
1247                 if r.a.class = FINITE and r.c.class = FINITE then
1248                     v.result_exp := r.a.exponent + r.c.exponent;
1249                     -- Renormalize denorm operands
1250                     if r.a.mantissa(54) = '0' then
1251                         v.state := RENORM_A;
1252                     elsif r.c.mantissa(54) = '0' then
1253                         v.state := RENORM_C;
1254                     else
1255                         f_to_multiply.valid <= '1';
1256                         v.state := MULT_1;
1257                     end if;
1258                 else
1259                     if r.a.class = NAN or r.c.class = NAN then
1260                         v.state := NAN_RESULT;
1261                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262                         (r.a.class = ZERO and r.c.class = INFINITY) then
1263                         -- invalid operation, construct QNaN
1264                         v.fpscr(FPSCR_VXIMZ) := '1';
1265                         qnan_result := '1';
1266                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1267                         -- result is +/- A
1268                         arith_done := '1';
1269                     else
1270                         -- r.c.class is ZERO or INFINITY
1271                         v.opsel_a := AIN_C;
1272                         v.negate := r.a.negative;
1273                         v.state := EXC_RESULT;
1274                     end if;
1275                 end if;
1276
1277             when DO_FDIV =>
1278                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279                 v.result_class := r.a.class;
1280                 v.fpscr(FPSCR_FR) := '0';
1281                 v.fpscr(FPSCR_FI) := '0';
1282                 v.use_a := '1';
1283                 v.use_b := '1';
1284                 v.result_sign := r.a.negative xor r.b.negative;
1285                 v.result_exp := r.a.exponent - r.b.exponent;
1286                 v.count := "00";
1287                 if r.a.class = FINITE and r.b.class = FINITE then
1288                     -- Renormalize denorm operands
1289                     if r.a.mantissa(54) = '0' then
1290                         v.state := RENORM_A;
1291                     elsif r.b.mantissa(54) = '0' then
1292                         v.state := RENORM_B;
1293                     else
1294                         v.first := '1';
1295                         v.state := DIV_2;
1296                     end if;
1297                 else
1298                     if r.a.class = NAN or r.b.class = NAN then
1299                         v.state := NAN_RESULT;
1300                     elsif r.b.class = INFINITY then
1301                         if r.a.class = INFINITY then
1302                             v.fpscr(FPSCR_VXIDI) := '1';
1303                             qnan_result := '1';
1304                         else
1305                             v.result_class := ZERO;
1306                         end if;
1307                         arith_done := '1';
1308                     elsif r.b.class = ZERO then
1309                         if r.a.class = ZERO then
1310                             v.fpscr(FPSCR_VXZDZ) := '1';
1311                             qnan_result := '1';
1312                         else
1313                             if r.a.class = FINITE then
1314                                 zero_divide := '1';
1315                             end if;
1316                             v.result_class := INFINITY;
1317                         end if;
1318                         arith_done := '1';
1319                     else -- r.b.class = FINITE, result_class = r.a.class
1320                         arith_done := '1';
1321                     end if;
1322                 end if;
1323
1324             when DO_FSEL =>
1325                 v.fpscr(FPSCR_FR) := '0';
1326                 v.fpscr(FPSCR_FI) := '0';
1327                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328                     v.opsel_a := AIN_C;
1329                 else
1330                     v.opsel_a := AIN_B;
1331                 end if;
1332                 v.quieten_nan := '0';
1333                 v.state := EXC_RESULT;
1334
1335             when DO_FSQRT =>
1336                 -- r.opsel_a = AIN_B
1337                 v.result_class := r.b.class;
1338                 v.result_sign := r.b.negative;
1339                 v.fpscr(FPSCR_FR) := '0';
1340                 v.fpscr(FPSCR_FI) := '0';
1341                 v.use_b := '1';
1342                 case r.b.class is
1343                     when FINITE =>
1344                         v.result_exp := r.b.exponent;
1345                         if r.b.negative = '1' then
1346                             v.fpscr(FPSCR_VXSQRT) := '1';
1347                             qnan_result := '1';
1348                         elsif r.b.mantissa(54) = '0' then
1349                             v.state := RENORM_B;
1350                         elsif r.b.exponent(0) = '0' then
1351                             v.state := SQRT_1;
1352                         else
1353                             v.shift := to_signed(1, EXP_BITS);
1354                             v.state := RENORM_B2;
1355                         end if;
1356                     when NAN =>
1357                         v.state := NAN_RESULT;
1358                     when ZERO =>
1359                         -- result is B
1360                         arith_done := '1';
1361                     when INFINITY =>
1362                         if r.b.negative = '1' then
1363                             v.fpscr(FPSCR_VXSQRT) := '1';
1364                             qnan_result := '1';
1365                         -- else result is B
1366                         end if;
1367                         arith_done := '1';
1368                 end case;
1369
1370             when DO_FRE =>
1371                 -- r.opsel_a = AIN_B
1372                 v.result_class := r.b.class;
1373                 v.result_sign := r.b.negative;
1374                 v.fpscr(FPSCR_FR) := '0';
1375                 v.fpscr(FPSCR_FI) := '0';
1376                 v.use_b := '1';
1377                 case r.b.class is
1378                     when FINITE =>
1379                         v.result_exp := - r.b.exponent;
1380                         if r.b.mantissa(54) = '0' then
1381                             v.state := RENORM_B;
1382                         else
1383                             v.state := FRE_1;
1384                         end if;
1385                     when NAN =>
1386                         v.state := NAN_RESULT;
1387                     when INFINITY =>
1388                         v.result_class := ZERO;
1389                         arith_done := '1';
1390                     when ZERO =>
1391                         v.result_class := INFINITY;
1392                         zero_divide := '1';
1393                         arith_done := '1';
1394                 end case;
1395
1396             when DO_FRSQRTE =>
1397                 -- r.opsel_a = AIN_B
1398                 v.result_class := r.b.class;
1399                 v.result_sign := r.b.negative;
1400                 v.fpscr(FPSCR_FR) := '0';
1401                 v.fpscr(FPSCR_FI) := '0';
1402                 v.use_b := '1';
1403                 v.shift := to_signed(1, EXP_BITS);
1404                 case r.b.class is
1405                     when FINITE =>
1406                         v.result_exp := r.b.exponent;
1407                         if r.b.negative = '1' then
1408                             v.fpscr(FPSCR_VXSQRT) := '1';
1409                             qnan_result := '1';
1410                         elsif r.b.mantissa(54) = '0' then
1411                             v.state := RENORM_B;
1412                         elsif r.b.exponent(0) = '0' then
1413                             v.state := RSQRT_1;
1414                         else
1415                             v.state := RENORM_B2;
1416                         end if;
1417                     when NAN =>
1418                         v.state := NAN_RESULT;
1419                     when INFINITY =>
1420                         if r.b.negative = '1' then
1421                             v.fpscr(FPSCR_VXSQRT) := '1';
1422                             qnan_result := '1';
1423                         else
1424                             v.result_class := ZERO;
1425                         end if;
1426                         arith_done := '1';
1427                     when ZERO =>
1428                         v.result_class := INFINITY;
1429                         zero_divide := '1';
1430                         arith_done := '1';
1431                 end case;
1432
1433             when DO_FMADD =>
1434                 -- fmadd, fmsub, fnmadd, fnmsub
1435                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436                 -- else AIN_B
1437                 v.result_sign := r.a.negative;
1438                 v.result_class := r.a.class;
1439                 v.result_exp := r.a.exponent;
1440                 v.fpscr(FPSCR_FR) := '0';
1441                 v.fpscr(FPSCR_FI) := '0';
1442                 v.use_a := '1';
1443                 v.use_b := '1';
1444                 v.use_c := '1';
1445                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446                 if r.a.class = FINITE and r.c.class = FINITE and
1447                     (r.b.class = FINITE or r.b.class = ZERO) then
1448                     v.is_subtract := not is_add;
1449                     mulexp := r.a.exponent + r.c.exponent;
1450                     v.result_exp := mulexp;
1451                     -- Make sure A and C are normalized
1452                     if r.a.mantissa(54) = '0' then
1453                         v.state := RENORM_A;
1454                     elsif r.c.mantissa(54) = '0' then
1455                         v.state := RENORM_C;
1456                     elsif r.b.class = ZERO then
1457                         -- no addend, degenerates to multiply
1458                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459                         f_to_multiply.valid <= '1';
1460                         v.is_multiply := '1';
1461                         v.state := MULT_1;
1462                     elsif r.madd_cmp = '0' then
1463                         -- addend is bigger, do multiply first
1464                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465                         f_to_multiply.valid <= '1';
1466                         v.state := FMADD_1;
1467                     else
1468                         -- product is bigger, shift B right and use it as the
1469                         -- addend to the multiplier
1470                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471                         -- for subtract, multiplier does B - A * C
1472                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473                         v.result_exp := r.b.exponent;
1474                         v.state := FMADD_2;
1475                     end if;
1476                 else
1477                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478                         v.state := NAN_RESULT;
1479                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480                         (r.a.class = INFINITY and r.c.class = ZERO) then
1481                         -- invalid operation, construct QNaN
1482                         v.fpscr(FPSCR_VXIMZ) := '1';
1483                         qnan_result := '1';
1484                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485                         if r.b.class = INFINITY and is_add = '0' then
1486                             -- invalid operation, construct QNaN
1487                             v.fpscr(FPSCR_VXISI) := '1';
1488                             qnan_result := '1';
1489                         else
1490                             -- result is infinity
1491                             v.result_class := INFINITY;
1492                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493                             arith_done := '1';
1494                         end if;
1495                     else
1496                         -- Here A is zero, C is zero, or B is infinity
1497                         -- Result is +/-B in all of those cases
1498                         v.opsel_a := AIN_B;
1499                         if r.b.class /= ZERO or is_add = '1' then
1500                             v.negate := not (r.insn(1) xor r.insn(2));
1501                         else
1502                             -- have to be careful about rule for 0 - 0 result sign
1503                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504                         end if;
1505                         v.state := EXC_RESULT;
1506                     end if;
1507                 end if;
1508
1509             when RENORM_A =>
1510                 renormalize := '1';
1511                 v.state := RENORM_A2;
1512                 if r.insn(4) = '1' then
1513                     v.opsel_a := AIN_C;
1514                 else
1515                     v.opsel_a := AIN_B;
1516                 end if;
1517
1518             when RENORM_A2 =>
1519                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520                 set_a := '1';
1521                 v.result_exp := new_exp;
1522                 if r.insn(4) = '1' then
1523                     if r.c.mantissa(54) = '1' then
1524                         if r.insn(3) = '0' or r.b.class = ZERO then
1525                             v.first := '1';
1526                             v.state := MULT_1;
1527                         else
1528                             v.madd_cmp := '0';
1529                             if new_exp + 1 >= r.b.exponent then
1530                                 v.madd_cmp := '1';
1531                             end if;
1532                             v.opsel_a := AIN_B;
1533                             v.state := DO_FMADD;
1534                         end if;
1535                     else
1536                         v.state := RENORM_C;
1537                     end if;
1538                 else
1539                     if r.b.mantissa(54) = '1' then
1540                         v.first := '1';
1541                         v.state := DIV_2;
1542                     else
1543                         v.state := RENORM_B;
1544                     end if;
1545                 end if;
1546
1547             when RENORM_B =>
1548                 renormalize := '1';
1549                 renorm_sqrt := r.is_sqrt;
1550                 v.state := RENORM_B2;
1551
1552             when RENORM_B2 =>
1553                 set_b := '1';
1554                 if r.is_sqrt = '0' then
1555                     v.result_exp := r.result_exp + r.shift;
1556                 else
1557                     v.result_exp := new_exp;
1558                 end if;
1559                 v.opsel_a := AIN_B;
1560                 v.state := LOOKUP;
1561
1562             when RENORM_C =>
1563                 renormalize := '1';
1564                 v.state := RENORM_C2;
1565
1566             when RENORM_C2 =>
1567                 set_c := '1';
1568                 v.result_exp := new_exp;
1569                 if r.insn(3) = '0' or r.b.class = ZERO then
1570                     v.first := '1';
1571                     v.state := MULT_1;
1572                 else
1573                     v.madd_cmp := '0';
1574                     if new_exp + 1 >= r.b.exponent then
1575                         v.madd_cmp := '1';
1576                     end if;
1577                     v.opsel_a := AIN_B;
1578                     v.state := DO_FMADD;
1579                 end if;
1580
1581             when ADD_1 =>
1582                 -- transferring B to R
1583                 v.shift := r.b.exponent - r.a.exponent;
1584                 v.result_exp := r.b.exponent;
1585                 v.longmask := '0';
1586                 v.state := ADD_SHIFT;
1587
1588             when ADD_SHIFT =>
1589                 -- r.shift = - exponent difference, r.longmask = 0
1590                 opsel_r <= RES_SHIFT;
1591                 v.x := s_nz;
1592                 set_x := '1';
1593                 v.longmask := r.single_prec;
1594                 if r.add_bsmall = '1' then
1595                     v.opsel_a := AIN_A;
1596                 else
1597                     v.opsel_a := AIN_B;
1598                 end if;
1599                 v.state := ADD_2;
1600
1601             when ADD_2 =>
1602                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603                 opsel_b <= BIN_R;
1604                 opsel_binv <= r.is_subtract;
1605                 carry_in <= r.is_subtract and not r.x;
1606                 v.shift := to_signed(-1, EXP_BITS);
1607                 v.state := ADD_3;
1608
1609             when ADD_3 =>
1610                 -- check for overflow or negative result (can't get both)
1611                 -- r.shift = -1
1612                 if r.r(63) = '1' then
1613                     -- result is opposite sign to expected
1614                     v.result_sign := not r.result_sign;
1615                     opsel_ainv <= '1';
1616                     carry_in <= '1';
1617                     v.state := FINISH;
1618                 elsif r.r(55) = '1' then
1619                     -- sum overflowed, shift right
1620                     opsel_r <= RES_SHIFT;
1621                     set_x := '1';
1622                     v.shift := to_signed(-2, EXP_BITS);
1623                     if exp_huge = '1' then
1624                         v.state := ROUND_OFLOW;
1625                     else
1626                         v.state := ROUNDING;
1627                     end if;
1628                 elsif r.r(54) = '1' then
1629                     set_x := '1';
1630                     v.shift := to_signed(-2, EXP_BITS);
1631                     v.state := ROUNDING;
1632                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1633                     -- r.x must be zero at this point
1634                     v.result_class := ZERO;
1635                     if r.is_subtract = '1' then
1636                         -- set result sign depending on rounding mode
1637                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1638                     end if;
1639                     arith_done := '1';
1640                 else
1641                     renormalize := '1';
1642                     v.state := NORMALIZE;
1643                 end if;
1644
1645             when CMP_1 =>
1646                 -- r.opsel_a = AIN_A
1647                 opsel_b <= BIN_R;
1648                 opsel_binv <= '1';
1649                 carry_in <= '1';
1650                 v.state := CMP_2;
1651
1652             when CMP_2 =>
1653                 if r.r(63) = '1' then
1654                     -- A is smaller in magnitude
1655                     v.cr_result := not r.a.negative & r.a.negative & "00";
1656                 elsif (r_hi_nz or r_lo_nz) = '0' then
1657                     v.cr_result := "0010";
1658                 else
1659                     v.cr_result := r.a.negative & not r.a.negative & "00";
1660                 end if;
1661                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1662                 v.instr_done := '1';
1663                 v.state := IDLE;
1664
1665             when MULT_1 =>
1666                 f_to_multiply.valid <= r.first;
1667                 opsel_r <= RES_MULT;
1668                 if multiply_to_f.valid = '1' then
1669                     v.state := FINISH;
1670                 end if;
1671
1672             when FMADD_1 =>
1673                 -- Addend is bigger here
1674                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1675                 -- note v.shift is at most -2 here
1676                 v.shift := r.result_exp - r.b.exponent;
1677                 opsel_r <= RES_MULT;
1678                 opsel_s <= S_MULT;
1679                 set_s := '1';
1680                 f_to_multiply.valid <= r.first;
1681                 if multiply_to_f.valid = '1' then
1682                     v.longmask := '0';
1683                     v.state := ADD_SHIFT;
1684                 end if;
1685
1686             when FMADD_2 =>
1687                 -- Product is potentially bigger here
1688                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1689                 set_s := '1';
1690                 opsel_s <= S_SHIFT;
1691                 v.shift := r.shift - to_signed(64, EXP_BITS);
1692                 v.state := FMADD_3;
1693
1694             when FMADD_3 =>
1695                 -- r.shift = addend exp - product exp
1696                 opsel_r <= RES_SHIFT;
1697                 v.first := '1';
1698                 v.state := FMADD_4;
1699
1700             when FMADD_4 =>
1701                 msel_add <= MULADD_RS;
1702                 f_to_multiply.valid <= r.first;
1703                 msel_inv <= r.is_subtract;
1704                 opsel_r <= RES_MULT;
1705                 opsel_s <= S_MULT;
1706                 set_s := '1';
1707                 if multiply_to_f.valid = '1' then
1708                     v.state := FMADD_5;
1709                 end if;
1710
1711             when FMADD_5 =>
1712                 -- negate R:S:X if negative
1713                 if r.r(63) = '1' then
1714                     v.result_sign := not r.result_sign;
1715                     opsel_ainv <= '1';
1716                     carry_in <= not (s_nz or r.x);
1717                     opsel_s <= S_NEG;
1718                     set_s := '1';
1719                 end if;
1720                 v.shift := to_signed(56, EXP_BITS);
1721                 v.state := FMADD_6;
1722
1723             when FMADD_6 =>
1724                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1725                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1726                     if s_nz = '0' then
1727                         -- must be a subtraction, and r.x must be zero
1728                         v.result_class := ZERO;
1729                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1730                         arith_done := '1';
1731                     else
1732                         -- R is all zeroes but there are non-zero bits in S
1733                         -- so shift them into R and set S to 0
1734                         opsel_r <= RES_SHIFT;
1735                         set_s := '1';
1736                         -- stay in state FMADD_6
1737                     end if;
1738                 elsif r.r(56 downto 54) = "001" then
1739                     v.state := FINISH;
1740                 else
1741                     renormalize := '1';
1742                     v.state := NORMALIZE;
1743                 end if;
1744
1745             when LOOKUP =>
1746                 -- r.opsel_a = AIN_B
1747                 -- wait one cycle for inverse_table[B] lookup
1748                 v.first := '1';
1749                 if r.insn(4) = '0' then
1750                     if r.insn(3) = '0' then
1751                         v.state := DIV_2;
1752                     else
1753                         v.state := SQRT_1;
1754                     end if;
1755                 elsif r.insn(2) = '0' then
1756                     v.state := FRE_1;
1757                 else
1758                     v.state := RSQRT_1;
1759                 end if;
1760
1761             when DIV_2 =>
1762                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1763                 msel_1 <= MUL1_B;
1764                 msel_add <= MULADD_CONST;
1765                 msel_inv <= '1';
1766                 if r.count = 0 then
1767                     msel_2 <= MUL2_LUT;
1768                 else
1769                     msel_2 <= MUL2_P;
1770                 end if;
1771                 set_y := r.first;
1772                 pshift := '1';
1773                 f_to_multiply.valid <= r.first;
1774                 if multiply_to_f.valid = '1' then
1775                     v.first := '1';
1776                     v.count := r.count + 1;
1777                     v.state := DIV_3;
1778                 end if;
1779
1780             when DIV_3 =>
1781                 -- compute Y = P = P * Y
1782                 msel_1 <= MUL1_Y;
1783                 msel_2 <= MUL2_P;
1784                 f_to_multiply.valid <= r.first;
1785                 pshift := '1';
1786                 if multiply_to_f.valid = '1' then
1787                     v.first := '1';
1788                     if r.count = 3 then
1789                         v.state := DIV_4;
1790                     else
1791                         v.state := DIV_2;
1792                     end if;
1793                 end if;
1794
1795             when DIV_4 =>
1796                 -- compute R = P = A * Y (quotient)
1797                 msel_1 <= MUL1_A;
1798                 msel_2 <= MUL2_P;
1799                 set_y := r.first;
1800                 f_to_multiply.valid <= r.first;
1801                 pshift := '1';
1802                 if multiply_to_f.valid = '1' then
1803                     opsel_r <= RES_MULT;
1804                     v.first := '1';
1805                     v.state := DIV_5;
1806                 end if;
1807
1808             when DIV_5 =>
1809                 -- compute P = A - B * R (remainder)
1810                 msel_1 <= MUL1_B;
1811                 msel_2 <= MUL2_R;
1812                 msel_add <= MULADD_A;
1813                 msel_inv <= '1';
1814                 f_to_multiply.valid <= r.first;
1815                 if multiply_to_f.valid = '1' then
1816                     v.state := DIV_6;
1817                 end if;
1818
1819             when DIV_6 =>
1820                 -- test if remainder is 0 or >= B
1821                 if pcmpb_lt = '1' then
1822                     -- quotient is correct, set X if remainder non-zero
1823                     v.x := r.p(58) or px_nz;
1824                 else
1825                     -- quotient needs to be incremented by 1
1826                     carry_in <= '1';
1827                     v.x := not pcmpb_eq;
1828                 end if;
1829                 v.state := FINISH;
1830
1831             when FRE_1 =>
1832                 opsel_r <= RES_MISC;
1833                 misc_sel <= "0111";
1834                 v.shift := to_signed(1, EXP_BITS);
1835                 v.state := NORMALIZE;
1836
1837             when FTDIV_1 =>
1838                 v.cr_result(1) := exp_tiny or exp_huge;
1839                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1840                     v.instr_done := '1';
1841                     v.state := IDLE;
1842                 else
1843                     v.shift := r.a.exponent;
1844                     v.doing_ftdiv := "10";
1845                 end if;
1846
1847             when RSQRT_1 =>
1848                 opsel_r <= RES_MISC;
1849                 misc_sel <= "0111";
1850                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1851                 v.result_exp := - sqrt_exp;
1852                 v.shift := to_signed(1, EXP_BITS);
1853                 v.state := NORMALIZE;
1854
1855             when SQRT_1 =>
1856                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1857                 -- also transfer B (in R) to A
1858                 set_a := '1';
1859                 opsel_r <= RES_MISC;
1860                 misc_sel <= "0111";
1861                 msel_1 <= MUL1_B;
1862                 msel_2 <= MUL2_LUT;
1863                 f_to_multiply.valid <= '1';
1864                 v.shift := to_signed(-1, EXP_BITS);
1865                 v.count := "00";
1866                 v.state := SQRT_2;
1867
1868             when SQRT_2 =>
1869                 -- shift R right one place
1870                 -- not expecting multiplier result yet
1871                 -- r.shift = -1
1872                 opsel_r <= RES_SHIFT;
1873                 v.first := '1';
1874                 v.state := SQRT_3;
1875
1876             when SQRT_3 =>
1877                 -- put R into Y, wait for product from multiplier
1878                 msel_2 <= MUL2_R;
1879                 set_y := r.first;
1880                 pshift := '1';
1881                 if multiply_to_f.valid = '1' then
1882                     -- put result into R
1883                     opsel_r <= RES_MULT;
1884                     v.first := '1';
1885                     v.state := SQRT_4;
1886                 end if;
1887
1888             when SQRT_4 =>
1889                 -- compute 1.5 - Y * P
1890                 msel_1 <= MUL1_Y;
1891                 msel_2 <= MUL2_P;
1892                 msel_add <= MULADD_CONST;
1893                 msel_inv <= '1';
1894                 f_to_multiply.valid <= r.first;
1895                 pshift := '1';
1896                 if multiply_to_f.valid = '1' then
1897                     v.state := SQRT_5;
1898                 end if;
1899
1900             when SQRT_5 =>
1901                 -- compute Y = Y * P
1902                 msel_1 <= MUL1_Y;
1903                 msel_2 <= MUL2_P;
1904                 f_to_multiply.valid <= '1';
1905                 v.first := '1';
1906                 v.state := SQRT_6;
1907
1908             when SQRT_6 =>
1909                 -- pipeline in R = R * P
1910                 msel_1 <= MUL1_R;
1911                 msel_2 <= MUL2_P;
1912                 f_to_multiply.valid <= r.first;
1913                 pshift := '1';
1914                 if multiply_to_f.valid = '1' then
1915                     v.first := '1';
1916                     v.state := SQRT_7;
1917                 end if;
1918
1919             when SQRT_7 =>
1920                 -- first multiply is done, put result in Y
1921                 msel_2 <= MUL2_P;
1922                 set_y := r.first;
1923                 -- wait for second multiply (should be here already)
1924                 pshift := '1';
1925                 if multiply_to_f.valid = '1' then
1926                     -- put result into R
1927                     opsel_r <= RES_MULT;
1928                     v.first := '1';
1929                     v.count := r.count + 1;
1930                     if r.count < 2 then
1931                         v.state := SQRT_4;
1932                     else
1933                         v.first := '1';
1934                         v.state := SQRT_8;
1935                     end if;
1936                 end if;
1937
1938             when SQRT_8 =>
1939                 -- compute P = A - R * R, which can be +ve or -ve
1940                 -- we arranged for B to be put into A earlier
1941                 msel_1 <= MUL1_R;
1942                 msel_2 <= MUL2_R;
1943                 msel_add <= MULADD_A;
1944                 msel_inv <= '1';
1945                 pshift := '1';
1946                 f_to_multiply.valid <= r.first;
1947                 if multiply_to_f.valid = '1' then
1948                     v.first := '1';
1949                     v.state := SQRT_9;
1950                 end if;
1951
1952             when SQRT_9 =>
1953                 -- compute P = P * Y
1954                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1955                 -- estimate of the adjustment needed to R.  Since the error
1956                 -- could be negative and we have an unsigned multiplier, the
1957                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1958                 -- are correct and are all we need (given 3 iterations through
1959                 -- SQRT_4 to SQRT_7).
1960                 msel_1 <= MUL1_Y;
1961                 msel_2 <= MUL2_P;
1962                 pshift := '1';
1963                 f_to_multiply.valid <= r.first;
1964                 if multiply_to_f.valid = '1' then
1965                     v.state := SQRT_10;
1966                 end if;
1967
1968             when SQRT_10 =>
1969                 -- Add the bottom 8 bits of P, sign-extended,
1970                 -- divided by 4, onto R.
1971                 -- The division by 4 is because R is 10.54 format
1972                 -- whereas P is 8.56 format.
1973                 opsel_b <= BIN_PS6;
1974                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1975                 v.result_exp := sqrt_exp;
1976                 v.shift := to_signed(1, EXP_BITS);
1977                 v.first := '1';
1978                 v.state := SQRT_11;
1979
1980             when SQRT_11 =>
1981                 -- compute P = A - R * R (remainder)
1982                 -- also put 2 * R + 1 into B for comparison with P
1983                 msel_1 <= MUL1_R;
1984                 msel_2 <= MUL2_R;
1985                 msel_add <= MULADD_A;
1986                 msel_inv <= '1';
1987                 f_to_multiply.valid <= r.first;
1988                 shiftin := '1';
1989                 set_b := r.first;
1990                 if multiply_to_f.valid = '1' then
1991                     v.state := SQRT_12;
1992                 end if;
1993
1994             when SQRT_12 =>
1995                 -- test if remainder is 0 or >= B = 2*R + 1
1996                 if pcmpb_lt = '1' then
1997                     -- square root is correct, set X if remainder non-zero
1998                     v.x := r.p(58) or px_nz;
1999                 else
2000                     -- square root needs to be incremented by 1
2001                     carry_in <= '1';
2002                     v.x := not pcmpb_eq;
2003                 end if;
2004                 v.state := FINISH;
2005
2006             when INT_SHIFT =>
2007                 -- r.shift = b.exponent - 52
2008                 opsel_r <= RES_SHIFT;
2009                 set_x := '1';
2010                 v.state := INT_ROUND;
2011                 v.shift := to_signed(-2, EXP_BITS);
2012
2013             when INT_ROUND =>
2014                 -- r.shift = -2
2015                 opsel_r <= RES_SHIFT;
2016                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2017                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2018                 -- Check for negative values that don't round to 0 for fcti*u*
2019                 if r.insn(8) = '1' and r.result_sign = '1' and
2020                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2021                     v.state := INT_OFLOW;
2022                 else
2023                     v.state := INT_FINAL;
2024                 end if;
2025
2026             when INT_ISHIFT =>
2027                 -- r.shift = b.exponent - 54;
2028                 opsel_r <= RES_SHIFT;
2029                 v.state := INT_FINAL;
2030
2031             when INT_FINAL =>
2032                 -- Negate if necessary, and increment for rounding if needed
2033                 opsel_ainv <= r.result_sign;
2034                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2035                 -- Check for possible overflows
2036                 case r.insn(9 downto 8) is
2037                     when "00" =>        -- fctiw[z]
2038                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2039                     when "01" =>        -- fctiwu[z]
2040                         need_check := r.r(31);
2041                     when "10" =>        -- fctid[z]
2042                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2043                     when others =>      -- fctidu[z]
2044                         need_check := r.r(63);
2045                 end case;
2046                 if need_check = '1' then
2047                     v.state := INT_CHECK;
2048                 else
2049                     if r.fpscr(FPSCR_FI) = '1' then
2050                         v.fpscr(FPSCR_XX) := '1';
2051                     end if;
2052                     arith_done := '1';
2053                 end if;
2054
2055             when INT_CHECK =>
2056                 if r.insn(9) = '0' then
2057                     msb := r.r(31);
2058                 else
2059                     msb := r.r(63);
2060                 end if;
2061                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2062                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2063                     (r.insn(8) = '1' and msb /= '1') then
2064                     opsel_r <= RES_MISC;
2065                     v.fpscr(FPSCR_VXCVI) := '1';
2066                     invalid := '1';
2067                 else
2068                     if r.fpscr(FPSCR_FI) = '1' then
2069                         v.fpscr(FPSCR_XX) := '1';
2070                     end if;
2071                 end if;
2072                 arith_done := '1';
2073
2074             when INT_OFLOW =>
2075                 opsel_r <= RES_MISC;
2076                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2077                 if r.b.class = NAN then
2078                     misc_sel(0) <= '1';
2079                 end if;
2080                 v.fpscr(FPSCR_VXCVI) := '1';
2081                 invalid := '1';
2082                 arith_done := '1';
2083
2084             when FRI_1 =>
2085                 -- r.shift = b.exponent - 52
2086                 opsel_r <= RES_SHIFT;
2087                 set_x := '1';
2088                 v.shift := to_signed(-2, EXP_BITS);
2089                 v.state := ROUNDING;
2090
2091             when FINISH =>
2092                 if r.is_multiply = '1' and px_nz = '1' then
2093                     v.x := '1';
2094                 end if;
2095                 if r.r(63 downto 54) /= "0000000001" then
2096                     renormalize := '1';
2097                     v.state := NORMALIZE;
2098                 else
2099                     set_x := '1';
2100                     if exp_tiny = '1' then
2101                         v.shift := new_exp - min_exp;
2102                         v.state := ROUND_UFLOW;
2103                     elsif exp_huge = '1' then
2104                         v.state := ROUND_OFLOW;
2105                     else
2106                         v.shift := to_signed(-2, EXP_BITS);
2107                         v.state := ROUNDING;
2108                     end if;
2109                 end if;
2110
2111             when NORMALIZE =>
2112                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2113                 -- r.shift = clz(r.r) - 9
2114                 opsel_r <= RES_SHIFT;
2115                 set_x := '1';
2116                 if exp_tiny = '1' then
2117                     v.shift := new_exp - min_exp;
2118                     v.state := ROUND_UFLOW;
2119                 elsif exp_huge = '1' then
2120                     v.state := ROUND_OFLOW;
2121                 else
2122                     v.shift := to_signed(-2, EXP_BITS);
2123                     v.state := ROUNDING;
2124                 end if;
2125
2126             when ROUND_UFLOW =>
2127                 -- r.shift = - amount by which exponent underflows
2128                 v.tiny := '1';
2129                 if r.fpscr(FPSCR_UE) = '0' then
2130                     -- disabled underflow exception case
2131                     -- have to denormalize before rounding
2132                     opsel_r <= RES_SHIFT;
2133                     set_x := '1';
2134                     v.shift := to_signed(-2, EXP_BITS);
2135                     v.state := ROUNDING;
2136                 else
2137                     -- enabled underflow exception case
2138                     -- if denormalized, have to normalize before rounding
2139                     v.fpscr(FPSCR_UX) := '1';
2140                     v.result_exp := r.result_exp + bias_exp;
2141                     if r.r(54) = '0' then
2142                         renormalize := '1';
2143                         v.state := NORMALIZE;
2144                     else
2145                         v.shift := to_signed(-2, EXP_BITS);
2146                         v.state := ROUNDING;
2147                     end if;
2148                 end if;
2149
2150             when ROUND_OFLOW =>
2151                 v.fpscr(FPSCR_OX) := '1';
2152                 if r.fpscr(FPSCR_OE) = '0' then
2153                     -- disabled overflow exception
2154                     -- result depends on rounding mode
2155                     v.fpscr(FPSCR_XX) := '1';
2156                     v.fpscr(FPSCR_FI) := '1';
2157                     if r.round_mode(1 downto 0) = "00" or
2158                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2159                         v.result_class := INFINITY;
2160                         v.fpscr(FPSCR_FR) := '1';
2161                     else
2162                         v.fpscr(FPSCR_FR) := '0';
2163                     end if;
2164                     -- construct largest representable number
2165                     v.result_exp := max_exp;
2166                     opsel_r <= RES_MISC;
2167                     misc_sel <= "001" & r.single_prec;
2168                     arith_done := '1';
2169                 else
2170                     -- enabled overflow exception
2171                     v.result_exp := r.result_exp - bias_exp;
2172                     v.shift := to_signed(-2, EXP_BITS);
2173                     v.state := ROUNDING;
2174                 end if;
2175
2176             when ROUNDING =>
2177                 opsel_mask <= '1';
2178                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2179                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2180                 if round(1) = '1' then
2181                     -- set mask to increment the LSB for the precision
2182                     opsel_b <= BIN_MASK;
2183                     carry_in <= '1';
2184                     v.shift := to_signed(-1, EXP_BITS);
2185                     v.state := ROUNDING_2;
2186                 else
2187                     if r.r(54) = '0' then
2188                         -- result after masking could be zero, or could be a
2189                         -- denormalized result that needs to be renormalized
2190                         renormalize := '1';
2191                         v.state := ROUNDING_3;
2192                     else
2193                         arith_done := '1';
2194                     end if;
2195                 end if;
2196                 if round(0) = '1' then
2197                     v.fpscr(FPSCR_XX) := '1';
2198                     if r.tiny = '1' then
2199                         v.fpscr(FPSCR_UX) := '1';
2200                     end if;
2201                 end if;
2202
2203             when ROUNDING_2 =>
2204                 -- Check for overflow during rounding
2205                 -- r.shift = -1
2206                 v.x := '0';
2207                 if r.r(55) = '1' then
2208                     opsel_r <= RES_SHIFT;
2209                     if exp_huge = '1' then
2210                         v.state := ROUND_OFLOW;
2211                     else
2212                         arith_done := '1';
2213                     end if;
2214                 elsif r.r(54) = '0' then
2215                     -- Do CLZ so we can renormalize the result
2216                     renormalize := '1';
2217                     v.state := ROUNDING_3;
2218                 else
2219                     arith_done := '1';
2220                 end if;
2221
2222             when ROUNDING_3 =>
2223                 -- r.shift = clz(r.r) - 9
2224                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225                 if mant_nz = '0' then
2226                     v.result_class := ZERO;
2227                     if r.is_subtract = '1' then
2228                         -- set result sign depending on rounding mode
2229                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2230                     end if;
2231                     arith_done := '1';
2232                 else
2233                     -- Renormalize result after rounding
2234                     opsel_r <= RES_SHIFT;
2235                     v.denorm := exp_tiny;
2236                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237                     if new_exp < to_signed(-1022, EXP_BITS) then
2238                         v.state := DENORM;
2239                     else
2240                         arith_done := '1';
2241                     end if;
2242                 end if;
2243
2244             when DENORM =>
2245                 -- r.shift = result_exp - -1022
2246                 opsel_r <= RES_SHIFT;
2247                 arith_done := '1';
2248
2249             when NAN_RESULT =>
2250                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2251                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2252                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2253                     -- Signalling NAN
2254                     v.fpscr(FPSCR_VXSNAN) := '1';
2255                     invalid := '1';
2256                 end if;
2257                 if r.use_a = '1' and r.a.class = NAN then
2258                     v.opsel_a := AIN_A;
2259                 elsif r.use_b = '1' and r.b.class = NAN then
2260                     v.opsel_a := AIN_B;
2261                 elsif r.use_c = '1' and r.c.class = NAN then
2262                     v.opsel_a := AIN_C;
2263                 end if;
2264                 v.state := EXC_RESULT;
2265
2266             when EXC_RESULT =>
2267                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2268                 case r.opsel_a is
2269                     when AIN_B =>
2270                         v.result_sign := r.b.negative xor r.negate;
2271                         v.result_exp := r.b.exponent;
2272                         v.result_class := r.b.class;
2273                     when AIN_C =>
2274                         v.result_sign := r.c.negative xor r.negate;
2275                         v.result_exp := r.c.exponent;
2276                         v.result_class := r.c.class;
2277                     when others =>
2278                         v.result_sign := r.a.negative xor r.negate;
2279                         v.result_exp := r.a.exponent;
2280                         v.result_class := r.a.class;
2281                 end case;
2282                 arith_done := '1';
2283
2284         end case;
2285
2286         if zero_divide = '1' then
2287             v.fpscr(FPSCR_ZX) := '1';
2288         end if;
2289         if qnan_result = '1' then
2290             invalid := '1';
2291             v.result_class := NAN;
2292             v.result_sign := '0';
2293             misc_sel <= "0001";
2294             opsel_r <= RES_MISC;
2295             arith_done := '1';
2296         end if;
2297         if invalid = '1' then
2298             v.invalid := '1';
2299         end if;
2300         if arith_done = '1' then
2301             -- Enabled invalid exception doesn't write result or FPRF
2302             -- Neither does enabled zero-divide exception
2303             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2304                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2305                 v.writing_back := '1';
2306                 v.update_fprf := '1';
2307             end if;
2308             v.instr_done := '1';
2309             v.state := IDLE;
2310             update_fx := '1';
2311         end if;
2312
2313         -- Multiplier and divide/square root data path
2314         case msel_1 is
2315             when MUL1_A =>
2316                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2317             when MUL1_B =>
2318                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2319             when MUL1_Y =>
2320                 f_to_multiply.data1 <= r.y;
2321             when others =>
2322                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2323         end case;
2324         case msel_2 is
2325             when MUL2_C =>
2326                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2327             when MUL2_LUT =>
2328                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2329             when MUL2_P =>
2330                 f_to_multiply.data2 <= r.p;
2331             when others =>
2332                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2333         end case;
2334         maddend := (others => '0');
2335         case msel_add is
2336             when MULADD_CONST =>
2337                 -- addend is 2.0 or 1.5 in 16.112 format
2338                 if r.is_sqrt = '0' then
2339                     maddend(113) := '1';                -- 2.0
2340                 else
2341                     maddend(112 downto 111) := "11";    -- 1.5
2342                 end if;
2343             when MULADD_A =>
2344                 -- addend is A in 16.112 format
2345                 maddend(121 downto 58) := r.a.mantissa;
2346             when MULADD_RS =>
2347                 -- addend is concatenation of R and S in 16.112 format
2348                 maddend := "000000" & r.r & r.s & "00";
2349             when others =>
2350         end case;
2351         if msel_inv = '1' then
2352             f_to_multiply.addend <= not maddend;
2353         else
2354             f_to_multiply.addend <= maddend;
2355         end if;
2356         f_to_multiply.not_result <= msel_inv;
2357         if set_y = '1' then
2358             v.y := f_to_multiply.data2;
2359         end if;
2360         if multiply_to_f.valid = '1' then
2361             if pshift = '0' then
2362                 v.p := multiply_to_f.result(63 downto 0);
2363             else
2364                 v.p := multiply_to_f.result(119 downto 56);
2365             end if;
2366         end if;
2367
2368         -- Data path.
2369         -- This has A and B input multiplexers, an adder, a shifter,
2370         -- count-leading-zeroes logic, and a result mux.
2371         if r.longmask = '1' then
2372             mshift := r.shift + to_signed(-29, EXP_BITS);
2373         else
2374             mshift := r.shift;
2375         end if;
2376         if mshift < to_signed(-64, EXP_BITS) then
2377             mask := (others => '1');
2378         elsif mshift >= to_signed(0, EXP_BITS) then
2379             mask := (others => '0');
2380         else
2381             mask := right_mask(unsigned(mshift(5 downto 0)));
2382         end if;
2383         case r.opsel_a is
2384             when AIN_R =>
2385                 in_a0 := r.r;
2386             when AIN_A =>
2387                 in_a0 := r.a.mantissa;
2388             when AIN_B =>
2389                 in_a0 := r.b.mantissa;
2390             when others =>
2391                 in_a0 := r.c.mantissa;
2392         end case;
2393         if (or (mask and in_a0)) = '1' and set_x = '1' then
2394             v.x := '1';
2395         end if;
2396         if opsel_ainv = '1' then
2397             in_a0 := not in_a0;
2398         end if;
2399         in_a <= in_a0;
2400         case opsel_b is
2401             when BIN_ZERO =>
2402                 in_b0 := (others => '0');
2403             when BIN_R =>
2404                 in_b0 := r.r;
2405             when BIN_MASK =>
2406                 in_b0 := mask;
2407             when others =>
2408                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2409                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2410         end case;
2411         if opsel_binv = '1' then
2412             in_b0 := not in_b0;
2413         end if;
2414         in_b <= in_b0;
2415         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2416             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2417                                     std_ulogic_vector(r.shift(6 downto 0)));
2418         else
2419             shift_res := (others => '0');
2420         end if;
2421         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2422         if opsel_mask = '1' then
2423             sum := sum and not mask;
2424         end if;
2425         case opsel_r is
2426             when RES_SUM =>
2427                 result <= sum;
2428             when RES_SHIFT =>
2429                 result <= shift_res;
2430             when RES_MULT =>
2431                 result <= multiply_to_f.result(121 downto 58);
2432             when others =>
2433                 case misc_sel is
2434                     when "0000" =>
2435                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2436                     when "0001" =>
2437                         -- generated QNaN mantissa
2438                         misc := x"0020000000000000";
2439                     when "0010" =>
2440                         -- mantissa of max representable DP number
2441                         misc := x"007ffffffffffffc";
2442                     when "0011" =>
2443                         -- mantissa of max representable SP number
2444                         misc := x"007fffff80000000";
2445                     when "0100" =>
2446                         -- fmrgow result
2447                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2448                     when "0110" =>
2449                         -- fmrgew result
2450                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2451                     when "0111" =>
2452                         misc := 10x"000" & inverse_est & 35x"000000000";
2453                     when "1000" =>
2454                         -- max positive result for fctiw[z]
2455                         misc := x"000000007fffffff";
2456                     when "1001" =>
2457                         -- max negative result for fctiw[z]
2458                         misc := x"ffffffff80000000";
2459                     when "1010" =>
2460                         -- max positive result for fctiwu[z]
2461                         misc := x"00000000ffffffff";
2462                     when "1011" =>
2463                         -- max negative result for fctiwu[z]
2464                         misc := x"0000000000000000";
2465                     when "1100" =>
2466                         -- max positive result for fctid[z]
2467                         misc := x"7fffffffffffffff";
2468                     when "1101" =>
2469                         -- max negative result for fctid[z]
2470                         misc := x"8000000000000000";
2471                     when "1110" =>
2472                         -- max positive result for fctidu[z]
2473                         misc := x"ffffffffffffffff";
2474                     when "1111" =>
2475                         -- max negative result for fctidu[z]
2476                         misc := x"0000000000000000";
2477                     when others =>
2478                         misc := x"0000000000000000";
2479                 end case;
2480                 result <= misc;
2481         end case;
2482         v.r := result;
2483         if set_s = '1' then
2484             case opsel_s is
2485                 when S_NEG =>
2486                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2487                 when S_MULT =>
2488                     v.s := multiply_to_f.result(57 downto 2);
2489                 when S_SHIFT =>
2490                     v.s := shift_res(63 downto 8);
2491                     if shift_res(7 downto 0) /= x"00" then
2492                         v.x := '1';
2493                     end if;
2494                 when others =>
2495                     v.s := (others => '0');
2496             end case;
2497         end if;
2498
2499         if set_a = '1' then
2500             v.a.exponent := new_exp;
2501             v.a.mantissa := shift_res;
2502         end if;
2503         if set_b = '1' then
2504             v.b.exponent := new_exp;
2505             v.b.mantissa := shift_res;
2506         end if;
2507         if set_c = '1' then
2508             v.c.exponent := new_exp;
2509             v.c.mantissa := shift_res;
2510         end if;
2511
2512         if opsel_r = RES_SHIFT then
2513             v.result_exp := new_exp;
2514         end if;
2515
2516         if renormalize = '1' then
2517             clz := count_left_zeroes(r.r);
2518             if renorm_sqrt = '1' then
2519                 -- make denormalized value end up with even exponent
2520                 clz(0) := '1';
2521             end if;
2522             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2523         end if;
2524
2525         if r.int_result = '1' then
2526             fp_result <= r.r;
2527         else
2528             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2529                                  r.single_prec, r.quieten_nan);
2530         end if;
2531         if r.update_fprf = '1' then
2532             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2533                                                              r.r(54) and not r.denorm);
2534         end if;
2535
2536         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2537                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2538         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2539                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2540         if update_fx = '1' and
2541             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2542             v.fpscr(FPSCR_FX) := '1';
2543         end if;
2544         if r.rc = '1' then
2545             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2546         end if;
2547
2548         if illegal = '1' then
2549             v.instr_done := '0';
2550             v.do_intr := '0';
2551             v.writing_back := '0';
2552             v.busy := '0';
2553             v.state := IDLE;
2554         else
2555             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2556             if v.state /= IDLE or v.do_intr = '1' then
2557                 v.busy := '1';
2558             end if;
2559         end if;
2560
2561         rin <= v;
2562         e_out.illegal <= illegal;
2563     end process;
2564
2565 end architecture behaviour;