fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      FMADD_1, FMADD_2, FMADD_3,
  51                      FMADD_4, FMADD_5, FMADD_6,
  52                      LOOKUP,
  53                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  54                      FRE_1,
  55                      RSQRT_1,
  56                      FTDIV_1,
  57                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  58                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  59                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  60                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  61                      INT_FINAL, INT_CHECK, INT_OFLOW,
  62                      FINISH, NORMALIZE,
  63                      ROUND_UFLOW, ROUND_OFLOW,
  64                      ROUNDING, ROUNDING_2, ROUNDING_3,
  65                      DENORM,
  66                      RENORM_A, RENORM_A2,
  67                      RENORM_B, RENORM_B2,
  68                      RENORM_C, RENORM_C2,
  69                      NAN_RESULT, EXC_RESULT);
  70
  71     type reg_type is record
  72         state        : state_t;
  73         busy         : std_ulogic;
  74         instr_done   : std_ulogic;
  75         do_intr      : std_ulogic;
  76         illegal      : std_ulogic;
  77         op           : insn_type_t;
  78         insn         : std_ulogic_vector(31 downto 0);
  79         nia          : std_ulogic_vector(63 downto 0);
  80         instr_tag    : instr_tag_t;
  81         dest_fpr     : gspr_index_t;
  82         fe_mode      : std_ulogic;
  83         rc           : std_ulogic;
  84         is_cmp       : std_ulogic;
  85         single_prec  : std_ulogic;
  86         fpscr        : std_ulogic_vector(31 downto 0);
  87         a            : fpu_reg_type;
  88         b            : fpu_reg_type;
  89         c            : fpu_reg_type;
  90         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  91         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
  92         x            : std_ulogic;
  93         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  94         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  95         result_sign  : std_ulogic;
  96         result_class : fp_number_class;
  97         result_exp   : signed(EXP_BITS-1 downto 0);
  98         shift        : signed(EXP_BITS-1 downto 0);
  99         writing_back : std_ulogic;
 100         int_result   : std_ulogic;
 101         cr_result    : std_ulogic_vector(3 downto 0);
 102         cr_mask      : std_ulogic_vector(7 downto 0);
 103         old_exc      : std_ulogic_vector(4 downto 0);
 104         update_fprf  : std_ulogic;
 105         quieten_nan  : std_ulogic;
 106         tiny         : std_ulogic;
 107         denorm       : std_ulogic;
 108         round_mode   : std_ulogic_vector(2 downto 0);
 109         is_subtract  : std_ulogic;
 110         exp_cmp      : std_ulogic;
 111         madd_cmp     : std_ulogic;
 112         add_bsmall   : std_ulogic;
 113         is_multiply  : std_ulogic;
 114         is_sqrt      : std_ulogic;
 115         first        : std_ulogic;
 116         count        : unsigned(1 downto 0);
 117         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 118         opsel_a      : std_ulogic_vector(1 downto 0);
 119         use_a        : std_ulogic;
 120         use_b        : std_ulogic;
 121         use_c        : std_ulogic;
 122         invalid      : std_ulogic;
 123         negate       : std_ulogic;
 124         longmask     : std_ulogic;
 125     end record;
 126
 127     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 128
 129     signal r, rin : reg_type;
 130
 131     signal fp_result     : std_ulogic_vector(63 downto 0);
 132     signal opsel_b       : std_ulogic_vector(1 downto 0);
 133     signal opsel_r       : std_ulogic_vector(1 downto 0);
 134     signal opsel_s       : std_ulogic_vector(1 downto 0);
 135     signal opsel_ainv    : std_ulogic;
 136     signal opsel_mask    : std_ulogic;
 137     signal opsel_binv    : std_ulogic;
 138     signal in_a          : std_ulogic_vector(63 downto 0);
 139     signal in_b          : std_ulogic_vector(63 downto 0);
 140     signal result        : std_ulogic_vector(63 downto 0);
 141     signal carry_in      : std_ulogic;
 142     signal lost_bits     : std_ulogic;
 143     signal r_hi_nz       : std_ulogic;
 144     signal r_lo_nz       : std_ulogic;
 145     signal s_nz          : std_ulogic;
 146     signal misc_sel      : std_ulogic_vector(3 downto 0);
 147     signal f_to_multiply : MultiplyInputType;
 148     signal multiply_to_f : MultiplyOutputType;
 149     signal msel_1        : std_ulogic_vector(1 downto 0);
 150     signal msel_2        : std_ulogic_vector(1 downto 0);
 151     signal msel_add      : std_ulogic_vector(1 downto 0);
 152     signal msel_inv      : std_ulogic;
 153     signal inverse_est   : std_ulogic_vector(18 downto 0);
 154
 155     -- opsel values
 156     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 157     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 158     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 159     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 160
 161     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 162     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 163     constant BIN_RND  : std_ulogic_vector(1 downto 0) := "10";
 164     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 165
 166     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 167     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 168     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 169     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 170
 171     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 172     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 173     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 174     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 175
 176     -- msel values
 177     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 178     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 179     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 180     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 181
 182     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 183     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 184     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 185     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 186
 187     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 188     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 189     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 190     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 191
 192     -- Inverse lookup table, indexed by the top 8 fraction bits
 193     -- The first 256 entries are the reciprocal (1/x) lookup table,
 194     -- and the remaining 768 entries are the reciprocal square root table.
 195     -- Output range is [0.5, 1) in 0.19 format, though the top
 196     -- bit isn't stored since it is always 1.
 197     -- Each output value is the inverse of the center of the input
 198     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 199     -- entry 1 is 1 / (1 + 3/512), etc.
 200     signal inverse_table : lookup_table := (
 201         -- 1/x lookup table
 202         -- Unit bit is assumed to be 1, so input range is [1, 2)
 203         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 204         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 205         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 206         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 207         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 208         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 209         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 210         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 211         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 212         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 213         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 214         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 215         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 216         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 217         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 218         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 219         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 220         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 221         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 222         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 223         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 224         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 225         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 226         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 227         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 228         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 229         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 230         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 231         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 232         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 233         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 234         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 235         -- 1/sqrt(x) lookup table
 236         -- Input is in the range [1, 4), i.e. two bits to the left of the
 237         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 238         -- 1.0 ... 1.9999
 239         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 240         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 241         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 242         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 243         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 244         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 245         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 246         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 247         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 248         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 249         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 250         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 251         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 252         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 253         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 254         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 255         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 256         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 257         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 258         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 259         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 260         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 261         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 262         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 263         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 264         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 265         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 266         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 267         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 268         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 269         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 270         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 271         -- 2.0 ... 2.9999
 272         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 273         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 274         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 275         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 276         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 277         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 278         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 279         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 280         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 281         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 282         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 283         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 284         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 285         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 286         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 287         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 288         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 289         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 290         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 291         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 292         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 293         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 294         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 295         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 296         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 297         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 298         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 299         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 300         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 301         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 302         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 303         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 304         -- 3.0 ... 3.9999
 305         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 306         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 307         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 308         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 309         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 310         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 311         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 312         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 313         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 314         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 315         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 316         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 317         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 318         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 319         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 320         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 321         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 322         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 323         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 324         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 325         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 326         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 327         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 328         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 329         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 330         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 331         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 332         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 333         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 334         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 335         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 336         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 337         );
 338
 339     -- Left and right shifter with 120 bit input and 64 bit output.
 340     -- Shifts inp left by shift bits and returns the upper 64 bits of
 341     -- the result.  The shift parameter is interpreted as a signed
 342     -- number in the range -64..63, with negative values indicating
 343     -- right shifts.
 344     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 345                         shift: std_ulogic_vector(6 downto 0))
 346         return std_ulogic_vector is
 347         variable s1 : std_ulogic_vector(94 downto 0);
 348         variable s2 : std_ulogic_vector(70 downto 0);
 349         variable result : std_ulogic_vector(63 downto 0);
 350     begin
 351         case shift(6 downto 5) is
 352             when "00" =>
 353                 s1 := inp(119 downto 25);
 354             when "01" =>
 355                 s1 := inp(87 downto 0) & "0000000";
 356             when "10" =>
 357                 s1 := x"0000000000000000" & inp(119 downto 89);
 358             when others =>
 359                 s1 := x"00000000" & inp(119 downto 57);
 360         end case;
 361         case shift(4 downto 3) is
 362             when "00" =>
 363                 s2 := s1(94 downto 24);
 364             when "01" =>
 365                 s2 := s1(86 downto 16);
 366             when "10" =>
 367                 s2 := s1(78 downto 8);
 368             when others =>
 369                 s2 := s1(70 downto 0);
 370         end case;
 371         case shift(2 downto 0) is
 372             when "000" =>
 373                 result := s2(70 downto 7);
 374             when "001" =>
 375                 result := s2(69 downto 6);
 376             when "010" =>
 377                 result := s2(68 downto 5);
 378             when "011" =>
 379                 result := s2(67 downto 4);
 380             when "100" =>
 381                 result := s2(66 downto 3);
 382             when "101" =>
 383                 result := s2(65 downto 2);
 384             when "110" =>
 385                 result := s2(64 downto 1);
 386             when others =>
 387                 result := s2(63 downto 0);
 388         end case;
 389         return result;
 390     end;
 391
 392     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 393     -- selects the bits will be lost in doing a right shift.  The shift
 394     -- parameter is the bottom 6 bits of a negative shift count,
 395     -- indicating a right shift.
 396     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 397         variable result: std_ulogic_vector(63 downto 0);
 398     begin
 399         result := (others => '0');
 400         for i in 0 to 63 loop
 401             if i >= shift then
 402                 result(63 - i) := '1';
 403             end if;
 404         end loop;
 405         return result;
 406     end;
 407
 408     -- Split a DP floating-point number into components and work out its class.
 409     -- If is_int = 1, the input is considered an integer
 410     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 411         variable r       : fpu_reg_type;
 412         variable exp_nz  : std_ulogic;
 413         variable exp_ao  : std_ulogic;
 414         variable frac_nz : std_ulogic;
 415         variable cls     : std_ulogic_vector(2 downto 0);
 416     begin
 417         r.negative := fpr(63);
 418         exp_nz := or (fpr(62 downto 52));
 419         exp_ao := and (fpr(62 downto 52));
 420         frac_nz := or (fpr(51 downto 0));
 421         if is_int = '0' then
 422             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 423             if exp_nz = '0' then
 424                 r.exponent := to_signed(-1022, EXP_BITS);
 425             end if;
 426             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 427             cls := exp_ao & exp_nz & frac_nz;
 428             case cls is
 429                 when "000"  => r.class := ZERO;
 430                 when "001"  => r.class := FINITE;    -- denormalized
 431                 when "010"  => r.class := FINITE;
 432                 when "011"  => r.class := FINITE;
 433                 when "110"  => r.class := INFINITY;
 434                 when others => r.class := NAN;
 435             end case;
 436         else
 437             r.mantissa := fpr;
 438             r.exponent := (others => '0');
 439             if (fpr(63) or exp_nz or frac_nz) = '1' then
 440                 r.class := FINITE;
 441             else
 442                 r.class := ZERO;
 443             end if;
 444         end if;
 445         return r;
 446     end;
 447
 448     -- Construct a DP floating-point result from components
 449     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 450                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 451         return std_ulogic_vector is
 452         variable result : std_ulogic_vector(63 downto 0);
 453     begin
 454         result := (others => '0');
 455         result(63) := sign;
 456         case class is
 457             when ZERO =>
 458             when FINITE =>
 459                 if mantissa(54) = '1' then
 460                     -- normalized number
 461                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 462                 end if;
 463                 result(51 downto 29) := mantissa(53 downto 31);
 464                 if single_prec = '0' then
 465                     result(28 downto 0) := mantissa(30 downto 2);
 466                 end if;
 467             when INFINITY =>
 468                 result(62 downto 52) := "11111111111";
 469             when NAN =>
 470                 result(62 downto 52) := "11111111111";
 471                 result(51) := quieten_nan or mantissa(53);
 472                 result(50 downto 29) := mantissa(52 downto 31);
 473                 if single_prec = '0' then
 474                     result(28 downto 0) := mantissa(30 downto 2);
 475                 end if;
 476         end case;
 477         return result;
 478     end;
 479
 480     -- Determine whether to increment when rounding
 481     -- Returns rounding_inc & inexact
 482     -- Assumes x includes the bottom 29 bits of the mantissa already
 483     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 484     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 485                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 486                          sign: std_ulogic)
 487         return std_ulogic_vector is
 488         variable grx : std_ulogic_vector(2 downto 0);
 489         variable ret : std_ulogic_vector(1 downto 0);
 490         variable lsb : std_ulogic;
 491     begin
 492         if single_prec = '0' then
 493             grx := mantissa(1 downto 0) & x;
 494             lsb := mantissa(2);
 495         else
 496             grx := mantissa(30 downto 29) & x;
 497             lsb := mantissa(31);
 498         end if;
 499         ret(1) := '0';
 500         ret(0) := or (grx);
 501         case rn(1 downto 0) is
 502             when "00" =>        -- round to nearest
 503                 if grx = "100" and rn(2) = '0' then
 504                     ret(1) := lsb; -- tie, round to even
 505                 else
 506                     ret(1) := grx(2);
 507                 end if;
 508             when "01" =>        -- round towards zero
 509             when others =>      -- round towards +/- inf
 510                 if rn(0) = sign then
 511                     -- round towards greater magnitude
 512                     ret(1) := ret(0);
 513                 end if;
 514         end case;
 515         return ret;
 516     end;
 517
 518     -- Determine result flags to write into the FPSCR
 519     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 520         return std_ulogic_vector is
 521     begin
 522         case class is
 523             when ZERO =>
 524                 return sign & "0010";
 525             when FINITE =>
 526                 return (not unitbit) & sign & (not sign) & "00";
 527             when INFINITY =>
 528                 return '0' & sign & (not sign) & "01";
 529             when NAN =>
 530                 return "10001";
 531         end case;
 532     end;
 533
 534 begin
 535     fpu_multiply_0: entity work.multiply
 536         port map (
 537             clk => clk,
 538             m_in => f_to_multiply,
 539             m_out => multiply_to_f
 540             );
 541
 542     fpu_0: process(clk)
 543     begin
 544         if rising_edge(clk) then
 545             if rst = '1' then
 546                 r.state <= IDLE;
 547                 r.busy <= '0';
 548                 r.instr_done <= '0';
 549                 r.do_intr <= '0';
 550                 r.fpscr <= (others => '0');
 551                 r.writing_back <= '0';
 552             else
 553                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 554                 r <= rin;
 555             end if;
 556         end if;
 557     end process;
 558
 559     -- synchronous reads from lookup table
 560     lut_access: process(clk)
 561         variable addrhi : std_ulogic_vector(1 downto 0);
 562         variable addr   : std_ulogic_vector(9 downto 0);
 563     begin
 564         if rising_edge(clk) then
 565             if r.is_sqrt = '1' then
 566                 addrhi := r.b.mantissa(55 downto 54);
 567             else
 568                 addrhi := "00";
 569             end if;
 570             addr := addrhi & r.b.mantissa(53 downto 46);
 571             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 572         end if;
 573     end process;
 574
 575     e_out.busy <= r.busy;
 576     e_out.exception <= r.fpscr(FPSCR_FEX);
 577
 578     w_out.valid <= r.instr_done and not r.do_intr;
 579     w_out.instr_tag <= r.instr_tag;
 580     w_out.write_enable <= r.writing_back;
 581     w_out.write_reg <= r.dest_fpr;
 582     w_out.write_data <= fp_result;
 583     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 584     w_out.write_cr_mask <= r.cr_mask;
 585     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 586                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 587     w_out.interrupt <= r.do_intr;
 588     w_out.intr_vec <= 16#700#;
 589     w_out.srr0 <= r.nia;
 590     w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
 591
 592     fpu_1: process(all)
 593         variable v           : reg_type;
 594         variable adec        : fpu_reg_type;
 595         variable bdec        : fpu_reg_type;
 596         variable cdec        : fpu_reg_type;
 597         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 598         variable illegal     : std_ulogic;
 599         variable j, k        : integer;
 600         variable flm         : std_ulogic_vector(7 downto 0);
 601         variable int_input   : std_ulogic;
 602         variable mask        : std_ulogic_vector(63 downto 0);
 603         variable in_a0       : std_ulogic_vector(63 downto 0);
 604         variable in_b0       : std_ulogic_vector(63 downto 0);
 605         variable misc        : std_ulogic_vector(63 downto 0);
 606         variable shift_res   : std_ulogic_vector(63 downto 0);
 607         variable round       : std_ulogic_vector(1 downto 0);
 608         variable update_fx   : std_ulogic;
 609         variable arith_done  : std_ulogic;
 610         variable invalid     : std_ulogic;
 611         variable zero_divide : std_ulogic;
 612         variable mant_nz     : std_ulogic;
 613         variable min_exp     : signed(EXP_BITS-1 downto 0);
 614         variable max_exp     : signed(EXP_BITS-1 downto 0);
 615         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 616         variable new_exp     : signed(EXP_BITS-1 downto 0);
 617         variable exp_tiny    : std_ulogic;
 618         variable exp_huge    : std_ulogic;
 619         variable renormalize : std_ulogic;
 620         variable clz         : std_ulogic_vector(5 downto 0);
 621         variable set_x       : std_ulogic;
 622         variable mshift      : signed(EXP_BITS-1 downto 0);
 623         variable need_check  : std_ulogic;
 624         variable msb         : std_ulogic;
 625         variable is_add      : std_ulogic;
 626         variable set_a       : std_ulogic;
 627         variable set_b       : std_ulogic;
 628         variable set_c       : std_ulogic;
 629         variable set_y       : std_ulogic;
 630         variable set_s       : std_ulogic;
 631         variable qnan_result : std_ulogic;
 632         variable px_nz       : std_ulogic;
 633         variable pcmpb_eq    : std_ulogic;
 634         variable pcmpb_lt    : std_ulogic;
 635         variable pshift      : std_ulogic;
 636         variable renorm_sqrt : std_ulogic;
 637         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 638         variable shiftin     : std_ulogic;
 639         variable mulexp      : signed(EXP_BITS-1 downto 0);
 640         variable maddend     : std_ulogic_vector(127 downto 0);
 641         variable sum         : std_ulogic_vector(63 downto 0);
 642         variable round_inc   : std_ulogic_vector(63 downto 0);
 643     begin
 644         v := r;
 645         illegal := '0';
 646         v.busy := '0';
 647         int_input := '0';
 648
 649         -- capture incoming instruction
 650         if e_in.valid = '1' then
 651             v.insn := e_in.insn;
 652             v.nia := e_in.nia;
 653             v.op := e_in.op;
 654             v.instr_tag := e_in.itag;
 655             v.fe_mode := or (e_in.fe_mode);
 656             v.dest_fpr := e_in.frt;
 657             v.single_prec := e_in.single;
 658             v.longmask := e_in.single;
 659             v.int_result := '0';
 660             v.rc := e_in.rc;
 661             v.is_cmp := e_in.out_cr;
 662             if e_in.out_cr = '0' then
 663                 v.cr_mask := num_to_fxm(1);
 664             else
 665                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 666             end if;
 667             int_input := '0';
 668             if e_in.op = OP_FPOP_I then
 669                 int_input := '1';
 670             end if;
 671             v.quieten_nan := '1';
 672             v.tiny := '0';
 673             v.denorm := '0';
 674             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 675             v.is_subtract := '0';
 676             v.is_multiply := '0';
 677             v.is_sqrt := '0';
 678             v.add_bsmall := '0';
 679             v.doing_ftdiv := "00";
 680
 681             adec := decode_dp(e_in.fra, int_input);
 682             bdec := decode_dp(e_in.frb, int_input);
 683             cdec := decode_dp(e_in.frc, int_input);
 684             v.a := adec;
 685             v.b := bdec;
 686             v.c := cdec;
 687
 688             v.exp_cmp := '0';
 689             if adec.exponent > bdec.exponent then
 690                 v.exp_cmp := '1';
 691             end if;
 692             v.madd_cmp := '0';
 693             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 694                 v.madd_cmp := '1';
 695             end if;
 696         end if;
 697
 698         r_hi_nz <= or (r.r(55 downto 31));
 699         r_lo_nz <= or (r.r(30 downto 2));
 700         s_nz <= or (r.s);
 701
 702         if r.single_prec = '0' then
 703             if r.doing_ftdiv(1) = '0' then
 704                 max_exp := to_signed(1023, EXP_BITS);
 705             else
 706                 max_exp := to_signed(1020, EXP_BITS);
 707             end if;
 708             if r.doing_ftdiv(0) = '0' then
 709                 min_exp := to_signed(-1022, EXP_BITS);
 710             else
 711                 min_exp := to_signed(-1021, EXP_BITS);
 712             end if;
 713             bias_exp := to_signed(1536, EXP_BITS);
 714         else
 715             max_exp := to_signed(127, EXP_BITS);
 716             min_exp := to_signed(-126, EXP_BITS);
 717             bias_exp := to_signed(192, EXP_BITS);
 718         end if;
 719         new_exp := r.result_exp - r.shift;
 720         exp_tiny := '0';
 721         exp_huge := '0';
 722         if new_exp < min_exp then
 723             exp_tiny := '1';
 724         end if;
 725         if new_exp > max_exp then
 726             exp_huge := '1';
 727         end if;
 728
 729         -- Compare P with zero and with B
 730         px_nz := or (r.p(57 downto 4));
 731         pcmpb_eq := '0';
 732         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 733             pcmpb_eq := '1';
 734         end if;
 735         pcmpb_lt := '0';
 736         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 737             pcmpb_lt := '1';
 738         end if;
 739
 740         v.writing_back := '0';
 741         v.instr_done := '0';
 742         v.update_fprf := '0';
 743         v.shift := to_signed(0, EXP_BITS);
 744         v.first := '0';
 745         v.opsel_a := AIN_R;
 746         opsel_ainv <= '0';
 747         opsel_mask <= '0';
 748         opsel_b <= BIN_ZERO;
 749         opsel_binv <= '0';
 750         opsel_r <= RES_SUM;
 751         opsel_s <= S_ZERO;
 752         carry_in <= '0';
 753         misc_sel <= "0000";
 754         fpscr_mask := (others => '1');
 755         update_fx := '0';
 756         arith_done := '0';
 757         invalid := '0';
 758         zero_divide := '0';
 759         renormalize := '0';
 760         set_x := '0';
 761         qnan_result := '0';
 762         set_a := '0';
 763         set_b := '0';
 764         set_c := '0';
 765         set_s := '0';
 766         f_to_multiply.is_32bit <= '0';
 767         f_to_multiply.valid <= '0';
 768         msel_1 <= MUL1_A;
 769         msel_2 <= MUL2_C;
 770         msel_add <= MULADD_ZERO;
 771         msel_inv <= '0';
 772         set_y := '0';
 773         pshift := '0';
 774         renorm_sqrt := '0';
 775         shiftin := '0';
 776         case r.state is
 777             when IDLE =>
 778                 v.use_a := '0';
 779                 v.use_b := '0';
 780                 v.use_c := '0';
 781                 v.invalid := '0';
 782                 v.negate := '0';
 783                 if e_in.valid = '1' then
 784                     case e_in.insn(5 downto 1) is
 785                         when "00000" =>
 786                             if e_in.insn(8) = '1' then
 787                                 if e_in.insn(6) = '0' then
 788                                     v.state := DO_FTDIV;
 789                                 else
 790                                     v.state := DO_FTSQRT;
 791                                 end if;
 792                             elsif e_in.insn(7) = '1' then
 793                                 v.state := DO_MCRFS;
 794                             else
 795                                 v.opsel_a := AIN_B;
 796                                 v.state := DO_FCMP;
 797                             end if;
 798                         when "00110" =>
 799                             if e_in.insn(10) = '0' then
 800                                 if e_in.insn(8) = '0' then
 801                                     v.state := DO_MTFSB;
 802                                 else
 803                                     v.state := DO_MTFSFI;
 804                                 end if;
 805                             else
 806                                 v.state := DO_FMRG;
 807                             end if;
 808                         when "00111" =>
 809                             if e_in.insn(8) = '0' then
 810                                 v.state := DO_MFFS;
 811                             else
 812                                 v.state := DO_MTFSF;
 813                             end if;
 814                         when "01000" =>
 815                             v.opsel_a := AIN_B;
 816                             if e_in.insn(9 downto 8) /= "11" then
 817                                 v.state := DO_FMR;
 818                             else
 819                                 v.state := DO_FRI;
 820                             end if;
 821                         when "01100" =>
 822                             v.opsel_a := AIN_B;
 823                             v.state := DO_FRSP;
 824                         when "01110" =>
 825                             v.opsel_a := AIN_B;
 826                             if int_input = '1' then
 827                                 -- fcfid[u][s]
 828                                 v.state := DO_FCFID;
 829                             else
 830                                 v.state := DO_FCTI;
 831                             end if;
 832                         when "01111" =>
 833                             v.round_mode := "001";
 834                             v.opsel_a := AIN_B;
 835                             v.state := DO_FCTI;
 836                         when "10010" =>
 837                             v.opsel_a := AIN_A;
 838                             if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 839                                 v.opsel_a := AIN_B;
 840                             end if;
 841                             v.state := DO_FDIV;
 842                         when "10100" | "10101" =>
 843                             v.opsel_a := AIN_A;
 844                             v.state := DO_FADD;
 845                         when "10110" =>
 846                             v.is_sqrt := '1';
 847                             v.opsel_a := AIN_B;
 848                             v.state := DO_FSQRT;
 849                         when "10111" =>
 850                             v.state := DO_FSEL;
 851                         when "11000" =>
 852                             v.opsel_a := AIN_B;
 853                             v.state := DO_FRE;
 854                         when "11001" =>
 855                             v.is_multiply := '1';
 856                             v.opsel_a := AIN_A;
 857                             if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
 858                                 v.opsel_a := AIN_C;
 859                             end if;
 860                             v.state := DO_FMUL;
 861                         when "11010" =>
 862                             v.is_sqrt := '1';
 863                             v.opsel_a := AIN_B;
 864                             v.state := DO_FRSQRTE;
 865                         when "11100" | "11101" | "11110" | "11111" =>
 866                             if v.a.mantissa(54) = '0' then
 867                                 v.opsel_a := AIN_A;
 868                             elsif v.c.mantissa(54) = '0' then
 869                                 v.opsel_a := AIN_C;
 870                             else
 871                                 v.opsel_a := AIN_B;
 872                             end if;
 873                             v.state := DO_FMADD;
 874                         when others =>
 875                             illegal := '1';
 876                     end case;
 877                 end if;
 878                 v.x := '0';
 879                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 880                 set_s := '1';
 881
 882             when DO_MCRFS =>
 883                 j := to_integer(unsigned(insn_bfa(r.insn)));
 884                 for i in 0 to 7 loop
 885                     if i = j then
 886                         k := (7 - i) * 4;
 887                         v.cr_result := r.fpscr(k + 3 downto k);
 888                         fpscr_mask(k + 3 downto k) := "0000";
 889                     end if;
 890                 end loop;
 891                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 892                 v.instr_done := '1';
 893                 v.state := IDLE;
 894
 895             when DO_FTDIV =>
 896                 v.instr_done := '1';
 897                 v.state := IDLE;
 898                 v.cr_result := "0000";
 899                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 900                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 901                     v.cr_result(2) := '1';
 902                 end if;
 903                 if r.a.class = NAN or r.a.class = INFINITY or
 904                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 905                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 906                     v.cr_result(1) := '1';
 907                 else
 908                     v.doing_ftdiv := "11";
 909                     v.first := '1';
 910                     v.state := FTDIV_1;
 911                     v.instr_done := '0';
 912                 end if;
 913
 914             when DO_FTSQRT =>
 915                 v.instr_done := '1';
 916                 v.state := IDLE;
 917                 v.cr_result := "0000";
 918                 if r.b.class = ZERO or r.b.class = INFINITY or
 919                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 920                     v.cr_result(2) := '1';
 921                 end if;
 922                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 923                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 924                     v.cr_result(1) := '0';
 925                 end if;
 926
 927             when DO_FCMP =>
 928                 -- fcmp[uo]
 929                 -- r.opsel_a = AIN_B
 930                 v.instr_done := '1';
 931                 v.state := IDLE;
 932                 update_fx := '1';
 933                 v.result_exp := r.b.exponent;
 934                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 935                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 936                     -- Signalling NAN
 937                     v.fpscr(FPSCR_VXSNAN) := '1';
 938                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 939                         v.fpscr(FPSCR_VXVC) := '1';
 940                     end if;
 941                     invalid := '1';
 942                     v.cr_result := "0001";          -- unordered
 943                 elsif r.a.class = NAN or r.b.class = NAN then
 944                     if r.insn(6) = '1' then
 945                         -- fcmpo
 946                         v.fpscr(FPSCR_VXVC) := '1';
 947                         invalid := '1';
 948                     end if;
 949                     v.cr_result := "0001";          -- unordered
 950                 elsif r.a.class = ZERO and r.b.class = ZERO then
 951                     v.cr_result := "0010";          -- equal
 952                 elsif r.a.negative /= r.b.negative then
 953                     v.cr_result := r.a.negative & r.b.negative & "00";
 954                 elsif r.a.class = ZERO then
 955                     -- A and B are the same sign from here down
 956                     v.cr_result := not r.b.negative & r.b.negative & "00";
 957                 elsif r.a.class = INFINITY then
 958                     if r.b.class = INFINITY then
 959                         v.cr_result := "0010";
 960                     else
 961                         v.cr_result := r.a.negative & not r.a.negative & "00";
 962                     end if;
 963                 elsif r.b.class = ZERO then
 964                     -- A is finite from here down
 965                     v.cr_result := r.a.negative & not r.a.negative & "00";
 966                 elsif r.b.class = INFINITY then
 967                     v.cr_result := not r.b.negative & r.b.negative & "00";
 968                 elsif r.exp_cmp = '1' then
 969                     -- A and B are both finite from here down
 970                     v.cr_result := r.a.negative & not r.a.negative & "00";
 971                 elsif r.a.exponent /= r.b.exponent then
 972                     -- A exponent is smaller than B
 973                     v.cr_result := not r.a.negative & r.a.negative & "00";
 974                 else
 975                     -- Prepare to subtract mantissas, put B in R
 976                     v.cr_result := "0000";
 977                     v.instr_done := '0';
 978                     v.opsel_a := AIN_A;
 979                     v.state := CMP_1;
 980                 end if;
 981                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 982
 983             when DO_MTFSB =>
 984                 -- mtfsb{0,1}
 985                 j := to_integer(unsigned(insn_bt(r.insn)));
 986                 for i in 0 to 31 loop
 987                     if i = j then
 988                         v.fpscr(31 - i) := r.insn(6);
 989                     end if;
 990                 end loop;
 991                 v.instr_done := '1';
 992                 v.state := IDLE;
 993
 994             when DO_MTFSFI =>
 995                 -- mtfsfi
 996                 j := to_integer(unsigned(insn_bf(r.insn)));
 997                 if r.insn(16) = '0' then
 998                     for i in 0 to 7 loop
 999                         if i = j then
1000                             k := (7 - i) * 4;
1001                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
1002                         end if;
1003                     end loop;
1004                 end if;
1005                 v.instr_done := '1';
1006                 v.state := IDLE;
1007
1008             when DO_FMRG =>
1009                 -- fmrgew, fmrgow
1010                 opsel_r <= RES_MISC;
1011                 misc_sel <= "01" & r.insn(8) & '0';
1012                 v.int_result := '1';
1013                 v.writing_back := '1';
1014                 v.instr_done := '1';
1015                 v.state := IDLE;
1016
1017             when DO_MFFS =>
1018                 v.int_result := '1';
1019                 v.writing_back := '1';
1020                 opsel_r <= RES_MISC;
1021                 case r.insn(20 downto 16) is
1022                     when "00000" =>
1023                         -- mffs
1024                     when "00001" =>
1025                         -- mffsce
1026                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1027                     when "10100" | "10101" =>
1028                         -- mffscdrn[i] (but we don't implement DRN)
1029                         fpscr_mask := x"000000FF";
1030                     when "10110" =>
1031                         -- mffscrn
1032                         fpscr_mask := x"000000FF";
1033                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1034                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1035                     when "10111" =>
1036                         -- mffscrni
1037                         fpscr_mask := x"000000FF";
1038                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1039                     when "11000" =>
1040                         -- mffsl
1041                         fpscr_mask := x"0007F0FF";
1042                     when others =>
1043                         illegal := '1';
1044                 end case;
1045                 v.instr_done := '1';
1046                 v.state := IDLE;
1047
1048             when DO_MTFSF =>
1049                 if r.insn(25) = '1' then
1050                     flm := x"FF";
1051                 elsif r.insn(16) = '1' then
1052                     flm := x"00";
1053                 else
1054                     flm := r.insn(24 downto 17);
1055                 end if;
1056                 for i in 0 to 7 loop
1057                     k := i * 4;
1058                     if flm(i) = '1' then
1059                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1060                     end if;
1061                 end loop;
1062                 v.instr_done := '1';
1063                 v.state := IDLE;
1064
1065             when DO_FMR =>
1066                 -- r.opsel_a = AIN_B
1067                 v.result_class := r.b.class;
1068                 v.result_exp := r.b.exponent;
1069                 v.quieten_nan := '0';
1070                 if r.insn(9) = '1' then
1071                     v.result_sign := '0';              -- fabs
1072                 elsif r.insn(8) = '1' then
1073                     v.result_sign := '1';              -- fnabs
1074                 elsif r.insn(7) = '1' then
1075                     v.result_sign := r.b.negative;     -- fmr
1076                 elsif r.insn(6) = '1' then
1077                     v.result_sign := not r.b.negative; -- fneg
1078                 else
1079                     v.result_sign := r.a.negative;     -- fcpsgn
1080                 end if;
1081                 v.writing_back := '1';
1082                 v.instr_done := '1';
1083                 v.state := IDLE;
1084
1085             when DO_FRI =>    -- fri[nzpm]
1086                 -- r.opsel_a = AIN_B
1087                 v.result_class := r.b.class;
1088                 v.result_sign := r.b.negative;
1089                 v.result_exp := r.b.exponent;
1090                 v.fpscr(FPSCR_FR) := '0';
1091                 v.fpscr(FPSCR_FI) := '0';
1092                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1093                     -- Signalling NAN
1094                     v.fpscr(FPSCR_VXSNAN) := '1';
1095                     invalid := '1';
1096                 end if;
1097                 if r.b.class = FINITE then
1098                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1099                         -- integer already, no rounding required
1100                         arith_done := '1';
1101                     else
1102                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1103                         v.state := FRI_1;
1104                         v.round_mode := '1' & r.insn(7 downto 6);
1105                     end if;
1106                 else
1107                     arith_done := '1';
1108                 end if;
1109
1110             when DO_FRSP =>
1111                 -- r.opsel_a = AIN_B, r.shift = 0
1112                 v.result_class := r.b.class;
1113                 v.result_sign := r.b.negative;
1114                 v.result_exp := r.b.exponent;
1115                 v.fpscr(FPSCR_FR) := '0';
1116                 v.fpscr(FPSCR_FI) := '0';
1117                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1118                     -- Signalling NAN
1119                     v.fpscr(FPSCR_VXSNAN) := '1';
1120                     invalid := '1';
1121                 end if;
1122                 set_x := '1';
1123                 if r.b.class = FINITE then
1124                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1125                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1126                         v.state := ROUND_UFLOW;
1127                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1128                         v.state := ROUND_OFLOW;
1129                     else
1130                         v.state := ROUNDING;
1131                     end if;
1132                 else
1133                     arith_done := '1';
1134                 end if;
1135
1136             when DO_FCTI =>
1137                 -- instr bit 9: 1=dword 0=word
1138                 -- instr bit 8: 1=unsigned 0=signed
1139                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1140                 -- r.opsel_a = AIN_B
1141                 v.result_class := r.b.class;
1142                 v.result_sign := r.b.negative;
1143                 v.result_exp := r.b.exponent;
1144                 v.fpscr(FPSCR_FR) := '0';
1145                 v.fpscr(FPSCR_FI) := '0';
1146                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1147                     -- Signalling NAN
1148                     v.fpscr(FPSCR_VXSNAN) := '1';
1149                     invalid := '1';
1150                 end if;
1151
1152                 v.int_result := '1';
1153                 case r.b.class is
1154                     when ZERO =>
1155                         arith_done := '1';
1156                     when FINITE =>
1157                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1158                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1159                             v.state := INT_OFLOW;
1160                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1161                             -- integer already, no rounding required,
1162                             -- shift into final position
1163                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1164                             if r.insn(8) = '1' and r.b.negative = '1' then
1165                                 v.state := INT_OFLOW;
1166                             else
1167                                 v.state := INT_ISHIFT;
1168                             end if;
1169                         else
1170                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1171                             v.state := INT_SHIFT;
1172                         end if;
1173                     when INFINITY | NAN =>
1174                         v.state := INT_OFLOW;
1175                 end case;
1176
1177             when DO_FCFID =>
1178                 -- r.opsel_a = AIN_B
1179                 v.result_sign := '0';
1180                 if r.insn(8) = '0' and r.b.negative = '1' then
1181                     -- fcfid[s] with negative operand, set R = -B
1182                     opsel_ainv <= '1';
1183                     carry_in <= '1';
1184                     v.result_sign := '1';
1185                 end if;
1186                 v.result_class := r.b.class;
1187                 v.result_exp := to_signed(54, EXP_BITS);
1188                 v.fpscr(FPSCR_FR) := '0';
1189                 v.fpscr(FPSCR_FI) := '0';
1190                 if r.b.class = ZERO then
1191                     arith_done := '1';
1192                 else
1193                     v.state := FINISH;
1194                 end if;
1195
1196             when DO_FADD =>
1197                 -- fadd[s] and fsub[s]
1198                 -- r.opsel_a = AIN_A
1199                 v.result_sign := r.a.negative;
1200                 v.result_class := r.a.class;
1201                 v.result_exp := r.a.exponent;
1202                 v.fpscr(FPSCR_FR) := '0';
1203                 v.fpscr(FPSCR_FI) := '0';
1204                 v.use_a := '1';
1205                 v.use_b := '1';
1206                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1207                 if r.a.class = FINITE and r.b.class = FINITE then
1208                     v.is_subtract := not is_add;
1209                     v.add_bsmall := r.exp_cmp;
1210                     v.opsel_a := AIN_B;
1211                     if r.exp_cmp = '0' then
1212                         v.shift := r.a.exponent - r.b.exponent;
1213                         v.result_sign := r.b.negative xnor r.insn(1);
1214                         if r.a.exponent = r.b.exponent then
1215                             v.state := ADD_2;
1216                         else
1217                             v.longmask := '0';
1218                             v.state := ADD_SHIFT;
1219                         end if;
1220                     else
1221                         v.state := ADD_1;
1222                     end if;
1223                 else
1224                     if r.a.class = NAN or r.b.class = NAN then
1225                         v.state := NAN_RESULT;
1226                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1227                         -- invalid operation, construct QNaN
1228                         v.fpscr(FPSCR_VXISI) := '1';
1229                         qnan_result := '1';
1230                         arith_done := '1';
1231                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1232                         -- return -0 for rounding to -infinity
1233                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1234                         arith_done := '1';
1235                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1236                         -- result is A
1237                         v.opsel_a := AIN_A;
1238                         v.state := EXC_RESULT;
1239                     else
1240                         -- result is +/- B
1241                         v.opsel_a := AIN_B;
1242                         v.negate := not r.insn(1);
1243                         v.state := EXC_RESULT;
1244                     end if;
1245                 end if;
1246
1247             when DO_FMUL =>
1248                 -- fmul[s]
1249                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1250                 v.result_sign := r.a.negative xor r.c.negative;
1251                 v.result_class := r.a.class;
1252                 v.fpscr(FPSCR_FR) := '0';
1253                 v.fpscr(FPSCR_FI) := '0';
1254                 v.use_a := '1';
1255                 v.use_c := '1';
1256                 if r.a.class = FINITE and r.c.class = FINITE then
1257                     v.result_exp := r.a.exponent + r.c.exponent;
1258                     -- Renormalize denorm operands
1259                     if r.a.mantissa(54) = '0' then
1260                         v.state := RENORM_A;
1261                     elsif r.c.mantissa(54) = '0' then
1262                         v.state := RENORM_C;
1263                     else
1264                         f_to_multiply.valid <= '1';
1265                         v.state := MULT_1;
1266                     end if;
1267                 else
1268                     if r.a.class = NAN or r.c.class = NAN then
1269                         v.state := NAN_RESULT;
1270                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1271                         (r.a.class = ZERO and r.c.class = INFINITY) then
1272                         -- invalid operation, construct QNaN
1273                         v.fpscr(FPSCR_VXIMZ) := '1';
1274                         qnan_result := '1';
1275                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1276                         -- result is +/- A
1277                         arith_done := '1';
1278                     else
1279                         -- r.c.class is ZERO or INFINITY
1280                         v.opsel_a := AIN_C;
1281                         v.negate := r.a.negative;
1282                         v.state := EXC_RESULT;
1283                     end if;
1284                 end if;
1285
1286             when DO_FDIV =>
1287                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1288                 v.result_class := r.a.class;
1289                 v.fpscr(FPSCR_FR) := '0';
1290                 v.fpscr(FPSCR_FI) := '0';
1291                 v.use_a := '1';
1292                 v.use_b := '1';
1293                 v.result_sign := r.a.negative xor r.b.negative;
1294                 v.result_exp := r.a.exponent - r.b.exponent;
1295                 v.count := "00";
1296                 if r.a.class = FINITE and r.b.class = FINITE then
1297                     -- Renormalize denorm operands
1298                     if r.a.mantissa(54) = '0' then
1299                         v.state := RENORM_A;
1300                     elsif r.b.mantissa(54) = '0' then
1301                         v.state := RENORM_B;
1302                     else
1303                         v.first := '1';
1304                         v.state := DIV_2;
1305                     end if;
1306                 else
1307                     if r.a.class = NAN or r.b.class = NAN then
1308                         v.state := NAN_RESULT;
1309                     elsif r.b.class = INFINITY then
1310                         if r.a.class = INFINITY then
1311                             v.fpscr(FPSCR_VXIDI) := '1';
1312                             qnan_result := '1';
1313                         else
1314                             v.result_class := ZERO;
1315                         end if;
1316                         arith_done := '1';
1317                     elsif r.b.class = ZERO then
1318                         if r.a.class = ZERO then
1319                             v.fpscr(FPSCR_VXZDZ) := '1';
1320                             qnan_result := '1';
1321                         else
1322                             if r.a.class = FINITE then
1323                                 zero_divide := '1';
1324                             end if;
1325                             v.result_class := INFINITY;
1326                         end if;
1327                         arith_done := '1';
1328                     else -- r.b.class = FINITE, result_class = r.a.class
1329                         arith_done := '1';
1330                     end if;
1331                 end if;
1332
1333             when DO_FSEL =>
1334                 v.fpscr(FPSCR_FR) := '0';
1335                 v.fpscr(FPSCR_FI) := '0';
1336                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1337                     v.opsel_a := AIN_C;
1338                 else
1339                     v.opsel_a := AIN_B;
1340                 end if;
1341                 v.quieten_nan := '0';
1342                 v.state := EXC_RESULT;
1343
1344             when DO_FSQRT =>
1345                 -- r.opsel_a = AIN_B
1346                 v.result_class := r.b.class;
1347                 v.result_sign := r.b.negative;
1348                 v.fpscr(FPSCR_FR) := '0';
1349                 v.fpscr(FPSCR_FI) := '0';
1350                 v.use_b := '1';
1351                 case r.b.class is
1352                     when FINITE =>
1353                         v.result_exp := r.b.exponent;
1354                         if r.b.negative = '1' then
1355                             v.fpscr(FPSCR_VXSQRT) := '1';
1356                             qnan_result := '1';
1357                         elsif r.b.mantissa(54) = '0' then
1358                             v.state := RENORM_B;
1359                         elsif r.b.exponent(0) = '0' then
1360                             v.state := SQRT_1;
1361                         else
1362                             v.shift := to_signed(1, EXP_BITS);
1363                             v.state := RENORM_B2;
1364                         end if;
1365                     when NAN =>
1366                         v.state := NAN_RESULT;
1367                     when ZERO =>
1368                         -- result is B
1369                         arith_done := '1';
1370                     when INFINITY =>
1371                         if r.b.negative = '1' then
1372                             v.fpscr(FPSCR_VXSQRT) := '1';
1373                             qnan_result := '1';
1374                         -- else result is B
1375                         end if;
1376                         arith_done := '1';
1377                 end case;
1378
1379             when DO_FRE =>
1380                 -- r.opsel_a = AIN_B
1381                 v.result_class := r.b.class;
1382                 v.result_sign := r.b.negative;
1383                 v.fpscr(FPSCR_FR) := '0';
1384                 v.fpscr(FPSCR_FI) := '0';
1385                 v.use_b := '1';
1386                 case r.b.class is
1387                     when FINITE =>
1388                         v.result_exp := - r.b.exponent;
1389                         if r.b.mantissa(54) = '0' then
1390                             v.state := RENORM_B;
1391                         else
1392                             v.state := FRE_1;
1393                         end if;
1394                     when NAN =>
1395                         v.state := NAN_RESULT;
1396                     when INFINITY =>
1397                         v.result_class := ZERO;
1398                         arith_done := '1';
1399                     when ZERO =>
1400                         v.result_class := INFINITY;
1401                         zero_divide := '1';
1402                         arith_done := '1';
1403                 end case;
1404
1405             when DO_FRSQRTE =>
1406                 -- r.opsel_a = AIN_B
1407                 v.result_class := r.b.class;
1408                 v.result_sign := r.b.negative;
1409                 v.fpscr(FPSCR_FR) := '0';
1410                 v.fpscr(FPSCR_FI) := '0';
1411                 v.use_b := '1';
1412                 v.shift := to_signed(1, EXP_BITS);
1413                 case r.b.class is
1414                     when FINITE =>
1415                         v.result_exp := r.b.exponent;
1416                         if r.b.negative = '1' then
1417                             v.fpscr(FPSCR_VXSQRT) := '1';
1418                             qnan_result := '1';
1419                         elsif r.b.mantissa(54) = '0' then
1420                             v.state := RENORM_B;
1421                         elsif r.b.exponent(0) = '0' then
1422                             v.state := RSQRT_1;
1423                         else
1424                             v.state := RENORM_B2;
1425                         end if;
1426                     when NAN =>
1427                         v.state := NAN_RESULT;
1428                     when INFINITY =>
1429                         if r.b.negative = '1' then
1430                             v.fpscr(FPSCR_VXSQRT) := '1';
1431                             qnan_result := '1';
1432                         else
1433                             v.result_class := ZERO;
1434                         end if;
1435                         arith_done := '1';
1436                     when ZERO =>
1437                         v.result_class := INFINITY;
1438                         zero_divide := '1';
1439                         arith_done := '1';
1440                 end case;
1441
1442             when DO_FMADD =>
1443                 -- fmadd, fmsub, fnmadd, fnmsub
1444                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1445                 -- else AIN_B
1446                 v.result_sign := r.a.negative;
1447                 v.result_class := r.a.class;
1448                 v.result_exp := r.a.exponent;
1449                 v.fpscr(FPSCR_FR) := '0';
1450                 v.fpscr(FPSCR_FI) := '0';
1451                 v.use_a := '1';
1452                 v.use_b := '1';
1453                 v.use_c := '1';
1454                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1455                 if r.a.class = FINITE and r.c.class = FINITE and
1456                     (r.b.class = FINITE or r.b.class = ZERO) then
1457                     v.is_subtract := not is_add;
1458                     mulexp := r.a.exponent + r.c.exponent;
1459                     v.result_exp := mulexp;
1460                     -- Make sure A and C are normalized
1461                     if r.a.mantissa(54) = '0' then
1462                         v.state := RENORM_A;
1463                     elsif r.c.mantissa(54) = '0' then
1464                         v.state := RENORM_C;
1465                     elsif r.b.class = ZERO then
1466                         -- no addend, degenerates to multiply
1467                         v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468                         f_to_multiply.valid <= '1';
1469                         v.is_multiply := '1';
1470                         v.state := MULT_1;
1471                     elsif r.madd_cmp = '0' then
1472                         -- addend is bigger, do multiply first
1473                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474                         f_to_multiply.valid <= '1';
1475                         v.state := FMADD_1;
1476                     else
1477                         -- product is bigger, shift B right and use it as the
1478                         -- addend to the multiplier
1479                         v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480                         -- for subtract, multiplier does B - A * C
1481                         v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482                         v.result_exp := r.b.exponent;
1483                         v.state := FMADD_2;
1484                     end if;
1485                 else
1486                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1487                         v.state := NAN_RESULT;
1488                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1489                         (r.a.class = INFINITY and r.c.class = ZERO) then
1490                         -- invalid operation, construct QNaN
1491                         v.fpscr(FPSCR_VXIMZ) := '1';
1492                         qnan_result := '1';
1493                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1494                         if r.b.class = INFINITY and is_add = '0' then
1495                             -- invalid operation, construct QNaN
1496                             v.fpscr(FPSCR_VXISI) := '1';
1497                             qnan_result := '1';
1498                         else
1499                             -- result is infinity
1500                             v.result_class := INFINITY;
1501                             v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1502                             arith_done := '1';
1503                         end if;
1504                     else
1505                         -- Here A is zero, C is zero, or B is infinity
1506                         -- Result is +/-B in all of those cases
1507                         v.opsel_a := AIN_B;
1508                         if r.b.class /= ZERO or is_add = '1' then
1509                             v.negate := not (r.insn(1) xor r.insn(2));
1510                         else
1511                             -- have to be careful about rule for 0 - 0 result sign
1512                             v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1513                         end if;
1514                         v.state := EXC_RESULT;
1515                     end if;
1516                 end if;
1517
1518             when RENORM_A =>
1519                 renormalize := '1';
1520                 v.state := RENORM_A2;
1521                 if r.insn(4) = '1' then
1522                     v.opsel_a := AIN_C;
1523                 else
1524                     v.opsel_a := AIN_B;
1525                 end if;
1526
1527             when RENORM_A2 =>
1528                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1529                 set_a := '1';
1530                 v.result_exp := new_exp;
1531                 if r.insn(4) = '1' then
1532                     if r.c.mantissa(54) = '1' then
1533                         if r.insn(3) = '0' or r.b.class = ZERO then
1534                             v.first := '1';
1535                             v.state := MULT_1;
1536                         else
1537                             v.madd_cmp := '0';
1538                             if new_exp + 1 >= r.b.exponent then
1539                                 v.madd_cmp := '1';
1540                             end if;
1541                             v.opsel_a := AIN_B;
1542                             v.state := DO_FMADD;
1543                         end if;
1544                     else
1545                         v.state := RENORM_C;
1546                     end if;
1547                 else
1548                     if r.b.mantissa(54) = '1' then
1549                         v.first := '1';
1550                         v.state := DIV_2;
1551                     else
1552                         v.state := RENORM_B;
1553                     end if;
1554                 end if;
1555
1556             when RENORM_B =>
1557                 renormalize := '1';
1558                 renorm_sqrt := r.is_sqrt;
1559                 v.state := RENORM_B2;
1560
1561             when RENORM_B2 =>
1562                 set_b := '1';
1563                 if r.is_sqrt = '0' then
1564                     v.result_exp := r.result_exp + r.shift;
1565                 else
1566                     v.result_exp := new_exp;
1567                 end if;
1568                 v.opsel_a := AIN_B;
1569                 v.state := LOOKUP;
1570
1571             when RENORM_C =>
1572                 renormalize := '1';
1573                 v.state := RENORM_C2;
1574
1575             when RENORM_C2 =>
1576                 set_c := '1';
1577                 v.result_exp := new_exp;
1578                 if r.insn(3) = '0' or r.b.class = ZERO then
1579                     v.first := '1';
1580                     v.state := MULT_1;
1581                 else
1582                     v.madd_cmp := '0';
1583                     if new_exp + 1 >= r.b.exponent then
1584                         v.madd_cmp := '1';
1585                     end if;
1586                     v.opsel_a := AIN_B;
1587                     v.state := DO_FMADD;
1588                 end if;
1589
1590             when ADD_1 =>
1591                 -- transferring B to R
1592                 v.shift := r.b.exponent - r.a.exponent;
1593                 v.result_exp := r.b.exponent;
1594                 v.longmask := '0';
1595                 v.state := ADD_SHIFT;
1596
1597             when ADD_SHIFT =>
1598                 -- r.shift = - exponent difference, r.longmask = 0
1599                 opsel_r <= RES_SHIFT;
1600                 v.x := s_nz;
1601                 set_x := '1';
1602                 v.longmask := r.single_prec;
1603                 if r.add_bsmall = '1' then
1604                     v.opsel_a := AIN_A;
1605                 else
1606                     v.opsel_a := AIN_B;
1607                 end if;
1608                 v.state := ADD_2;
1609
1610             when ADD_2 =>
1611                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1612                 opsel_b <= BIN_R;
1613                 opsel_binv <= r.is_subtract;
1614                 carry_in <= r.is_subtract and not r.x;
1615                 v.shift := to_signed(-1, EXP_BITS);
1616                 v.state := ADD_3;
1617
1618             when ADD_3 =>
1619                 -- check for overflow or negative result (can't get both)
1620                 -- r.shift = -1
1621                 if r.r(63) = '1' then
1622                     -- result is opposite sign to expected
1623                     v.result_sign := not r.result_sign;
1624                     opsel_ainv <= '1';
1625                     carry_in <= '1';
1626                     v.state := FINISH;
1627                 elsif r.r(55) = '1' then
1628                     -- sum overflowed, shift right
1629                     opsel_r <= RES_SHIFT;
1630                     set_x := '1';
1631                     if exp_huge = '1' then
1632                         v.state := ROUND_OFLOW;
1633                     else
1634                         v.state := ROUNDING;
1635                     end if;
1636                 elsif r.r(54) = '1' then
1637                     set_x := '1';
1638                     v.state := ROUNDING;
1639                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1640                     -- r.x must be zero at this point
1641                     v.result_class := ZERO;
1642                     if r.is_subtract = '1' then
1643                         -- set result sign depending on rounding mode
1644                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1645                     end if;
1646                     arith_done := '1';
1647                 else
1648                     renormalize := '1';
1649                     v.state := NORMALIZE;
1650                 end if;
1651
1652             when CMP_1 =>
1653                 -- r.opsel_a = AIN_A
1654                 opsel_b <= BIN_R;
1655                 opsel_binv <= '1';
1656                 carry_in <= '1';
1657                 v.state := CMP_2;
1658
1659             when CMP_2 =>
1660                 if r.r(63) = '1' then
1661                     -- A is smaller in magnitude
1662                     v.cr_result := not r.a.negative & r.a.negative & "00";
1663                 elsif (r_hi_nz or r_lo_nz) = '0' then
1664                     v.cr_result := "0010";
1665                 else
1666                     v.cr_result := r.a.negative & not r.a.negative & "00";
1667                 end if;
1668                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1669                 v.instr_done := '1';
1670                 v.state := IDLE;
1671
1672             when MULT_1 =>
1673                 f_to_multiply.valid <= r.first;
1674                 opsel_r <= RES_MULT;
1675                 if multiply_to_f.valid = '1' then
1676                     v.state := FINISH;
1677                 end if;
1678
1679             when FMADD_1 =>
1680                 -- Addend is bigger here
1681                 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1682                 -- note v.shift is at most -2 here
1683                 v.shift := r.result_exp - r.b.exponent;
1684                 opsel_r <= RES_MULT;
1685                 opsel_s <= S_MULT;
1686                 set_s := '1';
1687                 f_to_multiply.valid <= r.first;
1688                 if multiply_to_f.valid = '1' then
1689                     v.longmask := '0';
1690                     v.state := ADD_SHIFT;
1691                 end if;
1692
1693             when FMADD_2 =>
1694                 -- Product is potentially bigger here
1695                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1696                 set_s := '1';
1697                 opsel_s <= S_SHIFT;
1698                 v.shift := r.shift - to_signed(64, EXP_BITS);
1699                 v.state := FMADD_3;
1700
1701             when FMADD_3 =>
1702                 -- r.shift = addend exp - product exp
1703                 opsel_r <= RES_SHIFT;
1704                 v.first := '1';
1705                 v.state := FMADD_4;
1706
1707             when FMADD_4 =>
1708                 msel_add <= MULADD_RS;
1709                 f_to_multiply.valid <= r.first;
1710                 msel_inv <= r.is_subtract;
1711                 opsel_r <= RES_MULT;
1712                 opsel_s <= S_MULT;
1713                 set_s := '1';
1714                 if multiply_to_f.valid = '1' then
1715                     v.state := FMADD_5;
1716                 end if;
1717
1718             when FMADD_5 =>
1719                 -- negate R:S:X if negative
1720                 if r.r(63) = '1' then
1721                     v.result_sign := not r.result_sign;
1722                     opsel_ainv <= '1';
1723                     carry_in <= not (s_nz or r.x);
1724                     opsel_s <= S_NEG;
1725                     set_s := '1';
1726                 end if;
1727                 v.shift := to_signed(56, EXP_BITS);
1728                 v.state := FMADD_6;
1729
1730             when FMADD_6 =>
1731                 -- r.shift = 56 (or 0, but only if r is now nonzero)
1732                 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1733                     if s_nz = '0' then
1734                         -- must be a subtraction, and r.x must be zero
1735                         v.result_class := ZERO;
1736                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1737                         arith_done := '1';
1738                     else
1739                         -- R is all zeroes but there are non-zero bits in S
1740                         -- so shift them into R and set S to 0
1741                         opsel_r <= RES_SHIFT;
1742                         set_s := '1';
1743                         -- stay in state FMADD_6
1744                     end if;
1745                 elsif r.r(56 downto 54) = "001" then
1746                     v.state := FINISH;
1747                 else
1748                     renormalize := '1';
1749                     v.state := NORMALIZE;
1750                 end if;
1751
1752             when LOOKUP =>
1753                 -- r.opsel_a = AIN_B
1754                 -- wait one cycle for inverse_table[B] lookup
1755                 v.first := '1';
1756                 if r.insn(4) = '0' then
1757                     if r.insn(3) = '0' then
1758                         v.state := DIV_2;
1759                     else
1760                         v.state := SQRT_1;
1761                     end if;
1762                 elsif r.insn(2) = '0' then
1763                     v.state := FRE_1;
1764                 else
1765                     v.state := RSQRT_1;
1766                 end if;
1767
1768             when DIV_2 =>
1769                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1770                 msel_1 <= MUL1_B;
1771                 msel_add <= MULADD_CONST;
1772                 msel_inv <= '1';
1773                 if r.count = 0 then
1774                     msel_2 <= MUL2_LUT;
1775                 else
1776                     msel_2 <= MUL2_P;
1777                 end if;
1778                 set_y := r.first;
1779                 pshift := '1';
1780                 f_to_multiply.valid <= r.first;
1781                 if multiply_to_f.valid = '1' then
1782                     v.first := '1';
1783                     v.count := r.count + 1;
1784                     v.state := DIV_3;
1785                 end if;
1786
1787             when DIV_3 =>
1788                 -- compute Y = P = P * Y
1789                 msel_1 <= MUL1_Y;
1790                 msel_2 <= MUL2_P;
1791                 f_to_multiply.valid <= r.first;
1792                 pshift := '1';
1793                 if multiply_to_f.valid = '1' then
1794                     v.first := '1';
1795                     if r.count = 3 then
1796                         v.state := DIV_4;
1797                     else
1798                         v.state := DIV_2;
1799                     end if;
1800                 end if;
1801
1802             when DIV_4 =>
1803                 -- compute R = P = A * Y (quotient)
1804                 msel_1 <= MUL1_A;
1805                 msel_2 <= MUL2_P;
1806                 set_y := r.first;
1807                 f_to_multiply.valid <= r.first;
1808                 pshift := '1';
1809                 if multiply_to_f.valid = '1' then
1810                     opsel_r <= RES_MULT;
1811                     v.first := '1';
1812                     v.state := DIV_5;
1813                 end if;
1814
1815             when DIV_5 =>
1816                 -- compute P = A - B * R (remainder)
1817                 msel_1 <= MUL1_B;
1818                 msel_2 <= MUL2_R;
1819                 msel_add <= MULADD_A;
1820                 msel_inv <= '1';
1821                 f_to_multiply.valid <= r.first;
1822                 if multiply_to_f.valid = '1' then
1823                     v.state := DIV_6;
1824                 end if;
1825
1826             when DIV_6 =>
1827                 -- test if remainder is 0 or >= B
1828                 if pcmpb_lt = '1' then
1829                     -- quotient is correct, set X if remainder non-zero
1830                     v.x := r.p(58) or px_nz;
1831                 else
1832                     -- quotient needs to be incremented by 1
1833                     carry_in <= '1';
1834                     v.x := not pcmpb_eq;
1835                 end if;
1836                 v.state := FINISH;
1837
1838             when FRE_1 =>
1839                 opsel_r <= RES_MISC;
1840                 misc_sel <= "0111";
1841                 v.shift := to_signed(1, EXP_BITS);
1842                 v.state := NORMALIZE;
1843
1844             when FTDIV_1 =>
1845                 v.cr_result(1) := exp_tiny or exp_huge;
1846                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1847                     v.instr_done := '1';
1848                     v.state := IDLE;
1849                 else
1850                     v.shift := r.a.exponent;
1851                     v.doing_ftdiv := "10";
1852                 end if;
1853
1854             when RSQRT_1 =>
1855                 opsel_r <= RES_MISC;
1856                 misc_sel <= "0111";
1857                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1858                 v.result_exp := - sqrt_exp;
1859                 v.shift := to_signed(1, EXP_BITS);
1860                 v.state := NORMALIZE;
1861
1862             when SQRT_1 =>
1863                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1864                 -- also transfer B (in R) to A
1865                 set_a := '1';
1866                 opsel_r <= RES_MISC;
1867                 misc_sel <= "0111";
1868                 msel_1 <= MUL1_B;
1869                 msel_2 <= MUL2_LUT;
1870                 f_to_multiply.valid <= '1';
1871                 v.shift := to_signed(-1, EXP_BITS);
1872                 v.count := "00";
1873                 v.state := SQRT_2;
1874
1875             when SQRT_2 =>
1876                 -- shift R right one place
1877                 -- not expecting multiplier result yet
1878                 -- r.shift = -1
1879                 opsel_r <= RES_SHIFT;
1880                 v.first := '1';
1881                 v.state := SQRT_3;
1882
1883             when SQRT_3 =>
1884                 -- put R into Y, wait for product from multiplier
1885                 msel_2 <= MUL2_R;
1886                 set_y := r.first;
1887                 pshift := '1';
1888                 if multiply_to_f.valid = '1' then
1889                     -- put result into R
1890                     opsel_r <= RES_MULT;
1891                     v.first := '1';
1892                     v.state := SQRT_4;
1893                 end if;
1894
1895             when SQRT_4 =>
1896                 -- compute 1.5 - Y * P
1897                 msel_1 <= MUL1_Y;
1898                 msel_2 <= MUL2_P;
1899                 msel_add <= MULADD_CONST;
1900                 msel_inv <= '1';
1901                 f_to_multiply.valid <= r.first;
1902                 pshift := '1';
1903                 if multiply_to_f.valid = '1' then
1904                     v.state := SQRT_5;
1905                 end if;
1906
1907             when SQRT_5 =>
1908                 -- compute Y = Y * P
1909                 msel_1 <= MUL1_Y;
1910                 msel_2 <= MUL2_P;
1911                 f_to_multiply.valid <= '1';
1912                 v.first := '1';
1913                 v.state := SQRT_6;
1914
1915             when SQRT_6 =>
1916                 -- pipeline in R = R * P
1917                 msel_1 <= MUL1_R;
1918                 msel_2 <= MUL2_P;
1919                 f_to_multiply.valid <= r.first;
1920                 pshift := '1';
1921                 if multiply_to_f.valid = '1' then
1922                     v.first := '1';
1923                     v.state := SQRT_7;
1924                 end if;
1925
1926             when SQRT_7 =>
1927                 -- first multiply is done, put result in Y
1928                 msel_2 <= MUL2_P;
1929                 set_y := r.first;
1930                 -- wait for second multiply (should be here already)
1931                 pshift := '1';
1932                 if multiply_to_f.valid = '1' then
1933                     -- put result into R
1934                     opsel_r <= RES_MULT;
1935                     v.first := '1';
1936                     v.count := r.count + 1;
1937                     if r.count < 2 then
1938                         v.state := SQRT_4;
1939                     else
1940                         v.first := '1';
1941                         v.state := SQRT_8;
1942                     end if;
1943                 end if;
1944
1945             when SQRT_8 =>
1946                 -- compute P = A - R * R, which can be +ve or -ve
1947                 -- we arranged for B to be put into A earlier
1948                 msel_1 <= MUL1_R;
1949                 msel_2 <= MUL2_R;
1950                 msel_add <= MULADD_A;
1951                 msel_inv <= '1';
1952                 pshift := '1';
1953                 f_to_multiply.valid <= r.first;
1954                 if multiply_to_f.valid = '1' then
1955                     v.first := '1';
1956                     v.state := SQRT_9;
1957                 end if;
1958
1959             when SQRT_9 =>
1960                 -- compute P = P * Y
1961                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1962                 -- estimate of the adjustment needed to R.  Since the error
1963                 -- could be negative and we have an unsigned multiplier, the
1964                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1965                 -- are correct and are all we need (given 3 iterations through
1966                 -- SQRT_4 to SQRT_7).
1967                 msel_1 <= MUL1_Y;
1968                 msel_2 <= MUL2_P;
1969                 pshift := '1';
1970                 f_to_multiply.valid <= r.first;
1971                 if multiply_to_f.valid = '1' then
1972                     v.state := SQRT_10;
1973                 end if;
1974
1975             when SQRT_10 =>
1976                 -- Add the bottom 8 bits of P, sign-extended,
1977                 -- divided by 4, onto R.
1978                 -- The division by 4 is because R is 10.54 format
1979                 -- whereas P is 8.56 format.
1980                 opsel_b <= BIN_PS6;
1981                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1982                 v.result_exp := sqrt_exp;
1983                 v.shift := to_signed(1, EXP_BITS);
1984                 v.first := '1';
1985                 v.state := SQRT_11;
1986
1987             when SQRT_11 =>
1988                 -- compute P = A - R * R (remainder)
1989                 -- also put 2 * R + 1 into B for comparison with P
1990                 msel_1 <= MUL1_R;
1991                 msel_2 <= MUL2_R;
1992                 msel_add <= MULADD_A;
1993                 msel_inv <= '1';
1994                 f_to_multiply.valid <= r.first;
1995                 shiftin := '1';
1996                 set_b := r.first;
1997                 if multiply_to_f.valid = '1' then
1998                     v.state := SQRT_12;
1999                 end if;
2000
2001             when SQRT_12 =>
2002                 -- test if remainder is 0 or >= B = 2*R + 1
2003                 if pcmpb_lt = '1' then
2004                     -- square root is correct, set X if remainder non-zero
2005                     v.x := r.p(58) or px_nz;
2006                 else
2007                     -- square root needs to be incremented by 1
2008                     carry_in <= '1';
2009                     v.x := not pcmpb_eq;
2010                 end if;
2011                 v.state := FINISH;
2012
2013             when INT_SHIFT =>
2014                 -- r.shift = b.exponent - 52
2015                 opsel_r <= RES_SHIFT;
2016                 set_x := '1';
2017                 v.state := INT_ROUND;
2018                 v.shift := to_signed(-2, EXP_BITS);
2019
2020             when INT_ROUND =>
2021                 -- r.shift = -2
2022                 opsel_r <= RES_SHIFT;
2023                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2024                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2025                 -- Check for negative values that don't round to 0 for fcti*u*
2026                 if r.insn(8) = '1' and r.result_sign = '1' and
2027                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2028                     v.state := INT_OFLOW;
2029                 else
2030                     v.state := INT_FINAL;
2031                 end if;
2032
2033             when INT_ISHIFT =>
2034                 -- r.shift = b.exponent - 54;
2035                 opsel_r <= RES_SHIFT;
2036                 v.state := INT_FINAL;
2037
2038             when INT_FINAL =>
2039                 -- Negate if necessary, and increment for rounding if needed
2040                 opsel_ainv <= r.result_sign;
2041                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2042                 -- Check for possible overflows
2043                 case r.insn(9 downto 8) is
2044                     when "00" =>        -- fctiw[z]
2045                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2046                     when "01" =>        -- fctiwu[z]
2047                         need_check := r.r(31);
2048                     when "10" =>        -- fctid[z]
2049                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2050                     when others =>      -- fctidu[z]
2051                         need_check := r.r(63);
2052                 end case;
2053                 if need_check = '1' then
2054                     v.state := INT_CHECK;
2055                 else
2056                     if r.fpscr(FPSCR_FI) = '1' then
2057                         v.fpscr(FPSCR_XX) := '1';
2058                     end if;
2059                     arith_done := '1';
2060                 end if;
2061
2062             when INT_CHECK =>
2063                 if r.insn(9) = '0' then
2064                     msb := r.r(31);
2065                 else
2066                     msb := r.r(63);
2067                 end if;
2068                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2069                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2070                     (r.insn(8) = '1' and msb /= '1') then
2071                     opsel_r <= RES_MISC;
2072                     v.fpscr(FPSCR_VXCVI) := '1';
2073                     invalid := '1';
2074                 else
2075                     if r.fpscr(FPSCR_FI) = '1' then
2076                         v.fpscr(FPSCR_XX) := '1';
2077                     end if;
2078                 end if;
2079                 arith_done := '1';
2080
2081             when INT_OFLOW =>
2082                 opsel_r <= RES_MISC;
2083                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2084                 if r.b.class = NAN then
2085                     misc_sel(0) <= '1';
2086                 end if;
2087                 v.fpscr(FPSCR_VXCVI) := '1';
2088                 invalid := '1';
2089                 arith_done := '1';
2090
2091             when FRI_1 =>
2092                 -- r.shift = b.exponent - 52
2093                 opsel_r <= RES_SHIFT;
2094                 set_x := '1';
2095                 v.state := ROUNDING;
2096
2097             when FINISH =>
2098                 if r.is_multiply = '1' and px_nz = '1' then
2099                     v.x := '1';
2100                 end if;
2101                 if r.r(63 downto 54) /= "0000000001" then
2102                     renormalize := '1';
2103                     v.state := NORMALIZE;
2104                 else
2105                     set_x := '1';
2106                     if exp_tiny = '1' then
2107                         v.shift := new_exp - min_exp;
2108                         v.state := ROUND_UFLOW;
2109                     elsif exp_huge = '1' then
2110                         v.state := ROUND_OFLOW;
2111                     else
2112                         v.state := ROUNDING;
2113                     end if;
2114                 end if;
2115
2116             when NORMALIZE =>
2117                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2118                 -- r.shift = clz(r.r) - 9
2119                 opsel_r <= RES_SHIFT;
2120                 set_x := '1';
2121                 if exp_tiny = '1' then
2122                     v.shift := new_exp - min_exp;
2123                     v.state := ROUND_UFLOW;
2124                 elsif exp_huge = '1' then
2125                     v.state := ROUND_OFLOW;
2126                 else
2127                     v.state := ROUNDING;
2128                 end if;
2129
2130             when ROUND_UFLOW =>
2131                 -- r.shift = - amount by which exponent underflows
2132                 v.tiny := '1';
2133                 if r.fpscr(FPSCR_UE) = '0' then
2134                     -- disabled underflow exception case
2135                     -- have to denormalize before rounding
2136                     opsel_r <= RES_SHIFT;
2137                     set_x := '1';
2138                     v.state := ROUNDING;
2139                 else
2140                     -- enabled underflow exception case
2141                     -- if denormalized, have to normalize before rounding
2142                     v.fpscr(FPSCR_UX) := '1';
2143                     v.result_exp := r.result_exp + bias_exp;
2144                     if r.r(54) = '0' then
2145                         renormalize := '1';
2146                         v.state := NORMALIZE;
2147                     else
2148                         v.state := ROUNDING;
2149                     end if;
2150                 end if;
2151
2152             when ROUND_OFLOW =>
2153                 v.fpscr(FPSCR_OX) := '1';
2154                 if r.fpscr(FPSCR_OE) = '0' then
2155                     -- disabled overflow exception
2156                     -- result depends on rounding mode
2157                     v.fpscr(FPSCR_XX) := '1';
2158                     v.fpscr(FPSCR_FI) := '1';
2159                     if r.round_mode(1 downto 0) = "00" or
2160                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161                         v.result_class := INFINITY;
2162                         v.fpscr(FPSCR_FR) := '1';
2163                     else
2164                         v.fpscr(FPSCR_FR) := '0';
2165                     end if;
2166                     -- construct largest representable number
2167                     v.result_exp := max_exp;
2168                     opsel_r <= RES_MISC;
2169                     misc_sel <= "001" & r.single_prec;
2170                     arith_done := '1';
2171                 else
2172                     -- enabled overflow exception
2173                     v.result_exp := r.result_exp - bias_exp;
2174                     v.state := ROUNDING;
2175                 end if;
2176
2177             when ROUNDING =>
2178                 opsel_mask <= '1';
2179                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2180                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2181                 if round(1) = '1' then
2182                     -- increment the LSB for the precision
2183                     opsel_b <= BIN_RND;
2184                     v.shift := to_signed(-1, EXP_BITS);
2185                     v.state := ROUNDING_2;
2186                 else
2187                     if r.r(54) = '0' then
2188                         -- result after masking could be zero, or could be a
2189                         -- denormalized result that needs to be renormalized
2190                         renormalize := '1';
2191                         v.state := ROUNDING_3;
2192                     else
2193                         arith_done := '1';
2194                     end if;
2195                 end if;
2196                 if round(0) = '1' then
2197                     v.fpscr(FPSCR_XX) := '1';
2198                     if r.tiny = '1' then
2199                         v.fpscr(FPSCR_UX) := '1';
2200                     end if;
2201                 end if;
2202
2203             when ROUNDING_2 =>
2204                 -- Check for overflow during rounding
2205                 -- r.shift = -1
2206                 v.x := '0';
2207                 if r.r(55) = '1' then
2208                     opsel_r <= RES_SHIFT;
2209                     if exp_huge = '1' then
2210                         v.state := ROUND_OFLOW;
2211                     else
2212                         arith_done := '1';
2213                     end if;
2214                 elsif r.r(54) = '0' then
2215                     -- Do CLZ so we can renormalize the result
2216                     renormalize := '1';
2217                     v.state := ROUNDING_3;
2218                 else
2219                     arith_done := '1';
2220                 end if;
2221
2222             when ROUNDING_3 =>
2223                 -- r.shift = clz(r.r) - 9
2224                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225                 if mant_nz = '0' then
2226                     v.result_class := ZERO;
2227                     if r.is_subtract = '1' then
2228                         -- set result sign depending on rounding mode
2229                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2230                     end if;
2231                     arith_done := '1';
2232                 else
2233                     -- Renormalize result after rounding
2234                     opsel_r <= RES_SHIFT;
2235                     v.denorm := exp_tiny;
2236                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237                     if new_exp < to_signed(-1022, EXP_BITS) then
2238                         v.state := DENORM;
2239                     else
2240                         arith_done := '1';
2241                     end if;
2242                 end if;
2243
2244             when DENORM =>
2245                 -- r.shift = result_exp - -1022
2246                 opsel_r <= RES_SHIFT;
2247                 arith_done := '1';
2248
2249             when NAN_RESULT =>
2250                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2251                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2252                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2253                     -- Signalling NAN
2254                     v.fpscr(FPSCR_VXSNAN) := '1';
2255                     invalid := '1';
2256                 end if;
2257                 if r.use_a = '1' and r.a.class = NAN then
2258                     v.opsel_a := AIN_A;
2259                 elsif r.use_b = '1' and r.b.class = NAN then
2260                     v.opsel_a := AIN_B;
2261                 elsif r.use_c = '1' and r.c.class = NAN then
2262                     v.opsel_a := AIN_C;
2263                 end if;
2264                 v.state := EXC_RESULT;
2265
2266             when EXC_RESULT =>
2267                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2268                 case r.opsel_a is
2269                     when AIN_B =>
2270                         v.result_sign := r.b.negative xor r.negate;
2271                         v.result_exp := r.b.exponent;
2272                         v.result_class := r.b.class;
2273                     when AIN_C =>
2274                         v.result_sign := r.c.negative xor r.negate;
2275                         v.result_exp := r.c.exponent;
2276                         v.result_class := r.c.class;
2277                     when others =>
2278                         v.result_sign := r.a.negative xor r.negate;
2279                         v.result_exp := r.a.exponent;
2280                         v.result_class := r.a.class;
2281                 end case;
2282                 arith_done := '1';
2283
2284         end case;
2285
2286         if zero_divide = '1' then
2287             v.fpscr(FPSCR_ZX) := '1';
2288         end if;
2289         if qnan_result = '1' then
2290             invalid := '1';
2291             v.result_class := NAN;
2292             v.result_sign := '0';
2293             misc_sel <= "0001";
2294             opsel_r <= RES_MISC;
2295             arith_done := '1';
2296         end if;
2297         if invalid = '1' then
2298             v.invalid := '1';
2299         end if;
2300         if arith_done = '1' then
2301             -- Enabled invalid exception doesn't write result or FPRF
2302             -- Neither does enabled zero-divide exception
2303             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2304                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2305                 v.writing_back := '1';
2306                 v.update_fprf := '1';
2307             end if;
2308             v.instr_done := '1';
2309             v.state := IDLE;
2310             update_fx := '1';
2311         end if;
2312
2313         -- Multiplier and divide/square root data path
2314         case msel_1 is
2315             when MUL1_A =>
2316                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2317             when MUL1_B =>
2318                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2319             when MUL1_Y =>
2320                 f_to_multiply.data1 <= r.y;
2321             when others =>
2322                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2323         end case;
2324         case msel_2 is
2325             when MUL2_C =>
2326                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2327             when MUL2_LUT =>
2328                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2329             when MUL2_P =>
2330                 f_to_multiply.data2 <= r.p;
2331             when others =>
2332                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2333         end case;
2334         maddend := (others => '0');
2335         case msel_add is
2336             when MULADD_CONST =>
2337                 -- addend is 2.0 or 1.5 in 16.112 format
2338                 if r.is_sqrt = '0' then
2339                     maddend(113) := '1';                -- 2.0
2340                 else
2341                     maddend(112 downto 111) := "11";    -- 1.5
2342                 end if;
2343             when MULADD_A =>
2344                 -- addend is A in 16.112 format
2345                 maddend(121 downto 58) := r.a.mantissa;
2346             when MULADD_RS =>
2347                 -- addend is concatenation of R and S in 16.112 format
2348                 maddend := "000000" & r.r & r.s & "00";
2349             when others =>
2350         end case;
2351         if msel_inv = '1' then
2352             f_to_multiply.addend <= not maddend;
2353         else
2354             f_to_multiply.addend <= maddend;
2355         end if;
2356         f_to_multiply.not_result <= msel_inv;
2357         if set_y = '1' then
2358             v.y := f_to_multiply.data2;
2359         end if;
2360         if multiply_to_f.valid = '1' then
2361             if pshift = '0' then
2362                 v.p := multiply_to_f.result(63 downto 0);
2363             else
2364                 v.p := multiply_to_f.result(119 downto 56);
2365             end if;
2366         end if;
2367
2368         -- Data path.
2369         -- This has A and B input multiplexers, an adder, a shifter,
2370         -- count-leading-zeroes logic, and a result mux.
2371         if r.longmask = '1' then
2372             mshift := r.shift + to_signed(-29, EXP_BITS);
2373         else
2374             mshift := r.shift;
2375         end if;
2376         if mshift < to_signed(-64, EXP_BITS) then
2377             mask := (others => '1');
2378         elsif mshift >= to_signed(0, EXP_BITS) then
2379             mask := (others => '0');
2380         else
2381             mask := right_mask(unsigned(mshift(5 downto 0)));
2382         end if;
2383         case r.opsel_a is
2384             when AIN_R =>
2385                 in_a0 := r.r;
2386             when AIN_A =>
2387                 in_a0 := r.a.mantissa;
2388             when AIN_B =>
2389                 in_a0 := r.b.mantissa;
2390             when others =>
2391                 in_a0 := r.c.mantissa;
2392         end case;
2393         if (or (mask and in_a0)) = '1' and set_x = '1' then
2394             v.x := '1';
2395         end if;
2396         if opsel_ainv = '1' then
2397             in_a0 := not in_a0;
2398         end if;
2399         in_a <= in_a0;
2400         case opsel_b is
2401             when BIN_ZERO =>
2402                 in_b0 := (others => '0');
2403             when BIN_R =>
2404                 in_b0 := r.r;
2405             when BIN_RND =>
2406                 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2407                 in_b0 := round_inc;
2408             when others =>
2409                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2410                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2411         end case;
2412         if opsel_binv = '1' then
2413             in_b0 := not in_b0;
2414         end if;
2415         in_b <= in_b0;
2416         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2417             shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2418                                     std_ulogic_vector(r.shift(6 downto 0)));
2419         else
2420             shift_res := (others => '0');
2421         end if;
2422         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2423         if opsel_mask = '1' then
2424             sum(1 downto 0) := "00";
2425             if r.single_prec = '1' then
2426                 sum(30 downto 2) := (others => '0');
2427             end if;
2428         end if;
2429         case opsel_r is
2430             when RES_SUM =>
2431                 result <= sum;
2432             when RES_SHIFT =>
2433                 result <= shift_res;
2434             when RES_MULT =>
2435                 result <= multiply_to_f.result(121 downto 58);
2436             when others =>
2437                 case misc_sel is
2438                     when "0000" =>
2439                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2440                     when "0001" =>
2441                         -- generated QNaN mantissa
2442                         misc := x"0020000000000000";
2443                     when "0010" =>
2444                         -- mantissa of max representable DP number
2445                         misc := x"007ffffffffffffc";
2446                     when "0011" =>
2447                         -- mantissa of max representable SP number
2448                         misc := x"007fffff80000000";
2449                     when "0100" =>
2450                         -- fmrgow result
2451                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2452                     when "0110" =>
2453                         -- fmrgew result
2454                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2455                     when "0111" =>
2456                         misc := 10x"000" & inverse_est & 35x"000000000";
2457                     when "1000" =>
2458                         -- max positive result for fctiw[z]
2459                         misc := x"000000007fffffff";
2460                     when "1001" =>
2461                         -- max negative result for fctiw[z]
2462                         misc := x"ffffffff80000000";
2463                     when "1010" =>
2464                         -- max positive result for fctiwu[z]
2465                         misc := x"00000000ffffffff";
2466                     when "1011" =>
2467                         -- max negative result for fctiwu[z]
2468                         misc := x"0000000000000000";
2469                     when "1100" =>
2470                         -- max positive result for fctid[z]
2471                         misc := x"7fffffffffffffff";
2472                     when "1101" =>
2473                         -- max negative result for fctid[z]
2474                         misc := x"8000000000000000";
2475                     when "1110" =>
2476                         -- max positive result for fctidu[z]
2477                         misc := x"ffffffffffffffff";
2478                     when "1111" =>
2479                         -- max negative result for fctidu[z]
2480                         misc := x"0000000000000000";
2481                     when others =>
2482                         misc := x"0000000000000000";
2483                 end case;
2484                 result <= misc;
2485         end case;
2486         v.r := result;
2487         if set_s = '1' then
2488             case opsel_s is
2489                 when S_NEG =>
2490                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2491                 when S_MULT =>
2492                     v.s := multiply_to_f.result(57 downto 2);
2493                 when S_SHIFT =>
2494                     v.s := shift_res(63 downto 8);
2495                     if shift_res(7 downto 0) /= x"00" then
2496                         v.x := '1';
2497                     end if;
2498                 when others =>
2499                     v.s := (others => '0');
2500             end case;
2501         end if;
2502
2503         if set_a = '1' then
2504             v.a.exponent := new_exp;
2505             v.a.mantissa := shift_res;
2506         end if;
2507         if set_b = '1' then
2508             v.b.exponent := new_exp;
2509             v.b.mantissa := shift_res;
2510         end if;
2511         if set_c = '1' then
2512             v.c.exponent := new_exp;
2513             v.c.mantissa := shift_res;
2514         end if;
2515
2516         if opsel_r = RES_SHIFT then
2517             v.result_exp := new_exp;
2518         end if;
2519
2520         if renormalize = '1' then
2521             clz := count_left_zeroes(r.r);
2522             if renorm_sqrt = '1' then
2523                 -- make denormalized value end up with even exponent
2524                 clz(0) := '1';
2525             end if;
2526             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2527         end if;
2528
2529         if r.int_result = '1' then
2530             fp_result <= r.r;
2531         else
2532             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2533                                  r.single_prec, r.quieten_nan);
2534         end if;
2535         if r.update_fprf = '1' then
2536             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2537                                                              r.r(54) and not r.denorm);
2538         end if;
2539
2540         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2541                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2542         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2543                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2544         if update_fx = '1' and
2545             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2546             v.fpscr(FPSCR_FX) := '1';
2547         end if;
2548         if r.rc = '1' then
2549             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2550         end if;
2551
2552         v.illegal := illegal;
2553         if illegal = '1' then
2554             v.instr_done := '0';
2555             v.do_intr := '1';
2556             v.writing_back := '0';
2557             v.busy := '0';
2558             v.state := IDLE;
2559         else
2560             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2561             if v.state /= IDLE or v.do_intr = '1' then
2562                 v.busy := '1';
2563             end if;
2564         end if;
2565
2566         rin <= v;
2567     end process;
2568
2569 end architecture behaviour;