fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18
  19         e_in  : in  Execute1toFPUType;
  20         e_out : out FPUToExecute1Type;
  21
  22         w_out : out FPUToWritebackType
  23         );
  24 end entity fpu;
  25
  26 architecture behaviour of fpu is
  27     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  28
  29     constant EXP_BITS : natural := 13;
  30
  31     type fpu_reg_type is record
  32         class    : fp_number_class;
  33         negative : std_ulogic;
  34         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  35         mantissa : std_ulogic_vector(63 downto 0);      -- 10.54 format
  36     end record;
  37
  38     type state_t is (IDLE,
  39                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  40                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  41                      DO_FCFID, DO_FCTI,
  42                      DO_FRSP, DO_FRI,
  43                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT,
  44                      DO_FRE, DO_FRSQRTE,
  45                      DO_FSEL,
  46                      FRI_1,
  47                      ADD_SHIFT, ADD_2, ADD_3,
  48                      CMP_1, CMP_2,
  49                      MULT_1,
  50                      LOOKUP,
  51                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  52                      FRE_1,
  53                      RSQRT_1,
  54                      FTDIV_1,
  55                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  56                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  57                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  58                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  59                      INT_FINAL, INT_CHECK, INT_OFLOW,
  60                      FINISH, NORMALIZE,
  61                      ROUND_UFLOW, ROUND_OFLOW,
  62                      ROUNDING, ROUNDING_2, ROUNDING_3,
  63                      DENORM,
  64                      RENORM_A, RENORM_A2,
  65                      RENORM_B, RENORM_B2,
  66                      RENORM_C, RENORM_C2);
  67
  68     type reg_type is record
  69         state        : state_t;
  70         busy         : std_ulogic;
  71         instr_done   : std_ulogic;
  72         do_intr      : std_ulogic;
  73         op           : insn_type_t;
  74         insn         : std_ulogic_vector(31 downto 0);
  75         dest_fpr     : gspr_index_t;
  76         fe_mode      : std_ulogic;
  77         rc           : std_ulogic;
  78         is_cmp       : std_ulogic;
  79         single_prec  : std_ulogic;
  80         fpscr        : std_ulogic_vector(31 downto 0);
  81         a            : fpu_reg_type;
  82         b            : fpu_reg_type;
  83         c            : fpu_reg_type;
  84         r            : std_ulogic_vector(63 downto 0);  -- 10.54 format
  85         x            : std_ulogic;
  86         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  87         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
  88         result_sign  : std_ulogic;
  89         result_class : fp_number_class;
  90         result_exp   : signed(EXP_BITS-1 downto 0);
  91         shift        : signed(EXP_BITS-1 downto 0);
  92         writing_back : std_ulogic;
  93         int_result   : std_ulogic;
  94         cr_result    : std_ulogic_vector(3 downto 0);
  95         cr_mask      : std_ulogic_vector(7 downto 0);
  96         old_exc      : std_ulogic_vector(4 downto 0);
  97         update_fprf  : std_ulogic;
  98         quieten_nan  : std_ulogic;
  99         tiny         : std_ulogic;
 100         denorm       : std_ulogic;
 101         round_mode   : std_ulogic_vector(2 downto 0);
 102         is_subtract  : std_ulogic;
 103         exp_cmp      : std_ulogic;
 104         add_bsmall   : std_ulogic;
 105         is_multiply  : std_ulogic;
 106         is_sqrt      : std_ulogic;
 107         first        : std_ulogic;
 108         count        : unsigned(1 downto 0);
 109         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 110     end record;
 111
 112     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 113
 114     signal r, rin : reg_type;
 115
 116     signal fp_result     : std_ulogic_vector(63 downto 0);
 117     signal opsel_a       : std_ulogic_vector(1 downto 0);
 118     signal opsel_b       : std_ulogic_vector(1 downto 0);
 119     signal opsel_r       : std_ulogic_vector(1 downto 0);
 120     signal opsel_ainv    : std_ulogic;
 121     signal opsel_amask   : std_ulogic;
 122     signal opsel_binv    : std_ulogic;
 123     signal in_a          : std_ulogic_vector(63 downto 0);
 124     signal in_b          : std_ulogic_vector(63 downto 0);
 125     signal result        : std_ulogic_vector(63 downto 0);
 126     signal carry_in      : std_ulogic;
 127     signal lost_bits     : std_ulogic;
 128     signal r_hi_nz       : std_ulogic;
 129     signal r_lo_nz       : std_ulogic;
 130     signal misc_sel      : std_ulogic_vector(3 downto 0);
 131     signal f_to_multiply : MultiplyInputType;
 132     signal multiply_to_f : MultiplyOutputType;
 133     signal msel_1        : std_ulogic_vector(1 downto 0);
 134     signal msel_2        : std_ulogic_vector(1 downto 0);
 135     signal msel_add      : std_ulogic_vector(1 downto 0);
 136     signal msel_inv      : std_ulogic;
 137     signal inverse_est   : std_ulogic_vector(18 downto 0);
 138
 139     -- opsel values
 140     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 141     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 142     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 143     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 144
 145     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 146     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 147     constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
 148     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 149
 150     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 151     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 152     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 153     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 154
 155     -- msel values
 156     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 157     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 158     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 159     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 160
 161     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 162     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 163     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 164     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 165
 166     constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
 167     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 168     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 169
 170     -- Inverse lookup table, indexed by the top 8 fraction bits
 171     -- The first 256 entries are the reciprocal (1/x) lookup table,
 172     -- and the remaining 768 entries are the reciprocal square root table.
 173     -- Output range is [0.5, 1) in 0.19 format, though the top
 174     -- bit isn't stored since it is always 1.
 175     -- Each output value is the inverse of the center of the input
 176     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 177     -- entry 1 is 1 / (1 + 3/512), etc.
 178     signal inverse_table : lookup_table := (
 179         -- 1/x lookup table
 180         -- Unit bit is assumed to be 1, so input range is [1, 2)
 181         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 182         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 183         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 184         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 185         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 186         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 187         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 188         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 189         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 190         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 191         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 192         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 193         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 194         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 195         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 196         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 197         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 198         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 199         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 200         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 201         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 202         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 203         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 204         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 205         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 206         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 207         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 208         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 209         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 210         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 211         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 212         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 213         -- 1/sqrt(x) lookup table
 214         -- Input is in the range [1, 4), i.e. two bits to the left of the
 215         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 216         -- 1.0 ... 1.9999
 217         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 218         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 219         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 220         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 221         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 222         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 223         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 224         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 225         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 226         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 227         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 228         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 229         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 230         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 231         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 232         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 233         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 234         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 235         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 236         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 237         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 238         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 239         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 240         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 241         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 242         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 243         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 244         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 245         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 246         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 247         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 248         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 249         -- 2.0 ... 2.9999
 250         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 251         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 252         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 253         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 254         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 255         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 256         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 257         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 258         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 259         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 260         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 261         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 262         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 263         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 264         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 265         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 266         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 267         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 268         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 269         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 270         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 271         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 272         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 273         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 274         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 275         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 276         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 277         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 278         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 279         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 280         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 281         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 282         -- 3.0 ... 3.9999
 283         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 284         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 285         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 286         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 287         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 288         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 289         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 290         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 291         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 292         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 293         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 294         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 295         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 296         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 297         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 298         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 299         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 300         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 301         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 302         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 303         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 304         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 305         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 306         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 307         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 308         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 309         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 310         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 311         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 312         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 313         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 314         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 315         );
 316
 317     -- Left and right shifter with 120 bit input and 64 bit output.
 318     -- Shifts inp left by shift bits and returns the upper 64 bits of
 319     -- the result.  The shift parameter is interpreted as a signed
 320     -- number in the range -64..63, with negative values indicating
 321     -- right shifts.
 322     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 323                         shift: std_ulogic_vector(6 downto 0))
 324         return std_ulogic_vector is
 325         variable s1 : std_ulogic_vector(94 downto 0);
 326         variable s2 : std_ulogic_vector(70 downto 0);
 327         variable result : std_ulogic_vector(63 downto 0);
 328     begin
 329         case shift(6 downto 5) is
 330             when "00" =>
 331                 s1 := inp(119 downto 25);
 332             when "01" =>
 333                 s1 := inp(87 downto 0) & "0000000";
 334             when "10" =>
 335                 s1 := x"0000000000000000" & inp(119 downto 89);
 336             when others =>
 337                 s1 := x"00000000" & inp(119 downto 57);
 338         end case;
 339         case shift(4 downto 3) is
 340             when "00" =>
 341                 s2 := s1(94 downto 24);
 342             when "01" =>
 343                 s2 := s1(86 downto 16);
 344             when "10" =>
 345                 s2 := s1(78 downto 8);
 346             when others =>
 347                 s2 := s1(70 downto 0);
 348         end case;
 349         case shift(2 downto 0) is
 350             when "000" =>
 351                 result := s2(70 downto 7);
 352             when "001" =>
 353                 result := s2(69 downto 6);
 354             when "010" =>
 355                 result := s2(68 downto 5);
 356             when "011" =>
 357                 result := s2(67 downto 4);
 358             when "100" =>
 359                 result := s2(66 downto 3);
 360             when "101" =>
 361                 result := s2(65 downto 2);
 362             when "110" =>
 363                 result := s2(64 downto 1);
 364             when others =>
 365                 result := s2(63 downto 0);
 366         end case;
 367         return result;
 368     end;
 369
 370     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 371     -- selects the bits will be lost in doing a right shift.  The shift
 372     -- parameter is the bottom 6 bits of a negative shift count,
 373     -- indicating a right shift.
 374     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 375         variable result: std_ulogic_vector(63 downto 0);
 376     begin
 377         result := (others => '0');
 378         for i in 0 to 63 loop
 379             if i >= shift then
 380                 result(63 - i) := '1';
 381             end if;
 382         end loop;
 383         return result;
 384     end;
 385
 386     -- Split a DP floating-point number into components and work out its class.
 387     -- If is_int = 1, the input is considered an integer
 388     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
 389         variable r       : fpu_reg_type;
 390         variable exp_nz  : std_ulogic;
 391         variable exp_ao  : std_ulogic;
 392         variable frac_nz : std_ulogic;
 393         variable cls     : std_ulogic_vector(2 downto 0);
 394     begin
 395         r.negative := fpr(63);
 396         exp_nz := or (fpr(62 downto 52));
 397         exp_ao := and (fpr(62 downto 52));
 398         frac_nz := or (fpr(51 downto 0));
 399         if is_int = '0' then
 400             r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 401             if exp_nz = '0' then
 402                 r.exponent := to_signed(-1022, EXP_BITS);
 403             end if;
 404             r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
 405             cls := exp_ao & exp_nz & frac_nz;
 406             case cls is
 407                 when "000"  => r.class := ZERO;
 408                 when "001"  => r.class := FINITE;    -- denormalized
 409                 when "010"  => r.class := FINITE;
 410                 when "011"  => r.class := FINITE;
 411                 when "110"  => r.class := INFINITY;
 412                 when others => r.class := NAN;
 413             end case;
 414         else
 415             r.mantissa := fpr;
 416             r.exponent := (others => '0');
 417             if (fpr(63) or exp_nz or frac_nz) = '1' then
 418                 r.class := FINITE;
 419             else
 420                 r.class := ZERO;
 421             end if;
 422         end if;
 423         return r;
 424     end;
 425
 426     -- Construct a DP floating-point result from components
 427     function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 428                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
 429         return std_ulogic_vector is
 430         variable result : std_ulogic_vector(63 downto 0);
 431     begin
 432         result := (others => '0');
 433         result(63) := sign;
 434         case class is
 435             when ZERO =>
 436             when FINITE =>
 437                 if mantissa(54) = '1' then
 438                     -- normalized number
 439                     result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 440                 end if;
 441                 result(51 downto 29) := mantissa(53 downto 31);
 442                 if single_prec = '0' then
 443                     result(28 downto 0) := mantissa(30 downto 2);
 444                 end if;
 445             when INFINITY =>
 446                 result(62 downto 52) := "11111111111";
 447             when NAN =>
 448                 result(62 downto 52) := "11111111111";
 449                 result(51) := quieten_nan or mantissa(53);
 450                 result(50 downto 29) := mantissa(52 downto 31);
 451                 if single_prec = '0' then
 452                     result(28 downto 0) := mantissa(30 downto 2);
 453                 end if;
 454         end case;
 455         return result;
 456     end;
 457
 458     -- Determine whether to increment when rounding
 459     -- Returns rounding_inc & inexact
 460     -- Assumes x includes the bottom 29 bits of the mantissa already
 461     -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
 462     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 463                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 464                          sign: std_ulogic)
 465         return std_ulogic_vector is
 466         variable grx : std_ulogic_vector(2 downto 0);
 467         variable ret : std_ulogic_vector(1 downto 0);
 468         variable lsb : std_ulogic;
 469     begin
 470         if single_prec = '0' then
 471             grx := mantissa(1 downto 0) & x;
 472             lsb := mantissa(2);
 473         else
 474             grx := mantissa(30 downto 29) & x;
 475             lsb := mantissa(31);
 476         end if;
 477         ret(1) := '0';
 478         ret(0) := or (grx);
 479         case rn(1 downto 0) is
 480             when "00" =>        -- round to nearest
 481                 if grx = "100" and rn(2) = '0' then
 482                     ret(1) := lsb; -- tie, round to even
 483                 else
 484                     ret(1) := grx(2);
 485                 end if;
 486             when "01" =>        -- round towards zero
 487             when others =>      -- round towards +/- inf
 488                 if rn(0) = sign then
 489                     -- round towards greater magnitude
 490                     ret(1) := ret(0);
 491                 end if;
 492         end case;
 493         return ret;
 494     end;
 495
 496     -- Determine result flags to write into the FPSCR
 497     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 498         return std_ulogic_vector is
 499     begin
 500         case class is
 501             when ZERO =>
 502                 return sign & "0010";
 503             when FINITE =>
 504                 return (not unitbit) & sign & (not sign) & "00";
 505             when INFINITY =>
 506                 return '0' & sign & (not sign) & "01";
 507             when NAN =>
 508                 return "10001";
 509         end case;
 510     end;
 511
 512 begin
 513     fpu_multiply_0: entity work.multiply
 514         port map (
 515             clk => clk,
 516             m_in => f_to_multiply,
 517             m_out => multiply_to_f
 518             );
 519
 520     fpu_0: process(clk)
 521     begin
 522         if rising_edge(clk) then
 523             if rst = '1' then
 524                 r.state <= IDLE;
 525                 r.busy <= '0';
 526                 r.instr_done <= '0';
 527                 r.do_intr <= '0';
 528                 r.fpscr <= (others => '0');
 529                 r.writing_back <= '0';
 530             else
 531                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 532                 r <= rin;
 533             end if;
 534         end if;
 535     end process;
 536
 537     -- synchronous reads from lookup table
 538     lut_access: process(clk)
 539         variable addrhi : std_ulogic_vector(1 downto 0);
 540         variable addr   : std_ulogic_vector(9 downto 0);
 541     begin
 542         if rising_edge(clk) then
 543             if r.is_sqrt = '1' then
 544                 addrhi := r.b.mantissa(55 downto 54);
 545             else
 546                 addrhi := "00";
 547             end if;
 548             addr := addrhi & r.b.mantissa(53 downto 46);
 549             inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 550         end if;
 551     end process;
 552
 553     e_out.busy <= r.busy;
 554     e_out.exception <= r.fpscr(FPSCR_FEX);
 555     e_out.interrupt <= r.do_intr;
 556
 557     w_out.valid <= r.instr_done and not r.do_intr;
 558     w_out.write_enable <= r.writing_back;
 559     w_out.write_reg <= r.dest_fpr;
 560     w_out.write_data <= fp_result;
 561     w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
 562     w_out.write_cr_mask <= r.cr_mask;
 563     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 564                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 565
 566     fpu_1: process(all)
 567         variable v           : reg_type;
 568         variable adec        : fpu_reg_type;
 569         variable bdec        : fpu_reg_type;
 570         variable cdec        : fpu_reg_type;
 571         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 572         variable illegal     : std_ulogic;
 573         variable j, k        : integer;
 574         variable flm         : std_ulogic_vector(7 downto 0);
 575         variable int_input   : std_ulogic;
 576         variable mask        : std_ulogic_vector(63 downto 0);
 577         variable in_a0       : std_ulogic_vector(63 downto 0);
 578         variable in_b0       : std_ulogic_vector(63 downto 0);
 579         variable misc        : std_ulogic_vector(63 downto 0);
 580         variable shift_res   : std_ulogic_vector(63 downto 0);
 581         variable round       : std_ulogic_vector(1 downto 0);
 582         variable update_fx   : std_ulogic;
 583         variable arith_done  : std_ulogic;
 584         variable invalid     : std_ulogic;
 585         variable zero_divide : std_ulogic;
 586         variable mant_nz     : std_ulogic;
 587         variable min_exp     : signed(EXP_BITS-1 downto 0);
 588         variable max_exp     : signed(EXP_BITS-1 downto 0);
 589         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 590         variable new_exp     : signed(EXP_BITS-1 downto 0);
 591         variable exp_tiny    : std_ulogic;
 592         variable exp_huge    : std_ulogic;
 593         variable renormalize : std_ulogic;
 594         variable clz         : std_ulogic_vector(5 downto 0);
 595         variable set_x       : std_ulogic;
 596         variable mshift      : signed(EXP_BITS-1 downto 0);
 597         variable need_check  : std_ulogic;
 598         variable msb         : std_ulogic;
 599         variable is_add      : std_ulogic;
 600         variable qnan_result : std_ulogic;
 601         variable longmask    : std_ulogic;
 602         variable set_a       : std_ulogic;
 603         variable set_b       : std_ulogic;
 604         variable set_c       : std_ulogic;
 605         variable px_nz       : std_ulogic;
 606         variable maddend     : std_ulogic_vector(127 downto 0);
 607         variable set_y       : std_ulogic;
 608         variable pcmpb_eq    : std_ulogic;
 609         variable pcmpb_lt    : std_ulogic;
 610         variable pshift      : std_ulogic;
 611         variable renorm_sqrt : std_ulogic;
 612         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 613         variable shiftin     : std_ulogic;
 614     begin
 615         v := r;
 616         illegal := '0';
 617         v.busy := '0';
 618         int_input := '0';
 619
 620         -- capture incoming instruction
 621         if e_in.valid = '1' then
 622             v.insn := e_in.insn;
 623             v.op := e_in.op;
 624             v.fe_mode := or (e_in.fe_mode);
 625             v.dest_fpr := e_in.frt;
 626             v.single_prec := e_in.single;
 627             v.int_result := '0';
 628             v.rc := e_in.rc;
 629             v.is_cmp := e_in.out_cr;
 630             if e_in.out_cr = '0' then
 631                 v.cr_mask := num_to_fxm(1);
 632             else
 633                 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
 634             end if;
 635             int_input := '0';
 636             if e_in.op = OP_FPOP_I then
 637                 int_input := '1';
 638             end if;
 639             v.quieten_nan := '1';
 640             v.tiny := '0';
 641             v.denorm := '0';
 642             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 643             v.is_subtract := '0';
 644             v.is_multiply := '0';
 645             v.is_sqrt := '0';
 646             v.add_bsmall := '0';
 647             v.doing_ftdiv := "00";
 648
 649             adec := decode_dp(e_in.fra, int_input);
 650             bdec := decode_dp(e_in.frb, int_input);
 651             cdec := decode_dp(e_in.frc, int_input);
 652             v.a := adec;
 653             v.b := bdec;
 654             v.c := cdec;
 655
 656             v.exp_cmp := '0';
 657             if adec.exponent > bdec.exponent then
 658                 v.exp_cmp := '1';
 659             end if;
 660         end if;
 661
 662         r_hi_nz <= or (r.r(55 downto 31));
 663         r_lo_nz <= or (r.r(30 downto 2));
 664
 665         if r.single_prec = '0' then
 666             if r.doing_ftdiv(1) = '0' then
 667                 max_exp := to_signed(1023, EXP_BITS);
 668             else
 669                 max_exp := to_signed(1020, EXP_BITS);
 670             end if;
 671             if r.doing_ftdiv(0) = '0' then
 672                 min_exp := to_signed(-1022, EXP_BITS);
 673             else
 674                 min_exp := to_signed(-1021, EXP_BITS);
 675             end if;
 676             bias_exp := to_signed(1536, EXP_BITS);
 677         else
 678             max_exp := to_signed(127, EXP_BITS);
 679             min_exp := to_signed(-126, EXP_BITS);
 680             bias_exp := to_signed(192, EXP_BITS);
 681         end if;
 682         new_exp := r.result_exp - r.shift;
 683         exp_tiny := '0';
 684         exp_huge := '0';
 685         if new_exp < min_exp then
 686             exp_tiny := '1';
 687         end if;
 688         if new_exp > max_exp then
 689             exp_huge := '1';
 690         end if;
 691
 692         -- Compare P with zero and with B
 693         px_nz := or (r.p(57 downto 4));
 694         pcmpb_eq := '0';
 695         if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
 696             pcmpb_eq := '1';
 697         end if;
 698         pcmpb_lt := '0';
 699         if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
 700             pcmpb_lt := '1';
 701         end if;
 702
 703         v.writing_back := '0';
 704         v.instr_done := '0';
 705         v.update_fprf := '0';
 706         v.shift := to_signed(0, EXP_BITS);
 707         v.first := '0';
 708         opsel_a <= AIN_R;
 709         opsel_ainv <= '0';
 710         opsel_amask <= '0';
 711         opsel_b <= BIN_ZERO;
 712         opsel_binv <= '0';
 713         opsel_r <= RES_SUM;
 714         carry_in <= '0';
 715         misc_sel <= "0000";
 716         fpscr_mask := (others => '1');
 717         update_fx := '0';
 718         arith_done := '0';
 719         invalid := '0';
 720         zero_divide := '0';
 721         renormalize := '0';
 722         set_x := '0';
 723         qnan_result := '0';
 724         longmask := r.single_prec;
 725         set_a := '0';
 726         set_b := '0';
 727         set_c := '0';
 728         f_to_multiply.is_32bit <= '0';
 729         f_to_multiply.valid <= '0';
 730         msel_1 <= MUL1_A;
 731         msel_2 <= MUL2_C;
 732         msel_add <= MULADD_ZERO;
 733         msel_inv <= '0';
 734         set_y := '0';
 735         pshift := '0';
 736         renorm_sqrt := '0';
 737         shiftin := '0';
 738         case r.state is
 739             when IDLE =>
 740                 if e_in.valid = '1' then
 741                     case e_in.insn(5 downto 1) is
 742                         when "00000" =>
 743                             if e_in.insn(8) = '1' then
 744                                 if e_in.insn(6) = '0' then
 745                                     v.state := DO_FTDIV;
 746                                 else
 747                                     v.state := DO_FTSQRT;
 748                                 end if;
 749                             elsif e_in.insn(7) = '1' then
 750                                 v.state := DO_MCRFS;
 751                             else
 752                                 v.state := DO_FCMP;
 753                             end if;
 754                         when "00110" =>
 755                             if e_in.insn(10) = '0' then
 756                                 if e_in.insn(8) = '0' then
 757                                     v.state := DO_MTFSB;
 758                                 else
 759                                     v.state := DO_MTFSFI;
 760                                 end if;
 761                             else
 762                                 v.state := DO_FMRG;
 763                             end if;
 764                         when "00111" =>
 765                             if e_in.insn(8) = '0' then
 766                                 v.state := DO_MFFS;
 767                             else
 768                                 v.state := DO_MTFSF;
 769                             end if;
 770                         when "01000" =>
 771                             if e_in.insn(9 downto 8) /= "11" then
 772                                 v.state := DO_FMR;
 773                             else
 774                                 v.state := DO_FRI;
 775                             end if;
 776                         when "01100" =>
 777                             v.state := DO_FRSP;
 778                         when "01110" =>
 779                             if int_input = '1' then
 780                                 -- fcfid[u][s]
 781                                 v.state := DO_FCFID;
 782                             else
 783                                 v.state := DO_FCTI;
 784                             end if;
 785                         when "01111" =>
 786                             v.round_mode := "001";
 787                             v.state := DO_FCTI;
 788                         when "10010" =>
 789                             v.state := DO_FDIV;
 790                         when "10100" | "10101" =>
 791                             v.state := DO_FADD;
 792                         when "10110" =>
 793                             v.is_sqrt := '1';
 794                             v.state := DO_FSQRT;
 795                         when "10111" =>
 796                             v.state := DO_FSEL;
 797                         when "11000" =>
 798                             v.state := DO_FRE;
 799                         when "11001" =>
 800                             v.is_multiply := '1';
 801                             v.state := DO_FMUL;
 802                         when "11010" =>
 803                             v.is_sqrt := '1';
 804                             v.state := DO_FRSQRTE;
 805                         when others =>
 806                             illegal := '1';
 807                     end case;
 808                 end if;
 809                 v.x := '0';
 810                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
 811
 812             when DO_MCRFS =>
 813                 j := to_integer(unsigned(insn_bfa(r.insn)));
 814                 for i in 0 to 7 loop
 815                     if i = j then
 816                         k := (7 - i) * 4;
 817                         v.cr_result := r.fpscr(k + 3 downto k);
 818                         fpscr_mask(k + 3 downto k) := "0000";
 819                     end if;
 820                 end loop;
 821                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
 822                 v.instr_done := '1';
 823                 v.state := IDLE;
 824
 825             when DO_FTDIV =>
 826                 v.instr_done := '1';
 827                 v.state := IDLE;
 828                 v.cr_result := "0000";
 829                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
 830                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 831                     v.cr_result(2) := '1';
 832                 end if;
 833                 if r.a.class = NAN or r.a.class = INFINITY or
 834                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
 835                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
 836                     v.cr_result(1) := '1';
 837                 else
 838                     v.doing_ftdiv := "11";
 839                     v.first := '1';
 840                     v.state := FTDIV_1;
 841                     v.instr_done := '0';
 842                 end if;
 843
 844             when DO_FTSQRT =>
 845                 v.instr_done := '1';
 846                 v.state := IDLE;
 847                 v.cr_result := "0000";
 848                 if r.b.class = ZERO or r.b.class = INFINITY or
 849                     (r.b.class = FINITE and r.b.mantissa(53) = '0') then
 850                     v.cr_result(2) := '1';
 851                 end if;
 852                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
 853                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
 854                     v.cr_result(1) := '0';
 855                 end if;
 856
 857             when DO_FCMP =>
 858                 -- fcmp[uo]
 859                 v.instr_done := '1';
 860                 v.state := IDLE;
 861                 update_fx := '1';
 862                 opsel_a <= AIN_B;
 863                 opsel_r <= RES_SUM;
 864                 v.result_exp := r.b.exponent;
 865                 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
 866                     (r.b.class = NAN and r.b.mantissa(53) = '0') then
 867                     -- Signalling NAN
 868                     v.fpscr(FPSCR_VXSNAN) := '1';
 869                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
 870                         v.fpscr(FPSCR_VXVC) := '1';
 871                     end if;
 872                     invalid := '1';
 873                     v.cr_result := "0001";          -- unordered
 874                 elsif r.a.class = NAN or r.b.class = NAN then
 875                     if r.insn(6) = '1' then
 876                         -- fcmpo
 877                         v.fpscr(FPSCR_VXVC) := '1';
 878                         invalid := '1';
 879                     end if;
 880                     v.cr_result := "0001";          -- unordered
 881                 elsif r.a.class = ZERO and r.b.class = ZERO then
 882                     v.cr_result := "0010";          -- equal
 883                 elsif r.a.negative /= r.b.negative then
 884                     v.cr_result := r.a.negative & r.b.negative & "00";
 885                 elsif r.a.class = ZERO then
 886                     -- A and B are the same sign from here down
 887                     v.cr_result := not r.b.negative & r.b.negative & "00";
 888                 elsif r.a.class = INFINITY then
 889                     if r.b.class = INFINITY then
 890                         v.cr_result := "0010";
 891                     else
 892                         v.cr_result := r.a.negative & not r.a.negative & "00";
 893                     end if;
 894                 elsif r.b.class = ZERO then
 895                     -- A is finite from here down
 896                     v.cr_result := r.a.negative & not r.a.negative & "00";
 897                 elsif r.b.class = INFINITY then
 898                     v.cr_result := not r.b.negative & r.b.negative & "00";
 899                 elsif r.exp_cmp = '1' then
 900                     -- A and B are both finite from here down
 901                     v.cr_result := r.a.negative & not r.a.negative & "00";
 902                 elsif r.a.exponent /= r.b.exponent then
 903                     -- A exponent is smaller than B
 904                     v.cr_result := not r.a.negative & r.a.negative & "00";
 905                 else
 906                     -- Prepare to subtract mantissas, put B in R
 907                     v.cr_result := "0000";
 908                     v.instr_done := '0';
 909                     v.state := CMP_1;
 910                 end if;
 911                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
 912
 913             when DO_MTFSB =>
 914                 -- mtfsb{0,1}
 915                 j := to_integer(unsigned(insn_bt(r.insn)));
 916                 for i in 0 to 31 loop
 917                     if i = j then
 918                         v.fpscr(31 - i) := r.insn(6);
 919                     end if;
 920                 end loop;
 921                 v.instr_done := '1';
 922                 v.state := IDLE;
 923
 924             when DO_MTFSFI =>
 925                 -- mtfsfi
 926                 j := to_integer(unsigned(insn_bf(r.insn)));
 927                 if r.insn(16) = '0' then
 928                     for i in 0 to 7 loop
 929                         if i = j then
 930                             k := (7 - i) * 4;
 931                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
 932                         end if;
 933                     end loop;
 934                 end if;
 935                 v.instr_done := '1';
 936                 v.state := IDLE;
 937
 938             when DO_FMRG =>
 939                 -- fmrgew, fmrgow
 940                 opsel_r <= RES_MISC;
 941                 misc_sel <= "01" & r.insn(8) & '0';
 942                 v.int_result := '1';
 943                 v.writing_back := '1';
 944                 v.instr_done := '1';
 945                 v.state := IDLE;
 946
 947             when DO_MFFS =>
 948                 v.int_result := '1';
 949                 v.writing_back := '1';
 950                 opsel_r <= RES_MISC;
 951                 case r.insn(20 downto 16) is
 952                     when "00000" =>
 953                         -- mffs
 954                     when "00001" =>
 955                         -- mffsce
 956                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
 957                     when "10100" | "10101" =>
 958                         -- mffscdrn[i] (but we don't implement DRN)
 959                         fpscr_mask := x"000000FF";
 960                     when "10110" =>
 961                         -- mffscrn
 962                         fpscr_mask := x"000000FF";
 963                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
 964                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
 965                     when "10111" =>
 966                         -- mffscrni
 967                         fpscr_mask := x"000000FF";
 968                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
 969                     when "11000" =>
 970                         -- mffsl
 971                         fpscr_mask := x"0007F0FF";
 972                     when others =>
 973                         illegal := '1';
 974                 end case;
 975                 v.instr_done := '1';
 976                 v.state := IDLE;
 977
 978             when DO_MTFSF =>
 979                 if r.insn(25) = '1' then
 980                     flm := x"FF";
 981                 elsif r.insn(16) = '1' then
 982                     flm := x"00";
 983                 else
 984                     flm := r.insn(24 downto 17);
 985                 end if;
 986                 for i in 0 to 7 loop
 987                     k := i * 4;
 988                     if flm(i) = '1' then
 989                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
 990                     end if;
 991                 end loop;
 992                 v.instr_done := '1';
 993                 v.state := IDLE;
 994
 995             when DO_FMR =>
 996                 opsel_a <= AIN_B;
 997                 v.result_class := r.b.class;
 998                 v.result_exp := r.b.exponent;
 999                 v.quieten_nan := '0';
1000                 if r.insn(9) = '1' then
1001                     v.result_sign := '0';              -- fabs
1002                 elsif r.insn(8) = '1' then
1003                     v.result_sign := '1';              -- fnabs
1004                 elsif r.insn(7) = '1' then
1005                     v.result_sign := r.b.negative;     -- fmr
1006                 elsif r.insn(6) = '1' then
1007                     v.result_sign := not r.b.negative; -- fneg
1008                 else
1009                     v.result_sign := r.a.negative;     -- fcpsgn
1010                 end if;
1011                 v.writing_back := '1';
1012                 v.instr_done := '1';
1013                 v.state := IDLE;
1014
1015             when DO_FRI =>    -- fri[nzpm]
1016                 opsel_a <= AIN_B;
1017                 v.result_class := r.b.class;
1018                 v.result_sign := r.b.negative;
1019                 v.result_exp := r.b.exponent;
1020                 v.fpscr(FPSCR_FR) := '0';
1021                 v.fpscr(FPSCR_FI) := '0';
1022                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1023                     -- Signalling NAN
1024                     v.fpscr(FPSCR_VXSNAN) := '1';
1025                     invalid := '1';
1026                 end if;
1027                 if r.b.class = FINITE then
1028                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1029                         -- integer already, no rounding required
1030                         arith_done := '1';
1031                     else
1032                         v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1033                         v.state := FRI_1;
1034                         v.round_mode := '1' & r.insn(7 downto 6);
1035                     end if;
1036                 else
1037                     arith_done := '1';
1038                 end if;
1039
1040             when DO_FRSP =>
1041                 opsel_a <= AIN_B;
1042                 v.result_class := r.b.class;
1043                 v.result_sign := r.b.negative;
1044                 v.result_exp := r.b.exponent;
1045                 v.fpscr(FPSCR_FR) := '0';
1046                 v.fpscr(FPSCR_FI) := '0';
1047                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1048                     -- Signalling NAN
1049                     v.fpscr(FPSCR_VXSNAN) := '1';
1050                     invalid := '1';
1051                 end if;
1052                 set_x := '1';
1053                 if r.b.class = FINITE then
1054                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1055                         v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1056                         v.state := ROUND_UFLOW;
1057                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1058                         v.state := ROUND_OFLOW;
1059                     else
1060                         v.shift := to_signed(-2, EXP_BITS);
1061                         v.state := ROUNDING;
1062                     end if;
1063                 else
1064                     arith_done := '1';
1065                 end if;
1066
1067             when DO_FCTI =>
1068                 -- instr bit 9: 1=dword 0=word
1069                 -- instr bit 8: 1=unsigned 0=signed
1070                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1071                 opsel_a <= AIN_B;
1072                 v.result_class := r.b.class;
1073                 v.result_sign := r.b.negative;
1074                 v.result_exp := r.b.exponent;
1075                 v.fpscr(FPSCR_FR) := '0';
1076                 v.fpscr(FPSCR_FI) := '0';
1077                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1078                     -- Signalling NAN
1079                     v.fpscr(FPSCR_VXSNAN) := '1';
1080                     invalid := '1';
1081                 end if;
1082
1083                 v.int_result := '1';
1084                 case r.b.class is
1085                     when ZERO =>
1086                         arith_done := '1';
1087                     when FINITE =>
1088                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1089                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1090                             v.state := INT_OFLOW;
1091                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1092                             -- integer already, no rounding required,
1093                             -- shift into final position
1094                             v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1095                             if r.insn(8) = '1' and r.b.negative = '1' then
1096                                 v.state := INT_OFLOW;
1097                             else
1098                                 v.state := INT_ISHIFT;
1099                             end if;
1100                         else
1101                             v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1102                             v.state := INT_SHIFT;
1103                         end if;
1104                     when INFINITY | NAN =>
1105                         v.state := INT_OFLOW;
1106                 end case;
1107
1108             when DO_FCFID =>
1109                 v.result_sign := '0';
1110                 opsel_a <= AIN_B;
1111                 if r.insn(8) = '0' and r.b.negative = '1' then
1112                     -- fcfid[s] with negative operand, set R = -B
1113                     opsel_ainv <= '1';
1114                     carry_in <= '1';
1115                     v.result_sign := '1';
1116                 end if;
1117                 v.result_class := r.b.class;
1118                 v.result_exp := to_signed(54, EXP_BITS);
1119                 v.fpscr(FPSCR_FR) := '0';
1120                 v.fpscr(FPSCR_FI) := '0';
1121                 if r.b.class = ZERO then
1122                     arith_done := '1';
1123                 else
1124                     v.state := FINISH;
1125                 end if;
1126
1127             when DO_FADD =>
1128                 -- fadd[s] and fsub[s]
1129                 opsel_a <= AIN_A;
1130                 v.result_sign := r.a.negative;
1131                 v.result_class := r.a.class;
1132                 v.result_exp := r.a.exponent;
1133                 v.fpscr(FPSCR_FR) := '0';
1134                 v.fpscr(FPSCR_FI) := '0';
1135                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1136                 if r.a.class = FINITE and r.b.class = FINITE then
1137                     v.is_subtract := not is_add;
1138                     v.add_bsmall := r.exp_cmp;
1139                     if r.exp_cmp = '0' then
1140                         v.shift := r.a.exponent - r.b.exponent;
1141                         v.result_sign := r.b.negative xnor r.insn(1);
1142                         if r.a.exponent = r.b.exponent then
1143                             v.state := ADD_2;
1144                         else
1145                             v.state := ADD_SHIFT;
1146                         end if;
1147                     else
1148                         opsel_a <= AIN_B;
1149                         v.shift := r.b.exponent - r.a.exponent;
1150                         v.result_exp := r.b.exponent;
1151                         v.state := ADD_SHIFT;
1152                     end if;
1153                 else
1154                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1155                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1156                         -- Signalling NAN
1157                         v.fpscr(FPSCR_VXSNAN) := '1';
1158                         invalid := '1';
1159                     end if;
1160                     if r.a.class = NAN then
1161                         -- nothing to do, result is A
1162                     elsif r.b.class = NAN then
1163                         v.result_class := NAN;
1164                         v.result_sign := r.b.negative;
1165                         opsel_a <= AIN_B;
1166                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1167                         -- invalid operation, construct QNaN
1168                         v.fpscr(FPSCR_VXISI) := '1';
1169                         qnan_result := '1';
1170                     elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1171                         -- return -0 for rounding to -infinity
1172                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1173                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1174                         -- nothing to do, result is A
1175                     else
1176                         -- result is +/- B
1177                         v.result_sign := r.b.negative xnor r.insn(1);
1178                         v.result_class := r.b.class;
1179                         v.result_exp := r.b.exponent;
1180                         opsel_a <= AIN_B;
1181                     end if;
1182                     arith_done := '1';
1183                 end if;
1184
1185             when DO_FMUL =>
1186                 -- fmul[s]
1187                 opsel_a <= AIN_A;
1188                 v.result_sign := r.a.negative;
1189                 v.result_class := r.a.class;
1190                 v.result_exp := r.a.exponent;
1191                 v.fpscr(FPSCR_FR) := '0';
1192                 v.fpscr(FPSCR_FI) := '0';
1193                 if r.a.class = FINITE and r.c.class = FINITE then
1194                     v.result_sign := r.a.negative xor r.c.negative;
1195                     v.result_exp := r.a.exponent + r.c.exponent;
1196                     -- Renormalize denorm operands
1197                     if r.a.mantissa(54) = '0' then
1198                         v.state := RENORM_A;
1199                     elsif r.c.mantissa(54) = '0' then
1200                         opsel_a <= AIN_C;
1201                         v.state := RENORM_C;
1202                     else
1203                         f_to_multiply.valid <= '1';
1204                         v.state := MULT_1;
1205                     end if;
1206                 else
1207                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1208                         (r.c.class = NAN and r.c.mantissa(53) = '0') then
1209                         -- Signalling NAN
1210                         v.fpscr(FPSCR_VXSNAN) := '1';
1211                         invalid := '1';
1212                     end if;
1213                     if r.a.class = NAN then
1214                     -- result is A
1215                     elsif r.c.class = NAN then
1216                         v.result_class := NAN;
1217                         v.result_sign := r.c.negative;
1218                         opsel_a <= AIN_C;
1219                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1220                         (r.a.class = ZERO and r.c.class = INFINITY) then
1221                         -- invalid operation, construct QNaN
1222                         v.fpscr(FPSCR_VXIMZ) := '1';
1223                         qnan_result := '1';
1224                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1225                         -- result is +/- A
1226                         v.result_sign := r.a.negative xor r.c.negative;
1227                     else
1228                         -- r.c.class is ZERO or INFINITY
1229                         v.result_class := r.c.class;
1230                         v.result_sign := r.a.negative xor r.c.negative;
1231                     end if;
1232                     arith_done := '1';
1233                 end if;
1234
1235             when DO_FDIV =>
1236                 opsel_a <= AIN_A;
1237                 v.result_sign := r.a.negative;
1238                 v.result_class := r.a.class;
1239                 v.result_exp := r.a.exponent;
1240                 v.fpscr(FPSCR_FR) := '0';
1241                 v.fpscr(FPSCR_FI) := '0';
1242                 v.result_sign := r.a.negative xor r.b.negative;
1243                 v.result_exp := r.a.exponent - r.b.exponent;
1244                 v.count := "00";
1245                 if r.a.class = FINITE and r.b.class = FINITE then
1246                     -- Renormalize denorm operands
1247                     if r.a.mantissa(54) = '0' then
1248                         v.state := RENORM_A;
1249                     elsif r.b.mantissa(54) = '0' then
1250                         opsel_a <= AIN_B;
1251                         v.state := RENORM_B;
1252                     else
1253                         v.first := '1';
1254                         v.state := DIV_2;
1255                     end if;
1256                 else
1257                     if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1258                         (r.b.class = NAN and r.b.mantissa(53) = '0') then
1259                         -- Signalling NAN
1260                         v.fpscr(FPSCR_VXSNAN) := '1';
1261                         invalid := '1';
1262                     end if;
1263                     if r.a.class = NAN then
1264                         -- result is A
1265                         v.result_sign := r.a.negative;
1266                     elsif r.b.class = NAN then
1267                         v.result_class := NAN;
1268                         v.result_sign := r.b.negative;
1269                         opsel_a <= AIN_B;
1270                     elsif r.b.class = INFINITY then
1271                         if r.a.class = INFINITY then
1272                             v.fpscr(FPSCR_VXIDI) := '1';
1273                             qnan_result := '1';
1274                         else
1275                             v.result_class := ZERO;
1276                         end if;
1277                     elsif r.b.class = ZERO then
1278                         if r.a.class = ZERO then
1279                             v.fpscr(FPSCR_VXZDZ) := '1';
1280                             qnan_result := '1';
1281                         else
1282                             if r.a.class = FINITE then
1283                                 zero_divide := '1';
1284                             end if;
1285                             v.result_class := INFINITY;
1286                         end if;
1287                     -- else r.b.class = FINITE, result_class = r.a.class
1288                     end if;
1289                     arith_done := '1';
1290                 end if;
1291
1292             when DO_FSEL =>
1293                 opsel_a <= AIN_A;
1294                 v.fpscr(FPSCR_FR) := '0';
1295                 v.fpscr(FPSCR_FI) := '0';
1296                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1297                     v.result_sign := r.c.negative;
1298                     v.result_exp := r.c.exponent;
1299                     v.result_class := r.c.class;
1300                     opsel_a <= AIN_C;
1301                 else
1302                     v.result_sign := r.b.negative;
1303                     v.result_exp := r.b.exponent;
1304                     v.result_class := r.b.class;
1305                     opsel_a <= AIN_B;
1306                 end if;
1307                 v.quieten_nan := '0';
1308                 arith_done := '1';
1309
1310             when DO_FSQRT =>
1311                 opsel_a <= AIN_B;
1312                 v.result_class := r.b.class;
1313                 v.result_sign := r.b.negative;
1314                 v.fpscr(FPSCR_FR) := '0';
1315                 v.fpscr(FPSCR_FI) := '0';
1316                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1317                     v.fpscr(FPSCR_VXSNAN) := '1';
1318                     invalid := '1';
1319                 end if;
1320                 case r.b.class is
1321                     when FINITE =>
1322                         v.result_exp := r.b.exponent;
1323                         if r.b.negative = '1' then
1324                             v.fpscr(FPSCR_VXSQRT) := '1';
1325                             qnan_result := '1';
1326                             arith_done := '1';
1327                         elsif r.b.mantissa(54) = '0' then
1328                             v.state := RENORM_B;
1329                         elsif r.b.exponent(0) = '0' then
1330                             v.state := SQRT_1;
1331                         else
1332                             v.shift := to_signed(1, EXP_BITS);
1333                             v.state := RENORM_B2;
1334                         end if;
1335                     when NAN | ZERO =>
1336                         -- result is B
1337                         arith_done := '1';
1338                     when INFINITY =>
1339                         if r.b.negative = '1' then
1340                             v.fpscr(FPSCR_VXSQRT) := '1';
1341                             qnan_result := '1';
1342                         -- else result is B
1343                         end if;
1344                         arith_done := '1';
1345                 end case;
1346
1347             when DO_FRE =>
1348                 opsel_a <= AIN_B;
1349                 v.result_class := r.b.class;
1350                 v.result_sign := r.b.negative;
1351                 v.fpscr(FPSCR_FR) := '0';
1352                 v.fpscr(FPSCR_FI) := '0';
1353                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1354                     v.fpscr(FPSCR_VXSNAN) := '1';
1355                     invalid := '1';
1356                 end if;
1357                 case r.b.class is
1358                     when FINITE =>
1359                         v.result_exp := - r.b.exponent;
1360                         if r.b.mantissa(54) = '0' then
1361                             v.state := RENORM_B;
1362                         else
1363                             v.state := FRE_1;
1364                         end if;
1365                     when NAN =>
1366                         -- result is B
1367                         arith_done := '1';
1368                     when INFINITY =>
1369                         v.result_class := ZERO;
1370                         arith_done := '1';
1371                     when ZERO =>
1372                         v.result_class := INFINITY;
1373                         zero_divide := '1';
1374                         arith_done := '1';
1375                 end case;
1376
1377             when DO_FRSQRTE =>
1378                 opsel_a <= AIN_B;
1379                 v.result_class := r.b.class;
1380                 v.result_sign := r.b.negative;
1381                 v.fpscr(FPSCR_FR) := '0';
1382                 v.fpscr(FPSCR_FI) := '0';
1383                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1384                     v.fpscr(FPSCR_VXSNAN) := '1';
1385                     invalid := '1';
1386                 end if;
1387                 v.shift := to_signed(1, EXP_BITS);
1388                 case r.b.class is
1389                     when FINITE =>
1390                         v.result_exp := r.b.exponent;
1391                         if r.b.negative = '1' then
1392                             v.fpscr(FPSCR_VXSQRT) := '1';
1393                             qnan_result := '1';
1394                             arith_done := '1';
1395                         elsif r.b.mantissa(54) = '0' then
1396                             v.state := RENORM_B;
1397                         elsif r.b.exponent(0) = '0' then
1398                             v.state := RSQRT_1;
1399                         else
1400                             v.state := RENORM_B2;
1401                         end if;
1402                     when NAN =>
1403                         -- result is B
1404                         arith_done := '1';
1405                     when INFINITY =>
1406                         if r.b.negative = '1' then
1407                             v.fpscr(FPSCR_VXSQRT) := '1';
1408                             qnan_result := '1';
1409                         else
1410                             v.result_class := ZERO;
1411                         end if;
1412                         arith_done := '1';
1413                     when ZERO =>
1414                         v.result_class := INFINITY;
1415                         zero_divide := '1';
1416                         arith_done := '1';
1417                 end case;
1418
1419             when RENORM_A =>
1420                 renormalize := '1';
1421                 v.state := RENORM_A2;
1422
1423             when RENORM_A2 =>
1424                 set_a := '1';
1425                 v.result_exp := new_exp;
1426                 if r.insn(4) = '1' then
1427                     opsel_a <= AIN_C;
1428                     if r.c.mantissa(54) = '1' then
1429                         v.first := '1';
1430                         v.state := MULT_1;
1431                     else
1432                         v.state := RENORM_C;
1433                     end if;
1434                 else
1435                         opsel_a <= AIN_B;
1436                         if r.b.mantissa(54) = '1' then
1437                             v.first := '1';
1438                             v.state := DIV_2;
1439                         else
1440                             v.state := RENORM_B;
1441                     end if;
1442                 end if;
1443
1444             when RENORM_B =>
1445                 renormalize := '1';
1446                 renorm_sqrt := r.is_sqrt;
1447                 v.state := RENORM_B2;
1448
1449             when RENORM_B2 =>
1450                 set_b := '1';
1451                 if r.is_sqrt = '0' then
1452                     v.result_exp := r.result_exp + r.shift;
1453                 else
1454                     v.result_exp := new_exp;
1455                 end if;
1456                 v.state := LOOKUP;
1457
1458             when RENORM_C =>
1459                 renormalize := '1';
1460                 v.state := RENORM_C2;
1461
1462             when RENORM_C2 =>
1463                 set_c := '1';
1464                 v.result_exp := new_exp;
1465                 v.first := '1';
1466                 v.state := MULT_1;
1467
1468             when ADD_SHIFT =>
1469                 opsel_r <= RES_SHIFT;
1470                 set_x := '1';
1471                 longmask := '0';
1472                 v.state := ADD_2;
1473
1474             when ADD_2 =>
1475                 if r.add_bsmall = '1' then
1476                     opsel_a <= AIN_A;
1477                 else
1478                     opsel_a <= AIN_B;
1479                 end if;
1480                 opsel_b <= BIN_R;
1481                 opsel_binv <= r.is_subtract;
1482                 carry_in <= r.is_subtract and not r.x;
1483                 v.shift := to_signed(-1, EXP_BITS);
1484                 v.state := ADD_3;
1485
1486             when ADD_3 =>
1487                 -- check for overflow or negative result (can't get both)
1488                 if r.r(63) = '1' then
1489                     -- result is opposite sign to expected
1490                     v.result_sign := not r.result_sign;
1491                     opsel_ainv <= '1';
1492                     carry_in <= '1';
1493                     v.state := FINISH;
1494                 elsif r.r(55) = '1' then
1495                     -- sum overflowed, shift right
1496                     opsel_r <= RES_SHIFT;
1497                     set_x := '1';
1498                     v.shift := to_signed(-2, EXP_BITS);
1499                     if exp_huge = '1' then
1500                         v.state := ROUND_OFLOW;
1501                     else
1502                         v.state := ROUNDING;
1503                     end if;
1504                 elsif r.r(54) = '1' then
1505                     set_x := '1';
1506                     v.shift := to_signed(-2, EXP_BITS);
1507                     v.state := ROUNDING;
1508                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1509                     -- r.x must be zero at this point
1510                     v.result_class := ZERO;
1511                     if r.is_subtract = '1' then
1512                         -- set result sign depending on rounding mode
1513                         v.result_sign := r.round_mode(1) and r.round_mode(0);
1514                     end if;
1515                     arith_done := '1';
1516                 else
1517                     renormalize := '1';
1518                     v.state := NORMALIZE;
1519                 end if;
1520
1521             when CMP_1 =>
1522                 opsel_a <= AIN_A;
1523                 opsel_b <= BIN_R;
1524                 opsel_binv <= '1';
1525                 carry_in <= '1';
1526                 v.state := CMP_2;
1527
1528             when CMP_2 =>
1529                 if r.r(63) = '1' then
1530                     -- A is smaller in magnitude
1531                     v.cr_result := not r.a.negative & r.a.negative & "00";
1532                 elsif (r_hi_nz or r_lo_nz) = '0' then
1533                     v.cr_result := "0010";
1534                 else
1535                     v.cr_result := r.a.negative & not r.a.negative & "00";
1536                 end if;
1537                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1538                 v.instr_done := '1';
1539                 v.state := IDLE;
1540
1541             when MULT_1 =>
1542                 f_to_multiply.valid <= r.first;
1543                 opsel_r <= RES_MULT;
1544                 if multiply_to_f.valid = '1' then
1545                     v.state := FINISH;
1546                 end if;
1547
1548             when LOOKUP =>
1549                 opsel_a <= AIN_B;
1550                 -- wait one cycle for inverse_table[B] lookup
1551                 v.first := '1';
1552                 if r.insn(4) = '0' then
1553                     if r.insn(3) = '0' then
1554                         v.state := DIV_2;
1555                     else
1556                         v.state := SQRT_1;
1557                     end if;
1558                 elsif r.insn(2) = '0' then
1559                     v.state := FRE_1;
1560                 else
1561                     v.state := RSQRT_1;
1562                 end if;
1563
1564             when DIV_2 =>
1565                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1566                 msel_1 <= MUL1_B;
1567                 msel_add <= MULADD_CONST;
1568                 msel_inv <= '1';
1569                 if r.count = 0 then
1570                     msel_2 <= MUL2_LUT;
1571                 else
1572                     msel_2 <= MUL2_P;
1573                 end if;
1574                 set_y := r.first;
1575                 pshift := '1';
1576                 f_to_multiply.valid <= r.first;
1577                 if multiply_to_f.valid = '1' then
1578                     v.first := '1';
1579                     v.count := r.count + 1;
1580                     v.state := DIV_3;
1581                 end if;
1582
1583             when DIV_3 =>
1584                 -- compute Y = P = P * Y
1585                 msel_1 <= MUL1_Y;
1586                 msel_2 <= MUL2_P;
1587                 f_to_multiply.valid <= r.first;
1588                 pshift := '1';
1589                 if multiply_to_f.valid = '1' then
1590                     v.first := '1';
1591                     if r.count = 3 then
1592                         v.state := DIV_4;
1593                     else
1594                         v.state := DIV_2;
1595                     end if;
1596                 end if;
1597
1598             when DIV_4 =>
1599                 -- compute R = P = A * Y (quotient)
1600                 msel_1 <= MUL1_A;
1601                 msel_2 <= MUL2_P;
1602                 set_y := r.first;
1603                 f_to_multiply.valid <= r.first;
1604                 pshift := '1';
1605                 if multiply_to_f.valid = '1' then
1606                     opsel_r <= RES_MULT;
1607                     v.first := '1';
1608                     v.state := DIV_5;
1609                 end if;
1610
1611             when DIV_5 =>
1612                 -- compute P = A - B * R (remainder)
1613                 msel_1 <= MUL1_B;
1614                 msel_2 <= MUL2_R;
1615                 msel_add <= MULADD_A;
1616                 msel_inv <= '1';
1617                 f_to_multiply.valid <= r.first;
1618                 if multiply_to_f.valid = '1' then
1619                     v.state := DIV_6;
1620                 end if;
1621
1622             when DIV_6 =>
1623                 -- test if remainder is 0 or >= B
1624                 if pcmpb_lt = '1' then
1625                     -- quotient is correct, set X if remainder non-zero
1626                     v.x := r.p(58) or px_nz;
1627                 else
1628                     -- quotient needs to be incremented by 1
1629                     carry_in <= '1';
1630                     v.x := not pcmpb_eq;
1631                 end if;
1632                 v.state := FINISH;
1633
1634             when FRE_1 =>
1635                 opsel_r <= RES_MISC;
1636                 misc_sel <= "0111";
1637                 v.shift := to_signed(1, EXP_BITS);
1638                 v.state := NORMALIZE;
1639
1640             when FTDIV_1 =>
1641                 v.cr_result(1) := exp_tiny or exp_huge;
1642                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1643                     v.instr_done := '1';
1644                     v.state := IDLE;
1645                 else
1646                     v.shift := r.a.exponent;
1647                     v.doing_ftdiv := "10";
1648                 end if;
1649
1650             when RSQRT_1 =>
1651                 opsel_r <= RES_MISC;
1652                 misc_sel <= "0111";
1653                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1654                 v.result_exp := - sqrt_exp;
1655                 v.shift := to_signed(1, EXP_BITS);
1656                 v.state := NORMALIZE;
1657
1658             when SQRT_1 =>
1659                 -- put invsqr[B] in R and compute P = invsqr[B] * B
1660                 -- also transfer B (in R) to A
1661                 set_a := '1';
1662                 opsel_r <= RES_MISC;
1663                 misc_sel <= "0111";
1664                 msel_1 <= MUL1_B;
1665                 msel_2 <= MUL2_LUT;
1666                 f_to_multiply.valid <= '1';
1667                 v.shift := to_signed(-1, EXP_BITS);
1668                 v.count := "00";
1669                 v.state := SQRT_2;
1670
1671             when SQRT_2 =>
1672                 -- shift R right one place
1673                 -- not expecting multiplier result yet
1674                 opsel_r <= RES_SHIFT;
1675                 v.first := '1';
1676                 v.state := SQRT_3;
1677
1678             when SQRT_3 =>
1679                 -- put R into Y, wait for product from multiplier
1680                 msel_2 <= MUL2_R;
1681                 set_y := r.first;
1682                 pshift := '1';
1683                 if multiply_to_f.valid = '1' then
1684                     -- put result into R
1685                     opsel_r <= RES_MULT;
1686                     v.first := '1';
1687                     v.state := SQRT_4;
1688                 end if;
1689
1690             when SQRT_4 =>
1691                 -- compute 1.5 - Y * P
1692                 msel_1 <= MUL1_Y;
1693                 msel_2 <= MUL2_P;
1694                 msel_add <= MULADD_CONST;
1695                 msel_inv <= '1';
1696                 f_to_multiply.valid <= r.first;
1697                 pshift := '1';
1698                 if multiply_to_f.valid = '1' then
1699                     v.state := SQRT_5;
1700                 end if;
1701
1702             when SQRT_5 =>
1703                 -- compute Y = Y * P
1704                 msel_1 <= MUL1_Y;
1705                 msel_2 <= MUL2_P;
1706                 f_to_multiply.valid <= '1';
1707                 v.first := '1';
1708                 v.state := SQRT_6;
1709
1710             when SQRT_6 =>
1711                 -- pipeline in R = R * P
1712                 msel_1 <= MUL1_R;
1713                 msel_2 <= MUL2_P;
1714                 f_to_multiply.valid <= r.first;
1715                 pshift := '1';
1716                 if multiply_to_f.valid = '1' then
1717                     v.first := '1';
1718                     v.state := SQRT_7;
1719                 end if;
1720
1721             when SQRT_7 =>
1722                 -- first multiply is done, put result in Y
1723                 msel_2 <= MUL2_P;
1724                 set_y := r.first;
1725                 -- wait for second multiply (should be here already)
1726                 pshift := '1';
1727                 if multiply_to_f.valid = '1' then
1728                     -- put result into R
1729                     opsel_r <= RES_MULT;
1730                     v.first := '1';
1731                     v.count := r.count + 1;
1732                     if r.count < 2 then
1733                         v.state := SQRT_4;
1734                     else
1735                         v.first := '1';
1736                         v.state := SQRT_8;
1737                     end if;
1738                 end if;
1739
1740             when SQRT_8 =>
1741                 -- compute P = A - R * R, which can be +ve or -ve
1742                 -- we arranged for B to be put into A earlier
1743                 msel_1 <= MUL1_R;
1744                 msel_2 <= MUL2_R;
1745                 msel_add <= MULADD_A;
1746                 msel_inv <= '1';
1747                 pshift := '1';
1748                 f_to_multiply.valid <= r.first;
1749                 if multiply_to_f.valid = '1' then
1750                     v.first := '1';
1751                     v.state := SQRT_9;
1752                 end if;
1753
1754             when SQRT_9 =>
1755                 -- compute P = P * Y
1756                 -- since Y is an estimate of 1/sqrt(B), this makes P an
1757                 -- estimate of the adjustment needed to R.  Since the error
1758                 -- could be negative and we have an unsigned multiplier, the
1759                 -- upper bits can be wrong, but it turns out the lowest 8 bits
1760                 -- are correct and are all we need (given 3 iterations through
1761                 -- SQRT_4 to SQRT_7).
1762                 msel_1 <= MUL1_Y;
1763                 msel_2 <= MUL2_P;
1764                 pshift := '1';
1765                 f_to_multiply.valid <= r.first;
1766                 if multiply_to_f.valid = '1' then
1767                     v.state := SQRT_10;
1768                 end if;
1769
1770             when SQRT_10 =>
1771                 -- Add the bottom 8 bits of P, sign-extended,
1772                 -- divided by 4, onto R.
1773                 -- The division by 4 is because R is 10.54 format
1774                 -- whereas P is 8.56 format.
1775                 opsel_b <= BIN_PS6;
1776                 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1777                 v.result_exp := sqrt_exp;
1778                 v.shift := to_signed(1, EXP_BITS);
1779                 v.first := '1';
1780                 v.state := SQRT_11;
1781
1782             when SQRT_11 =>
1783                 -- compute P = A - R * R (remainder)
1784                 -- also put 2 * R + 1 into B for comparison with P
1785                 msel_1 <= MUL1_R;
1786                 msel_2 <= MUL2_R;
1787                 msel_add <= MULADD_A;
1788                 msel_inv <= '1';
1789                 f_to_multiply.valid <= r.first;
1790                 shiftin := '1';
1791                 set_b := r.first;
1792                 if multiply_to_f.valid = '1' then
1793                     v.state := SQRT_12;
1794                 end if;
1795
1796             when SQRT_12 =>
1797                 -- test if remainder is 0 or >= B = 2*R + 1
1798                 if pcmpb_lt = '1' then
1799                     -- square root is correct, set X if remainder non-zero
1800                     v.x := r.p(58) or px_nz;
1801                 else
1802                     -- square root needs to be incremented by 1
1803                     carry_in <= '1';
1804                     v.x := not pcmpb_eq;
1805                 end if;
1806                 v.state := FINISH;
1807
1808             when INT_SHIFT =>
1809                 opsel_r <= RES_SHIFT;
1810                 set_x := '1';
1811                 v.state := INT_ROUND;
1812                 v.shift := to_signed(-2, EXP_BITS);
1813
1814             when INT_ROUND =>
1815                 opsel_r <= RES_SHIFT;
1816                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
1817                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1818                 -- Check for negative values that don't round to 0 for fcti*u*
1819                 if r.insn(8) = '1' and r.result_sign = '1' and
1820                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
1821                     v.state := INT_OFLOW;
1822                 else
1823                     v.state := INT_FINAL;
1824                 end if;
1825
1826             when INT_ISHIFT =>
1827                 opsel_r <= RES_SHIFT;
1828                 v.state := INT_FINAL;
1829
1830             when INT_FINAL =>
1831                 -- Negate if necessary, and increment for rounding if needed
1832                 opsel_ainv <= r.result_sign;
1833                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
1834                 -- Check for possible overflows
1835                 case r.insn(9 downto 8) is
1836                     when "00" =>        -- fctiw[z]
1837                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
1838                     when "01" =>        -- fctiwu[z]
1839                         need_check := r.r(31);
1840                     when "10" =>        -- fctid[z]
1841                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
1842                     when others =>      -- fctidu[z]
1843                         need_check := r.r(63);
1844                 end case;
1845                 if need_check = '1' then
1846                     v.state := INT_CHECK;
1847                 else
1848                     if r.fpscr(FPSCR_FI) = '1' then
1849                         v.fpscr(FPSCR_XX) := '1';
1850                     end if;
1851                     arith_done := '1';
1852                 end if;
1853
1854             when INT_CHECK =>
1855                 if r.insn(9) = '0' then
1856                     msb := r.r(31);
1857                 else
1858                     msb := r.r(63);
1859                 end if;
1860                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1861                 if (r.insn(8) = '0' and msb /= r.result_sign) or
1862                     (r.insn(8) = '1' and msb /= '1') then
1863                     opsel_r <= RES_MISC;
1864                     v.fpscr(FPSCR_VXCVI) := '1';
1865                     invalid := '1';
1866                 else
1867                     if r.fpscr(FPSCR_FI) = '1' then
1868                         v.fpscr(FPSCR_XX) := '1';
1869                     end if;
1870                 end if;
1871                 arith_done := '1';
1872
1873             when INT_OFLOW =>
1874                 opsel_r <= RES_MISC;
1875                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1876                 if r.b.class = NAN then
1877                     misc_sel(0) <= '1';
1878                 end if;
1879                 v.fpscr(FPSCR_VXCVI) := '1';
1880                 invalid := '1';
1881                 arith_done := '1';
1882
1883             when FRI_1 =>
1884                 opsel_r <= RES_SHIFT;
1885                 set_x := '1';
1886                 v.shift := to_signed(-2, EXP_BITS);
1887                 v.state := ROUNDING;
1888
1889             when FINISH =>
1890                 if r.is_multiply = '1' and px_nz = '1' then
1891                     v.x := '1';
1892                 end if;
1893                 if r.r(63 downto 54) /= "0000000001" then
1894                     renormalize := '1';
1895                     v.state := NORMALIZE;
1896                 else
1897                     set_x := '1';
1898                     if exp_tiny = '1' then
1899                         v.shift := new_exp - min_exp;
1900                         v.state := ROUND_UFLOW;
1901                     elsif exp_huge = '1' then
1902                         v.state := ROUND_OFLOW;
1903                     else
1904                         v.shift := to_signed(-2, EXP_BITS);
1905                         v.state := ROUNDING;
1906                     end if;
1907                 end if;
1908
1909             when NORMALIZE =>
1910                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
1911                 opsel_r <= RES_SHIFT;
1912                 set_x := '1';
1913                 if exp_tiny = '1' then
1914                     v.shift := new_exp - min_exp;
1915                     v.state := ROUND_UFLOW;
1916                 elsif exp_huge = '1' then
1917                     v.state := ROUND_OFLOW;
1918                 else
1919                     v.shift := to_signed(-2, EXP_BITS);
1920                     v.state := ROUNDING;
1921                 end if;
1922
1923             when ROUND_UFLOW =>
1924                 v.tiny := '1';
1925                 if r.fpscr(FPSCR_UE) = '0' then
1926                     -- disabled underflow exception case
1927                     -- have to denormalize before rounding
1928                     opsel_r <= RES_SHIFT;
1929                     set_x := '1';
1930                     v.shift := to_signed(-2, EXP_BITS);
1931                     v.state := ROUNDING;
1932                 else
1933                     -- enabled underflow exception case
1934                     -- if denormalized, have to normalize before rounding
1935                     v.fpscr(FPSCR_UX) := '1';
1936                     v.result_exp := r.result_exp + bias_exp;
1937                     if r.r(54) = '0' then
1938                         renormalize := '1';
1939                         v.state := NORMALIZE;
1940                     else
1941                         v.shift := to_signed(-2, EXP_BITS);
1942                         v.state := ROUNDING;
1943                     end if;
1944                 end if;
1945
1946             when ROUND_OFLOW =>
1947                 v.fpscr(FPSCR_OX) := '1';
1948                 if r.fpscr(FPSCR_OE) = '0' then
1949                     -- disabled overflow exception
1950                     -- result depends on rounding mode
1951                     v.fpscr(FPSCR_XX) := '1';
1952                     v.fpscr(FPSCR_FI) := '1';
1953                     if r.round_mode(1 downto 0) = "00" or
1954                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
1955                         v.result_class := INFINITY;
1956                         v.fpscr(FPSCR_FR) := '1';
1957                     else
1958                         v.fpscr(FPSCR_FR) := '0';
1959                     end if;
1960                     -- construct largest representable number
1961                     v.result_exp := max_exp;
1962                     opsel_r <= RES_MISC;
1963                     misc_sel <= "001" & r.single_prec;
1964                     arith_done := '1';
1965                 else
1966                     -- enabled overflow exception
1967                     v.result_exp := r.result_exp - bias_exp;
1968                     v.shift := to_signed(-2, EXP_BITS);
1969                     v.state := ROUNDING;
1970                 end if;
1971
1972             when ROUNDING =>
1973                 opsel_amask <= '1';
1974                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
1975                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1976                 if round(1) = '1' then
1977                     -- set mask to increment the LSB for the precision
1978                     opsel_b <= BIN_MASK;
1979                     carry_in <= '1';
1980                     v.shift := to_signed(-1, EXP_BITS);
1981                     v.state := ROUNDING_2;
1982                 else
1983                     if r.r(54) = '0' then
1984                         -- result after masking could be zero, or could be a
1985                         -- denormalized result that needs to be renormalized
1986                         renormalize := '1';
1987                         v.state := ROUNDING_3;
1988                     else
1989                         arith_done := '1';
1990                     end if;
1991                 end if;
1992                 if round(0) = '1' then
1993                     v.fpscr(FPSCR_XX) := '1';
1994                     if r.tiny = '1' then
1995                         v.fpscr(FPSCR_UX) := '1';
1996                     end if;
1997                 end if;
1998
1999             when ROUNDING_2 =>
2000                 -- Check for overflow during rounding
2001                 v.x := '0';
2002                 if r.r(55) = '1' then
2003                     opsel_r <= RES_SHIFT;
2004                     if exp_huge = '1' then
2005                         v.state := ROUND_OFLOW;
2006                     else
2007                         arith_done := '1';
2008                     end if;
2009                 elsif r.r(54) = '0' then
2010                     -- Do CLZ so we can renormalize the result
2011                     renormalize := '1';
2012                     v.state := ROUNDING_3;
2013                 else
2014                     arith_done := '1';
2015                 end if;
2016
2017             when ROUNDING_3 =>
2018                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2019                 if mant_nz = '0' then
2020                     v.result_class := ZERO;
2021                     if r.is_subtract = '1' then
2022                         -- set result sign depending on rounding mode
2023                         v.result_sign := r.round_mode(1) and r.round_mode(0);
2024                     end if;
2025                     arith_done := '1';
2026                 else
2027                     -- Renormalize result after rounding
2028                     opsel_r <= RES_SHIFT;
2029                     v.denorm := exp_tiny;
2030                     v.shift := new_exp - to_signed(-1022, EXP_BITS);
2031                     if new_exp < to_signed(-1022, EXP_BITS) then
2032                         v.state := DENORM;
2033                     else
2034                         arith_done := '1';
2035                     end if;
2036                 end if;
2037
2038             when DENORM =>
2039                 opsel_r <= RES_SHIFT;
2040                 arith_done := '1';
2041
2042         end case;
2043
2044         if zero_divide = '1' then
2045             v.fpscr(FPSCR_ZX) := '1';
2046         end if;
2047         if qnan_result = '1' then
2048             invalid := '1';
2049             v.result_class := NAN;
2050             v.result_sign := '0';
2051             misc_sel <= "0001";
2052             opsel_r <= RES_MISC;
2053         end if;
2054         if arith_done = '1' then
2055             -- Enabled invalid exception doesn't write result or FPRF
2056             -- Neither does enabled zero-divide exception
2057             if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2058                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2059                 v.writing_back := '1';
2060                 v.update_fprf := '1';
2061             end if;
2062             v.instr_done := '1';
2063             v.state := IDLE;
2064             update_fx := '1';
2065         end if;
2066
2067         -- Multiplier and divide/square root data path
2068         case msel_1 is
2069             when MUL1_A =>
2070                 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2071             when MUL1_B =>
2072                 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2073             when MUL1_Y =>
2074                 f_to_multiply.data1 <= r.y;
2075             when others =>
2076                 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2077         end case;
2078         case msel_2 is
2079             when MUL2_C =>
2080                 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2081             when MUL2_LUT =>
2082                 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2083             when MUL2_P =>
2084                 f_to_multiply.data2 <= r.p;
2085             when others =>
2086                 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2087         end case;
2088         maddend := (others => '0');
2089         case msel_add is
2090             when MULADD_CONST =>
2091                 -- addend is 2.0 or 1.5 in 16.112 format
2092                 if r.is_sqrt = '0' then
2093                     maddend(113) := '1';                -- 2.0
2094                 else
2095                     maddend(112 downto 111) := "11";    -- 1.5
2096                 end if;
2097             when MULADD_A =>
2098                 -- addend is A in 16.112 format
2099                 maddend(121 downto 58) := r.a.mantissa;
2100             when others =>
2101         end case;
2102         if msel_inv = '1' then
2103             f_to_multiply.addend <= not maddend;
2104         else
2105             f_to_multiply.addend <= maddend;
2106         end if;
2107         f_to_multiply.not_result <= msel_inv;
2108         if set_y = '1' then
2109             v.y := f_to_multiply.data2;
2110         end if;
2111         if multiply_to_f.valid = '1' then
2112             if pshift = '0' then
2113                 v.p := multiply_to_f.result(63 downto 0);
2114             else
2115                 v.p := multiply_to_f.result(119 downto 56);
2116             end if;
2117         end if;
2118
2119         -- Data path.
2120         -- This has A and B input multiplexers, an adder, a shifter,
2121         -- count-leading-zeroes logic, and a result mux.
2122         if longmask = '1' then
2123             mshift := r.shift + to_signed(-29, EXP_BITS);
2124         else
2125             mshift := r.shift;
2126         end if;
2127         if mshift < to_signed(-64, EXP_BITS) then
2128             mask := (others => '1');
2129         elsif mshift >= to_signed(0, EXP_BITS) then
2130             mask := (others => '0');
2131         else
2132             mask := right_mask(unsigned(mshift(5 downto 0)));
2133         end if;
2134         case opsel_a is
2135             when AIN_R =>
2136                 in_a0 := r.r;
2137             when AIN_A =>
2138                 in_a0 := r.a.mantissa;
2139             when AIN_B =>
2140                 in_a0 := r.b.mantissa;
2141             when others =>
2142                 in_a0 := r.c.mantissa;
2143         end case;
2144         if (or (mask and in_a0)) = '1' and set_x = '1' then
2145             v.x := '1';
2146         end if;
2147         if opsel_ainv = '1' then
2148             in_a0 := not in_a0;
2149         end if;
2150         if opsel_amask = '1' then
2151             in_a0 := in_a0 and not mask;
2152         end if;
2153         in_a <= in_a0;
2154         case opsel_b is
2155             when BIN_ZERO =>
2156                 in_b0 := (others => '0');
2157             when BIN_R =>
2158                 in_b0 := r.r;
2159             when BIN_MASK =>
2160                 in_b0 := mask;
2161             when others =>
2162                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2163                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2164         end case;
2165         if opsel_binv = '1' then
2166             in_b0 := not in_b0;
2167         end if;
2168         in_b <= in_b0;
2169         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2170             shift_res := shifter_64(r.r & shiftin & 55x"00000000000000",
2171                                     std_ulogic_vector(r.shift(6 downto 0)));
2172         else
2173             shift_res := (others => '0');
2174         end if;
2175         case opsel_r is
2176             when RES_SUM =>
2177                 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2178             when RES_SHIFT =>
2179                 result <= shift_res;
2180             when RES_MULT =>
2181                 result <= multiply_to_f.result(121 downto 58);
2182             when others =>
2183                 case misc_sel is
2184                     when "0000" =>
2185                         misc := x"00000000" & (r.fpscr and fpscr_mask);
2186                     when "0001" =>
2187                         -- generated QNaN mantissa
2188                         misc := x"0020000000000000";
2189                     when "0010" =>
2190                         -- mantissa of max representable DP number
2191                         misc := x"007ffffffffffffc";
2192                     when "0011" =>
2193                         -- mantissa of max representable SP number
2194                         misc := x"007fffff80000000";
2195                     when "0100" =>
2196                         -- fmrgow result
2197                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2198                     when "0110" =>
2199                         -- fmrgew result
2200                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2201                     when "0111" =>
2202                         misc := 10x"000" & inverse_est & 35x"000000000";
2203                     when "1000" =>
2204                         -- max positive result for fctiw[z]
2205                         misc := x"000000007fffffff";
2206                     when "1001" =>
2207                         -- max negative result for fctiw[z]
2208                         misc := x"ffffffff80000000";
2209                     when "1010" =>
2210                         -- max positive result for fctiwu[z]
2211                         misc := x"00000000ffffffff";
2212                     when "1011" =>
2213                         -- max negative result for fctiwu[z]
2214                         misc := x"0000000000000000";
2215                     when "1100" =>
2216                         -- max positive result for fctid[z]
2217                         misc := x"7fffffffffffffff";
2218                     when "1101" =>
2219                         -- max negative result for fctid[z]
2220                         misc := x"8000000000000000";
2221                     when "1110" =>
2222                         -- max positive result for fctidu[z]
2223                         misc := x"ffffffffffffffff";
2224                     when "1111" =>
2225                         -- max negative result for fctidu[z]
2226                         misc := x"0000000000000000";
2227                     when others =>
2228                         misc := x"0000000000000000";
2229                 end case;
2230                 result <= misc;
2231         end case;
2232         v.r := result;
2233
2234         if set_a = '1' then
2235             v.a.exponent := new_exp;
2236             v.a.mantissa := shift_res;
2237         end if;
2238         if set_b = '1' then
2239             v.b.exponent := new_exp;
2240             v.b.mantissa := shift_res;
2241         end if;
2242         if set_c = '1' then
2243             v.c.exponent := new_exp;
2244             v.c.mantissa := shift_res;
2245         end if;
2246
2247         if opsel_r = RES_SHIFT then
2248             v.result_exp := new_exp;
2249         end if;
2250
2251         if renormalize = '1' then
2252             clz := count_left_zeroes(r.r);
2253             if renorm_sqrt = '1' then
2254                 -- make denormalized value end up with even exponent
2255                 clz(0) := '1';
2256             end if;
2257             v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2258         end if;
2259
2260         if r.int_result = '1' then
2261             fp_result <= r.r;
2262         else
2263             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2264                                  r.single_prec, r.quieten_nan);
2265         end if;
2266         if r.update_fprf = '1' then
2267             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2268                                                              r.r(54) and not r.denorm);
2269         end if;
2270
2271         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2272                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2273         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2274                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
2275         if update_fx = '1' and
2276             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2277             v.fpscr(FPSCR_FX) := '1';
2278         end if;
2279         if r.rc = '1' then
2280             v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2281         end if;
2282
2283         if illegal = '1' then
2284             v.instr_done := '0';
2285             v.do_intr := '0';
2286             v.writing_back := '0';
2287             v.busy := '0';
2288             v.state := IDLE;
2289         else
2290             v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2291             if v.state /= IDLE or v.do_intr = '1' then
2292                 v.busy := '1';
2293             end if;
2294         end if;
2295
2296         rin <= v;
2297         e_out.illegal <= illegal;
2298     end process;
2299
2300 end architecture behaviour;