fpu.vhdl

   1 -- Floating-point unit for Microwatt
   2
   3 library ieee;
   4 use ieee.std_logic_1164.all;
   5 use ieee.numeric_std.all;
   6
   7 library work;
   8 use work.insn_helpers.all;
   9 use work.decode_types.all;
  10 use work.crhelpers.all;
  11 use work.helpers.all;
  12 use work.common.all;
  13
  14 entity fpu is
  15     port (
  16         clk : in std_ulogic;
  17         rst : in std_ulogic;
  18         flush_in : in std_ulogic;
  19
  20         e_in  : in  Execute1ToFPUType;
  21         e_out : out FPUToExecute1Type;
  22
  23         w_out : out FPUToWritebackType
  24         );
  25 end entity fpu;
  26
  27 architecture behaviour of fpu is
  28     type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  29
  30     constant EXP_BITS : natural := 13;
  31     constant UNIT_BIT : natural := 56;
  32     constant QNAN_BIT : natural := UNIT_BIT - 1;
  33     constant SP_LSB   : natural := UNIT_BIT - 23;
  34     constant SP_GBIT  : natural := SP_LSB - 1;
  35     constant SP_RBIT  : natural := SP_LSB - 2;
  36     constant DP_LSB   : natural := UNIT_BIT - 52;
  37     constant DP_GBIT  : natural := DP_LSB - 1;
  38     constant DP_RBIT  : natural := DP_LSB - 2;
  39
  40     type fpu_reg_type is record
  41         class    : fp_number_class;
  42         negative : std_ulogic;
  43         denorm   : std_ulogic;
  44         exponent : signed(EXP_BITS-1 downto 0);         -- unbiased
  45         mantissa : std_ulogic_vector(63 downto 0);      -- 8.56 format
  46     end record;
  47
  48     type state_t is (IDLE, DO_ILLEGAL,
  49                      DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  50                      DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  51                      DO_FCFID, DO_FCTI,
  52                      DO_FRSP, DO_FRI,
  53                      DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  54                      DO_FRE, DO_FRSQRTE,
  55                      DO_FSEL,
  56                      DO_IDIVMOD,
  57                      FRI_1,
  58                      ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  59                      CMP_1, CMP_2,
  60                      MULT_1,
  61                      FMADD_0, FMADD_1, FMADD_2, FMADD_3,
  62                      FMADD_4, FMADD_5, FMADD_6,
  63                      LOOKUP,
  64                      DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  65                      FRE_1,
  66                      RSQRT_1,
  67                      FTDIV_1,
  68                      SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  69                      SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  70                      SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  71                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
  72                      INT_FINAL, INT_CHECK, INT_OFLOW,
  73                      FINISH, NORMALIZE,
  74                      ROUND_UFLOW, ROUND_OFLOW,
  75                      ROUNDING, ROUNDING_2, ROUNDING_3,
  76                      DENORM,
  77                      RENORM_A, RENORM_A2,
  78                      RENORM_B, RENORM_B2,
  79                      RENORM_C, RENORM_C2,
  80                      NAN_RESULT, EXC_RESULT,
  81                      IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
  82                      IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
  83                      IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
  84                      IDIV_DODIV, IDIV_SH32,
  85                      IDIV_DIV, IDIV_DIV2, IDIV_DIV3, IDIV_DIV4, IDIV_DIV5,
  86                      IDIV_DIV6, IDIV_DIV7, IDIV_DIV8, IDIV_DIV9,
  87                      IDIV_EXT_TBH, IDIV_EXT_TBH2, IDIV_EXT_TBH3,
  88                      IDIV_EXT_TBH4, IDIV_EXT_TBH5,
  89                      IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3,
  90                      IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6,
  91                      IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO);
  92
  93     type decode32 is array(0 to 31) of state_t;
  94     type decode8 is array(0 to 7) of state_t;
  95
  96     type reg_type is record
  97         state        : state_t;
  98         busy         : std_ulogic;
  99         f2stall      : std_ulogic;
 100         instr_done   : std_ulogic;
 101         complete     : std_ulogic;
 102         do_intr      : std_ulogic;
 103         illegal      : std_ulogic;
 104         op           : insn_type_t;
 105         insn         : std_ulogic_vector(31 downto 0);
 106         instr_tag    : instr_tag_t;
 107         dest_fpr     : gspr_index_t;
 108         fe_mode      : std_ulogic;
 109         rc           : std_ulogic;
 110         fp_rc        : std_ulogic;
 111         is_cmp       : std_ulogic;
 112         single_prec  : std_ulogic;
 113         sp_result    : std_ulogic;
 114         fpscr        : std_ulogic_vector(31 downto 0);
 115         comm_fpscr   : std_ulogic_vector(31 downto 0);  -- committed FPSCR value
 116         a            : fpu_reg_type;
 117         b            : fpu_reg_type;
 118         c            : fpu_reg_type;
 119         r            : std_ulogic_vector(63 downto 0);  -- 8.56 format
 120         s            : std_ulogic_vector(55 downto 0);  -- extended fraction
 121         x            : std_ulogic;
 122         p            : std_ulogic_vector(63 downto 0);  -- 8.56 format
 123         y            : std_ulogic_vector(63 downto 0);  -- 8.56 format
 124         result_sign  : std_ulogic;
 125         result_class : fp_number_class;
 126         result_exp   : signed(EXP_BITS-1 downto 0);
 127         shift        : signed(EXP_BITS-1 downto 0);
 128         writing_fpr  : std_ulogic;
 129         write_reg    : gspr_index_t;
 130         complete_tag : instr_tag_t;
 131         writing_cr   : std_ulogic;
 132         writing_xer  : std_ulogic;
 133         int_result   : std_ulogic;
 134         cr_result    : std_ulogic_vector(3 downto 0);
 135         cr_mask      : std_ulogic_vector(7 downto 0);
 136         old_exc      : std_ulogic_vector(4 downto 0);
 137         update_fprf  : std_ulogic;
 138         quieten_nan  : std_ulogic;
 139         nsnan_result : std_ulogic;
 140         tiny         : std_ulogic;
 141         denorm       : std_ulogic;
 142         round_mode   : std_ulogic_vector(2 downto 0);
 143         is_subtract  : std_ulogic;
 144         exp_cmp      : std_ulogic;
 145         madd_cmp     : std_ulogic;
 146         add_bsmall   : std_ulogic;
 147         is_multiply  : std_ulogic;
 148         is_sqrt      : std_ulogic;
 149         first        : std_ulogic;
 150         count        : unsigned(1 downto 0);
 151         doing_ftdiv  : std_ulogic_vector(1 downto 0);
 152         opsel_a      : std_ulogic_vector(1 downto 0);
 153         use_a        : std_ulogic;
 154         use_b        : std_ulogic;
 155         use_c        : std_ulogic;
 156         invalid      : std_ulogic;
 157         negate       : std_ulogic;
 158         longmask     : std_ulogic;
 159         integer_op   : std_ulogic;
 160         divext       : std_ulogic;
 161         divmod       : std_ulogic;
 162         is_signed    : std_ulogic;
 163         int_ovf      : std_ulogic;
 164         div_close    : std_ulogic;
 165         inc_quot     : std_ulogic;
 166         a_hi         : std_ulogic_vector(7 downto 0);
 167         a_lo         : std_ulogic_vector(55 downto 0);
 168         m32b         : std_ulogic;
 169         oe           : std_ulogic;
 170         xerc         : xer_common_t;
 171         xerc_result  : xer_common_t;
 172         res_negate   : std_ulogic;
 173         res_subtract : std_ulogic;
 174         res_rmode    : std_ulogic_vector(2 downto 0);
 175     end record;
 176
 177     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
 178
 179     signal r, rin : reg_type;
 180
 181     signal fp_result     : std_ulogic_vector(63 downto 0);
 182     signal opsel_b       : std_ulogic_vector(1 downto 0);
 183     signal opsel_r       : std_ulogic_vector(1 downto 0);
 184     signal opsel_s       : std_ulogic_vector(1 downto 0);
 185     signal opsel_ainv    : std_ulogic;
 186     signal opsel_mask    : std_ulogic;
 187     signal opsel_binv    : std_ulogic;
 188     signal in_a          : std_ulogic_vector(63 downto 0);
 189     signal in_b          : std_ulogic_vector(63 downto 0);
 190     signal result        : std_ulogic_vector(63 downto 0);
 191     signal carry_in      : std_ulogic;
 192     signal lost_bits     : std_ulogic;
 193     signal r_hi_nz       : std_ulogic;
 194     signal r_lo_nz       : std_ulogic;
 195     signal r_gt_1        : std_ulogic;
 196     signal s_nz          : std_ulogic;
 197     signal misc_sel      : std_ulogic_vector(3 downto 0);
 198     signal f_to_multiply : MultiplyInputType;
 199     signal multiply_to_f : MultiplyOutputType;
 200     signal msel_1        : std_ulogic_vector(1 downto 0);
 201     signal msel_2        : std_ulogic_vector(1 downto 0);
 202     signal msel_add      : std_ulogic_vector(1 downto 0);
 203     signal msel_inv      : std_ulogic;
 204     signal inverse_est   : std_ulogic_vector(18 downto 0);
 205
 206     -- opsel values
 207     constant AIN_R    : std_ulogic_vector(1 downto 0) := "00";
 208     constant AIN_A    : std_ulogic_vector(1 downto 0) := "01";
 209     constant AIN_B    : std_ulogic_vector(1 downto 0) := "10";
 210     constant AIN_C    : std_ulogic_vector(1 downto 0) := "11";
 211
 212     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
 213     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
 214     constant BIN_RND  : std_ulogic_vector(1 downto 0) := "10";
 215     constant BIN_PS8  : std_ulogic_vector(1 downto 0) := "11";
 216
 217     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
 218     constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
 219     constant RES_MULT  : std_ulogic_vector(1 downto 0) := "10";
 220     constant RES_MISC  : std_ulogic_vector(1 downto 0) := "11";
 221
 222     constant S_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 223     constant S_NEG   : std_ulogic_vector(1 downto 0) := "01";
 224     constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
 225     constant S_MULT  : std_ulogic_vector(1 downto 0) := "11";
 226
 227     -- msel values
 228     constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
 229     constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
 230     constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
 231     constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
 232
 233     constant MUL2_C   : std_ulogic_vector(1 downto 0) := "00";
 234     constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
 235     constant MUL2_P   : std_ulogic_vector(1 downto 0) := "10";
 236     constant MUL2_R   : std_ulogic_vector(1 downto 0) := "11";
 237
 238     constant MULADD_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 239     constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
 240     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
 241     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 242
 243     -- control signals and values for exponent data path
 244     constant REXP1_ZERO  : std_ulogic_vector(1 downto 0) := "00";
 245     constant REXP1_R     : std_ulogic_vector(1 downto 0) := "01";
 246     constant REXP1_A     : std_ulogic_vector(1 downto 0) := "10";
 247     constant REXP1_BHALF : std_ulogic_vector(1 downto 0) := "11";
 248
 249     constant REXP2_CON   : std_ulogic_vector(1 downto 0) := "00";
 250     constant REXP2_NE    : std_ulogic_vector(1 downto 0) := "01";
 251     constant REXP2_C     : std_ulogic_vector(1 downto 0) := "10";
 252     constant REXP2_B     : std_ulogic_vector(1 downto 0) := "11";
 253
 254     constant RECON2_ZERO : std_ulogic_vector(1 downto 0) := "00";
 255     constant RECON2_UNIT : std_ulogic_vector(1 downto 0) := "01";
 256     constant RECON2_BIAS : std_ulogic_vector(1 downto 0) := "10";
 257     constant RECON2_MAX  : std_ulogic_vector(1 downto 0) := "11";
 258
 259     signal re_sel1       : std_ulogic_vector(1 downto 0);
 260     signal re_sel2       : std_ulogic_vector(1 downto 0);
 261     signal re_con2       : std_ulogic_vector(1 downto 0);
 262     signal re_neg1       : std_ulogic;
 263     signal re_neg2       : std_ulogic;
 264     signal re_set_result : std_ulogic;
 265
 266     constant RSH1_ZERO   : std_ulogic_vector(1 downto 0) := "00";
 267     constant RSH1_B      : std_ulogic_vector(1 downto 0) := "01";
 268     constant RSH1_NE     : std_ulogic_vector(1 downto 0) := "10";
 269     constant RSH1_S      : std_ulogic_vector(1 downto 0) := "11";
 270
 271     constant RSH2_CON    : std_ulogic := '0';
 272     constant RSH2_A      : std_ulogic := '1';
 273
 274     constant RSCON2_ZERO    : std_ulogic_vector(3 downto 0) := "0000";
 275     constant RSCON2_1       : std_ulogic_vector(3 downto 0) := "0001";
 276     constant RSCON2_UNIT_52 : std_ulogic_vector(3 downto 0) := "0010";
 277     constant RSCON2_64_UNIT : std_ulogic_vector(3 downto 0) := "0011";
 278     constant RSCON2_32      : std_ulogic_vector(3 downto 0) := "0100";
 279     constant RSCON2_52      : std_ulogic_vector(3 downto 0) := "0101";
 280     constant RSCON2_UNIT    : std_ulogic_vector(3 downto 0) := "0110";
 281     constant RSCON2_63      : std_ulogic_vector(3 downto 0) := "0111";
 282     constant RSCON2_64      : std_ulogic_vector(3 downto 0) := "1000";
 283     constant RSCON2_MINEXP  : std_ulogic_vector(3 downto 0) := "1001";
 284
 285     signal rs_sel1       : std_ulogic_vector(1 downto 0);
 286     signal rs_sel2       : std_ulogic;
 287     signal rs_con2       : std_ulogic_vector(3 downto 0);
 288     signal rs_neg1       : std_ulogic;
 289     signal rs_neg2       : std_ulogic;
 290     signal rs_norm       : std_ulogic;
 291
 292     constant arith_decode : decode32 := (
 293         -- indexed by bits 5..1 of opcode
 294         2#01000# => DO_FRI,
 295         2#01100# => DO_FRSP,
 296         2#01110# => DO_FCTI,
 297         2#01111# => DO_FCTI,
 298         2#10010# => DO_FDIV,
 299         2#10100# => DO_FADD,
 300         2#10101# => DO_FADD,
 301         2#10110# => DO_FSQRT,
 302         2#11000# => DO_FRE,
 303         2#11001# => DO_FMUL,
 304         2#11010# => DO_FRSQRTE,
 305         2#11100# => DO_FMADD,
 306         2#11101# => DO_FMADD,
 307         2#11110# => DO_FMADD,
 308         2#11111# => DO_FMADD,
 309         others   => DO_ILLEGAL
 310         );
 311
 312     constant cmp_decode : decode8 := (
 313         2#000# => DO_FCMP,
 314         2#001# => DO_FCMP,
 315         2#010# => DO_MCRFS,
 316         2#100# => DO_FTDIV,
 317         2#101# => DO_FTSQRT,
 318         others => DO_ILLEGAL
 319         );
 320
 321     constant misc_decode : decode32 := (
 322         -- indexed by bits 10, 8, 4, 2, 1 of opcode
 323         2#00010# => DO_MTFSB,
 324         2#01010# => DO_MTFSFI,
 325         2#10010# => DO_FMRG,
 326         2#11010# => DO_FMRG,
 327         2#10011# => DO_MFFS,
 328         2#11011# => DO_MTFSF,
 329         2#10110# => DO_FCFID,
 330         2#11110# => DO_FCFID,
 331         others   => DO_ILLEGAL
 332         );
 333
 334     -- Inverse lookup table, indexed by the top 8 fraction bits
 335     -- The first 256 entries are the reciprocal (1/x) lookup table,
 336     -- and the remaining 768 entries are the reciprocal square root table.
 337     -- Output range is [0.5, 1) in 0.19 format, though the top
 338     -- bit isn't stored since it is always 1.
 339     -- Each output value is the inverse of the center of the input
 340     -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
 341     -- entry 1 is 1 / (1 + 3/512), etc.
 342     constant inverse_table : lookup_table := (
 343         -- 1/x lookup table
 344         -- Unit bit is assumed to be 1, so input range is [1, 2)
 345         18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
 346         18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
 347         18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
 348         18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
 349         18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
 350         18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
 351         18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
 352         18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
 353         18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
 354         18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
 355         18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
 356         18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
 357         18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
 358         18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
 359         18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
 360         18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
 361         18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
 362         18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
 363         18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
 364         18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
 365         18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
 366         18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
 367         18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
 368         18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
 369         18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
 370         18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
 371         18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
 372         18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
 373         18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
 374         18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
 375         18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
 376         18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
 377         -- 1/sqrt(x) lookup table
 378         -- Input is in the range [1, 4), i.e. two bits to the left of the
 379         -- binary point.  Those 2 bits index the following 3 blocks of 256 values.
 380         -- 1.0 ... 1.9999
 381         18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
 382         18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
 383         18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
 384         18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
 385         18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
 386         18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
 387         18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
 388         18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
 389         18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
 390         18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
 391         18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
 392         18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
 393         18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
 394         18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
 395         18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
 396         18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
 397         18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
 398         18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
 399         18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
 400         18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
 401         18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
 402         18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
 403         18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
 404         18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
 405         18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
 406         18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
 407         18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
 408         18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
 409         18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
 410         18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
 411         18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
 412         18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
 413         -- 2.0 ... 2.9999
 414         18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
 415         18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
 416         18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
 417         18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
 418         18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
 419         18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
 420         18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
 421         18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
 422         18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
 423         18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
 424         18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
 425         18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
 426         18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
 427         18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
 428         18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
 429         18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
 430         18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
 431         18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
 432         18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
 433         18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
 434         18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
 435         18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
 436         18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
 437         18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
 438         18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
 439         18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
 440         18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
 441         18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
 442         18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
 443         18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
 444         18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
 445         18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
 446         -- 3.0 ... 3.9999
 447         18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
 448         18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
 449         18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
 450         18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
 451         18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
 452         18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
 453         18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
 454         18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
 455         18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
 456         18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
 457         18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
 458         18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
 459         18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
 460         18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
 461         18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
 462         18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
 463         18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
 464         18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
 465         18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
 466         18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
 467         18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
 468         18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
 469         18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
 470         18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
 471         18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
 472         18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
 473         18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
 474         18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
 475         18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
 476         18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
 477         18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
 478         18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
 479         );
 480
 481     -- Left and right shifter with 120 bit input and 64 bit output.
 482     -- Shifts inp left by shift bits and returns the upper 64 bits of
 483     -- the result.  The shift parameter is interpreted as a signed
 484     -- number in the range -64..63, with negative values indicating
 485     -- right shifts.
 486     function shifter_64(inp: std_ulogic_vector(119 downto 0);
 487                         shift: std_ulogic_vector(6 downto 0))
 488         return std_ulogic_vector is
 489         variable s1 : std_ulogic_vector(94 downto 0);
 490         variable s2 : std_ulogic_vector(70 downto 0);
 491         variable shift_result : std_ulogic_vector(63 downto 0);
 492     begin
 493         case shift(6 downto 5) is
 494             when "00" =>
 495                 s1 := inp(119 downto 25);
 496             when "01" =>
 497                 s1 := inp(87 downto 0) & "0000000";
 498             when "10" =>
 499                 s1 := x"0000000000000000" & inp(119 downto 89);
 500             when others =>
 501                 s1 := x"00000000" & inp(119 downto 57);
 502         end case;
 503         case shift(4 downto 3) is
 504             when "00" =>
 505                 s2 := s1(94 downto 24);
 506             when "01" =>
 507                 s2 := s1(86 downto 16);
 508             when "10" =>
 509                 s2 := s1(78 downto 8);
 510             when others =>
 511                 s2 := s1(70 downto 0);
 512         end case;
 513         case shift(2 downto 0) is
 514             when "000" =>
 515                 shift_result := s2(70 downto 7);
 516             when "001" =>
 517                 shift_result := s2(69 downto 6);
 518             when "010" =>
 519                 shift_result := s2(68 downto 5);
 520             when "011" =>
 521                 shift_result := s2(67 downto 4);
 522             when "100" =>
 523                 shift_result := s2(66 downto 3);
 524             when "101" =>
 525                 shift_result := s2(65 downto 2);
 526             when "110" =>
 527                 shift_result := s2(64 downto 1);
 528             when others =>
 529                 shift_result := s2(63 downto 0);
 530         end case;
 531         return shift_result;
 532     end;
 533
 534     -- Generate a mask with 0-bits on the left and 1-bits on the right which
 535     -- selects the bits will be lost in doing a right shift.  The shift
 536     -- parameter is the bottom 6 bits of a negative shift count,
 537     -- indicating a right shift.
 538     function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
 539         variable mask_result: std_ulogic_vector(63 downto 0);
 540     begin
 541         mask_result := (others => '0');
 542         if is_X(shift) then
 543             mask_result := (others => 'X');
 544             return mask_result;
 545         end if;
 546         for i in 0 to 63 loop
 547             if i >= shift then
 548                 mask_result(63 - i) := '1';
 549             end if;
 550         end loop;
 551         return mask_result;
 552     end;
 553
 554     -- Split a DP floating-point number into components and work out its class.
 555     -- If is_int = 1, the input is considered an integer
 556     function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_fp: std_ulogic;
 557                        is_32bint: std_ulogic; is_signed: std_ulogic) return fpu_reg_type is
 558         variable reg     : fpu_reg_type;
 559         variable exp_nz  : std_ulogic;
 560         variable exp_ao  : std_ulogic;
 561         variable frac_nz : std_ulogic;
 562         variable low_nz  : std_ulogic;
 563         variable cls     : std_ulogic_vector(2 downto 0);
 564     begin
 565         reg.negative := fpr(63);
 566         reg.denorm := '0';
 567         exp_nz := or (fpr(62 downto 52));
 568         exp_ao := and (fpr(62 downto 52));
 569         frac_nz := or (fpr(51 downto 0));
 570         low_nz := or (fpr(31 downto 0));
 571         if is_fp = '1' then
 572             reg.denorm := frac_nz and not exp_nz;
 573             reg.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
 574             if exp_nz = '0' then
 575                 reg.exponent := to_signed(-1022, EXP_BITS);
 576             end if;
 577             reg.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64),
 578                                                          UNIT_BIT - 52));
 579             cls := exp_ao & exp_nz & frac_nz;
 580             case cls is
 581                 when "000"  => reg.class := ZERO;
 582                 when "001"  => reg.class := FINITE;    -- denormalized
 583                 when "010"  => reg.class := FINITE;
 584                 when "011"  => reg.class := FINITE;
 585                 when "110"  => reg.class := INFINITY;
 586                 when others => reg.class := NAN;
 587             end case;
 588         elsif is_32bint = '1' then
 589             reg.negative := fpr(31);
 590             reg.mantissa(31 downto 0) := fpr(31 downto 0);
 591             reg.mantissa(63 downto 32) := (others => (is_signed and fpr(31)));
 592             reg.exponent := (others => '0');
 593             if low_nz = '1' then
 594                 reg.class := FINITE;
 595             else
 596                 reg.class := ZERO;
 597             end if;
 598         else
 599             reg.mantissa := fpr;
 600             reg.exponent := (others => '0');
 601             if (fpr(63) or exp_nz or frac_nz) = '1' then
 602                 reg.class := FINITE;
 603             else
 604                 reg.class := ZERO;
 605             end if;
 606         end if;
 607         return reg;
 608     end;
 609
 610     -- Construct a DP floating-point result from components
 611     function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
 612                      mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic;
 613                      negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector)
 614         return std_ulogic_vector is
 615         variable dp_result : std_ulogic_vector(63 downto 0);
 616         variable sign : std_ulogic;
 617     begin
 618         dp_result := (others => '0');
 619         sign := negative;
 620         case class is
 621             when ZERO =>
 622                 if is_subtract = '1' then
 623                     -- set result sign depending on rounding mode
 624                     sign := round_mode(0) and round_mode(1);
 625                 end if;
 626             when FINITE =>
 627                 if mantissa(UNIT_BIT) = '1' then
 628                     -- normalized number
 629                     dp_result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
 630                 end if;
 631                 dp_result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB);
 632                 if single_prec = '0' then
 633                     dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
 634                 end if;
 635             when INFINITY =>
 636                 dp_result(62 downto 52) := "11111111111";
 637             when NAN =>
 638                 dp_result(62 downto 52) := "11111111111";
 639                 dp_result(51) := quieten_nan or mantissa(QNAN_BIT);
 640                 dp_result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB);
 641                 if single_prec = '0' then
 642                     dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
 643                 end if;
 644         end case;
 645         dp_result(63) := sign xor negate;
 646         return dp_result;
 647     end;
 648
 649     -- Determine whether to increment when rounding
 650     -- Returns rounding_inc & inexact
 651     -- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2)
 652     -- bits of the mantissa already (usually arranged by setting set_x = 1 earlier).
 653     function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
 654                          single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
 655                          sign: std_ulogic)
 656         return std_ulogic_vector is
 657         variable grx : std_ulogic_vector(2 downto 0);
 658         variable ret : std_ulogic_vector(1 downto 0);
 659         variable lsb : std_ulogic;
 660     begin
 661         if single_prec = '0' then
 662             grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0)));
 663             lsb := mantissa(DP_LSB);
 664         else
 665             grx := mantissa(SP_GBIT downto SP_RBIT) & x;
 666             lsb := mantissa(SP_LSB);
 667         end if;
 668         ret(1) := '0';
 669         ret(0) := or (grx);
 670         case rn(1 downto 0) is
 671             when "00" =>        -- round to nearest
 672                 if grx = "100" and rn(2) = '0' then
 673                     ret(1) := lsb; -- tie, round to even
 674                 else
 675                     ret(1) := grx(2);
 676                 end if;
 677             when "01" =>        -- round towards zero
 678             when others =>      -- round towards +/- inf
 679                 if rn(0) = sign then
 680                     -- round towards greater magnitude
 681                     ret(1) := ret(0);
 682                 end if;
 683         end case;
 684         return ret;
 685     end;
 686
 687     -- Determine result flags to write into the FPSCR
 688     function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
 689         return std_ulogic_vector is
 690     begin
 691         case class is
 692             when ZERO =>
 693                 return sign & "0010";
 694             when FINITE =>
 695                 return (not unitbit) & sign & (not sign) & "00";
 696             when INFINITY =>
 697                 return '0' & sign & (not sign) & "01";
 698             when NAN =>
 699                 return "10001";
 700         end case;
 701     end;
 702
 703 begin
 704     fpu_multiply_0: entity work.multiply
 705         port map (
 706             clk => clk,
 707             m_in => f_to_multiply,
 708             m_out => multiply_to_f
 709             );
 710
 711     fpu_0: process(clk)
 712     begin
 713         if rising_edge(clk) then
 714             if rst = '1' or flush_in = '1' then
 715                 r.state <= IDLE;
 716                 r.busy <= '0';
 717                 r.f2stall <= '0';
 718                 r.instr_done <= '0';
 719                 r.complete <= '0';
 720                 r.illegal <= '0';
 721                 r.do_intr <= '0';
 722                 r.writing_fpr <= '0';
 723                 r.writing_cr <= '0';
 724                 r.writing_xer <= '0';
 725                 r.fpscr <= (others => '0');
 726                 r.write_reg <= (others =>'0');
 727                 r.complete_tag.valid <= '0';
 728                 r.cr_mask <= (others =>'0');
 729                 r.cr_result <= (others =>'0');
 730                 r.instr_tag.valid <= '0';
 731                 if rst = '1' then
 732                     r.fpscr <= (others => '0');
 733                     r.comm_fpscr <= (others => '0');
 734                 elsif r.do_intr = '0' then
 735                     -- flush_in = 1 and not due to us generating an interrupt,
 736                     -- roll back to committed fpscr
 737                     r.fpscr <= r.comm_fpscr;
 738                 end if;
 739             else
 740                 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
 741                 r <= rin;
 742             end if;
 743         end if;
 744     end process;
 745
 746     -- synchronous reads from lookup table
 747     lut_access: process(clk)
 748         variable addrhi : std_ulogic_vector(1 downto 0);
 749         variable addr   : std_ulogic_vector(9 downto 0);
 750     begin
 751         if rising_edge(clk) then
 752             if r.is_sqrt = '1' then
 753                 addrhi := r.b.mantissa(UNIT_BIT + 1 downto UNIT_BIT);
 754             else
 755                 addrhi := "00";
 756             end if;
 757             addr := addrhi & r.b.mantissa(UNIT_BIT - 1 downto UNIT_BIT - 8);
 758             if is_X(addr) then
 759                 inverse_est <= (others => 'X');
 760             else
 761                 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
 762             end if;
 763         end if;
 764     end process;
 765
 766     e_out.busy <= r.busy;
 767     e_out.f2stall <= r.f2stall;
 768     e_out.exception <= r.fpscr(FPSCR_FEX);
 769
 770     -- Note that the cycle where r.complete = 1 for an instruction can be as
 771     -- late as the second cycle of the following instruction (i.e. in the state
 772     -- following IDLE state).  Hence it is important that none of the fields of
 773     -- r that are used below are modified in IDLE state.
 774     w_out.valid <= r.complete;
 775     w_out.instr_tag <= r.complete_tag;
 776     w_out.write_enable <= r.writing_fpr and r.complete;
 777     w_out.write_reg <= r.write_reg;
 778     w_out.write_data <= fp_result;
 779     w_out.write_cr_enable <= r.writing_cr and r.complete;
 780     w_out.write_cr_mask <= r.cr_mask;
 781     w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
 782                            r.cr_result & r.cr_result & r.cr_result & r.cr_result;
 783     w_out.write_xerc <= r.writing_xer and r.complete;
 784     w_out.xerc <= r.xerc_result;
 785     w_out.interrupt <= r.do_intr;
 786     w_out.intr_vec <= 16#700#;
 787     w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
 788
 789     fpu_1: process(all)
 790         variable v           : reg_type;
 791         variable adec        : fpu_reg_type;
 792         variable bdec        : fpu_reg_type;
 793         variable cdec        : fpu_reg_type;
 794         variable fpscr_mask  : std_ulogic_vector(31 downto 0);
 795         variable j, k        : integer;
 796         variable flm         : std_ulogic_vector(7 downto 0);
 797         variable fpin_a      : std_ulogic;
 798         variable fpin_b      : std_ulogic;
 799         variable fpin_c      : std_ulogic;
 800         variable is_32bint   : std_ulogic;
 801         variable mask        : std_ulogic_vector(63 downto 0);
 802         variable in_a0       : std_ulogic_vector(63 downto 0);
 803         variable in_b0       : std_ulogic_vector(63 downto 0);
 804         variable misc        : std_ulogic_vector(63 downto 0);
 805         variable shift_res   : std_ulogic_vector(63 downto 0);
 806         variable round       : std_ulogic_vector(1 downto 0);
 807         variable update_fx   : std_ulogic;
 808         variable arith_done  : std_ulogic;
 809         variable invalid     : std_ulogic;
 810         variable zero_divide : std_ulogic;
 811         variable mant_nz     : std_ulogic;
 812         variable min_exp     : signed(EXP_BITS-1 downto 0);
 813         variable max_exp     : signed(EXP_BITS-1 downto 0);
 814         variable bias_exp    : signed(EXP_BITS-1 downto 0);
 815         variable new_exp     : signed(EXP_BITS-1 downto 0);
 816         variable exp_tiny    : std_ulogic;
 817         variable exp_huge    : std_ulogic;
 818         variable clz         : std_ulogic_vector(5 downto 0);
 819         variable set_x       : std_ulogic;
 820         variable mshift      : signed(EXP_BITS-1 downto 0);
 821         variable need_check  : std_ulogic;
 822         variable msb         : std_ulogic;
 823         variable is_add      : std_ulogic;
 824         variable set_a       : std_ulogic;
 825         variable set_a_exp   : std_ulogic;
 826         variable set_a_mant  : std_ulogic;
 827         variable set_a_hi    : std_ulogic;
 828         variable set_a_lo    : std_ulogic;
 829         variable set_b       : std_ulogic;
 830         variable set_b_mant  : std_ulogic;
 831         variable set_c       : std_ulogic;
 832         variable set_y       : std_ulogic;
 833         variable set_s       : std_ulogic;
 834         variable qnan_result : std_ulogic;
 835         variable px_nz       : std_ulogic;
 836         variable pcmpb_eq    : std_ulogic;
 837         variable pcmpb_lt    : std_ulogic;
 838         variable pcmpc_eq    : std_ulogic;
 839         variable pcmpc_lt    : std_ulogic;
 840         variable pshift      : std_ulogic;
 841         variable renorm_sqrt : std_ulogic;
 842         variable sqrt_exp    : signed(EXP_BITS-1 downto 0);
 843         variable shiftin     : std_ulogic;
 844         variable shiftin0    : std_ulogic;
 845         variable mulexp      : signed(EXP_BITS-1 downto 0);
 846         variable maddend     : std_ulogic_vector(127 downto 0);
 847         variable sum         : std_ulogic_vector(63 downto 0);
 848         variable round_inc   : std_ulogic_vector(63 downto 0);
 849         variable rbit_inc    : std_ulogic;
 850         variable mult_mask   : std_ulogic;
 851         variable sign_bit    : std_ulogic;
 852         variable rnd_b32     : std_ulogic;
 853         variable rexp_in1    : signed(EXP_BITS-1 downto 0);
 854         variable rexp_in2    : signed(EXP_BITS-1 downto 0);
 855         variable rexp_cin    : std_ulogic;
 856         variable rexp_sum    : signed(EXP_BITS-1 downto 0);
 857         variable rsh_in1     : signed(EXP_BITS-1 downto 0);
 858         variable rsh_in2     : signed(EXP_BITS-1 downto 0);
 859         variable exec_state  : state_t;
 860         variable opcbits     : std_ulogic_vector(4 downto 0);
 861         variable int_result  : std_ulogic;
 862         variable illegal     : std_ulogic;
 863     begin
 864         v := r;
 865         v.complete := '0';
 866         v.do_intr := '0';
 867         is_32bint := '0';
 868         exec_state := IDLE;
 869
 870         if r.complete = '1' or r.do_intr = '1' then
 871             v.instr_done := '0';
 872             v.writing_fpr := '0';
 873             v.writing_cr := '0';
 874             v.writing_xer := '0';
 875             v.comm_fpscr := r.fpscr;
 876             v.illegal := '0';
 877         end if;
 878
 879         -- capture incoming instruction
 880         if e_in.valid = '1' then
 881             v.insn := e_in.insn;
 882             v.op := e_in.op;
 883             v.instr_tag := e_in.itag;
 884             v.fe_mode := or (e_in.fe_mode);
 885             v.dest_fpr := e_in.frt;
 886             v.single_prec := e_in.single;
 887             v.is_signed := e_in.is_signed;
 888             v.rc := e_in.rc;
 889             v.fp_rc := '0';
 890             v.is_cmp := e_in.out_cr;
 891             v.oe := e_in.oe;
 892             v.m32b := e_in.m32b;
 893             v.xerc := e_in.xerc;
 894             v.longmask := '0';
 895             v.integer_op := '0';
 896             v.divext := '0';
 897             v.divmod := '0';
 898             v.is_sqrt := '0';
 899             v.is_multiply := '0';
 900             fpin_a := '0';
 901             fpin_b := '0';
 902             fpin_c := '0';
 903             v.use_a := e_in.valid_a;
 904             v.use_b := e_in.valid_b;
 905             v.use_c := e_in.valid_c;
 906             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
 907             case e_in.op is
 908                 when OP_FP_ARITH =>
 909                     fpin_a := e_in.valid_a;
 910                     fpin_b := e_in.valid_b;
 911                     fpin_c := e_in.valid_c;
 912                     v.longmask := e_in.single;
 913                     v.fp_rc := e_in.rc;
 914                     exec_state := arith_decode(to_integer(unsigned(e_in.insn(5 downto 1))));
 915                     if e_in.insn(5 downto 1) = "11001" or e_in.insn(5 downto 3) = "111" then
 916                         v.is_multiply := '1';
 917                     end if;
 918                     if e_in.insn(5 downto 1) = "10110" or e_in.insn(5 downto 1) = "11010" then
 919                         v.is_sqrt := '1';
 920                     end if;
 921                     if e_in.insn(5 downto 1) = "01111" then
 922                         v.round_mode := "001";
 923                     end if;
 924                 when OP_FP_CMP =>
 925                     fpin_a := e_in.valid_a;
 926                     fpin_b := e_in.valid_b;
 927                     exec_state := cmp_decode(to_integer(unsigned(e_in.insn(8 downto 6))));
 928                 when OP_FP_MISC =>
 929                     v.fp_rc := e_in.rc;
 930                     opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1);
 931                     exec_state := misc_decode(to_integer(unsigned(opcbits)));
 932                 when OP_FP_MOVE =>
 933                     v.fp_rc := e_in.rc;
 934                     fpin_a := e_in.valid_a;
 935                     fpin_b := e_in.valid_b;
 936                     fpin_c := e_in.valid_c;
 937                     if e_in.insn(5) = '0' then
 938                         exec_state := DO_FMR;
 939                     else
 940                         exec_state := DO_FSEL;
 941                     end if;
 942                 when OP_DIV =>
 943                     v.integer_op := '1';
 944                     is_32bint := e_in.single;
 945                     exec_state := DO_IDIVMOD;
 946                 when OP_DIVE =>
 947                     v.integer_op := '1';
 948                     v.divext := '1';
 949                     is_32bint := e_in.single;
 950                     exec_state := DO_IDIVMOD;
 951                 when OP_MOD =>
 952                     v.integer_op := '1';
 953                     v.divmod := '1';
 954                     is_32bint := e_in.single;
 955                     exec_state := DO_IDIVMOD;
 956                 when others =>
 957                     exec_state := DO_ILLEGAL;
 958             end case;
 959             v.quieten_nan := '1';
 960             v.tiny := '0';
 961             v.denorm := '0';
 962             v.is_subtract := '0';
 963             v.add_bsmall := '0';
 964             v.doing_ftdiv := "00";
 965             v.int_ovf := '0';
 966             v.div_close := '0';
 967
 968             adec := decode_dp(e_in.fra, fpin_a, is_32bint, e_in.is_signed);
 969             bdec := decode_dp(e_in.frb, fpin_b, is_32bint, e_in.is_signed);
 970             cdec := decode_dp(e_in.frc, fpin_c, '0', '0');
 971             v.a := adec;
 972             v.b := bdec;
 973             v.c := cdec;
 974
 975             v.exp_cmp := '0';
 976             if adec.exponent > bdec.exponent then
 977                 v.exp_cmp := '1';
 978             end if;
 979             v.madd_cmp := '0';
 980             if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
 981                 v.madd_cmp := '1';
 982             end if;
 983
 984             v.a_hi := 8x"0";
 985             v.a_lo := 56x"0";
 986         end if;
 987
 988         r_hi_nz <= or (r.r(UNIT_BIT + 1 downto SP_LSB));
 989         r_lo_nz <= or (r.r(SP_LSB - 1 downto DP_LSB));
 990         r_gt_1 <= or (r.r(63 downto 1));
 991         s_nz <= or (r.s);
 992
 993         if r.single_prec = '0' then
 994             if r.doing_ftdiv(1) = '0' then
 995                 max_exp := to_signed(1023, EXP_BITS);
 996             else
 997                 max_exp := to_signed(1020, EXP_BITS);
 998             end if;
 999             if r.doing_ftdiv(0) = '0' then
1000                 min_exp := to_signed(-1022, EXP_BITS);
1001             else
1002                 min_exp := to_signed(-1021, EXP_BITS);
1003             end if;
1004             bias_exp := to_signed(1536, EXP_BITS);
1005         else
1006             max_exp := to_signed(127, EXP_BITS);
1007             min_exp := to_signed(-126, EXP_BITS);
1008             bias_exp := to_signed(192, EXP_BITS);
1009         end if;
1010         new_exp := r.result_exp - r.shift;
1011         exp_tiny := '0';
1012         exp_huge := '0';
1013         if is_X(new_exp) or is_X(min_exp) then
1014             exp_tiny := 'X';
1015         elsif new_exp < min_exp then
1016             exp_tiny := '1';
1017         end if;
1018         if is_X(new_exp) or is_X(min_exp) then
1019             exp_huge := 'X';
1020         elsif new_exp > max_exp then
1021             exp_huge := '1';
1022         end if;
1023
1024         -- Compare P with zero and with B
1025         px_nz := or (r.p(UNIT_BIT + 1 downto 4));
1026         pcmpb_eq := '0';
1027         if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then
1028             pcmpb_eq := '1';
1029         end if;
1030         pcmpb_lt := '0';
1031         if is_X(r.p(59 downto 4)) or is_X(r.b.mantissa(55 downto 0)) then
1032             pcmpb_lt := 'X';
1033         elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then
1034             pcmpb_lt := '1';
1035         end if;
1036         pcmpc_eq := '0';
1037         if r.p = r.c.mantissa then
1038             pcmpc_eq := '1';
1039         end if;
1040         pcmpc_lt := '0';
1041         if is_X(r.p) or is_X(r.c.mantissa) then
1042             pcmpc_lt := 'X';
1043         elsif unsigned(r.p) < unsigned(r.c.mantissa) then
1044             pcmpc_lt := '1';
1045         end if;
1046
1047         v.update_fprf := '0';
1048         v.first := '0';
1049         v.opsel_a := AIN_R;
1050         opsel_ainv <= '0';
1051         opsel_mask <= '0';
1052         opsel_b <= BIN_ZERO;
1053         opsel_binv <= '0';
1054         opsel_r <= RES_SUM;
1055         opsel_s <= S_ZERO;
1056         carry_in <= '0';
1057         misc_sel <= "0000";
1058         fpscr_mask := (others => '1');
1059         update_fx := '0';
1060         arith_done := '0';
1061         invalid := '0';
1062         zero_divide := '0';
1063         set_x := '0';
1064         qnan_result := '0';
1065         set_a := '0';
1066         set_a_exp := '0';
1067         set_a_mant := '0';
1068         set_a_hi := '0';
1069         set_a_lo := '0';
1070         set_b := '0';
1071         set_b_mant := '0';
1072         set_c := '0';
1073         set_s := '0';
1074         f_to_multiply.is_signed <= '0';
1075         f_to_multiply.valid <= '0';
1076         msel_1 <= MUL1_A;
1077         msel_2 <= MUL2_C;
1078         msel_add <= MULADD_ZERO;
1079         msel_inv <= '0';
1080         set_y := '0';
1081         pshift := '0';
1082         renorm_sqrt := '0';
1083         shiftin := '0';
1084         shiftin0 := '0';
1085         rbit_inc := '0';
1086         mult_mask := '0';
1087         rnd_b32 := '0';
1088         int_result := '0';
1089         illegal := '0';
1090
1091         re_sel1 <= REXP1_ZERO;
1092         re_sel2 <= REXP2_CON;
1093         re_con2 <= RECON2_ZERO;
1094         re_neg1 <= '0';
1095         re_neg2 <= '0';
1096         re_set_result <= '0';
1097         rs_sel1 <= RSH1_ZERO;
1098         rs_sel2 <= RSH2_CON;
1099         rs_con2 <= RSCON2_ZERO;
1100         rs_neg1 <= '0';
1101         rs_neg2 <= '0';
1102         rs_norm <= '0';
1103
1104         case r.state is
1105             when IDLE =>
1106                 v.invalid := '0';
1107                 v.negate := '0';
1108                 if e_in.valid = '1' then
1109                     v.opsel_a := AIN_B;
1110                     v.busy := '1';
1111                     if e_in.op = OP_FP_ARITH and e_in.valid_a = '1' and
1112                         (e_in.valid_b = '0' or e_in.valid_c = '0') then
1113                         v.opsel_a := AIN_A;
1114                     end if;
1115                     if e_in.op = OP_FP_ARITH then
1116                         -- input selection for denorm cases
1117                         case e_in.insn(5 downto 1) is
1118                             when "10010" =>         -- fdiv
1119                                 if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1120                                     v.opsel_a := AIN_B;
1121                                 end if;
1122                             when "11001" =>         -- fmul
1123                                 if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1124                                     v.opsel_a := AIN_C;
1125                                 end if;
1126                             when "11100" | "11101" | "11110" | "11111" =>   -- fmadd etc.
1127                                 if v.a.mantissa(UNIT_BIT) = '0' then
1128                                     v.opsel_a := AIN_A;
1129                                 elsif v.c.mantissa(UNIT_BIT) = '0' then
1130                                     v.opsel_a := AIN_C;
1131                                 end if;
1132                             when others =>
1133                         end case;
1134                     end if;
1135                     v.state := exec_state;
1136                 end if;
1137                 v.x := '0';
1138                 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
1139                 set_s := '1';
1140
1141             when DO_ILLEGAL =>
1142                 illegal := '1';
1143                 v.instr_done := '1';
1144
1145             when DO_MCRFS =>
1146                 j := to_integer(unsigned(insn_bfa(r.insn)));
1147                 for i in 0 to 7 loop
1148                     if i = j then
1149                         k := (7 - i) * 4;
1150                         v.cr_result := r.fpscr(k + 3 downto k);
1151                         fpscr_mask(k + 3 downto k) := "0000";
1152                     end if;
1153                 end loop;
1154                 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
1155                 v.instr_done := '1';
1156
1157             when DO_FTDIV =>
1158                 v.instr_done := '1';
1159                 v.cr_result := "0000";
1160                 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
1161                     (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1162                     v.cr_result(2) := '1';
1163                 end if;
1164                 if r.a.class = NAN or r.a.class = INFINITY or
1165                     r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
1166                     (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
1167                     v.cr_result(1) := '1';
1168                 else
1169                     v.doing_ftdiv := "11";
1170                     v.first := '1';
1171                     v.state := FTDIV_1;
1172                     v.instr_done := '0';
1173                 end if;
1174
1175             when DO_FTSQRT =>
1176                 v.instr_done := '1';
1177                 v.cr_result := "0000";
1178                 if r.b.class = ZERO or r.b.class = INFINITY or
1179                     (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1180                     v.cr_result(2) := '1';
1181                 end if;
1182                 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
1183                     or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
1184                     v.cr_result(1) := '0';
1185                 end if;
1186
1187             when DO_FCMP =>
1188                 -- fcmp[uo]
1189                 -- r.opsel_a = AIN_B
1190                 v.instr_done := '1';
1191                 update_fx := '1';
1192                 re_sel2 <= REXP2_B;
1193                 re_set_result <= '1';
1194                 if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
1195                     (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then
1196                     -- Signalling NAN
1197                     v.fpscr(FPSCR_VXSNAN) := '1';
1198                     if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
1199                         v.fpscr(FPSCR_VXVC) := '1';
1200                     end if;
1201                     invalid := '1';
1202                     v.cr_result := "0001";          -- unordered
1203                 elsif r.a.class = NAN or r.b.class = NAN then
1204                     if r.insn(6) = '1' then
1205                         -- fcmpo
1206                         v.fpscr(FPSCR_VXVC) := '1';
1207                         invalid := '1';
1208                     end if;
1209                     v.cr_result := "0001";          -- unordered
1210                 elsif r.a.class = ZERO and r.b.class = ZERO then
1211                     v.cr_result := "0010";          -- equal
1212                 elsif r.a.negative /= r.b.negative then
1213                     v.cr_result := r.a.negative & r.b.negative & "00";
1214                 elsif r.a.class = ZERO then
1215                     -- A and B are the same sign from here down
1216                     v.cr_result := not r.b.negative & r.b.negative & "00";
1217                 elsif r.a.class = INFINITY then
1218                     if r.b.class = INFINITY then
1219                         v.cr_result := "0010";
1220                     else
1221                         v.cr_result := r.a.negative & not r.a.negative & "00";
1222                     end if;
1223                 elsif r.b.class = ZERO then
1224                     -- A is finite from here down
1225                     v.cr_result := r.a.negative & not r.a.negative & "00";
1226                 elsif r.b.class = INFINITY then
1227                     v.cr_result := not r.b.negative & r.b.negative & "00";
1228                 elsif r.exp_cmp = '1' then
1229                     -- A and B are both finite from here down
1230                     v.cr_result := r.a.negative & not r.a.negative & "00";
1231                 elsif r.a.exponent /= r.b.exponent then
1232                     -- A exponent is smaller than B
1233                     v.cr_result := not r.a.negative & r.a.negative & "00";
1234                 else
1235                     -- Prepare to subtract mantissas, put B in R
1236                     v.cr_result := "0000";
1237                     v.instr_done := '0';
1238                     v.opsel_a := AIN_A;
1239                     v.state := CMP_1;
1240                 end if;
1241                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1242
1243             when DO_MTFSB =>
1244                 -- mtfsb{0,1}
1245                 j := to_integer(unsigned(insn_bt(r.insn)));
1246                 for i in 0 to 31 loop
1247                     if i = j then
1248                         v.fpscr(31 - i) := r.insn(6);
1249                     end if;
1250                 end loop;
1251                 v.instr_done := '1';
1252
1253             when DO_MTFSFI =>
1254                 -- mtfsfi
1255                 j := to_integer(unsigned(insn_bf(r.insn)));
1256                 if r.insn(16) = '0' then
1257                     for i in 0 to 7 loop
1258                         if i = j then
1259                             k := (7 - i) * 4;
1260                             v.fpscr(k + 3 downto k) := insn_u(r.insn);
1261                         end if;
1262                     end loop;
1263                 end if;
1264                 v.instr_done := '1';
1265
1266             when DO_FMRG =>
1267                 -- fmrgew, fmrgow
1268                 opsel_r <= RES_MISC;
1269                 misc_sel <= "01" & r.insn(8) & '0';
1270                 int_result := '1';
1271                 v.writing_fpr := '1';
1272                 v.instr_done := '1';
1273
1274             when DO_MFFS =>
1275                 v.writing_fpr := '1';
1276                 opsel_r <= RES_MISC;
1277                 case r.insn(20 downto 16) is
1278                     when "00000" =>
1279                         -- mffs
1280                     when "00001" =>
1281                         -- mffsce
1282                         v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1283                     when "10100" | "10101" =>
1284                         -- mffscdrn[i] (but we don't implement DRN)
1285                         fpscr_mask := x"000000FF";
1286                     when "10110" =>
1287                         -- mffscrn
1288                         fpscr_mask := x"000000FF";
1289                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1290                             r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1291                     when "10111" =>
1292                         -- mffscrni
1293                         fpscr_mask := x"000000FF";
1294                         v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1295                     when "11000" =>
1296                         -- mffsl
1297                         fpscr_mask := x"0007F0FF";
1298                     when others =>
1299                         v.illegal := '1';
1300                         v.writing_fpr := '0';
1301                 end case;
1302                 int_result := '1';
1303                 v.instr_done := '1';
1304
1305             when DO_MTFSF =>
1306                 if r.insn(25) = '1' then
1307                     flm := x"FF";
1308                 elsif r.insn(16) = '1' then
1309                     flm := x"00";
1310                 else
1311                     flm := r.insn(24 downto 17);
1312                 end if;
1313                 for i in 0 to 7 loop
1314                     k := i * 4;
1315                     if flm(i) = '1' then
1316                         v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1317                     end if;
1318                 end loop;
1319                 v.instr_done := '1';
1320
1321             when DO_FMR =>
1322                 -- r.opsel_a = AIN_B
1323                 v.result_class := r.b.class;
1324                 re_sel2 <= REXP2_B;
1325                 re_set_result <= '1';
1326                 v.quieten_nan := '0';
1327                 if r.insn(9) = '1' then
1328                     v.result_sign := '0';              -- fabs
1329                 elsif r.insn(8) = '1' then
1330                     v.result_sign := '1';              -- fnabs
1331                 elsif r.insn(7) = '1' then
1332                     v.result_sign := r.b.negative;     -- fmr
1333                 elsif r.insn(6) = '1' then
1334                     v.result_sign := not r.b.negative; -- fneg
1335                 else
1336                     v.result_sign := r.a.negative;     -- fcpsgn
1337                 end if;
1338                 v.writing_fpr := '1';
1339                 v.instr_done := '1';
1340
1341             when DO_FRI =>    -- fri[nzpm]
1342                 -- r.opsel_a = AIN_B
1343                 v.result_class := r.b.class;
1344                 v.result_sign := r.b.negative;
1345                 re_sel2 <= REXP2_B;
1346                 re_set_result <= '1';
1347                 -- set shift to exponent - 52
1348                 rs_sel1 <= RSH1_B;
1349                 rs_con2 <= RSCON2_52;
1350                 rs_neg2 <= '1';
1351                 v.fpscr(FPSCR_FR) := '0';
1352                 v.fpscr(FPSCR_FI) := '0';
1353                 if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then
1354                     -- Signalling NAN
1355                     v.fpscr(FPSCR_VXSNAN) := '1';
1356                     invalid := '1';
1357                 end if;
1358                 if r.b.class = FINITE then
1359                     if r.b.exponent >= to_signed(52, EXP_BITS) then
1360                         -- integer already, no rounding required
1361                         arith_done := '1';
1362                     else
1363                         v.state := FRI_1;
1364                         v.round_mode := '1' & r.insn(7 downto 6);
1365                     end if;
1366                 else
1367                     arith_done := '1';
1368                 end if;
1369
1370             when DO_FRSP =>
1371                 -- r.opsel_a = AIN_B, r.shift = 0
1372                 v.result_class := r.b.class;
1373                 v.result_sign := r.b.negative;
1374                 re_sel2 <= REXP2_B;
1375                 re_set_result <= '1';
1376                 -- set shift to exponent - -126
1377                 rs_sel1 <= RSH1_B;
1378                 rs_con2 <= RSCON2_MINEXP;
1379                 rs_neg2 <= '1';
1380                 v.fpscr(FPSCR_FR) := '0';
1381                 v.fpscr(FPSCR_FI) := '0';
1382                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1383                     -- Signalling NAN
1384                     v.fpscr(FPSCR_VXSNAN) := '1';
1385                     invalid := '1';
1386                 end if;
1387                 set_x := '1';
1388                 if r.b.class = FINITE then
1389                     if r.b.exponent < to_signed(-126, EXP_BITS) then
1390                         v.state := ROUND_UFLOW;
1391                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
1392                         v.state := ROUND_OFLOW;
1393                     else
1394                         v.state := ROUNDING;
1395                     end if;
1396                 else
1397                     arith_done := '1';
1398                 end if;
1399
1400             when DO_FCTI =>
1401                 -- instr bit 9: 1=dword 0=word
1402                 -- instr bit 8: 1=unsigned 0=signed
1403                 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1404                 -- r.opsel_a = AIN_B
1405                 v.result_class := r.b.class;
1406                 v.result_sign := r.b.negative;
1407                 re_sel2 <= REXP2_B;
1408                 re_set_result <= '1';
1409                 rs_sel1 <= RSH1_B;
1410                 rs_neg2 <= '1';
1411                 v.fpscr(FPSCR_FR) := '0';
1412                 v.fpscr(FPSCR_FI) := '0';
1413                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1414                     -- Signalling NAN
1415                     v.fpscr(FPSCR_VXSNAN) := '1';
1416                     invalid := '1';
1417                 end if;
1418
1419                 int_result := '1';
1420
1421                 case r.b.class is
1422                     when ZERO =>
1423                         arith_done := '1';
1424                     when FINITE =>
1425                         if r.b.exponent >= to_signed(64, EXP_BITS) or
1426                             (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1427                             v.state := INT_OFLOW;
1428                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1429                             -- integer already, no rounding required,
1430                             -- shift into final position
1431                             -- set shift to exponent - 56
1432                             rs_con2 <= RSCON2_UNIT;
1433                             if r.insn(8) = '1' and r.b.negative = '1' then
1434                                 v.state := INT_OFLOW;
1435                             else
1436                                 v.state := INT_ISHIFT;
1437                             end if;
1438                         else
1439                             -- set shift to exponent - 52
1440                             rs_con2 <= RSCON2_52;
1441                             v.state := INT_SHIFT;
1442                         end if;
1443                     when INFINITY | NAN =>
1444                         v.state := INT_OFLOW;
1445                 end case;
1446
1447             when DO_FCFID =>
1448                 -- r.opsel_a = AIN_B
1449                 v.result_sign := '0';
1450                 if r.insn(8) = '0' and r.b.negative = '1' then
1451                     -- fcfid[s] with negative operand, set R = -B
1452                     opsel_ainv <= '1';
1453                     carry_in <= '1';
1454                     v.result_sign := '1';
1455                 end if;
1456                 v.result_class := r.b.class;
1457                 re_con2 <= RECON2_UNIT;
1458                 re_set_result <= '1';
1459                 v.fpscr(FPSCR_FR) := '0';
1460                 v.fpscr(FPSCR_FI) := '0';
1461                 if r.b.class = ZERO then
1462                     arith_done := '1';
1463                 else
1464                     v.state := FINISH;
1465                 end if;
1466
1467             when DO_FADD =>
1468                 -- fadd[s] and fsub[s]
1469                 -- r.opsel_a = AIN_A
1470                 v.result_sign := r.a.negative;
1471                 v.result_class := r.a.class;
1472                 re_sel1 <= REXP1_A;
1473                 re_set_result <= '1';
1474                 -- set shift to a.exp - b.exp
1475                 rs_sel1 <= RSH1_B;
1476                 rs_neg1 <= '1';
1477                 rs_sel2 <= RSH2_A;
1478                 v.fpscr(FPSCR_FR) := '0';
1479                 v.fpscr(FPSCR_FI) := '0';
1480                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1481                 v.is_subtract := not is_add;
1482                 if r.a.class = FINITE and r.b.class = FINITE then
1483                     v.add_bsmall := r.exp_cmp;
1484                     v.opsel_a := AIN_B;
1485                     if r.exp_cmp = '0' then
1486                         v.result_sign := r.b.negative xnor r.insn(1);
1487                         if r.a.exponent = r.b.exponent then
1488                             v.state := ADD_2;
1489                         else
1490                             v.longmask := '0';
1491                             v.state := ADD_SHIFT;
1492                         end if;
1493                     else
1494                         v.state := ADD_1;
1495                     end if;
1496                 else
1497                     if r.a.class = NAN or r.b.class = NAN then
1498                         v.state := NAN_RESULT;
1499                     elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1500                         -- invalid operation, construct QNaN
1501                         v.fpscr(FPSCR_VXISI) := '1';
1502                         qnan_result := '1';
1503                         arith_done := '1';
1504                     elsif r.a.class = INFINITY or r.b.class = ZERO then
1505                         -- result is A; we're already set up to put A into R
1506                         arith_done := '1';
1507                     else
1508                         -- result is +/- B
1509                         v.opsel_a := AIN_B;
1510                         v.result_sign := r.b.negative xnor r.insn(1);
1511                         v.state := EXC_RESULT;
1512                     end if;
1513                 end if;
1514
1515             when DO_FMUL =>
1516                 -- fmul[s]
1517                 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1518                 v.result_sign := r.a.negative xor r.c.negative;
1519                 v.result_class := r.a.class;
1520                 v.fpscr(FPSCR_FR) := '0';
1521                 v.fpscr(FPSCR_FI) := '0';
1522                 re_sel1 <= REXP1_A;
1523                 re_sel2 <= REXP2_C;
1524                 re_set_result <= '1';
1525                 if r.a.class = FINITE and r.c.class = FINITE then
1526                     -- Renormalize denorm operands
1527                     if r.a.mantissa(UNIT_BIT) = '0' then
1528                         v.state := RENORM_A;
1529                     elsif r.c.mantissa(UNIT_BIT) = '0' then
1530                         v.state := RENORM_C;
1531                     else
1532                         f_to_multiply.valid <= '1';
1533                         v.state := MULT_1;
1534                     end if;
1535                 else
1536                     if r.a.class = NAN or r.c.class = NAN then
1537                         v.state := NAN_RESULT;
1538                     elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1539                         (r.a.class = ZERO and r.c.class = INFINITY) then
1540                         -- invalid operation, construct QNaN
1541                         v.fpscr(FPSCR_VXIMZ) := '1';
1542                         qnan_result := '1';
1543                     elsif r.a.class = ZERO or r.a.class = INFINITY then
1544                         -- result is +/- A
1545                         arith_done := '1';
1546                     else
1547                         -- r.c.class is ZERO or INFINITY
1548                         v.opsel_a := AIN_C;
1549                         v.state := EXC_RESULT;
1550                     end if;
1551                 end if;
1552
1553             when DO_FDIV =>
1554                 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1555                 v.result_class := r.a.class;
1556                 v.fpscr(FPSCR_FR) := '0';
1557                 v.fpscr(FPSCR_FI) := '0';
1558                 v.result_sign := r.a.negative xor r.b.negative;
1559                 re_sel1 <= REXP1_A;
1560                 re_sel2 <= REXP2_B;
1561                 re_neg2 <= '1';
1562                 re_set_result <= '1';
1563                 v.count := "00";
1564                 if r.a.class = FINITE and r.b.class = FINITE then
1565                     -- Renormalize denorm operands
1566                     if r.a.mantissa(UNIT_BIT) = '0' then
1567                         v.state := RENORM_A;
1568                     elsif r.b.mantissa(UNIT_BIT) = '0' then
1569                         v.state := RENORM_B;
1570                     else
1571                         v.first := '1';
1572                         v.state := DIV_2;
1573                     end if;
1574                 else
1575                     if r.a.class = NAN or r.b.class = NAN then
1576                         v.state := NAN_RESULT;
1577                     elsif r.b.class = INFINITY then
1578                         if r.a.class = INFINITY then
1579                             v.fpscr(FPSCR_VXIDI) := '1';
1580                             qnan_result := '1';
1581                         else
1582                             v.result_class := ZERO;
1583                         end if;
1584                         arith_done := '1';
1585                     elsif r.b.class = ZERO then
1586                         if r.a.class = ZERO then
1587                             v.fpscr(FPSCR_VXZDZ) := '1';
1588                             qnan_result := '1';
1589                         else
1590                             if r.a.class = FINITE then
1591                                 zero_divide := '1';
1592                             end if;
1593                             v.result_class := INFINITY;
1594                         end if;
1595                         arith_done := '1';
1596                     else -- r.b.class = FINITE, result_class = r.a.class
1597                         arith_done := '1';
1598                     end if;
1599                 end if;
1600
1601             when DO_FSEL =>
1602                 v.fpscr(FPSCR_FR) := '0';
1603                 v.fpscr(FPSCR_FI) := '0';
1604                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1605                     v.opsel_a := AIN_C;
1606                     v.result_sign := r.c.negative;
1607                 else
1608                     v.opsel_a := AIN_B;
1609                     v.result_sign := r.b.negative;
1610                 end if;
1611                 v.quieten_nan := '0';
1612                 v.state := EXC_RESULT;
1613
1614             when DO_FSQRT =>
1615                 -- r.opsel_a = AIN_B
1616                 v.result_class := r.b.class;
1617                 v.result_sign := r.b.negative;
1618                 v.fpscr(FPSCR_FR) := '0';
1619                 v.fpscr(FPSCR_FI) := '0';
1620                 re_sel2 <= REXP2_B;
1621                 re_set_result <= '1';
1622                 case r.b.class is
1623                     when FINITE =>
1624                         if r.b.negative = '1' then
1625                             v.fpscr(FPSCR_VXSQRT) := '1';
1626                             qnan_result := '1';
1627                         elsif r.b.mantissa(UNIT_BIT) = '0' then
1628                             v.state := RENORM_B;
1629                         elsif r.b.exponent(0) = '0' then
1630                             v.state := SQRT_1;
1631                         else
1632                             -- set shift to 1
1633                             rs_con2 <= RSCON2_1;
1634                             v.state := RENORM_B2;
1635                         end if;
1636                     when NAN =>
1637                         v.state := NAN_RESULT;
1638                     when ZERO =>
1639                         -- result is B
1640                         arith_done := '1';
1641                     when INFINITY =>
1642                         if r.b.negative = '1' then
1643                             v.fpscr(FPSCR_VXSQRT) := '1';
1644                             qnan_result := '1';
1645                         -- else result is B
1646                         end if;
1647                         arith_done := '1';
1648                 end case;
1649
1650             when DO_FRE =>
1651                 -- r.opsel_a = AIN_B
1652                 v.result_class := r.b.class;
1653                 v.result_sign := r.b.negative;
1654                 v.fpscr(FPSCR_FR) := '0';
1655                 v.fpscr(FPSCR_FI) := '0';
1656                 re_sel2 <= REXP2_B;
1657                 re_set_result <= '1';
1658                 case r.b.class is
1659                     when FINITE =>
1660                         if r.b.mantissa(UNIT_BIT) = '0' then
1661                             v.state := RENORM_B;
1662                         else
1663                             v.state := FRE_1;
1664                         end if;
1665                     when NAN =>
1666                         v.state := NAN_RESULT;
1667                     when INFINITY =>
1668                         v.result_class := ZERO;
1669                         arith_done := '1';
1670                     when ZERO =>
1671                         v.result_class := INFINITY;
1672                         zero_divide := '1';
1673                         arith_done := '1';
1674                 end case;
1675
1676             when DO_FRSQRTE =>
1677                 -- r.opsel_a = AIN_B
1678                 v.result_class := r.b.class;
1679                 v.result_sign := r.b.negative;
1680                 v.fpscr(FPSCR_FR) := '0';
1681                 v.fpscr(FPSCR_FI) := '0';
1682                 re_sel2 <= REXP2_B;
1683                 re_set_result <= '1';
1684                 -- set shift to 1
1685                 rs_con2 <= RSCON2_1;
1686                 case r.b.class is
1687                     when FINITE =>
1688                         if r.b.negative = '1' then
1689                             v.fpscr(FPSCR_VXSQRT) := '1';
1690                             qnan_result := '1';
1691                         elsif r.b.mantissa(UNIT_BIT) = '0' then
1692                             v.state := RENORM_B;
1693                         elsif r.b.exponent(0) = '0' then
1694                             v.state := RSQRT_1;
1695                         else
1696                             v.state := RENORM_B2;
1697                         end if;
1698                     when NAN =>
1699                         v.state := NAN_RESULT;
1700                     when INFINITY =>
1701                         if r.b.negative = '1' then
1702                             v.fpscr(FPSCR_VXSQRT) := '1';
1703                             qnan_result := '1';
1704                         else
1705                             v.result_class := ZERO;
1706                         end if;
1707                         arith_done := '1';
1708                     when ZERO =>
1709                         v.result_class := INFINITY;
1710                         zero_divide := '1';
1711                         arith_done := '1';
1712                 end case;
1713
1714             when DO_FMADD =>
1715                 -- fmadd, fmsub, fnmadd, fnmsub
1716                 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1717                 -- else AIN_B
1718                 v.result_sign := r.a.negative;
1719                 v.result_class := r.a.class;
1720                 -- put a.exp + c.exp into result_exp
1721                 re_sel1 <= REXP1_A;
1722                 re_sel2 <= REXP2_C;
1723                 re_set_result <= '1';
1724                 -- put b.exp into shift
1725                 rs_sel1 <= RSH1_B;
1726                 v.fpscr(FPSCR_FR) := '0';
1727                 v.fpscr(FPSCR_FI) := '0';
1728                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1729                 v.negate := r.insn(2);
1730                 v.is_subtract := not is_add;
1731                 if r.a.class = FINITE and r.c.class = FINITE and
1732                     (r.b.class = FINITE or r.b.class = ZERO) then
1733                     -- Make sure A and C are normalized
1734                     if r.a.mantissa(UNIT_BIT) = '0' then
1735                         v.state := RENORM_A;
1736                     elsif r.c.mantissa(UNIT_BIT) = '0' then
1737                         v.state := RENORM_C;
1738                     elsif r.b.class = ZERO then
1739                         -- no addend, degenerates to multiply
1740                         v.result_sign := r.a.negative xor r.c.negative;
1741                         f_to_multiply.valid <= '1';
1742                         v.is_multiply := '1';
1743                         v.state := MULT_1;
1744                     elsif r.madd_cmp = '0' then
1745                         -- addend is bigger, do multiply first
1746                         v.result_sign := r.b.negative xnor r.insn(1);
1747                         f_to_multiply.valid <= '1';
1748                         v.first := '1';
1749                         v.state := FMADD_0;
1750                     else
1751                         -- product is bigger, shift B first
1752                         v.state := FMADD_1;
1753                     end if;
1754                 else
1755                     if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1756                         v.state := NAN_RESULT;
1757                     elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1758                         (r.a.class = INFINITY and r.c.class = ZERO) then
1759                         -- invalid operation, construct QNaN
1760                         v.fpscr(FPSCR_VXIMZ) := '1';
1761                         qnan_result := '1';
1762                     elsif r.a.class = INFINITY or r.c.class = INFINITY then
1763                         if r.b.class = INFINITY and is_add = '0' then
1764                             -- invalid operation, construct QNaN
1765                             v.fpscr(FPSCR_VXISI) := '1';
1766                             qnan_result := '1';
1767                         else
1768                             -- result is infinity
1769                             v.result_class := INFINITY;
1770                             v.result_sign := r.a.negative xor r.c.negative;
1771                             arith_done := '1';
1772                         end if;
1773                     else
1774                         -- Here A is zero, C is zero, or B is infinity
1775                         -- Result is +/-B in all of those cases
1776                         v.opsel_a := AIN_B;
1777                         v.result_sign := r.b.negative xnor r.insn(1);
1778                         v.state := EXC_RESULT;
1779                     end if;
1780                 end if;
1781
1782             when RENORM_A =>
1783                 rs_norm <= '1';
1784                 v.state := RENORM_A2;
1785                 if r.use_c = '1' and r.c.denorm = '1' then
1786                     v.opsel_a := AIN_C;
1787                 else
1788                     v.opsel_a := AIN_B;
1789                 end if;
1790
1791             when RENORM_A2 =>
1792                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1793                 set_a := '1';
1794                 re_sel2 <= REXP2_NE;
1795                 re_set_result <= '1';
1796                 if r.insn(4) = '1' then
1797                     if r.c.mantissa(UNIT_BIT) = '1' then
1798                         if r.insn(3) = '0' or r.b.class = ZERO then
1799                             v.first := '1';
1800                             v.state := MULT_1;
1801                         else
1802                             v.madd_cmp := '0';
1803                             if new_exp + 1 >= r.b.exponent then
1804                                 v.madd_cmp := '1';
1805                             end if;
1806                             v.opsel_a := AIN_B;
1807                             v.state := DO_FMADD;
1808                         end if;
1809                     else
1810                         v.state := RENORM_C;
1811                     end if;
1812                 else
1813                     if r.b.mantissa(UNIT_BIT) = '1' then
1814                         v.first := '1';
1815                         v.state := DIV_2;
1816                     else
1817                         v.state := RENORM_B;
1818                     end if;
1819                 end if;
1820
1821             when RENORM_B =>
1822                 rs_norm <= '1';
1823                 renorm_sqrt := r.is_sqrt;
1824                 v.state := RENORM_B2;
1825
1826             when RENORM_B2 =>
1827                 set_b := '1';
1828                 re_sel2 <= REXP2_NE;
1829                 re_set_result <= '1';
1830                 v.opsel_a := AIN_B;
1831                 v.state := LOOKUP;
1832
1833             when RENORM_C =>
1834                 rs_norm <= '1';
1835                 v.state := RENORM_C2;
1836
1837             when RENORM_C2 =>
1838                 set_c := '1';
1839                 re_sel2 <= REXP2_NE;
1840                 re_set_result <= '1';
1841                 if r.insn(3) = '0' or r.b.class = ZERO then
1842                     v.first := '1';
1843                     v.state := MULT_1;
1844                 else
1845                     v.madd_cmp := '0';
1846                     if new_exp + 1 >= r.b.exponent then
1847                         v.madd_cmp := '1';
1848                     end if;
1849                     v.opsel_a := AIN_B;
1850                     v.state := DO_FMADD;
1851                 end if;
1852
1853             when ADD_1 =>
1854                 -- transferring B to R
1855                 re_sel2 <= REXP2_B;
1856                 re_set_result <= '1';
1857                 -- set shift to b.exp - a.exp
1858                 rs_sel1 <= RSH1_B;
1859                 rs_sel2 <= RSH2_A;
1860                 rs_neg2 <= '1';
1861                 v.longmask := '0';
1862                 v.state := ADD_SHIFT;
1863
1864             when ADD_SHIFT =>
1865                 -- r.shift = - exponent difference, r.longmask = 0
1866                 opsel_r <= RES_SHIFT;
1867                 re_sel2 <= REXP2_NE;
1868                 re_set_result <= '1';
1869                 v.x := s_nz;
1870                 set_x := '1';
1871                 v.longmask := r.single_prec;
1872                 if r.add_bsmall = '1' then
1873                     v.opsel_a := AIN_A;
1874                 else
1875                     v.opsel_a := AIN_B;
1876                 end if;
1877                 v.state := ADD_2;
1878
1879             when ADD_2 =>
1880                 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1881                 opsel_b <= BIN_R;
1882                 opsel_binv <= r.is_subtract;
1883                 carry_in <= r.is_subtract and not r.x;
1884                 -- set shift to -1
1885                 rs_con2 <= RSCON2_1;
1886                 rs_neg2 <= '1';
1887                 v.state := ADD_3;
1888
1889             when ADD_3 =>
1890                 -- check for overflow or negative result (can't get both)
1891                 -- r.shift = -1
1892                 re_sel2 <= REXP2_NE;
1893                 if r.r(63) = '1' then
1894                     -- result is opposite sign to expected
1895                     v.result_sign := not r.result_sign;
1896                     opsel_ainv <= '1';
1897                     carry_in <= '1';
1898                     v.state := FINISH;
1899                 elsif r.r(UNIT_BIT + 1) = '1' then
1900                     -- sum overflowed, shift right
1901                     opsel_r <= RES_SHIFT;
1902                     re_set_result <= '1';
1903                     set_x := '1';
1904                     if exp_huge = '1' then
1905                         v.state := ROUND_OFLOW;
1906                     else
1907                         v.state := ROUNDING;
1908                     end if;
1909                 elsif r.r(UNIT_BIT) = '1' then
1910                     set_x := '1';
1911                     v.state := ROUNDING;
1912                 elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
1913                     -- r.x must be zero at this point
1914                     v.result_class := ZERO;
1915                     arith_done := '1';
1916                 else
1917                     rs_norm <= '1';
1918                     v.state := NORMALIZE;
1919                 end if;
1920
1921             when CMP_1 =>
1922                 -- r.opsel_a = AIN_A
1923                 opsel_b <= BIN_R;
1924                 opsel_binv <= '1';
1925                 carry_in <= '1';
1926                 v.state := CMP_2;
1927
1928             when CMP_2 =>
1929                 if r.r(63) = '1' then
1930                     -- A is smaller in magnitude
1931                     v.cr_result := not r.a.negative & r.a.negative & "00";
1932                 elsif (r_hi_nz or r_lo_nz) = '0' then
1933                     v.cr_result := "0010";
1934                 else
1935                     v.cr_result := r.a.negative & not r.a.negative & "00";
1936                 end if;
1937                 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1938                 v.instr_done := '1';
1939
1940             when MULT_1 =>
1941                 f_to_multiply.valid <= r.first;
1942                 opsel_r <= RES_MULT;
1943                 if multiply_to_f.valid = '1' then
1944                     v.state := FINISH;
1945                 end if;
1946
1947             when FMADD_0 =>
1948                 -- r.shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1949                 -- (first time through; subsequent times we preserve v.shift)
1950                 -- Addend is bigger here
1951                 -- set shift to a.exp + c.exp - b.exp
1952                 -- note v.shift is at most -2 here
1953                 if r.first = '1' then
1954                     rs_sel1 <= RSH1_NE;
1955                 else
1956                     rs_sel1 <= RSH1_S;
1957                 end if;
1958                 opsel_r <= RES_MULT;
1959                 opsel_s <= S_MULT;
1960                 set_s := '1';
1961                 if multiply_to_f.valid = '1' then
1962                     v.longmask := '0';
1963                     v.state := ADD_SHIFT;
1964                 end if;
1965
1966             when FMADD_1 =>
1967                 -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1968                 -- product is bigger here
1969                 -- shift B right and use it as the addend to the multiplier
1970                 -- for subtract, multiplier does B - A * C
1971                 v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract;
1972                 re_sel2 <= REXP2_B;
1973                 re_set_result <= '1';
1974                 -- set shift to b.exp - result_exp + 64
1975                 rs_sel1 <= RSH1_NE;
1976                 rs_neg1 <= '1';
1977                 rs_con2 <= RSCON2_64;
1978                 v.state := FMADD_2;
1979
1980             when FMADD_2 =>
1981                 -- Product is potentially bigger here
1982                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1983                 set_s := '1';
1984                 opsel_s <= S_SHIFT;
1985                 -- set shift to r.shift - 64
1986                 rs_sel1 <= RSH1_S;
1987                 rs_con2 <= RSCON2_64;
1988                 rs_neg2 <= '1';
1989                 v.state := FMADD_3;
1990
1991             when FMADD_3 =>
1992                 -- r.shift = addend exp - product exp
1993                 opsel_r <= RES_SHIFT;
1994                 re_sel2 <= REXP2_NE;
1995                 re_set_result <= '1';
1996                 v.first := '1';
1997                 v.state := FMADD_4;
1998
1999             when FMADD_4 =>
2000                 msel_add <= MULADD_RS;
2001                 f_to_multiply.valid <= r.first;
2002                 msel_inv <= r.is_subtract;
2003                 opsel_r <= RES_MULT;
2004                 opsel_s <= S_MULT;
2005                 set_s := '1';
2006                 if multiply_to_f.valid = '1' then
2007                     v.state := FMADD_5;
2008                 end if;
2009
2010             when FMADD_5 =>
2011                 -- negate R:S:X if negative
2012                 if r.r(63) = '1' then
2013                     v.result_sign := not r.result_sign;
2014                     opsel_ainv <= '1';
2015                     carry_in <= not (s_nz or r.x);
2016                     opsel_s <= S_NEG;
2017                     set_s := '1';
2018                 end if;
2019                 -- set shift to UNIT_BIT
2020                 rs_con2 <= RSCON2_UNIT;
2021                 v.state := FMADD_6;
2022
2023             when FMADD_6 =>
2024                 -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero)
2025                 re_sel2 <= REXP2_NE;
2026                 rs_norm <= '1';
2027                 if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
2028                     if s_nz = '0' then
2029                         -- must be a subtraction, and r.x must be zero
2030                         v.result_class := ZERO;
2031                         arith_done := '1';
2032                     else
2033                         -- R is all zeroes but there are non-zero bits in S
2034                         -- so shift them into R and set S to 0
2035                         opsel_r <= RES_SHIFT;
2036                         re_set_result <= '1';
2037                         set_s := '1';
2038                         v.state := FINISH;
2039                     end if;
2040                 elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
2041                     v.state := FINISH;
2042                 else
2043                     v.state := NORMALIZE;
2044                 end if;
2045
2046             when LOOKUP =>
2047                 -- r.opsel_a = AIN_B
2048                 -- wait one cycle for inverse_table[B] lookup
2049                 v.first := '1';
2050                 if r.insn(4) = '0' then
2051                     if r.insn(3) = '0' then
2052                         v.state := DIV_2;
2053                     else
2054                         v.state := SQRT_1;
2055                     end if;
2056                 elsif r.insn(2) = '0' then
2057                     v.state := FRE_1;
2058                 else
2059                     v.state := RSQRT_1;
2060                 end if;
2061
2062             when DIV_2 =>
2063                 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
2064                 msel_1 <= MUL1_B;
2065                 msel_add <= MULADD_CONST;
2066                 msel_inv <= '1';
2067                 if r.count = 0 then
2068                     msel_2 <= MUL2_LUT;
2069                 else
2070                     msel_2 <= MUL2_P;
2071                 end if;
2072                 set_y := r.first;
2073                 pshift := '1';
2074                 f_to_multiply.valid <= r.first;
2075                 if multiply_to_f.valid = '1' then
2076                     v.first := '1';
2077                     v.count := r.count + 1;
2078                     v.state := DIV_3;
2079                 end if;
2080
2081             when DIV_3 =>
2082                 -- compute Y = P = P * Y
2083                 msel_1 <= MUL1_Y;
2084                 msel_2 <= MUL2_P;
2085                 f_to_multiply.valid <= r.first;
2086                 pshift := '1';
2087                 if multiply_to_f.valid = '1' then
2088                     v.first := '1';
2089                     if r.count = 3 then
2090                         v.state := DIV_4;
2091                     else
2092                         v.state := DIV_2;
2093                     end if;
2094                 end if;
2095
2096             when DIV_4 =>
2097                 -- compute R = P = A * Y (quotient)
2098                 msel_1 <= MUL1_A;
2099                 msel_2 <= MUL2_P;
2100                 set_y := r.first;
2101                 f_to_multiply.valid <= r.first;
2102                 pshift := '1';
2103                 mult_mask := '1';
2104                 if multiply_to_f.valid = '1' then
2105                     opsel_r <= RES_MULT;
2106                     v.first := '1';
2107                     v.state := DIV_5;
2108                 end if;
2109
2110             when DIV_5 =>
2111                 -- compute P = A - B * R (remainder)
2112                 msel_1 <= MUL1_B;
2113                 msel_2 <= MUL2_R;
2114                 msel_add <= MULADD_A;
2115                 msel_inv <= '1';
2116                 f_to_multiply.valid <= r.first;
2117                 if multiply_to_f.valid = '1' then
2118                     v.state := DIV_6;
2119                 end if;
2120
2121             when DIV_6 =>
2122                 -- r.opsel_a = AIN_R
2123                 -- test if remainder is 0 or >= B
2124                 if pcmpb_lt = '1' then
2125                     -- quotient is correct, set X if remainder non-zero
2126                     v.x := r.p(UNIT_BIT + 2) or px_nz;
2127                 else
2128                     -- quotient needs to be incremented by 1 in R-bit position
2129                     rbit_inc := '1';
2130                     opsel_b <= BIN_RND;
2131                     v.x := not pcmpb_eq;
2132                 end if;
2133                 v.state := FINISH;
2134
2135             when FRE_1 =>
2136                 re_sel1 <= REXP1_R;
2137                 re_neg1 <= '1';
2138                 re_set_result <= '1';
2139                 opsel_r <= RES_MISC;
2140                 misc_sel <= "0111";
2141                 -- set shift to 1
2142                 rs_con2 <= RSCON2_1;
2143                 v.state := NORMALIZE;
2144
2145             when FTDIV_1 =>
2146                 v.cr_result(1) := exp_tiny or exp_huge;
2147                 -- set shift to a.exp
2148                 rs_sel2 <= RSH2_A;
2149                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
2150                     v.instr_done := '1';
2151                 else
2152                     v.doing_ftdiv := "10";
2153                 end if;
2154
2155             when RSQRT_1 =>
2156                 opsel_r <= RES_MISC;
2157                 misc_sel <= "0111";
2158                 re_sel1 <= REXP1_BHALF;
2159                 re_neg1 <= '1';
2160                 re_set_result <= '1';
2161                 -- set shift to 1
2162                 rs_con2 <= RSCON2_1;
2163                 v.state := NORMALIZE;
2164
2165             when SQRT_1 =>
2166                 -- put invsqr[B] in R and compute P = invsqr[B] * B
2167                 -- also transfer B (in R) to A
2168                 set_a := '1';
2169                 opsel_r <= RES_MISC;
2170                 misc_sel <= "0111";
2171                 msel_1 <= MUL1_B;
2172                 msel_2 <= MUL2_LUT;
2173                 f_to_multiply.valid <= '1';
2174                 -- set shift to -1
2175                 rs_con2 <= RSCON2_1;
2176                 rs_neg2 <= '1';
2177                 v.count := "00";
2178                 v.state := SQRT_2;
2179
2180             when SQRT_2 =>
2181                 -- shift R right one place
2182                 -- not expecting multiplier result yet
2183                 -- r.shift = -1
2184                 opsel_r <= RES_SHIFT;
2185                 re_sel2 <= REXP2_NE;
2186                 re_set_result <= '1';
2187                 v.first := '1';
2188                 v.state := SQRT_3;
2189
2190             when SQRT_3 =>
2191                 -- put R into Y, wait for product from multiplier
2192                 msel_2 <= MUL2_R;
2193                 set_y := r.first;
2194                 pshift := '1';
2195                 mult_mask := '1';
2196                 if multiply_to_f.valid = '1' then
2197                     -- put result into R
2198                     opsel_r <= RES_MULT;
2199                     v.first := '1';
2200                     v.state := SQRT_4;
2201                 end if;
2202
2203             when SQRT_4 =>
2204                 -- compute 1.5 - Y * P
2205                 msel_1 <= MUL1_Y;
2206                 msel_2 <= MUL2_P;
2207                 msel_add <= MULADD_CONST;
2208                 msel_inv <= '1';
2209                 f_to_multiply.valid <= r.first;
2210                 pshift := '1';
2211                 if multiply_to_f.valid = '1' then
2212                     v.state := SQRT_5;
2213                 end if;
2214
2215             when SQRT_5 =>
2216                 -- compute Y = Y * P
2217                 msel_1 <= MUL1_Y;
2218                 msel_2 <= MUL2_P;
2219                 f_to_multiply.valid <= '1';
2220                 v.first := '1';
2221                 v.state := SQRT_6;
2222
2223             when SQRT_6 =>
2224                 -- pipeline in R = R * P
2225                 msel_1 <= MUL1_R;
2226                 msel_2 <= MUL2_P;
2227                 f_to_multiply.valid <= r.first;
2228                 pshift := '1';
2229                 if multiply_to_f.valid = '1' then
2230                     v.first := '1';
2231                     v.state := SQRT_7;
2232                 end if;
2233
2234             when SQRT_7 =>
2235                 -- first multiply is done, put result in Y
2236                 msel_2 <= MUL2_P;
2237                 set_y := r.first;
2238                 -- wait for second multiply (should be here already)
2239                 pshift := '1';
2240                 mult_mask := '1';
2241                 if multiply_to_f.valid = '1' then
2242                     -- put result into R
2243                     opsel_r <= RES_MULT;
2244                     v.first := '1';
2245                     v.count := r.count + 1;
2246                     if r.count < 2 then
2247                         v.state := SQRT_4;
2248                     else
2249                         v.first := '1';
2250                         v.state := SQRT_8;
2251                     end if;
2252                 end if;
2253
2254             when SQRT_8 =>
2255                 -- compute P = A - R * R, which can be +ve or -ve
2256                 -- we arranged for B to be put into A earlier
2257                 msel_1 <= MUL1_R;
2258                 msel_2 <= MUL2_R;
2259                 msel_add <= MULADD_A;
2260                 msel_inv <= '1';
2261                 pshift := '1';
2262                 f_to_multiply.valid <= r.first;
2263                 if multiply_to_f.valid = '1' then
2264                     v.first := '1';
2265                     v.state := SQRT_9;
2266                 end if;
2267
2268             when SQRT_9 =>
2269                 -- compute P = P * Y
2270                 -- since Y is an estimate of 1/sqrt(B), this makes P an
2271                 -- estimate of the adjustment needed to R.  Since the error
2272                 -- could be negative and we have an unsigned multiplier, the
2273                 -- upper bits can be wrong, but it turns out the lowest 8 bits
2274                 -- are correct and are all we need (given 3 iterations through
2275                 -- SQRT_4 to SQRT_7).
2276                 msel_1 <= MUL1_Y;
2277                 msel_2 <= MUL2_P;
2278                 pshift := '1';
2279                 f_to_multiply.valid <= r.first;
2280                 if multiply_to_f.valid = '1' then
2281                     v.state := SQRT_10;
2282                 end if;
2283
2284             when SQRT_10 =>
2285                 -- Add the bottom 8 bits of P, sign-extended, onto R.
2286                 opsel_b <= BIN_PS8;
2287                 re_sel1 <= REXP1_BHALF;
2288                 re_set_result <= '1';
2289                 -- set shift to 1
2290                 rs_con2 <= RSCON2_1;
2291                 v.first := '1';
2292                 v.state := SQRT_11;
2293
2294             when SQRT_11 =>
2295                 -- compute P = A - R * R (remainder)
2296                 -- also put 2 * R + 1 into B for comparison with P
2297                 msel_1 <= MUL1_R;
2298                 msel_2 <= MUL2_R;
2299                 msel_add <= MULADD_A;
2300                 msel_inv <= '1';
2301                 f_to_multiply.valid <= r.first;
2302                 shiftin := '1';
2303                 set_b := r.first;
2304                 if multiply_to_f.valid = '1' then
2305                     v.state := SQRT_12;
2306                 end if;
2307
2308             when SQRT_12 =>
2309                 -- test if remainder is 0 or >= B = 2*R + 1
2310                 if pcmpb_lt = '1' then
2311                     -- square root is correct, set X if remainder non-zero
2312                     v.x := r.p(UNIT_BIT + 2) or px_nz;
2313                 else
2314                     -- square root needs to be incremented by 1
2315                     carry_in <= '1';
2316                     v.x := not pcmpb_eq;
2317                 end if;
2318                 v.state := FINISH;
2319
2320             when INT_SHIFT =>
2321                 -- r.shift = b.exponent - 52
2322                 opsel_r <= RES_SHIFT;
2323                 re_sel2 <= REXP2_NE;
2324                 re_set_result <= '1';
2325                 set_x := '1';
2326                 v.state := INT_ROUND;
2327                 -- set shift to -4 (== 52 - UNIT_BIT)
2328                 rs_con2 <= RSCON2_UNIT_52;
2329                 rs_neg2 <= '1';
2330
2331             when INT_ROUND =>
2332                 -- r.shift = -4 (== 52 - UNIT_BIT)
2333                 opsel_r <= RES_SHIFT;
2334                 re_sel2 <= REXP2_NE;
2335                 re_set_result <= '1';
2336                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2337                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2338                 -- Check for negative values that don't round to 0 for fcti*u*
2339                 if r.insn(8) = '1' and r.result_sign = '1' and
2340                     (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2341                     v.state := INT_OFLOW;
2342                 else
2343                     v.state := INT_FINAL;
2344                 end if;
2345
2346             when INT_ISHIFT =>
2347                 -- r.shift = b.exponent - UNIT_BIT;
2348                 opsel_r <= RES_SHIFT;
2349                 re_sel2 <= REXP2_NE;
2350                 re_set_result <= '1';
2351                 v.state := INT_FINAL;
2352
2353             when INT_FINAL =>
2354                 -- Negate if necessary, and increment for rounding if needed
2355                 opsel_ainv <= r.result_sign;
2356                 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2357                 -- Check for possible overflows
2358                 case r.insn(9 downto 8) is
2359                     when "00" =>        -- fctiw[z]
2360                         need_check := r.r(31) or (r.r(30) and not r.result_sign);
2361                     when "01" =>        -- fctiwu[z]
2362                         need_check := r.r(31);
2363                     when "10" =>        -- fctid[z]
2364                         need_check := r.r(63) or (r.r(62) and not r.result_sign);
2365                     when others =>      -- fctidu[z]
2366                         need_check := r.r(63);
2367                 end case;
2368                 int_result := '1';
2369                 if need_check = '1' then
2370                     v.state := INT_CHECK;
2371                 else
2372                     if r.fpscr(FPSCR_FI) = '1' then
2373                         v.fpscr(FPSCR_XX) := '1';
2374                     end if;
2375                     arith_done := '1';
2376                 end if;
2377
2378             when INT_CHECK =>
2379                 if r.insn(9) = '0' then
2380                     msb := r.r(31);
2381                 else
2382                     msb := r.r(63);
2383                 end if;
2384                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2385                 if (r.insn(8) = '0' and msb /= r.result_sign) or
2386                     (r.insn(8) = '1' and msb /= '1') then
2387                     opsel_r <= RES_MISC;
2388                     v.fpscr(FPSCR_VXCVI) := '1';
2389                     invalid := '1';
2390                 else
2391                     if r.fpscr(FPSCR_FI) = '1' then
2392                         v.fpscr(FPSCR_XX) := '1';
2393                     end if;
2394                 end if;
2395                 int_result := '1';
2396                 arith_done := '1';
2397
2398             when INT_OFLOW =>
2399                 opsel_r <= RES_MISC;
2400                 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2401                 if r.b.class = NAN then
2402                     misc_sel(0) <= '1';
2403                 end if;
2404                 v.fpscr(FPSCR_VXCVI) := '1';
2405                 invalid := '1';
2406                 int_result := '1';
2407                 arith_done := '1';
2408
2409             when FRI_1 =>
2410                 -- r.shift = b.exponent - 52
2411                 opsel_r <= RES_SHIFT;
2412                 re_sel2 <= REXP2_NE;
2413                 re_set_result <= '1';
2414                 set_x := '1';
2415                 v.state := ROUNDING;
2416
2417             when FINISH =>
2418                 if r.is_multiply = '1' and px_nz = '1' then
2419                     v.x := '1';
2420                 end if;
2421                 -- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
2422                 rs_sel1 <= RSH1_NE;
2423                 rs_con2 <= RSCON2_MINEXP;
2424                 rs_neg2 <= '1';
2425                 if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
2426                     rs_norm <= '1';
2427                     v.state := NORMALIZE;
2428                 else
2429                     set_x := '1';
2430                     if exp_tiny = '1' then
2431                         v.state := ROUND_UFLOW;
2432                     elsif exp_huge = '1' then
2433                         v.state := ROUND_OFLOW;
2434                     else
2435                         v.state := ROUNDING;
2436                     end if;
2437                 end if;
2438
2439             when NORMALIZE =>
2440                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2441                 -- r.shift = clz(r.r) - 7
2442                 opsel_r <= RES_SHIFT;
2443                 re_sel2 <= REXP2_NE;
2444                 re_set_result <= '1';
2445                 -- set shift to new_exp - min_exp
2446                 rs_sel1 <= RSH1_NE;
2447                 rs_con2 <= RSCON2_MINEXP;
2448                 rs_neg2 <= '1';
2449                 set_x := '1';
2450                 if exp_tiny = '1' then
2451                     v.state := ROUND_UFLOW;
2452                 elsif exp_huge = '1' then
2453                     v.state := ROUND_OFLOW;
2454                 else
2455                     v.state := ROUNDING;
2456                 end if;
2457
2458             when ROUND_UFLOW =>
2459                 -- r.shift = - amount by which exponent underflows
2460                 v.tiny := '1';
2461                 if r.fpscr(FPSCR_UE) = '0' then
2462                     -- disabled underflow exception case
2463                     -- have to denormalize before rounding
2464                     opsel_r <= RES_SHIFT;
2465                     re_sel2 <= REXP2_NE;
2466                     re_set_result <= '1';
2467                     set_x := '1';
2468                     v.state := ROUNDING;
2469                 else
2470                     -- enabled underflow exception case
2471                     -- if denormalized, have to normalize before rounding
2472                     v.fpscr(FPSCR_UX) := '1';
2473                     re_sel1 <= REXP1_R;
2474                     re_con2 <= RECON2_BIAS;
2475                     re_set_result <= '1';
2476                     if r.r(UNIT_BIT) = '0' then
2477                         rs_norm <= '1';
2478                         v.state := NORMALIZE;
2479                     else
2480                         v.state := ROUNDING;
2481                     end if;
2482                 end if;
2483
2484             when ROUND_OFLOW =>
2485                 v.fpscr(FPSCR_OX) := '1';
2486                 if r.fpscr(FPSCR_OE) = '0' then
2487                     -- disabled overflow exception
2488                     -- result depends on rounding mode
2489                     v.fpscr(FPSCR_XX) := '1';
2490                     v.fpscr(FPSCR_FI) := '1';
2491                     if r.round_mode(1 downto 0) = "00" or
2492                         (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2493                         v.result_class := INFINITY;
2494                         v.fpscr(FPSCR_FR) := '1';
2495                     else
2496                         v.fpscr(FPSCR_FR) := '0';
2497                     end if;
2498                     -- construct largest representable number
2499                     re_con2 <= RECON2_MAX;
2500                     re_set_result <= '1';
2501                     opsel_r <= RES_MISC;
2502                     misc_sel <= "001" & r.single_prec;
2503                     arith_done := '1';
2504                 else
2505                     -- enabled overflow exception
2506                     re_sel1 <= REXP1_R;
2507                     re_con2 <= RECON2_BIAS;
2508                     re_neg2 <= '1';
2509                     re_set_result <= '1';
2510                     v.state := ROUNDING;
2511                 end if;
2512
2513             when ROUNDING =>
2514                 opsel_mask <= '1';
2515                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2516                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2517                 if round(1) = '1' then
2518                     -- increment the LSB for the precision
2519                     opsel_b <= BIN_RND;
2520                     -- set shift to -1
2521                     rs_con2 <= RSCON2_1;
2522                     rs_neg2 <= '1';
2523                     v.state := ROUNDING_2;
2524                 else
2525                     if r.r(UNIT_BIT) = '0' then
2526                         -- result after masking could be zero, or could be a
2527                         -- denormalized result that needs to be renormalized
2528                         rs_norm <= '1';
2529                         v.state := ROUNDING_3;
2530                     else
2531                         arith_done := '1';
2532                     end if;
2533                 end if;
2534                 if round(0) = '1' then
2535                     v.fpscr(FPSCR_XX) := '1';
2536                     if r.tiny = '1' then
2537                         v.fpscr(FPSCR_UX) := '1';
2538                     end if;
2539                 end if;
2540
2541             when ROUNDING_2 =>
2542                 -- Check for overflow during rounding
2543                 -- r.shift = -1
2544                 v.x := '0';
2545                 re_sel2 <= REXP2_NE;
2546                 if r.r(UNIT_BIT + 1) = '1' then
2547                     opsel_r <= RES_SHIFT;
2548                     re_set_result <= '1';
2549                     if exp_huge = '1' then
2550                         v.state := ROUND_OFLOW;
2551                     else
2552                         arith_done := '1';
2553                     end if;
2554                 elsif r.r(UNIT_BIT) = '0' then
2555                     -- Do CLZ so we can renormalize the result
2556                     rs_norm <= '1';
2557                     v.state := ROUNDING_3;
2558                 else
2559                     arith_done := '1';
2560                 end if;
2561
2562             when ROUNDING_3 =>
2563                 -- r.shift = clz(r.r) - 9
2564                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2565                 re_sel2 <= REXP2_NE;
2566                 -- set shift to new_exp - min_exp (== -1022)
2567                 rs_sel1 <= RSH1_NE;
2568                 rs_con2 <= RSCON2_MINEXP;
2569                 rs_neg2 <= '1';
2570                 if mant_nz = '0' then
2571                     v.result_class := ZERO;
2572                     arith_done := '1';
2573                 else
2574                     -- Renormalize result after rounding
2575                     opsel_r <= RES_SHIFT;
2576                     re_set_result <= '1';
2577                     v.denorm := exp_tiny;
2578                     if new_exp < to_signed(-1022, EXP_BITS) then
2579                         v.state := DENORM;
2580                     else
2581                         arith_done := '1';
2582                     end if;
2583                 end if;
2584
2585             when DENORM =>
2586                 -- r.shift = result_exp - -1022
2587                 opsel_r <= RES_SHIFT;
2588                 re_sel2 <= REXP2_NE;
2589                 re_set_result <= '1';
2590                 arith_done := '1';
2591
2592             when NAN_RESULT =>
2593                 v.negate := '0';
2594                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
2595                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
2596                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
2597                     -- Signalling NAN
2598                     v.fpscr(FPSCR_VXSNAN) := '1';
2599                     invalid := '1';
2600                 end if;
2601                 if r.use_a = '1' and r.a.class = NAN then
2602                     v.opsel_a := AIN_A;
2603                     v.result_sign := r.a.negative;
2604                 elsif r.use_b = '1' and r.b.class = NAN then
2605                     v.opsel_a := AIN_B;
2606                     v.result_sign := r.b.negative;
2607                 elsif r.use_c = '1' and r.c.class = NAN then
2608                     v.opsel_a := AIN_C;
2609                     v.result_sign := r.c.negative;
2610                 end if;
2611                 v.state := EXC_RESULT;
2612
2613             when EXC_RESULT =>
2614                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2615                 case r.opsel_a is
2616                     when AIN_B =>
2617                         re_sel2 <= REXP2_B;
2618                         v.result_class := r.b.class;
2619                     when AIN_C =>
2620                         re_sel2 <= REXP2_C;
2621                         v.result_class := r.c.class;
2622                     when others =>
2623                         re_sel1 <= REXP1_A;
2624                         v.result_class := r.a.class;
2625                 end case;
2626                 re_set_result <= '1';
2627                 arith_done := '1';
2628
2629             when DO_IDIVMOD =>
2630                 -- r.opsel_a = AIN_B
2631                 v.result_sign := r.is_signed and (r.a.negative xor (r.b.negative and not r.divmod));
2632                 if r.b.class = ZERO then
2633                     -- B is zero, signal overflow
2634                     v.int_ovf := '1';
2635                     v.state := IDIV_ZERO;
2636                 elsif r.a.class = ZERO then
2637                     -- A is zero, result is zero (both for div and for mod)
2638                     v.state := IDIV_ZERO;
2639                 else
2640                     -- take absolute value for signed division, and
2641                     -- normalize and round up B to 8.56 format, like fcfid[u]
2642                     if r.is_signed = '1' and r.b.negative = '1' then
2643                         opsel_ainv <= '1';
2644                         carry_in <= '1';
2645                     end if;
2646                     v.result_class := FINITE;
2647                     re_con2 <= RECON2_UNIT;
2648                     re_set_result <= '1';
2649                     v.state := IDIV_NORMB;
2650                 end if;
2651             when IDIV_NORMB =>
2652                 -- do count-leading-zeroes on B (now in R)
2653                 rs_norm <= '1';
2654                 -- save the original value of B or |B| in C
2655                 set_c := '1';
2656                 v.state := IDIV_NORMB2;
2657             when IDIV_NORMB2 =>
2658                 -- get B into the range [1, 2) in 8.56 format
2659                 set_x := '1';           -- record if any 1 bits shifted out
2660                 opsel_r <= RES_SHIFT;
2661                 re_sel2 <= REXP2_NE;
2662                 re_set_result <= '1';
2663                 v.state := IDIV_NORMB3;
2664             when IDIV_NORMB3 =>
2665                 -- add the X bit onto R to round up B
2666                 carry_in <= r.x;
2667                 -- prepare to do count-leading-zeroes on A
2668                 v.opsel_a := AIN_A;
2669                 v.state := IDIV_CLZA;
2670             when IDIV_CLZA =>
2671                 set_b := '1';           -- put R back into B
2672                 -- r.opsel_a = AIN_A
2673                 if r.is_signed = '1' and r.a.negative = '1' then
2674                     opsel_ainv <= '1';
2675                     carry_in <= '1';
2676                 end if;
2677                 re_con2 <= RECON2_UNIT;
2678                 re_set_result <= '1';
2679                 v.opsel_a := AIN_C;
2680                 v.state := IDIV_CLZA2;
2681             when IDIV_CLZA2 =>
2682                 -- r.opsel_a = AIN_C
2683                 rs_norm <= '1';
2684                 -- write the dividend back into A in case we negated it
2685                 set_a_mant := '1';
2686                 -- while doing the count-leading-zeroes on A,
2687                 -- also compute A - B to tell us whether A >= B
2688                 -- (using the original value of B, which is now in C)
2689                 opsel_b <= BIN_R;
2690                 opsel_ainv <= '1';
2691                 carry_in <= '1';
2692                 v.state := IDIV_CLZA3;
2693             when IDIV_CLZA3 =>
2694                 -- save the exponent of A (but don't overwrite the mantissa)
2695                 set_a_exp := '1';
2696                 re_sel2 <= REXP2_NE;
2697                 re_set_result <= '1';
2698                 v.div_close := '0';
2699                 if new_exp = r.b.exponent then
2700                     v.div_close := '1';
2701                 end if;
2702                 v.state := IDIV_NR0;
2703                 if new_exp > r.b.exponent or (v.div_close = '1' and r.r(63) = '0') then
2704                     -- A >= B, overflow if extended division
2705                     if r.divext = '1' then
2706                         v.int_ovf := '1';
2707                         -- return 0 in overflow cases
2708                         v.state := IDIV_ZERO;
2709                     end if;
2710                 else
2711                     -- A < B, result is zero for normal division
2712                     if r.divmod = '0' and r.divext = '0' then
2713                         v.state := IDIV_ZERO;
2714                     end if;
2715                 end if;
2716             when IDIV_NR0 =>
2717                 -- reduce number of Newton-Raphson iterations for small A
2718                 if r.divext = '1' or r.result_exp >= to_signed(32, EXP_BITS) then
2719                     v.count := "00";
2720                 elsif r.result_exp >= to_signed(16, EXP_BITS) then
2721                     v.count := "01";
2722                 else
2723                     v.count := "10";
2724                 end if;
2725                 -- first NR iteration does Y = LUT; P = 2 - B * LUT
2726                 msel_1 <= MUL1_B;
2727                 msel_add <= MULADD_CONST;
2728                 msel_inv <= '1';
2729                 msel_2 <= MUL2_LUT;
2730                 set_y := '1';
2731                 if r.b.mantissa(UNIT_BIT + 1) = '1' then
2732                     -- rounding up of the mantissa caused overflow, meaning the
2733                     -- normalized B is 2.0.  Since this is outside the range
2734                     -- of the LUT, just use 0.5 as the estimated inverse.
2735                     v.state := IDIV_USE0_5;
2736                 else
2737                     -- start the first multiply now
2738                     f_to_multiply.valid <= '1';
2739                     -- note we don't set v.first, thus the following IDIV_NR1
2740                     -- state doesn't start a multiply (we already did that)
2741                     v.state := IDIV_NR1;
2742                 end if;
2743             when IDIV_NR1 =>
2744                 -- subsequent NR iterations do Y = P; P = 2 - B * P
2745                 msel_1 <= MUL1_B;
2746                 msel_add <= MULADD_CONST;
2747                 msel_inv <= '1';
2748                 msel_2 <= MUL2_P;
2749                 set_y := r.first;
2750                 pshift := '1';
2751                 f_to_multiply.valid <= r.first;
2752                 if multiply_to_f.valid = '1' then
2753                     v.first := '1';
2754                     v.count := r.count + 1;
2755                     v.state := IDIV_NR2;
2756                 end if;
2757             when IDIV_NR2 =>
2758                 -- compute P = Y * P
2759                 msel_1 <= MUL1_Y;
2760                 msel_2 <= MUL2_P;
2761                 f_to_multiply.valid <= r.first;
2762                 pshift := '1';
2763                 v.opsel_a := AIN_A;
2764                 -- set shift to 64
2765                 rs_con2 <= RSCON2_64;
2766                 -- Get 0.5 into R in case the inverse estimate turns out to be
2767                 -- less than 0.5, in which case we want to use 0.5, to avoid
2768                 -- infinite loops in some cases.
2769                 opsel_r <= RES_MISC;
2770                 misc_sel <= "0001";
2771                 if multiply_to_f.valid = '1' then
2772                     v.first := '1';
2773                     if r.count = "11" then
2774                         v.state := IDIV_DODIV;
2775                     else
2776                         v.state := IDIV_NR1;
2777                     end if;
2778                 end if;
2779             when IDIV_USE0_5 =>
2780                 -- Get 0.5 into R; it turns out the generated
2781                 -- QNaN mantissa is actually what we want
2782                 opsel_r <= RES_MISC;
2783                 misc_sel <= "0001";
2784                 v.opsel_a := AIN_A;
2785                 -- set shift to 64
2786                 rs_con2 <= RSCON2_64;
2787                 v.state := IDIV_DODIV;
2788             when IDIV_DODIV =>
2789                 -- r.opsel_a = AIN_A
2790                 -- r.shift = 64
2791                 -- inverse estimate is in P or in R; copy it to Y
2792                 if r.b.mantissa(UNIT_BIT + 1) = '1' or
2793                     (r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0') then
2794                     msel_2 <= MUL2_R;
2795                 else
2796                     msel_2 <= MUL2_P;
2797                 end if;
2798                 set_y := '1';
2799                 -- shift_res is 0 because r.shift = 64;
2800                 -- put that into B, which now holds the quotient
2801                 set_b_mant := '1';
2802                 if r.divext = '0' then
2803                     -- set shift to -56
2804                     rs_con2 <= RSCON2_UNIT;
2805                     rs_neg2 <= '1';
2806                     v.first := '1';
2807                     v.state := IDIV_DIV;
2808                 elsif r.single_prec = '1' then
2809                     -- divwe[u][o], shift A left 32 bits
2810                     -- set shift to 32
2811                     rs_con2 <= RSCON2_32;
2812                     v.state := IDIV_SH32;
2813                 elsif r.div_close = '0' then
2814                     -- set shift to 64 - UNIT_BIT (== 8)
2815                     rs_con2 <= RSCON2_64_UNIT;
2816                     v.state := IDIV_EXTDIV;
2817                 else
2818                     -- handle top bit of quotient specially
2819                     -- for this we need the divisor left-justified in B
2820                     v.opsel_a := AIN_C;
2821                     v.state := IDIV_EXT_TBH;
2822                 end if;
2823             when IDIV_SH32 =>
2824                 -- r.shift = 32, R contains the dividend
2825                 opsel_r <= RES_SHIFT;
2826                 -- set shift to -UNIT_BIT (== -56)
2827                 rs_con2 <= RSCON2_UNIT;
2828                 rs_neg2 <= '1';
2829                 v.first := '1';
2830                 v.state := IDIV_DIV;
2831             when IDIV_DIV =>
2832                 -- Dividing A by C, r.shift = -56; A is in R
2833                 -- Put A into the bottom 64 bits of Ahi/A/Alo
2834                 set_a_mant := r.first;
2835                 set_a_lo := r.first;
2836                 -- compute R = R * Y (quotient estimate)
2837                 msel_1 <= MUL1_Y;
2838                 msel_2 <= MUL2_R;
2839                 f_to_multiply.valid <= r.first;
2840                 pshift := '1';
2841                 opsel_r <= RES_MULT;
2842                 -- set shift to - b.exp
2843                 rs_sel1 <= RSH1_B;
2844                 rs_neg1 <= '1';
2845                 if multiply_to_f.valid = '1' then
2846                     v.state := IDIV_DIV2;
2847                 end if;
2848             when IDIV_DIV2 =>
2849                 -- r.shift = - b.exponent
2850                 -- shift the quotient estimate right by b.exponent bits
2851                 opsel_r <= RES_SHIFT;
2852                 v.first := '1';
2853                 v.state := IDIV_DIV3;
2854             when IDIV_DIV3 =>
2855                 -- quotient (so far) is in R; multiply by C and subtract from A
2856                 msel_1 <= MUL1_R;
2857                 msel_2 <= MUL2_C;
2858                 msel_add <= MULADD_A;
2859                 msel_inv <= '1';
2860                 f_to_multiply.valid <= r.first;
2861                 -- store the current quotient estimate in B
2862                 set_b_mant := r.first;
2863                 opsel_r <= RES_MULT;
2864                 opsel_s <= S_MULT;
2865                 set_s := '1';
2866                 if multiply_to_f.valid = '1' then
2867                     v.state := IDIV_DIV4;
2868                 end if;
2869             when IDIV_DIV4 =>
2870                 -- remainder is in R/S and P
2871                 msel_1 <= MUL1_Y;
2872                 msel_2 <= MUL2_P;
2873                 v.inc_quot := not pcmpc_lt and not r.divmod;
2874                 if r.divmod = '0' then
2875                     v.opsel_a := AIN_B;
2876                 end if;
2877                 -- set shift to UNIT_BIT (== 56)
2878                 rs_con2 <= RSCON2_UNIT;
2879                 if pcmpc_lt = '1' or pcmpc_eq = '1' then
2880                     if r.divmod = '0' then
2881                         v.state := IDIV_DIVADJ;
2882                     elsif pcmpc_eq = '1' then
2883                         v.state := IDIV_ZERO;
2884                     else
2885                         v.state := IDIV_MODADJ;
2886                     end if;
2887                 else
2888                     -- need to do another iteration, compute P * Y
2889                     f_to_multiply.valid <= '1';
2890                     v.state := IDIV_DIV5;
2891                 end if;
2892             when IDIV_DIV5 =>
2893                 pshift := '1';
2894                 opsel_r <= RES_MULT;
2895                 -- set shift to - b.exp
2896                 rs_sel1 <= RSH1_B;
2897                 rs_neg1 <= '1';
2898                 if multiply_to_f.valid = '1' then
2899                     v.state := IDIV_DIV6;
2900                 end if;
2901             when IDIV_DIV6 =>
2902                 -- r.shift = - b.exponent
2903                 -- shift the quotient estimate right by b.exponent bits
2904                 opsel_r <= RES_SHIFT;
2905                 v.opsel_a := AIN_B;
2906                 v.first := '1';
2907                 v.state := IDIV_DIV7;
2908             when IDIV_DIV7 =>
2909                 -- r.opsel_a = AIN_B
2910                 -- add shifted quotient delta onto the total quotient
2911                 opsel_b <= BIN_R;
2912                 v.first := '1';
2913                 v.state := IDIV_DIV8;
2914             when IDIV_DIV8 =>
2915                 -- quotient (so far) is in R; multiply by C and subtract from A
2916                 msel_1 <= MUL1_R;
2917                 msel_2 <= MUL2_C;
2918                 msel_add <= MULADD_A;
2919                 msel_inv <= '1';
2920                 f_to_multiply.valid <= r.first;
2921                 -- store the current quotient estimate in B
2922                 set_b_mant := r.first;
2923                 opsel_r <= RES_MULT;
2924                 opsel_s <= S_MULT;
2925                 set_s := '1';
2926                 if multiply_to_f.valid = '1' then
2927                     v.state := IDIV_DIV9;
2928                 end if;
2929             when IDIV_DIV9 =>
2930                 -- remainder is in R/S and P
2931                 msel_1 <= MUL1_Y;
2932                 msel_2 <= MUL2_P;
2933                 v.inc_quot := not pcmpc_lt and not r.divmod;
2934                 if r.divmod = '0' then
2935                     v.opsel_a := AIN_B;
2936                 end if;
2937                 -- set shift to UNIT_BIT (== 56)
2938                 rs_con2 <= RSCON2_UNIT;
2939                 if r.divmod = '0' then
2940                     v.state := IDIV_DIVADJ;
2941                 elsif pcmpc_eq = '1' then
2942                     v.state := IDIV_ZERO;
2943                 else
2944                     v.state := IDIV_MODADJ;
2945                 end if;
2946             when IDIV_EXT_TBH =>
2947                 -- r.opsel_a = AIN_C; get divisor into R and prepare to shift left
2948                 -- set shift to 63 - b.exp
2949                 rs_sel1 <= RSH1_B;
2950                 rs_neg1 <= '1';
2951                 rs_con2 <= RSCON2_63;
2952                 v.opsel_a := AIN_A;
2953                 v.state := IDIV_EXT_TBH2;
2954             when IDIV_EXT_TBH2 =>
2955                 -- r.opsel_a = AIN_A; divisor is in R
2956                 -- r.shift = 63 - b.exponent; shift and put into B
2957                 set_b_mant := '1';
2958                 -- set shift to 64 - UNIT_BIT (== 8)
2959                 rs_con2 <= RSCON2_64_UNIT;
2960                 v.state := IDIV_EXT_TBH3;
2961             when IDIV_EXT_TBH3 =>
2962                 -- Dividing (A << 64) by C
2963                 -- r.shift = 8
2964                 -- Put A in the top 64 bits of Ahi/A/Alo
2965                 set_a_hi := '1';
2966                 set_a_mant := '1';
2967                 -- set shift to 64 - b.exp
2968                 rs_sel1 <= RSH1_B;
2969                 rs_neg1 <= '1';
2970                 rs_con2 <= RSCON2_64;
2971                 v.state := IDIV_EXT_TBH4;
2972             when IDIV_EXT_TBH4 =>
2973                 -- dividend (A) is in R
2974                 -- r.shift = 64 - B.exponent, so is at least 1
2975                 opsel_r <= RES_SHIFT;
2976                 -- top bit of A gets lost in the shift, so handle it specially
2977                 v.opsel_a := AIN_B;
2978                 -- set shift to 63
2979                 rs_con2 <= RSCON2_63;
2980                 v.state := IDIV_EXT_TBH5;
2981             when IDIV_EXT_TBH5 =>
2982                 -- r.opsel_a = AIN_B, r.shift = 63
2983                 -- shifted dividend is in R, subtract left-justified divisor
2984                 opsel_b <= BIN_R;
2985                 opsel_ainv <= '1';
2986                 carry_in <= '1';
2987                 -- and put 1<<63 into B as the divisor (S is still 0)
2988                 shiftin0 := '1';
2989                 set_b_mant := '1';
2990                 v.first := '1';
2991                 v.state := IDIV_EXTDIV2;
2992             when IDIV_EXTDIV =>
2993                 -- Dividing (A << 64) by C
2994                 -- r.shift = 8
2995                 -- Put A in the top 64 bits of Ahi/A/Alo
2996                 set_a_hi := '1';
2997                 set_a_mant := '1';
2998                 -- set shift to 64 - b.exp
2999                 rs_sel1 <= RSH1_B;
3000                 rs_neg1 <= '1';
3001                 rs_con2 <= RSCON2_64;
3002                 v.state := IDIV_EXTDIV1;
3003             when IDIV_EXTDIV1 =>
3004                 -- dividend is in R
3005                 -- r.shift = 64 - B.exponent
3006                 opsel_r <= RES_SHIFT;
3007                 v.first := '1';
3008                 v.state := IDIV_EXTDIV2;
3009             when IDIV_EXTDIV2 =>
3010                 -- shifted remainder is in R; compute R = R * Y (quotient estimate)
3011                 msel_1 <= MUL1_Y;
3012                 msel_2 <= MUL2_R;
3013                 f_to_multiply.valid <= r.first;
3014                 pshift := '1';
3015                 v.opsel_a := AIN_B;
3016                 opsel_r <= RES_MULT;
3017                 if multiply_to_f.valid = '1' then
3018                     v.first := '1';
3019                     v.state := IDIV_EXTDIV3;
3020                 end if;
3021             when IDIV_EXTDIV3 =>
3022                 -- r.opsel_a = AIN_B
3023                 -- delta quotient is in R; add it to B
3024                 opsel_b <= BIN_R;
3025                 v.first := '1';
3026                 v.state := IDIV_EXTDIV4;
3027             when IDIV_EXTDIV4 =>
3028                 -- quotient is in R; put it in B and compute remainder
3029                 set_b_mant := r.first;
3030                 msel_1 <= MUL1_R;
3031                 msel_2 <= MUL2_C;
3032                 msel_add <= MULADD_A;
3033                 msel_inv <= '1';
3034                 f_to_multiply.valid <= r.first;
3035                 opsel_r <= RES_MULT;
3036                 opsel_s <= S_MULT;
3037                 set_s := '1';
3038                 -- set shift to UNIT_BIT - b.exp
3039                 rs_sel1 <= RSH1_B;
3040                 rs_neg1 <= '1';
3041                 rs_con2 <= RSCON2_UNIT;
3042                 if multiply_to_f.valid = '1' then
3043                     v.state := IDIV_EXTDIV5;
3044                 end if;
3045             when IDIV_EXTDIV5 =>
3046                 -- r.shift = r.b.exponent - 56
3047                 -- remainder is in R/S; shift it right r.b.exponent bits
3048                 opsel_r <= RES_SHIFT;
3049                 -- test LS 64b of remainder in P against divisor in C
3050                 v.inc_quot := not pcmpc_lt;
3051                 v.opsel_a := AIN_B;
3052                 v.state := IDIV_EXTDIV6;
3053             when IDIV_EXTDIV6 =>
3054                 -- r.opsel_a = AIN_B
3055                 -- shifted remainder is in R, see if it is > 1
3056                 -- and compute R = R * Y if so
3057                 msel_1 <= MUL1_Y;
3058                 msel_2 <= MUL2_R;
3059                 pshift := '1';
3060                 if r_gt_1 = '1' then
3061                     f_to_multiply.valid <= '1';
3062                     v.state := IDIV_EXTDIV2;
3063                 else
3064                     v.state := IDIV_DIVADJ;
3065                 end if;
3066             when IDIV_MODADJ =>
3067                 -- r.shift = 56
3068                 -- result is in R/S
3069                 opsel_r <= RES_SHIFT;
3070                 if pcmpc_lt = '0' then
3071                     v.opsel_a := AIN_C;
3072                     v.state := IDIV_MODSUB;
3073                 elsif r.result_sign = '0' then
3074                     v.state := IDIV_DONE;
3075                 else
3076                     v.state := IDIV_DIVADJ;
3077                 end if;
3078             when IDIV_MODSUB =>
3079                 -- r.opsel_a = AIN_C
3080                 -- Subtract divisor from remainder
3081                 opsel_ainv <= '1';
3082                 carry_in <= '1';
3083                 opsel_b <= BIN_R;
3084                 if r.result_sign = '0' then
3085                     v.state := IDIV_DONE;
3086                 else
3087                     v.state := IDIV_DIVADJ;
3088                 end if;
3089             when IDIV_DIVADJ =>
3090                 -- result (so far) is on the A input of the adder
3091                 -- set carry to increment quotient if needed
3092                 -- and also negate R if the answer is negative
3093                 opsel_ainv <= r.result_sign;
3094                 carry_in <= r.inc_quot xor r.result_sign;
3095                 rnd_b32 := '1';
3096                 if r.divmod = '0' then
3097                     opsel_b <= BIN_RND;
3098                 end if;
3099                 if r.is_signed = '0' then
3100                     v.state := IDIV_DONE;
3101                 else
3102                     v.state := IDIV_OVFCHK;
3103                 end if;
3104             when IDIV_OVFCHK =>
3105                 if r.single_prec = '0' then
3106                     sign_bit := r.r(63);
3107                 else
3108                     sign_bit := r.r(31);
3109                 end if;
3110                 v.int_ovf := sign_bit xor r.result_sign;
3111                 if v.int_ovf = '1' then
3112                     v.state := IDIV_ZERO;
3113                 else
3114                     v.state := IDIV_DONE;
3115                 end if;
3116             when IDIV_DONE =>
3117                 v.xerc_result := v.xerc;
3118                 if r.oe = '1' then
3119                     v.xerc_result.ov := '0';
3120                     v.xerc_result.ov32 := '0';
3121                     v.writing_xer := '1';
3122                 end if;
3123                 if r.m32b = '0' then
3124                     v.cr_result(3) := r.r(63);
3125                     v.cr_result(2 downto 1) := "00";
3126                     if r.r = 64x"0" then
3127                         v.cr_result(1) := '1';
3128                     else
3129                         v.cr_result(2) := not r.r(63);
3130                     end if;
3131                 else
3132                     v.cr_result(3) := r.r(31);
3133                     v.cr_result(2 downto 1) := "00";
3134                     if r.r(31 downto 0) = 32x"0" then
3135                         v.cr_result(1) := '1';
3136                     else
3137                         v.cr_result(2) := not r.r(31);
3138                     end if;
3139                 end if;
3140                 v.cr_result(0) := v.xerc.so;
3141                 int_result := '1';
3142                 v.writing_fpr := '1';
3143                 v.instr_done := '1';
3144             when IDIV_ZERO =>
3145                 opsel_r <= RES_MISC;
3146                 misc_sel <= "0101";
3147                 v.xerc_result := v.xerc;
3148                 if r.oe = '1' then
3149                     v.xerc_result.ov := r.int_ovf;
3150                     v.xerc_result.ov32 := r.int_ovf;
3151                     v.xerc_result.so := r.xerc.so or r.int_ovf;
3152                     v.writing_xer := '1';
3153                 end if;
3154                 v.cr_result := "001" & v.xerc_result.so;
3155                 int_result := '1';
3156                 v.writing_fpr := '1';
3157                 v.instr_done := '1';
3158
3159         end case;
3160
3161         if zero_divide = '1' then
3162             v.fpscr(FPSCR_ZX) := '1';
3163         end if;
3164         if qnan_result = '1' then
3165             invalid := '1';
3166             v.result_class := NAN;
3167             v.result_sign := '0';
3168             v.negate := '0';
3169             misc_sel <= "0001";
3170             opsel_r <= RES_MISC;
3171             arith_done := '1';
3172         end if;
3173         if invalid = '1' then
3174             v.invalid := '1';
3175         end if;
3176         if arith_done = '1' then
3177             -- Enabled invalid exception doesn't write result or FPRF
3178             -- Neither does enabled zero-divide exception
3179             if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
3180                 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
3181                 v.writing_fpr := '1';
3182                 v.update_fprf := '1';
3183             end if;
3184             v.instr_done := '1';
3185             update_fx := '1';
3186         end if;
3187
3188         -- Multiplier and divide/square root data path
3189         case msel_1 is
3190             when MUL1_A =>
3191                 f_to_multiply.data1 <= r.a.mantissa;
3192             when MUL1_B =>
3193                 f_to_multiply.data1 <= r.b.mantissa;
3194             when MUL1_Y =>
3195                 f_to_multiply.data1 <= r.y;
3196             when others =>
3197                 f_to_multiply.data1 <= r.r;
3198         end case;
3199         case msel_2 is
3200             when MUL2_C =>
3201                 f_to_multiply.data2 <= r.c.mantissa;
3202             when MUL2_LUT =>
3203                 f_to_multiply.data2 <= std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3204                                                                     UNIT_BIT - 19));
3205             when MUL2_P =>
3206                 f_to_multiply.data2 <= r.p;
3207             when others =>
3208                 f_to_multiply.data2 <= r.r;
3209         end case;
3210         maddend := (others => '0');
3211         case msel_add is
3212             when MULADD_CONST =>
3213                 -- addend is 2.0 or 1.5 in 16.112 format
3214                 if r.is_sqrt = '0' then
3215                     maddend(2*UNIT_BIT + 1) := '1';                       -- 2.0
3216                 else
3217                     maddend(2*UNIT_BIT downto 2*UNIT_BIT - 1) := "11";    -- 1.5
3218                 end if;
3219             when MULADD_A =>
3220                 -- addend is A in 16.112 format
3221                 maddend(127 downto UNIT_BIT + 64) := r.a_hi;
3222                 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.a.mantissa;
3223                 maddend(UNIT_BIT - 1 downto 0) := r.a_lo;
3224             when MULADD_RS =>
3225                 -- addend is concatenation of R and S in 16.112 format
3226                 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.r;
3227                 maddend(UNIT_BIT - 1 downto 0) := r.s;
3228             when others =>
3229         end case;
3230         f_to_multiply.addend <= maddend;
3231         f_to_multiply.subtract <= msel_inv;
3232         if set_y = '1' then
3233             v.y := f_to_multiply.data2;
3234         end if;
3235         if multiply_to_f.valid = '1' then
3236             if pshift = '0' then
3237                 v.p := multiply_to_f.result(63 downto 0);
3238             else
3239                 v.p := multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3240             end if;
3241         end if;
3242
3243         -- Data path.
3244         -- This has A and B input multiplexers, an adder, a shifter,
3245         -- count-leading-zeroes logic, and a result mux.
3246         if r.longmask = '1' then
3247             mshift := r.shift + to_signed(-29, EXP_BITS);
3248         else
3249             mshift := r.shift;
3250         end if;
3251         if is_X(mshift) then
3252             mask := (others => 'X');
3253         elsif mshift < to_signed(-64, EXP_BITS) then
3254             mask := (others => '1');
3255         elsif mshift >= to_signed(0, EXP_BITS) then
3256             mask := (others => '0');
3257         else
3258             mask := right_mask(unsigned(mshift(5 downto 0)));
3259         end if;
3260         case r.opsel_a is
3261             when AIN_R =>
3262                 in_a0 := r.r;
3263             when AIN_A =>
3264                 in_a0 := r.a.mantissa;
3265             when AIN_B =>
3266                 in_a0 := r.b.mantissa;
3267             when others =>
3268                 in_a0 := r.c.mantissa;
3269         end case;
3270         if (or (mask and in_a0)) = '1' and set_x = '1' then
3271             v.x := '1';
3272         end if;
3273         if opsel_ainv = '1' then
3274             in_a0 := not in_a0;
3275         end if;
3276         in_a <= in_a0;
3277         case opsel_b is
3278             when BIN_ZERO =>
3279                 in_b0 := (others => '0');
3280             when BIN_R =>
3281                 in_b0 := r.r;
3282             when BIN_RND =>
3283                 if rnd_b32 = '1' then
3284                     round_inc := (32 => r.result_sign and r.single_prec, others => '0');
3285                 elsif rbit_inc = '0' then
3286                     round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0');
3287                 else
3288                     round_inc := (DP_RBIT => '1', others => '0');
3289                 end if;
3290                 in_b0 := round_inc;
3291             when others =>
3292                 -- BIN_PS8, 8 LSBs of P sign-extended to 64
3293                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64));
3294         end case;
3295         if opsel_binv = '1' then
3296             in_b0 := not in_b0;
3297         end if;
3298         in_b <= in_b0;
3299         if is_X(r.shift) then
3300             shift_res := (others => 'X');
3301         elsif r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
3302             shift_res := shifter_64(r.r(63 downto 1) & (shiftin0 or r.r(0)) &
3303                                     (shiftin or r.s(55)) & r.s(54 downto 0),
3304                                     std_ulogic_vector(r.shift(6 downto 0)));
3305         else
3306             shift_res := (others => '0');
3307         end if;
3308         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
3309         if opsel_mask = '1' then
3310             sum(DP_LSB - 1 downto 0) := "0000";
3311             if r.single_prec = '1' then
3312                 sum(SP_LSB - 1 downto DP_LSB) := (others => '0');
3313             end if;
3314         end if;
3315         case opsel_r is
3316             when RES_SUM =>
3317                 result <= sum;
3318             when RES_SHIFT =>
3319                 result <= shift_res;
3320             when RES_MULT =>
3321                 result <= multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3322                 if mult_mask = '1' then
3323                     -- trim to 54 fraction bits if mult_mask = 1, for quotient when dividing
3324                     result(UNIT_BIT - 55 downto 0) <= (others => '0');
3325                 end if;
3326             when others =>
3327                 misc := (others => '0');
3328                 case misc_sel is
3329                     when "0000" =>
3330                         misc := x"00000000" & (r.fpscr and fpscr_mask);
3331                     when "0001" =>
3332                         -- generated QNaN mantissa
3333                         misc(QNAN_BIT) := '1';
3334                     when "0010" =>
3335                         -- mantissa of max representable DP number
3336                         misc(UNIT_BIT downto DP_LSB) := (others => '1');
3337                     when "0011" =>
3338                         -- mantissa of max representable SP number
3339                         misc(UNIT_BIT downto SP_LSB) := (others => '1');
3340                     when "0100" =>
3341                         -- fmrgow result
3342                         misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
3343                     when "0110" =>
3344                         -- fmrgew result
3345                         misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
3346                     when "0111" =>
3347                         misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3348                                                              UNIT_BIT - 19));
3349                     when "1000" =>
3350                         -- max positive result for fctiw[z]
3351                         misc := x"000000007fffffff";
3352                     when "1001" =>
3353                         -- max negative result for fctiw[z]
3354                         misc := x"ffffffff80000000";
3355                     when "1010" =>
3356                         -- max positive result for fctiwu[z]
3357                         misc := x"00000000ffffffff";
3358                     when "1011" =>
3359                         -- max negative result for fctiwu[z]
3360                         misc := x"0000000000000000";
3361                     when "1100" =>
3362                         -- max positive result for fctid[z]
3363                         misc := x"7fffffffffffffff";
3364                     when "1101" =>
3365                         -- max negative result for fctid[z]
3366                         misc := x"8000000000000000";
3367                     when "1110" =>
3368                         -- max positive result for fctidu[z]
3369                         misc := x"ffffffffffffffff";
3370                     when "1111" =>
3371                         -- max negative result for fctidu[z]
3372                         misc := x"0000000000000000";
3373                     when others =>
3374                 end case;
3375                 result <= misc;
3376         end case;
3377         v.r := result;
3378         if set_s = '1' then
3379             case opsel_s is
3380                 when S_NEG =>
3381                     v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
3382                 when S_MULT =>
3383                     v.s := multiply_to_f.result(55 downto 0);
3384                 when S_SHIFT =>
3385                     v.s := shift_res(63 downto 8);
3386                     if shift_res(7 downto 0) /= x"00" then
3387                         v.x := '1';
3388                     end if;
3389                 when others =>
3390                     v.s := (others => '0');
3391             end case;
3392         end if;
3393
3394         if set_a = '1' or set_a_exp = '1' then
3395             v.a.exponent := new_exp;
3396         end if;
3397         if set_a = '1' or set_a_mant = '1' then
3398             v.a.mantissa := shift_res;
3399         end if;
3400         if e_in.valid = '1' then
3401             v.a_hi := (others => '0');
3402             v.a_lo := (others => '0');
3403         else
3404             if set_a_hi = '1' then
3405                 v.a_hi := r.r(63 downto 56);
3406             end if;
3407             if set_a_lo = '1' then
3408                 v.a_lo := r.r(55 downto 0);
3409             end if;
3410         end if;
3411         if set_b = '1' then
3412             v.b.exponent := new_exp;
3413         end if;
3414         if set_b = '1' or set_b_mant = '1' then
3415             v.b.mantissa := shift_res;
3416         end if;
3417         if set_c = '1' then
3418             v.c.exponent := new_exp;
3419             v.c.mantissa := shift_res;
3420         end if;
3421
3422         -- exponent data path
3423         case re_sel1 is
3424             when REXP1_R =>
3425                 rexp_in1 := r.result_exp;
3426             when REXP1_A =>
3427                 rexp_in1 := r.a.exponent;
3428             when REXP1_BHALF =>
3429                 rexp_in1 := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
3430             when others =>
3431                 rexp_in1 := to_signed(0, EXP_BITS);
3432         end case;
3433         if re_neg1 = '1' then
3434             rexp_in1 := not rexp_in1;
3435         end if;
3436         case re_sel2 is
3437             when REXP2_NE =>
3438                 rexp_in2 := new_exp;
3439             when REXP2_C =>
3440                 rexp_in2 := r.c.exponent;
3441             when REXP2_B =>
3442                 rexp_in2 := r.b.exponent;
3443             when others =>
3444                 case re_con2 is
3445                     when RECON2_UNIT =>
3446                         rexp_in2 := to_signed(UNIT_BIT, EXP_BITS);
3447                     when RECON2_MAX =>
3448                         rexp_in2 := max_exp;
3449                     when RECON2_BIAS =>
3450                         rexp_in2 := bias_exp;
3451                     when others =>
3452                         rexp_in2 := to_signed(0, EXP_BITS);
3453                 end case;
3454         end case;
3455         if re_neg2 = '1' then
3456             rexp_in2 := not rexp_in2;
3457         end if;
3458         rexp_cin := re_neg1 or re_neg2;
3459         rexp_sum := rexp_in1 + rexp_in2 + rexp_cin;
3460         if re_set_result = '1' then
3461             v.result_exp := rexp_sum;
3462         end if;
3463         case rs_sel1 is
3464             when RSH1_B =>
3465                 rsh_in1 := r.b.exponent;
3466             when RSH1_NE =>
3467                 rsh_in1 := new_exp;
3468             when RSH1_S =>
3469                 rsh_in1 := r.shift;
3470             when others =>
3471                 rsh_in1 := to_signed(0, EXP_BITS);
3472         end case;
3473         if rs_neg1 = '1' then
3474             rsh_in1 := not rsh_in1;
3475         end if;
3476         case rs_sel2 is
3477             when RSH2_A =>
3478                 rsh_in2 := r.a.exponent;
3479             when others =>
3480                 case rs_con2 is
3481                     when RSCON2_1 =>
3482                         rsh_in2 := to_signed(1, EXP_BITS);
3483                     when RSCON2_UNIT_52 =>
3484                         rsh_in2 := to_signed(UNIT_BIT - 52, EXP_BITS);
3485                     when RSCON2_64_UNIT =>
3486                         rsh_in2 := to_signed(64 - UNIT_BIT, EXP_BITS);
3487                     when RSCON2_32 =>
3488                         rsh_in2 := to_signed(32, EXP_BITS);
3489                     when RSCON2_52 =>
3490                         rsh_in2 := to_signed(52, EXP_BITS);
3491                     when RSCON2_UNIT =>
3492                         rsh_in2 := to_signed(UNIT_BIT, EXP_BITS);
3493                     when RSCON2_63 =>
3494                         rsh_in2 := to_signed(63, EXP_BITS);
3495                     when RSCON2_64 =>
3496                         rsh_in2 := to_signed(64, EXP_BITS);
3497                     when RSCON2_MINEXP =>
3498                         rsh_in2 := min_exp;
3499                     when others =>
3500                         rsh_in2 := to_signed(0, EXP_BITS);
3501                 end case;
3502         end case;
3503         if rs_neg2 = '1' then
3504             rsh_in2 := not rsh_in2;
3505         end if;
3506         if rs_norm = '1' then
3507             clz := count_left_zeroes(r.r);
3508             if renorm_sqrt = '1' then
3509                 -- make denormalized value end up with even exponent
3510                 clz(0) := '1';
3511             end if;
3512             -- do this as a separate dedicated 7-bit adder for timing reasons
3513             v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS);
3514         else
3515             v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2);
3516         end if;
3517
3518         if r.update_fprf = '1' then
3519             v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
3520                                                              r.r(UNIT_BIT) and not r.denorm);
3521         end if;
3522
3523         v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
3524                              (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
3525         v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
3526                                   v.fpscr(FPSCR_VE downto FPSCR_XE));
3527         if update_fx = '1' and
3528             (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
3529             v.fpscr(FPSCR_FX) := '1';
3530         end if;
3531
3532         if v.instr_done = '1' then
3533             if r.state /= IDLE then
3534                 v.state := IDLE;
3535                 v.busy := '0';
3536                 v.f2stall := '0';
3537                 if r.fp_rc = '1' then
3538                     v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
3539                 end if;
3540                 v.sp_result := r.single_prec;
3541                 v.int_result := int_result;
3542                 v.illegal := illegal;
3543                 v.nsnan_result := v.quieten_nan;
3544                 v.res_negate := v.negate;
3545                 v.res_subtract := v.is_subtract;
3546                 v.res_rmode := r.round_mode;
3547                 if r.integer_op = '1' then
3548                     v.cr_mask := num_to_fxm(0);
3549                 elsif r.is_cmp = '0' then
3550                     v.cr_mask := num_to_fxm(1);
3551                 elsif is_X(insn_bf(r.insn)) then
3552                     v.cr_mask := (others => 'X');
3553                 else
3554                     v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(r.insn))));
3555                 end if;
3556                 v.writing_cr := r.is_cmp or r.rc;
3557                 v.write_reg := r.dest_fpr;
3558                 v.complete_tag := r.instr_tag;
3559             end if;
3560             if e_in.stall = '0' then
3561                 v.complete := not v.illegal;
3562                 v.do_intr := (v.fpscr(FPSCR_FEX) and r.fe_mode) or v.illegal;
3563             end if;
3564             -- N.B. We rely on execute1 to prevent any new instruction
3565             -- coming in while e_in.stall = 1, without us needing to
3566             -- have busy asserted.
3567         else
3568             if r.state /= IDLE and e_in.stall = '0' then
3569                 v.f2stall := '1';
3570             end if;
3571         end if;
3572
3573         -- This mustn't depend on any fields of r that are modified in IDLE state.
3574         if r.int_result = '1' then
3575             fp_result <= r.r;
3576         else
3577             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
3578                                  r.sp_result, r.nsnan_result,
3579                                  r.res_negate, r.res_subtract, r.res_rmode);
3580         end if;
3581
3582         rin <= v;
3583     end process;
3584
3585 end architecture behaviour;