1 -- Floating-point unit for Microwatt
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
18 flush_in : in std_ulogic;
20 e_in : in Execute1ToFPUType;
21 e_out : out FPUToExecute1Type;
23 w_out : out FPUToWritebackType
27 architecture behaviour of fpu is
28 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
30 constant EXP_BITS : natural := 13;
31 constant UNIT_BIT : natural := 56;
32 constant QNAN_BIT : natural := UNIT_BIT - 1;
33 constant SP_LSB : natural := UNIT_BIT - 23;
34 constant SP_GBIT : natural := SP_LSB - 1;
35 constant SP_RBIT : natural := SP_LSB - 2;
36 constant DP_LSB : natural := UNIT_BIT - 52;
37 constant DP_GBIT : natural := DP_LSB - 1;
38 constant DP_RBIT : natural := DP_LSB - 2;
40 type fpu_reg_type is record
41 class : fp_number_class;
42 negative : std_ulogic;
44 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
45 mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format
48 type state_t is (IDLE, DO_ILLEGAL,
49 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
50 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
53 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
58 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
61 FMADD_0, FMADD_1, FMADD_2, FMADD_3,
62 FMADD_4, FMADD_5, FMADD_6,
64 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
68 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
69 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
70 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
71 INT_SHIFT, INT_ROUND, INT_ISHIFT,
72 INT_FINAL, INT_CHECK, INT_OFLOW,
74 ROUND_UFLOW, ROUND_OFLOW,
75 ROUNDING, ROUNDING_2, ROUNDING_3,
80 NAN_RESULT, EXC_RESULT,
81 IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
82 IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
83 IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
84 IDIV_DODIV, IDIV_SH32,
85 IDIV_DIV, IDIV_DIV2, IDIV_DIV3, IDIV_DIV4, IDIV_DIV5,
86 IDIV_DIV6, IDIV_DIV7, IDIV_DIV8, IDIV_DIV9,
87 IDIV_EXT_TBH, IDIV_EXT_TBH2, IDIV_EXT_TBH3,
88 IDIV_EXT_TBH4, IDIV_EXT_TBH5,
89 IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3,
90 IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6,
91 IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO);
93 type decode32 is array(0 to 31) of state_t;
94 type decode8 is array(0 to 7) of state_t;
96 type reg_type is record
100 instr_done : std_ulogic;
101 complete : std_ulogic;
102 do_intr : std_ulogic;
103 illegal : std_ulogic;
105 insn : std_ulogic_vector(31 downto 0);
106 instr_tag : instr_tag_t;
107 dest_fpr : gspr_index_t;
108 fe_mode : std_ulogic;
112 single_prec : std_ulogic;
113 sp_result : std_ulogic;
114 fpscr : std_ulogic_vector(31 downto 0);
115 comm_fpscr : std_ulogic_vector(31 downto 0); -- committed FPSCR value
119 r : std_ulogic_vector(63 downto 0); -- 8.56 format
120 s : std_ulogic_vector(55 downto 0); -- extended fraction
122 p : std_ulogic_vector(63 downto 0); -- 8.56 format
123 y : std_ulogic_vector(63 downto 0); -- 8.56 format
124 result_sign : std_ulogic;
125 result_class : fp_number_class;
126 result_exp : signed(EXP_BITS-1 downto 0);
127 shift : signed(EXP_BITS-1 downto 0);
128 writing_fpr : std_ulogic;
129 write_reg : gspr_index_t;
130 complete_tag : instr_tag_t;
131 writing_cr : std_ulogic;
132 writing_xer : std_ulogic;
133 int_result : std_ulogic;
134 cr_result : std_ulogic_vector(3 downto 0);
135 cr_mask : std_ulogic_vector(7 downto 0);
136 old_exc : std_ulogic_vector(4 downto 0);
137 update_fprf : std_ulogic;
138 quieten_nan : std_ulogic;
139 nsnan_result : std_ulogic;
142 round_mode : std_ulogic_vector(2 downto 0);
143 is_subtract : std_ulogic;
144 exp_cmp : std_ulogic;
145 madd_cmp : std_ulogic;
146 add_bsmall : std_ulogic;
147 is_multiply : std_ulogic;
148 is_sqrt : std_ulogic;
150 count : unsigned(1 downto 0);
151 doing_ftdiv : std_ulogic_vector(1 downto 0);
152 opsel_a : std_ulogic_vector(1 downto 0);
156 invalid : std_ulogic;
158 longmask : std_ulogic;
159 integer_op : std_ulogic;
162 is_signed : std_ulogic;
163 int_ovf : std_ulogic;
164 div_close : std_ulogic;
165 inc_quot : std_ulogic;
166 a_hi : std_ulogic_vector(7 downto 0);
167 a_lo : std_ulogic_vector(55 downto 0);
171 xerc_result : xer_common_t;
172 res_negate : std_ulogic;
173 res_subtract : std_ulogic;
174 res_rmode : std_ulogic_vector(2 downto 0);
177 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
179 signal r, rin : reg_type;
181 signal fp_result : std_ulogic_vector(63 downto 0);
182 signal opsel_b : std_ulogic_vector(1 downto 0);
183 signal opsel_r : std_ulogic_vector(1 downto 0);
184 signal opsel_s : std_ulogic_vector(1 downto 0);
185 signal opsel_ainv : std_ulogic;
186 signal opsel_mask : std_ulogic;
187 signal opsel_binv : std_ulogic;
188 signal in_a : std_ulogic_vector(63 downto 0);
189 signal in_b : std_ulogic_vector(63 downto 0);
190 signal result : std_ulogic_vector(63 downto 0);
191 signal carry_in : std_ulogic;
192 signal lost_bits : std_ulogic;
193 signal r_hi_nz : std_ulogic;
194 signal r_lo_nz : std_ulogic;
195 signal r_gt_1 : std_ulogic;
196 signal s_nz : std_ulogic;
197 signal misc_sel : std_ulogic_vector(3 downto 0);
198 signal f_to_multiply : MultiplyInputType;
199 signal multiply_to_f : MultiplyOutputType;
200 signal msel_1 : std_ulogic_vector(1 downto 0);
201 signal msel_2 : std_ulogic_vector(1 downto 0);
202 signal msel_add : std_ulogic_vector(1 downto 0);
203 signal msel_inv : std_ulogic;
204 signal inverse_est : std_ulogic_vector(18 downto 0);
207 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
208 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
209 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
210 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
212 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
213 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
214 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
215 constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11";
217 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
218 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
219 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
220 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
222 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
223 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
224 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
225 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
228 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
229 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
230 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
231 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
233 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
234 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
235 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
236 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
238 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
239 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
240 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
241 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
243 -- control signals and values for exponent data path
244 constant REXP1_ZERO : std_ulogic_vector(1 downto 0) := "00";
245 constant REXP1_R : std_ulogic_vector(1 downto 0) := "01";
246 constant REXP1_A : std_ulogic_vector(1 downto 0) := "10";
247 constant REXP1_BHALF : std_ulogic_vector(1 downto 0) := "11";
249 constant REXP2_CON : std_ulogic_vector(1 downto 0) := "00";
250 constant REXP2_NE : std_ulogic_vector(1 downto 0) := "01";
251 constant REXP2_C : std_ulogic_vector(1 downto 0) := "10";
252 constant REXP2_B : std_ulogic_vector(1 downto 0) := "11";
254 constant RECON2_ZERO : std_ulogic_vector(1 downto 0) := "00";
255 constant RECON2_UNIT : std_ulogic_vector(1 downto 0) := "01";
256 constant RECON2_BIAS : std_ulogic_vector(1 downto 0) := "10";
257 constant RECON2_MAX : std_ulogic_vector(1 downto 0) := "11";
259 signal re_sel1 : std_ulogic_vector(1 downto 0);
260 signal re_sel2 : std_ulogic_vector(1 downto 0);
261 signal re_con2 : std_ulogic_vector(1 downto 0);
262 signal re_neg1 : std_ulogic;
263 signal re_neg2 : std_ulogic;
264 signal re_set_result : std_ulogic;
266 constant RSH1_ZERO : std_ulogic_vector(1 downto 0) := "00";
267 constant RSH1_B : std_ulogic_vector(1 downto 0) := "01";
268 constant RSH1_NE : std_ulogic_vector(1 downto 0) := "10";
269 constant RSH1_S : std_ulogic_vector(1 downto 0) := "11";
271 constant RSH2_CON : std_ulogic := '0';
272 constant RSH2_A : std_ulogic := '1';
274 constant RSCON2_ZERO : std_ulogic_vector(3 downto 0) := "0000";
275 constant RSCON2_1 : std_ulogic_vector(3 downto 0) := "0001";
276 constant RSCON2_UNIT_52 : std_ulogic_vector(3 downto 0) := "0010";
277 constant RSCON2_64_UNIT : std_ulogic_vector(3 downto 0) := "0011";
278 constant RSCON2_32 : std_ulogic_vector(3 downto 0) := "0100";
279 constant RSCON2_52 : std_ulogic_vector(3 downto 0) := "0101";
280 constant RSCON2_UNIT : std_ulogic_vector(3 downto 0) := "0110";
281 constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111";
282 constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000";
283 constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001";
285 signal rs_sel1 : std_ulogic_vector(1 downto 0);
286 signal rs_sel2 : std_ulogic;
287 signal rs_con2 : std_ulogic_vector(3 downto 0);
288 signal rs_neg1 : std_ulogic;
289 signal rs_neg2 : std_ulogic;
290 signal rs_norm : std_ulogic;
292 constant arith_decode : decode32 := (
293 -- indexed by bits 5..1 of opcode
301 2#10110# => DO_FSQRT,
304 2#11010# => DO_FRSQRTE,
305 2#11100# => DO_FMADD,
306 2#11101# => DO_FMADD,
307 2#11110# => DO_FMADD,
308 2#11111# => DO_FMADD,
312 constant cmp_decode : decode8 := (
321 constant misc_decode : decode32 := (
322 -- indexed by bits 10, 8, 4, 2, 1 of opcode
323 2#00010# => DO_MTFSB,
324 2#01010# => DO_MTFSFI,
328 2#11011# => DO_MTFSF,
329 2#10110# => DO_FCFID,
330 2#11110# => DO_FCFID,
334 -- Inverse lookup table, indexed by the top 8 fraction bits
335 -- The first 256 entries are the reciprocal (1/x) lookup table,
336 -- and the remaining 768 entries are the reciprocal square root table.
337 -- Output range is [0.5, 1) in 0.19 format, though the top
338 -- bit isn't stored since it is always 1.
339 -- Each output value is the inverse of the center of the input
340 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
341 -- entry 1 is 1 / (1 + 3/512), etc.
342 constant inverse_table : lookup_table := (
344 -- Unit bit is assumed to be 1, so input range is [1, 2)
345 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
346 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
347 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
348 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
349 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
350 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
351 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
352 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
353 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
354 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
355 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
356 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
357 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
358 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
359 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
360 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
361 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
362 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
363 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
364 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
365 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
366 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
367 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
368 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
369 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
370 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
371 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
372 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
373 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
374 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
375 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
376 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
377 -- 1/sqrt(x) lookup table
378 -- Input is in the range [1, 4), i.e. two bits to the left of the
379 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
381 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
382 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
383 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
384 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
385 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
386 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
387 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
388 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
389 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
390 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
391 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
392 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
393 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
394 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
395 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
396 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
397 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
398 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
399 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
400 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
401 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
402 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
403 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
404 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
405 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
406 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
407 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
408 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
409 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
410 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
411 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
412 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
414 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
415 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
416 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
417 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
418 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
419 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
420 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
421 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
422 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
423 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
424 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
425 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
426 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
427 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
428 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
429 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
430 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
431 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
432 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
433 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
434 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
435 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
436 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
437 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
438 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
439 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
440 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
441 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
442 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
443 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
444 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
445 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
447 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
448 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
449 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
450 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
451 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
452 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
453 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
454 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
455 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
456 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
457 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
458 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
459 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
460 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
461 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
462 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
463 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
464 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
465 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
466 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
467 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
468 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
469 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
470 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
471 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
472 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
473 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
474 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
475 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
476 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
477 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
478 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
481 -- Left and right shifter with 120 bit input and 64 bit output.
482 -- Shifts inp left by shift bits and returns the upper 64 bits of
483 -- the result. The shift parameter is interpreted as a signed
484 -- number in the range -64..63, with negative values indicating
486 function shifter_64(inp: std_ulogic_vector(119 downto 0);
487 shift: std_ulogic_vector(6 downto 0))
488 return std_ulogic_vector is
489 variable s1 : std_ulogic_vector(94 downto 0);
490 variable s2 : std_ulogic_vector(70 downto 0);
491 variable shift_result : std_ulogic_vector(63 downto 0);
493 case shift(6 downto 5) is
495 s1 := inp(119 downto 25);
497 s1 := inp(87 downto 0) & "0000000";
499 s1 := x"0000000000000000" & inp(119 downto 89);
501 s1 := x"00000000" & inp(119 downto 57);
503 case shift(4 downto 3) is
505 s2 := s1(94 downto 24);
507 s2 := s1(86 downto 16);
509 s2 := s1(78 downto 8);
511 s2 := s1(70 downto 0);
513 case shift(2 downto 0) is
515 shift_result := s2(70 downto 7);
517 shift_result := s2(69 downto 6);
519 shift_result := s2(68 downto 5);
521 shift_result := s2(67 downto 4);
523 shift_result := s2(66 downto 3);
525 shift_result := s2(65 downto 2);
527 shift_result := s2(64 downto 1);
529 shift_result := s2(63 downto 0);
534 -- Generate a mask with 0-bits on the left and 1-bits on the right which
535 -- selects the bits will be lost in doing a right shift. The shift
536 -- parameter is the bottom 6 bits of a negative shift count,
537 -- indicating a right shift.
538 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
539 variable mask_result: std_ulogic_vector(63 downto 0);
541 mask_result := (others => '0');
543 mask_result := (others => 'X');
546 for i in 0 to 63 loop
548 mask_result(63 - i) := '1';
554 -- Split a DP floating-point number into components and work out its class.
555 -- If is_int = 1, the input is considered an integer
556 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_fp: std_ulogic;
557 is_32bint: std_ulogic; is_signed: std_ulogic) return fpu_reg_type is
558 variable reg : fpu_reg_type;
559 variable exp_nz : std_ulogic;
560 variable exp_ao : std_ulogic;
561 variable frac_nz : std_ulogic;
562 variable low_nz : std_ulogic;
563 variable cls : std_ulogic_vector(2 downto 0);
565 reg.negative := fpr(63);
567 exp_nz := or (fpr(62 downto 52));
568 exp_ao := and (fpr(62 downto 52));
569 frac_nz := or (fpr(51 downto 0));
570 low_nz := or (fpr(31 downto 0));
572 reg.denorm := frac_nz and not exp_nz;
573 reg.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
575 reg.exponent := to_signed(-1022, EXP_BITS);
577 reg.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64),
579 cls := exp_ao & exp_nz & frac_nz;
581 when "000" => reg.class := ZERO;
582 when "001" => reg.class := FINITE; -- denormalized
583 when "010" => reg.class := FINITE;
584 when "011" => reg.class := FINITE;
585 when "110" => reg.class := INFINITY;
586 when others => reg.class := NAN;
588 elsif is_32bint = '1' then
589 reg.negative := fpr(31);
590 reg.mantissa(31 downto 0) := fpr(31 downto 0);
591 reg.mantissa(63 downto 32) := (others => (is_signed and fpr(31)));
592 reg.exponent := (others => '0');
600 reg.exponent := (others => '0');
601 if (fpr(63) or exp_nz or frac_nz) = '1' then
610 -- Construct a DP floating-point result from components
611 function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
612 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic;
613 negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector)
614 return std_ulogic_vector is
615 variable dp_result : std_ulogic_vector(63 downto 0);
616 variable sign : std_ulogic;
618 dp_result := (others => '0');
622 if is_subtract = '1' then
623 -- set result sign depending on rounding mode
624 sign := round_mode(0) and round_mode(1);
627 if mantissa(UNIT_BIT) = '1' then
629 dp_result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
631 dp_result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB);
632 if single_prec = '0' then
633 dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
636 dp_result(62 downto 52) := "11111111111";
638 dp_result(62 downto 52) := "11111111111";
639 dp_result(51) := quieten_nan or mantissa(QNAN_BIT);
640 dp_result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB);
641 if single_prec = '0' then
642 dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
645 dp_result(63) := sign xor negate;
649 -- Determine whether to increment when rounding
650 -- Returns rounding_inc & inexact
651 -- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2)
652 -- bits of the mantissa already (usually arranged by setting set_x = 1 earlier).
653 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
654 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
656 return std_ulogic_vector is
657 variable grx : std_ulogic_vector(2 downto 0);
658 variable ret : std_ulogic_vector(1 downto 0);
659 variable lsb : std_ulogic;
661 if single_prec = '0' then
662 grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0)));
663 lsb := mantissa(DP_LSB);
665 grx := mantissa(SP_GBIT downto SP_RBIT) & x;
666 lsb := mantissa(SP_LSB);
670 case rn(1 downto 0) is
671 when "00" => -- round to nearest
672 if grx = "100" and rn(2) = '0' then
673 ret(1) := lsb; -- tie, round to even
677 when "01" => -- round towards zero
678 when others => -- round towards +/- inf
680 -- round towards greater magnitude
687 -- Determine result flags to write into the FPSCR
688 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
689 return std_ulogic_vector is
693 return sign & "0010";
695 return (not unitbit) & sign & (not sign) & "00";
697 return '0' & sign & (not sign) & "01";
704 fpu_multiply_0: entity work.multiply
707 m_in => f_to_multiply,
708 m_out => multiply_to_f
713 if rising_edge(clk) then
714 if rst = '1' or flush_in = '1' then
722 r.writing_fpr <= '0';
724 r.writing_xer <= '0';
725 r.fpscr <= (others => '0');
726 r.write_reg <= (others =>'0');
727 r.complete_tag.valid <= '0';
728 r.cr_mask <= (others =>'0');
729 r.cr_result <= (others =>'0');
730 r.instr_tag.valid <= '0';
732 r.fpscr <= (others => '0');
733 r.comm_fpscr <= (others => '0');
734 elsif r.do_intr = '0' then
735 -- flush_in = 1 and not due to us generating an interrupt,
736 -- roll back to committed fpscr
737 r.fpscr <= r.comm_fpscr;
740 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
746 -- synchronous reads from lookup table
747 lut_access: process(clk)
748 variable addrhi : std_ulogic_vector(1 downto 0);
749 variable addr : std_ulogic_vector(9 downto 0);
751 if rising_edge(clk) then
752 if r.is_sqrt = '1' then
753 addrhi := r.b.mantissa(UNIT_BIT + 1 downto UNIT_BIT);
757 addr := addrhi & r.b.mantissa(UNIT_BIT - 1 downto UNIT_BIT - 8);
759 inverse_est <= (others => 'X');
761 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
766 e_out.busy <= r.busy;
767 e_out.f2stall <= r.f2stall;
768 e_out.exception <= r.fpscr(FPSCR_FEX);
770 -- Note that the cycle where r.complete = 1 for an instruction can be as
771 -- late as the second cycle of the following instruction (i.e. in the state
772 -- following IDLE state). Hence it is important that none of the fields of
773 -- r that are used below are modified in IDLE state.
774 w_out.valid <= r.complete;
775 w_out.instr_tag <= r.complete_tag;
776 w_out.write_enable <= r.writing_fpr and r.complete;
777 w_out.write_reg <= r.write_reg;
778 w_out.write_data <= fp_result;
779 w_out.write_cr_enable <= r.writing_cr and r.complete;
780 w_out.write_cr_mask <= r.cr_mask;
781 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
782 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
783 w_out.write_xerc <= r.writing_xer and r.complete;
784 w_out.xerc <= r.xerc_result;
785 w_out.interrupt <= r.do_intr;
786 w_out.intr_vec <= 16#700#;
787 w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
790 variable v : reg_type;
791 variable adec : fpu_reg_type;
792 variable bdec : fpu_reg_type;
793 variable cdec : fpu_reg_type;
794 variable fpscr_mask : std_ulogic_vector(31 downto 0);
795 variable j, k : integer;
796 variable flm : std_ulogic_vector(7 downto 0);
797 variable fpin_a : std_ulogic;
798 variable fpin_b : std_ulogic;
799 variable fpin_c : std_ulogic;
800 variable is_32bint : std_ulogic;
801 variable mask : std_ulogic_vector(63 downto 0);
802 variable in_a0 : std_ulogic_vector(63 downto 0);
803 variable in_b0 : std_ulogic_vector(63 downto 0);
804 variable misc : std_ulogic_vector(63 downto 0);
805 variable shift_res : std_ulogic_vector(63 downto 0);
806 variable round : std_ulogic_vector(1 downto 0);
807 variable update_fx : std_ulogic;
808 variable arith_done : std_ulogic;
809 variable invalid : std_ulogic;
810 variable zero_divide : std_ulogic;
811 variable mant_nz : std_ulogic;
812 variable min_exp : signed(EXP_BITS-1 downto 0);
813 variable max_exp : signed(EXP_BITS-1 downto 0);
814 variable bias_exp : signed(EXP_BITS-1 downto 0);
815 variable new_exp : signed(EXP_BITS-1 downto 0);
816 variable exp_tiny : std_ulogic;
817 variable exp_huge : std_ulogic;
818 variable clz : std_ulogic_vector(5 downto 0);
819 variable set_x : std_ulogic;
820 variable mshift : signed(EXP_BITS-1 downto 0);
821 variable need_check : std_ulogic;
822 variable msb : std_ulogic;
823 variable is_add : std_ulogic;
824 variable set_a : std_ulogic;
825 variable set_a_exp : std_ulogic;
826 variable set_a_mant : std_ulogic;
827 variable set_a_hi : std_ulogic;
828 variable set_a_lo : std_ulogic;
829 variable set_b : std_ulogic;
830 variable set_b_mant : std_ulogic;
831 variable set_c : std_ulogic;
832 variable set_y : std_ulogic;
833 variable set_s : std_ulogic;
834 variable qnan_result : std_ulogic;
835 variable px_nz : std_ulogic;
836 variable pcmpb_eq : std_ulogic;
837 variable pcmpb_lt : std_ulogic;
838 variable pcmpc_eq : std_ulogic;
839 variable pcmpc_lt : std_ulogic;
840 variable pshift : std_ulogic;
841 variable renorm_sqrt : std_ulogic;
842 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
843 variable shiftin : std_ulogic;
844 variable shiftin0 : std_ulogic;
845 variable mulexp : signed(EXP_BITS-1 downto 0);
846 variable maddend : std_ulogic_vector(127 downto 0);
847 variable sum : std_ulogic_vector(63 downto 0);
848 variable round_inc : std_ulogic_vector(63 downto 0);
849 variable rbit_inc : std_ulogic;
850 variable mult_mask : std_ulogic;
851 variable sign_bit : std_ulogic;
852 variable rnd_b32 : std_ulogic;
853 variable rexp_in1 : signed(EXP_BITS-1 downto 0);
854 variable rexp_in2 : signed(EXP_BITS-1 downto 0);
855 variable rexp_cin : std_ulogic;
856 variable rexp_sum : signed(EXP_BITS-1 downto 0);
857 variable rsh_in1 : signed(EXP_BITS-1 downto 0);
858 variable rsh_in2 : signed(EXP_BITS-1 downto 0);
859 variable exec_state : state_t;
860 variable opcbits : std_ulogic_vector(4 downto 0);
861 variable int_result : std_ulogic;
862 variable illegal : std_ulogic;
870 if r.complete = '1' or r.do_intr = '1' then
872 v.writing_fpr := '0';
874 v.writing_xer := '0';
875 v.comm_fpscr := r.fpscr;
879 -- capture incoming instruction
880 if e_in.valid = '1' then
883 v.instr_tag := e_in.itag;
884 v.fe_mode := or (e_in.fe_mode);
885 v.dest_fpr := e_in.frt;
886 v.single_prec := e_in.single;
887 v.is_signed := e_in.is_signed;
890 v.is_cmp := e_in.out_cr;
899 v.is_multiply := '0';
903 v.use_a := e_in.valid_a;
904 v.use_b := e_in.valid_b;
905 v.use_c := e_in.valid_c;
906 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
909 fpin_a := e_in.valid_a;
910 fpin_b := e_in.valid_b;
911 fpin_c := e_in.valid_c;
912 v.longmask := e_in.single;
914 exec_state := arith_decode(to_integer(unsigned(e_in.insn(5 downto 1))));
915 if e_in.insn(5 downto 1) = "11001" or e_in.insn(5 downto 3) = "111" then
916 v.is_multiply := '1';
918 if e_in.insn(5 downto 1) = "10110" or e_in.insn(5 downto 1) = "11010" then
921 if e_in.insn(5 downto 1) = "01111" then
922 v.round_mode := "001";
925 fpin_a := e_in.valid_a;
926 fpin_b := e_in.valid_b;
927 exec_state := cmp_decode(to_integer(unsigned(e_in.insn(8 downto 6))));
930 opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1);
931 exec_state := misc_decode(to_integer(unsigned(opcbits)));
934 fpin_a := e_in.valid_a;
935 fpin_b := e_in.valid_b;
936 fpin_c := e_in.valid_c;
937 if e_in.insn(5) = '0' then
938 exec_state := DO_FMR;
940 exec_state := DO_FSEL;
944 is_32bint := e_in.single;
945 exec_state := DO_IDIVMOD;
949 is_32bint := e_in.single;
950 exec_state := DO_IDIVMOD;
954 is_32bint := e_in.single;
955 exec_state := DO_IDIVMOD;
957 exec_state := DO_ILLEGAL;
959 v.quieten_nan := '1';
962 v.is_subtract := '0';
964 v.doing_ftdiv := "00";
968 adec := decode_dp(e_in.fra, fpin_a, is_32bint, e_in.is_signed);
969 bdec := decode_dp(e_in.frb, fpin_b, is_32bint, e_in.is_signed);
970 cdec := decode_dp(e_in.frc, fpin_c, '0', '0');
976 if adec.exponent > bdec.exponent then
980 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
988 r_hi_nz <= or (r.r(UNIT_BIT + 1 downto SP_LSB));
989 r_lo_nz <= or (r.r(SP_LSB - 1 downto DP_LSB));
990 r_gt_1 <= or (r.r(63 downto 1));
993 if r.single_prec = '0' then
994 if r.doing_ftdiv(1) = '0' then
995 max_exp := to_signed(1023, EXP_BITS);
997 max_exp := to_signed(1020, EXP_BITS);
999 if r.doing_ftdiv(0) = '0' then
1000 min_exp := to_signed(-1022, EXP_BITS);
1002 min_exp := to_signed(-1021, EXP_BITS);
1004 bias_exp := to_signed(1536, EXP_BITS);
1006 max_exp := to_signed(127, EXP_BITS);
1007 min_exp := to_signed(-126, EXP_BITS);
1008 bias_exp := to_signed(192, EXP_BITS);
1010 new_exp := r.result_exp - r.shift;
1013 if is_X(new_exp) or is_X(min_exp) then
1015 elsif new_exp < min_exp then
1018 if is_X(new_exp) or is_X(min_exp) then
1020 elsif new_exp > max_exp then
1024 -- Compare P with zero and with B
1025 px_nz := or (r.p(UNIT_BIT + 1 downto 4));
1027 if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then
1031 if is_X(r.p(59 downto 4)) or is_X(r.b.mantissa(55 downto 0)) then
1033 elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then
1037 if r.p = r.c.mantissa then
1041 if is_X(r.p) or is_X(r.c.mantissa) then
1043 elsif unsigned(r.p) < unsigned(r.c.mantissa) then
1047 v.update_fprf := '0';
1052 opsel_b <= BIN_ZERO;
1058 fpscr_mask := (others => '1');
1074 f_to_multiply.is_signed <= '0';
1075 f_to_multiply.valid <= '0';
1078 msel_add <= MULADD_ZERO;
1091 re_sel1 <= REXP1_ZERO;
1092 re_sel2 <= REXP2_CON;
1093 re_con2 <= RECON2_ZERO;
1096 re_set_result <= '0';
1097 rs_sel1 <= RSH1_ZERO;
1098 rs_sel2 <= RSH2_CON;
1099 rs_con2 <= RSCON2_ZERO;
1108 if e_in.valid = '1' then
1111 if e_in.op = OP_FP_ARITH and e_in.valid_a = '1' and
1112 (e_in.valid_b = '0' or e_in.valid_c = '0') then
1115 if e_in.op = OP_FP_ARITH then
1116 -- input selection for denorm cases
1117 case e_in.insn(5 downto 1) is
1118 when "10010" => -- fdiv
1119 if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1122 when "11001" => -- fmul
1123 if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1126 when "11100" | "11101" | "11110" | "11111" => -- fmadd etc.
1127 if v.a.mantissa(UNIT_BIT) = '0' then
1129 elsif v.c.mantissa(UNIT_BIT) = '0' then
1135 v.state := exec_state;
1138 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
1143 v.instr_done := '1';
1146 j := to_integer(unsigned(insn_bfa(r.insn)));
1147 for i in 0 to 7 loop
1150 v.cr_result := r.fpscr(k + 3 downto k);
1151 fpscr_mask(k + 3 downto k) := "0000";
1154 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
1155 v.instr_done := '1';
1158 v.instr_done := '1';
1159 v.cr_result := "0000";
1160 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
1161 (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1162 v.cr_result(2) := '1';
1164 if r.a.class = NAN or r.a.class = INFINITY or
1165 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
1166 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
1167 v.cr_result(1) := '1';
1169 v.doing_ftdiv := "11";
1172 v.instr_done := '0';
1176 v.instr_done := '1';
1177 v.cr_result := "0000";
1178 if r.b.class = ZERO or r.b.class = INFINITY or
1179 (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1180 v.cr_result(2) := '1';
1182 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
1183 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
1184 v.cr_result(1) := '0';
1189 -- r.opsel_a = AIN_B
1190 v.instr_done := '1';
1193 re_set_result <= '1';
1194 if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
1195 (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then
1197 v.fpscr(FPSCR_VXSNAN) := '1';
1198 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
1199 v.fpscr(FPSCR_VXVC) := '1';
1202 v.cr_result := "0001"; -- unordered
1203 elsif r.a.class = NAN or r.b.class = NAN then
1204 if r.insn(6) = '1' then
1206 v.fpscr(FPSCR_VXVC) := '1';
1209 v.cr_result := "0001"; -- unordered
1210 elsif r.a.class = ZERO and r.b.class = ZERO then
1211 v.cr_result := "0010"; -- equal
1212 elsif r.a.negative /= r.b.negative then
1213 v.cr_result := r.a.negative & r.b.negative & "00";
1214 elsif r.a.class = ZERO then
1215 -- A and B are the same sign from here down
1216 v.cr_result := not r.b.negative & r.b.negative & "00";
1217 elsif r.a.class = INFINITY then
1218 if r.b.class = INFINITY then
1219 v.cr_result := "0010";
1221 v.cr_result := r.a.negative & not r.a.negative & "00";
1223 elsif r.b.class = ZERO then
1224 -- A is finite from here down
1225 v.cr_result := r.a.negative & not r.a.negative & "00";
1226 elsif r.b.class = INFINITY then
1227 v.cr_result := not r.b.negative & r.b.negative & "00";
1228 elsif r.exp_cmp = '1' then
1229 -- A and B are both finite from here down
1230 v.cr_result := r.a.negative & not r.a.negative & "00";
1231 elsif r.a.exponent /= r.b.exponent then
1232 -- A exponent is smaller than B
1233 v.cr_result := not r.a.negative & r.a.negative & "00";
1235 -- Prepare to subtract mantissas, put B in R
1236 v.cr_result := "0000";
1237 v.instr_done := '0';
1241 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1245 j := to_integer(unsigned(insn_bt(r.insn)));
1246 for i in 0 to 31 loop
1248 v.fpscr(31 - i) := r.insn(6);
1251 v.instr_done := '1';
1255 j := to_integer(unsigned(insn_bf(r.insn)));
1256 if r.insn(16) = '0' then
1257 for i in 0 to 7 loop
1260 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1264 v.instr_done := '1';
1268 opsel_r <= RES_MISC;
1269 misc_sel <= "01" & r.insn(8) & '0';
1271 v.writing_fpr := '1';
1272 v.instr_done := '1';
1275 v.writing_fpr := '1';
1276 opsel_r <= RES_MISC;
1277 case r.insn(20 downto 16) is
1282 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1283 when "10100" | "10101" =>
1284 -- mffscdrn[i] (but we don't implement DRN)
1285 fpscr_mask := x"000000FF";
1288 fpscr_mask := x"000000FF";
1289 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1290 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1293 fpscr_mask := x"000000FF";
1294 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1297 fpscr_mask := x"0007F0FF";
1300 v.writing_fpr := '0';
1303 v.instr_done := '1';
1306 if r.insn(25) = '1' then
1308 elsif r.insn(16) = '1' then
1311 flm := r.insn(24 downto 17);
1313 for i in 0 to 7 loop
1315 if flm(i) = '1' then
1316 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1319 v.instr_done := '1';
1322 -- r.opsel_a = AIN_B
1323 v.result_class := r.b.class;
1325 re_set_result <= '1';
1326 v.quieten_nan := '0';
1327 if r.insn(9) = '1' then
1328 v.result_sign := '0'; -- fabs
1329 elsif r.insn(8) = '1' then
1330 v.result_sign := '1'; -- fnabs
1331 elsif r.insn(7) = '1' then
1332 v.result_sign := r.b.negative; -- fmr
1333 elsif r.insn(6) = '1' then
1334 v.result_sign := not r.b.negative; -- fneg
1336 v.result_sign := r.a.negative; -- fcpsgn
1338 v.writing_fpr := '1';
1339 v.instr_done := '1';
1341 when DO_FRI => -- fri[nzpm]
1342 -- r.opsel_a = AIN_B
1343 v.result_class := r.b.class;
1344 v.result_sign := r.b.negative;
1346 re_set_result <= '1';
1347 -- set shift to exponent - 52
1349 rs_con2 <= RSCON2_52;
1351 v.fpscr(FPSCR_FR) := '0';
1352 v.fpscr(FPSCR_FI) := '0';
1353 if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then
1355 v.fpscr(FPSCR_VXSNAN) := '1';
1358 if r.b.class = FINITE then
1359 if r.b.exponent >= to_signed(52, EXP_BITS) then
1360 -- integer already, no rounding required
1364 v.round_mode := '1' & r.insn(7 downto 6);
1371 -- r.opsel_a = AIN_B, r.shift = 0
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1375 re_set_result <= '1';
1376 -- set shift to exponent - -126
1378 rs_con2 <= RSCON2_MINEXP;
1380 v.fpscr(FPSCR_FR) := '0';
1381 v.fpscr(FPSCR_FI) := '0';
1382 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1384 v.fpscr(FPSCR_VXSNAN) := '1';
1388 if r.b.class = FINITE then
1389 if r.b.exponent < to_signed(-126, EXP_BITS) then
1390 v.state := ROUND_UFLOW;
1391 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1392 v.state := ROUND_OFLOW;
1394 v.state := ROUNDING;
1401 -- instr bit 9: 1=dword 0=word
1402 -- instr bit 8: 1=unsigned 0=signed
1403 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1404 -- r.opsel_a = AIN_B
1405 v.result_class := r.b.class;
1406 v.result_sign := r.b.negative;
1408 re_set_result <= '1';
1411 v.fpscr(FPSCR_FR) := '0';
1412 v.fpscr(FPSCR_FI) := '0';
1413 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1415 v.fpscr(FPSCR_VXSNAN) := '1';
1425 if r.b.exponent >= to_signed(64, EXP_BITS) or
1426 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1427 v.state := INT_OFLOW;
1428 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1429 -- integer already, no rounding required,
1430 -- shift into final position
1431 -- set shift to exponent - 56
1432 rs_con2 <= RSCON2_UNIT;
1433 if r.insn(8) = '1' and r.b.negative = '1' then
1434 v.state := INT_OFLOW;
1436 v.state := INT_ISHIFT;
1439 -- set shift to exponent - 52
1440 rs_con2 <= RSCON2_52;
1441 v.state := INT_SHIFT;
1443 when INFINITY | NAN =>
1444 v.state := INT_OFLOW;
1448 -- r.opsel_a = AIN_B
1449 v.result_sign := '0';
1450 if r.insn(8) = '0' and r.b.negative = '1' then
1451 -- fcfid[s] with negative operand, set R = -B
1454 v.result_sign := '1';
1456 v.result_class := r.b.class;
1457 re_con2 <= RECON2_UNIT;
1458 re_set_result <= '1';
1459 v.fpscr(FPSCR_FR) := '0';
1460 v.fpscr(FPSCR_FI) := '0';
1461 if r.b.class = ZERO then
1468 -- fadd[s] and fsub[s]
1469 -- r.opsel_a = AIN_A
1470 v.result_sign := r.a.negative;
1471 v.result_class := r.a.class;
1473 re_set_result <= '1';
1474 -- set shift to a.exp - b.exp
1478 v.fpscr(FPSCR_FR) := '0';
1479 v.fpscr(FPSCR_FI) := '0';
1480 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1481 v.is_subtract := not is_add;
1482 if r.a.class = FINITE and r.b.class = FINITE then
1483 v.add_bsmall := r.exp_cmp;
1485 if r.exp_cmp = '0' then
1486 v.result_sign := r.b.negative xnor r.insn(1);
1487 if r.a.exponent = r.b.exponent then
1491 v.state := ADD_SHIFT;
1497 if r.a.class = NAN or r.b.class = NAN then
1498 v.state := NAN_RESULT;
1499 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1500 -- invalid operation, construct QNaN
1501 v.fpscr(FPSCR_VXISI) := '1';
1504 elsif r.a.class = INFINITY or r.b.class = ZERO then
1505 -- result is A; we're already set up to put A into R
1510 v.result_sign := r.b.negative xnor r.insn(1);
1511 v.state := EXC_RESULT;
1517 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1518 v.result_sign := r.a.negative xor r.c.negative;
1519 v.result_class := r.a.class;
1520 v.fpscr(FPSCR_FR) := '0';
1521 v.fpscr(FPSCR_FI) := '0';
1524 re_set_result <= '1';
1525 if r.a.class = FINITE and r.c.class = FINITE then
1526 -- Renormalize denorm operands
1527 if r.a.mantissa(UNIT_BIT) = '0' then
1528 v.state := RENORM_A;
1529 elsif r.c.mantissa(UNIT_BIT) = '0' then
1530 v.state := RENORM_C;
1532 f_to_multiply.valid <= '1';
1536 if r.a.class = NAN or r.c.class = NAN then
1537 v.state := NAN_RESULT;
1538 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1539 (r.a.class = ZERO and r.c.class = INFINITY) then
1540 -- invalid operation, construct QNaN
1541 v.fpscr(FPSCR_VXIMZ) := '1';
1543 elsif r.a.class = ZERO or r.a.class = INFINITY then
1547 -- r.c.class is ZERO or INFINITY
1549 v.state := EXC_RESULT;
1554 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1555 v.result_class := r.a.class;
1556 v.fpscr(FPSCR_FR) := '0';
1557 v.fpscr(FPSCR_FI) := '0';
1558 v.result_sign := r.a.negative xor r.b.negative;
1562 re_set_result <= '1';
1564 if r.a.class = FINITE and r.b.class = FINITE then
1565 -- Renormalize denorm operands
1566 if r.a.mantissa(UNIT_BIT) = '0' then
1567 v.state := RENORM_A;
1568 elsif r.b.mantissa(UNIT_BIT) = '0' then
1569 v.state := RENORM_B;
1575 if r.a.class = NAN or r.b.class = NAN then
1576 v.state := NAN_RESULT;
1577 elsif r.b.class = INFINITY then
1578 if r.a.class = INFINITY then
1579 v.fpscr(FPSCR_VXIDI) := '1';
1582 v.result_class := ZERO;
1585 elsif r.b.class = ZERO then
1586 if r.a.class = ZERO then
1587 v.fpscr(FPSCR_VXZDZ) := '1';
1590 if r.a.class = FINITE then
1593 v.result_class := INFINITY;
1596 else -- r.b.class = FINITE, result_class = r.a.class
1602 v.fpscr(FPSCR_FR) := '0';
1603 v.fpscr(FPSCR_FI) := '0';
1604 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1606 v.result_sign := r.c.negative;
1609 v.result_sign := r.b.negative;
1611 v.quieten_nan := '0';
1612 v.state := EXC_RESULT;
1615 -- r.opsel_a = AIN_B
1616 v.result_class := r.b.class;
1617 v.result_sign := r.b.negative;
1618 v.fpscr(FPSCR_FR) := '0';
1619 v.fpscr(FPSCR_FI) := '0';
1621 re_set_result <= '1';
1624 if r.b.negative = '1' then
1625 v.fpscr(FPSCR_VXSQRT) := '1';
1627 elsif r.b.mantissa(UNIT_BIT) = '0' then
1628 v.state := RENORM_B;
1629 elsif r.b.exponent(0) = '0' then
1633 rs_con2 <= RSCON2_1;
1634 v.state := RENORM_B2;
1637 v.state := NAN_RESULT;
1642 if r.b.negative = '1' then
1643 v.fpscr(FPSCR_VXSQRT) := '1';
1651 -- r.opsel_a = AIN_B
1652 v.result_class := r.b.class;
1653 v.result_sign := r.b.negative;
1654 v.fpscr(FPSCR_FR) := '0';
1655 v.fpscr(FPSCR_FI) := '0';
1657 re_set_result <= '1';
1660 if r.b.mantissa(UNIT_BIT) = '0' then
1661 v.state := RENORM_B;
1666 v.state := NAN_RESULT;
1668 v.result_class := ZERO;
1671 v.result_class := INFINITY;
1677 -- r.opsel_a = AIN_B
1678 v.result_class := r.b.class;
1679 v.result_sign := r.b.negative;
1680 v.fpscr(FPSCR_FR) := '0';
1681 v.fpscr(FPSCR_FI) := '0';
1683 re_set_result <= '1';
1685 rs_con2 <= RSCON2_1;
1688 if r.b.negative = '1' then
1689 v.fpscr(FPSCR_VXSQRT) := '1';
1691 elsif r.b.mantissa(UNIT_BIT) = '0' then
1692 v.state := RENORM_B;
1693 elsif r.b.exponent(0) = '0' then
1696 v.state := RENORM_B2;
1699 v.state := NAN_RESULT;
1701 if r.b.negative = '1' then
1702 v.fpscr(FPSCR_VXSQRT) := '1';
1705 v.result_class := ZERO;
1709 v.result_class := INFINITY;
1715 -- fmadd, fmsub, fnmadd, fnmsub
1716 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1718 v.result_sign := r.a.negative;
1719 v.result_class := r.a.class;
1720 -- put a.exp + c.exp into result_exp
1723 re_set_result <= '1';
1724 -- put b.exp into shift
1726 v.fpscr(FPSCR_FR) := '0';
1727 v.fpscr(FPSCR_FI) := '0';
1728 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1729 v.negate := r.insn(2);
1730 v.is_subtract := not is_add;
1731 if r.a.class = FINITE and r.c.class = FINITE and
1732 (r.b.class = FINITE or r.b.class = ZERO) then
1733 -- Make sure A and C are normalized
1734 if r.a.mantissa(UNIT_BIT) = '0' then
1735 v.state := RENORM_A;
1736 elsif r.c.mantissa(UNIT_BIT) = '0' then
1737 v.state := RENORM_C;
1738 elsif r.b.class = ZERO then
1739 -- no addend, degenerates to multiply
1740 v.result_sign := r.a.negative xor r.c.negative;
1741 f_to_multiply.valid <= '1';
1742 v.is_multiply := '1';
1744 elsif r.madd_cmp = '0' then
1745 -- addend is bigger, do multiply first
1746 v.result_sign := r.b.negative xnor r.insn(1);
1747 f_to_multiply.valid <= '1';
1751 -- product is bigger, shift B first
1755 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1756 v.state := NAN_RESULT;
1757 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1758 (r.a.class = INFINITY and r.c.class = ZERO) then
1759 -- invalid operation, construct QNaN
1760 v.fpscr(FPSCR_VXIMZ) := '1';
1762 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1763 if r.b.class = INFINITY and is_add = '0' then
1764 -- invalid operation, construct QNaN
1765 v.fpscr(FPSCR_VXISI) := '1';
1768 -- result is infinity
1769 v.result_class := INFINITY;
1770 v.result_sign := r.a.negative xor r.c.negative;
1774 -- Here A is zero, C is zero, or B is infinity
1775 -- Result is +/-B in all of those cases
1777 v.result_sign := r.b.negative xnor r.insn(1);
1778 v.state := EXC_RESULT;
1784 v.state := RENORM_A2;
1785 if r.use_c = '1' and r.c.denorm = '1' then
1792 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1794 re_sel2 <= REXP2_NE;
1795 re_set_result <= '1';
1796 if r.insn(4) = '1' then
1797 if r.c.mantissa(UNIT_BIT) = '1' then
1798 if r.insn(3) = '0' or r.b.class = ZERO then
1803 if new_exp + 1 >= r.b.exponent then
1807 v.state := DO_FMADD;
1810 v.state := RENORM_C;
1813 if r.b.mantissa(UNIT_BIT) = '1' then
1817 v.state := RENORM_B;
1823 renorm_sqrt := r.is_sqrt;
1824 v.state := RENORM_B2;
1828 re_sel2 <= REXP2_NE;
1829 re_set_result <= '1';
1835 v.state := RENORM_C2;
1839 re_sel2 <= REXP2_NE;
1840 re_set_result <= '1';
1841 if r.insn(3) = '0' or r.b.class = ZERO then
1846 if new_exp + 1 >= r.b.exponent then
1850 v.state := DO_FMADD;
1854 -- transferring B to R
1856 re_set_result <= '1';
1857 -- set shift to b.exp - a.exp
1862 v.state := ADD_SHIFT;
1865 -- r.shift = - exponent difference, r.longmask = 0
1866 opsel_r <= RES_SHIFT;
1867 re_sel2 <= REXP2_NE;
1868 re_set_result <= '1';
1871 v.longmask := r.single_prec;
1872 if r.add_bsmall = '1' then
1880 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1882 opsel_binv <= r.is_subtract;
1883 carry_in <= r.is_subtract and not r.x;
1885 rs_con2 <= RSCON2_1;
1890 -- check for overflow or negative result (can't get both)
1892 re_sel2 <= REXP2_NE;
1893 if r.r(63) = '1' then
1894 -- result is opposite sign to expected
1895 v.result_sign := not r.result_sign;
1899 elsif r.r(UNIT_BIT + 1) = '1' then
1900 -- sum overflowed, shift right
1901 opsel_r <= RES_SHIFT;
1902 re_set_result <= '1';
1904 if exp_huge = '1' then
1905 v.state := ROUND_OFLOW;
1907 v.state := ROUNDING;
1909 elsif r.r(UNIT_BIT) = '1' then
1911 v.state := ROUNDING;
1912 elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
1913 -- r.x must be zero at this point
1914 v.result_class := ZERO;
1918 v.state := NORMALIZE;
1922 -- r.opsel_a = AIN_A
1929 if r.r(63) = '1' then
1930 -- A is smaller in magnitude
1931 v.cr_result := not r.a.negative & r.a.negative & "00";
1932 elsif (r_hi_nz or r_lo_nz) = '0' then
1933 v.cr_result := "0010";
1935 v.cr_result := r.a.negative & not r.a.negative & "00";
1937 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1938 v.instr_done := '1';
1941 f_to_multiply.valid <= r.first;
1942 opsel_r <= RES_MULT;
1943 if multiply_to_f.valid = '1' then
1948 -- r.shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1949 -- (first time through; subsequent times we preserve v.shift)
1950 -- Addend is bigger here
1951 -- set shift to a.exp + c.exp - b.exp
1952 -- note v.shift is at most -2 here
1953 if r.first = '1' then
1958 opsel_r <= RES_MULT;
1961 if multiply_to_f.valid = '1' then
1963 v.state := ADD_SHIFT;
1967 -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1968 -- product is bigger here
1969 -- shift B right and use it as the addend to the multiplier
1970 -- for subtract, multiplier does B - A * C
1971 v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract;
1973 re_set_result <= '1';
1974 -- set shift to b.exp - result_exp + 64
1977 rs_con2 <= RSCON2_64;
1981 -- Product is potentially bigger here
1982 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1985 -- set shift to r.shift - 64
1987 rs_con2 <= RSCON2_64;
1992 -- r.shift = addend exp - product exp
1993 opsel_r <= RES_SHIFT;
1994 re_sel2 <= REXP2_NE;
1995 re_set_result <= '1';
2000 msel_add <= MULADD_RS;
2001 f_to_multiply.valid <= r.first;
2002 msel_inv <= r.is_subtract;
2003 opsel_r <= RES_MULT;
2006 if multiply_to_f.valid = '1' then
2011 -- negate R:S:X if negative
2012 if r.r(63) = '1' then
2013 v.result_sign := not r.result_sign;
2015 carry_in <= not (s_nz or r.x);
2019 -- set shift to UNIT_BIT
2020 rs_con2 <= RSCON2_UNIT;
2024 -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero)
2025 re_sel2 <= REXP2_NE;
2027 if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
2029 -- must be a subtraction, and r.x must be zero
2030 v.result_class := ZERO;
2033 -- R is all zeroes but there are non-zero bits in S
2034 -- so shift them into R and set S to 0
2035 opsel_r <= RES_SHIFT;
2036 re_set_result <= '1';
2040 elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
2043 v.state := NORMALIZE;
2047 -- r.opsel_a = AIN_B
2048 -- wait one cycle for inverse_table[B] lookup
2050 if r.insn(4) = '0' then
2051 if r.insn(3) = '0' then
2056 elsif r.insn(2) = '0' then
2063 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
2065 msel_add <= MULADD_CONST;
2074 f_to_multiply.valid <= r.first;
2075 if multiply_to_f.valid = '1' then
2077 v.count := r.count + 1;
2082 -- compute Y = P = P * Y
2085 f_to_multiply.valid <= r.first;
2087 if multiply_to_f.valid = '1' then
2097 -- compute R = P = A * Y (quotient)
2101 f_to_multiply.valid <= r.first;
2104 if multiply_to_f.valid = '1' then
2105 opsel_r <= RES_MULT;
2111 -- compute P = A - B * R (remainder)
2114 msel_add <= MULADD_A;
2116 f_to_multiply.valid <= r.first;
2117 if multiply_to_f.valid = '1' then
2122 -- r.opsel_a = AIN_R
2123 -- test if remainder is 0 or >= B
2124 if pcmpb_lt = '1' then
2125 -- quotient is correct, set X if remainder non-zero
2126 v.x := r.p(UNIT_BIT + 2) or px_nz;
2128 -- quotient needs to be incremented by 1 in R-bit position
2131 v.x := not pcmpb_eq;
2138 re_set_result <= '1';
2139 opsel_r <= RES_MISC;
2142 rs_con2 <= RSCON2_1;
2143 v.state := NORMALIZE;
2146 v.cr_result(1) := exp_tiny or exp_huge;
2147 -- set shift to a.exp
2149 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
2150 v.instr_done := '1';
2152 v.doing_ftdiv := "10";
2156 opsel_r <= RES_MISC;
2158 re_sel1 <= REXP1_BHALF;
2160 re_set_result <= '1';
2162 rs_con2 <= RSCON2_1;
2163 v.state := NORMALIZE;
2166 -- put invsqr[B] in R and compute P = invsqr[B] * B
2167 -- also transfer B (in R) to A
2169 opsel_r <= RES_MISC;
2173 f_to_multiply.valid <= '1';
2175 rs_con2 <= RSCON2_1;
2181 -- shift R right one place
2182 -- not expecting multiplier result yet
2184 opsel_r <= RES_SHIFT;
2185 re_sel2 <= REXP2_NE;
2186 re_set_result <= '1';
2191 -- put R into Y, wait for product from multiplier
2196 if multiply_to_f.valid = '1' then
2197 -- put result into R
2198 opsel_r <= RES_MULT;
2204 -- compute 1.5 - Y * P
2207 msel_add <= MULADD_CONST;
2209 f_to_multiply.valid <= r.first;
2211 if multiply_to_f.valid = '1' then
2216 -- compute Y = Y * P
2219 f_to_multiply.valid <= '1';
2224 -- pipeline in R = R * P
2227 f_to_multiply.valid <= r.first;
2229 if multiply_to_f.valid = '1' then
2235 -- first multiply is done, put result in Y
2238 -- wait for second multiply (should be here already)
2241 if multiply_to_f.valid = '1' then
2242 -- put result into R
2243 opsel_r <= RES_MULT;
2245 v.count := r.count + 1;
2255 -- compute P = A - R * R, which can be +ve or -ve
2256 -- we arranged for B to be put into A earlier
2259 msel_add <= MULADD_A;
2262 f_to_multiply.valid <= r.first;
2263 if multiply_to_f.valid = '1' then
2269 -- compute P = P * Y
2270 -- since Y is an estimate of 1/sqrt(B), this makes P an
2271 -- estimate of the adjustment needed to R. Since the error
2272 -- could be negative and we have an unsigned multiplier, the
2273 -- upper bits can be wrong, but it turns out the lowest 8 bits
2274 -- are correct and are all we need (given 3 iterations through
2275 -- SQRT_4 to SQRT_7).
2279 f_to_multiply.valid <= r.first;
2280 if multiply_to_f.valid = '1' then
2285 -- Add the bottom 8 bits of P, sign-extended, onto R.
2287 re_sel1 <= REXP1_BHALF;
2288 re_set_result <= '1';
2290 rs_con2 <= RSCON2_1;
2295 -- compute P = A - R * R (remainder)
2296 -- also put 2 * R + 1 into B for comparison with P
2299 msel_add <= MULADD_A;
2301 f_to_multiply.valid <= r.first;
2304 if multiply_to_f.valid = '1' then
2309 -- test if remainder is 0 or >= B = 2*R + 1
2310 if pcmpb_lt = '1' then
2311 -- square root is correct, set X if remainder non-zero
2312 v.x := r.p(UNIT_BIT + 2) or px_nz;
2314 -- square root needs to be incremented by 1
2316 v.x := not pcmpb_eq;
2321 -- r.shift = b.exponent - 52
2322 opsel_r <= RES_SHIFT;
2323 re_sel2 <= REXP2_NE;
2324 re_set_result <= '1';
2326 v.state := INT_ROUND;
2327 -- set shift to -4 (== 52 - UNIT_BIT)
2328 rs_con2 <= RSCON2_UNIT_52;
2332 -- r.shift = -4 (== 52 - UNIT_BIT)
2333 opsel_r <= RES_SHIFT;
2334 re_sel2 <= REXP2_NE;
2335 re_set_result <= '1';
2336 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2337 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2338 -- Check for negative values that don't round to 0 for fcti*u*
2339 if r.insn(8) = '1' and r.result_sign = '1' and
2340 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2341 v.state := INT_OFLOW;
2343 v.state := INT_FINAL;
2347 -- r.shift = b.exponent - UNIT_BIT;
2348 opsel_r <= RES_SHIFT;
2349 re_sel2 <= REXP2_NE;
2350 re_set_result <= '1';
2351 v.state := INT_FINAL;
2354 -- Negate if necessary, and increment for rounding if needed
2355 opsel_ainv <= r.result_sign;
2356 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2357 -- Check for possible overflows
2358 case r.insn(9 downto 8) is
2359 when "00" => -- fctiw[z]
2360 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2361 when "01" => -- fctiwu[z]
2362 need_check := r.r(31);
2363 when "10" => -- fctid[z]
2364 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2365 when others => -- fctidu[z]
2366 need_check := r.r(63);
2369 if need_check = '1' then
2370 v.state := INT_CHECK;
2372 if r.fpscr(FPSCR_FI) = '1' then
2373 v.fpscr(FPSCR_XX) := '1';
2379 if r.insn(9) = '0' then
2384 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2385 if (r.insn(8) = '0' and msb /= r.result_sign) or
2386 (r.insn(8) = '1' and msb /= '1') then
2387 opsel_r <= RES_MISC;
2388 v.fpscr(FPSCR_VXCVI) := '1';
2391 if r.fpscr(FPSCR_FI) = '1' then
2392 v.fpscr(FPSCR_XX) := '1';
2399 opsel_r <= RES_MISC;
2400 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2401 if r.b.class = NAN then
2404 v.fpscr(FPSCR_VXCVI) := '1';
2410 -- r.shift = b.exponent - 52
2411 opsel_r <= RES_SHIFT;
2412 re_sel2 <= REXP2_NE;
2413 re_set_result <= '1';
2415 v.state := ROUNDING;
2418 if r.is_multiply = '1' and px_nz = '1' then
2421 -- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
2423 rs_con2 <= RSCON2_MINEXP;
2425 if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
2427 v.state := NORMALIZE;
2430 if exp_tiny = '1' then
2431 v.state := ROUND_UFLOW;
2432 elsif exp_huge = '1' then
2433 v.state := ROUND_OFLOW;
2435 v.state := ROUNDING;
2440 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2441 -- r.shift = clz(r.r) - 7
2442 opsel_r <= RES_SHIFT;
2443 re_sel2 <= REXP2_NE;
2444 re_set_result <= '1';
2445 -- set shift to new_exp - min_exp
2447 rs_con2 <= RSCON2_MINEXP;
2450 if exp_tiny = '1' then
2451 v.state := ROUND_UFLOW;
2452 elsif exp_huge = '1' then
2453 v.state := ROUND_OFLOW;
2455 v.state := ROUNDING;
2459 -- r.shift = - amount by which exponent underflows
2461 if r.fpscr(FPSCR_UE) = '0' then
2462 -- disabled underflow exception case
2463 -- have to denormalize before rounding
2464 opsel_r <= RES_SHIFT;
2465 re_sel2 <= REXP2_NE;
2466 re_set_result <= '1';
2468 v.state := ROUNDING;
2470 -- enabled underflow exception case
2471 -- if denormalized, have to normalize before rounding
2472 v.fpscr(FPSCR_UX) := '1';
2474 re_con2 <= RECON2_BIAS;
2475 re_set_result <= '1';
2476 if r.r(UNIT_BIT) = '0' then
2478 v.state := NORMALIZE;
2480 v.state := ROUNDING;
2485 v.fpscr(FPSCR_OX) := '1';
2486 if r.fpscr(FPSCR_OE) = '0' then
2487 -- disabled overflow exception
2488 -- result depends on rounding mode
2489 v.fpscr(FPSCR_XX) := '1';
2490 v.fpscr(FPSCR_FI) := '1';
2491 if r.round_mode(1 downto 0) = "00" or
2492 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2493 v.result_class := INFINITY;
2494 v.fpscr(FPSCR_FR) := '1';
2496 v.fpscr(FPSCR_FR) := '0';
2498 -- construct largest representable number
2499 re_con2 <= RECON2_MAX;
2500 re_set_result <= '1';
2501 opsel_r <= RES_MISC;
2502 misc_sel <= "001" & r.single_prec;
2505 -- enabled overflow exception
2507 re_con2 <= RECON2_BIAS;
2509 re_set_result <= '1';
2510 v.state := ROUNDING;
2515 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2516 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2517 if round(1) = '1' then
2518 -- increment the LSB for the precision
2521 rs_con2 <= RSCON2_1;
2523 v.state := ROUNDING_2;
2525 if r.r(UNIT_BIT) = '0' then
2526 -- result after masking could be zero, or could be a
2527 -- denormalized result that needs to be renormalized
2529 v.state := ROUNDING_3;
2534 if round(0) = '1' then
2535 v.fpscr(FPSCR_XX) := '1';
2536 if r.tiny = '1' then
2537 v.fpscr(FPSCR_UX) := '1';
2542 -- Check for overflow during rounding
2545 re_sel2 <= REXP2_NE;
2546 if r.r(UNIT_BIT + 1) = '1' then
2547 opsel_r <= RES_SHIFT;
2548 re_set_result <= '1';
2549 if exp_huge = '1' then
2550 v.state := ROUND_OFLOW;
2554 elsif r.r(UNIT_BIT) = '0' then
2555 -- Do CLZ so we can renormalize the result
2557 v.state := ROUNDING_3;
2563 -- r.shift = clz(r.r) - 9
2564 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2565 re_sel2 <= REXP2_NE;
2566 -- set shift to new_exp - min_exp (== -1022)
2568 rs_con2 <= RSCON2_MINEXP;
2570 if mant_nz = '0' then
2571 v.result_class := ZERO;
2574 -- Renormalize result after rounding
2575 opsel_r <= RES_SHIFT;
2576 re_set_result <= '1';
2577 v.denorm := exp_tiny;
2578 if new_exp < to_signed(-1022, EXP_BITS) then
2586 -- r.shift = result_exp - -1022
2587 opsel_r <= RES_SHIFT;
2588 re_sel2 <= REXP2_NE;
2589 re_set_result <= '1';
2594 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
2595 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
2596 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
2598 v.fpscr(FPSCR_VXSNAN) := '1';
2601 if r.use_a = '1' and r.a.class = NAN then
2603 v.result_sign := r.a.negative;
2604 elsif r.use_b = '1' and r.b.class = NAN then
2606 v.result_sign := r.b.negative;
2607 elsif r.use_c = '1' and r.c.class = NAN then
2609 v.result_sign := r.c.negative;
2611 v.state := EXC_RESULT;
2614 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2618 v.result_class := r.b.class;
2621 v.result_class := r.c.class;
2624 v.result_class := r.a.class;
2626 re_set_result <= '1';
2630 -- r.opsel_a = AIN_B
2631 v.result_sign := r.is_signed and (r.a.negative xor (r.b.negative and not r.divmod));
2632 if r.b.class = ZERO then
2633 -- B is zero, signal overflow
2635 v.state := IDIV_ZERO;
2636 elsif r.a.class = ZERO then
2637 -- A is zero, result is zero (both for div and for mod)
2638 v.state := IDIV_ZERO;
2640 -- take absolute value for signed division, and
2641 -- normalize and round up B to 8.56 format, like fcfid[u]
2642 if r.is_signed = '1' and r.b.negative = '1' then
2646 v.result_class := FINITE;
2647 re_con2 <= RECON2_UNIT;
2648 re_set_result <= '1';
2649 v.state := IDIV_NORMB;
2652 -- do count-leading-zeroes on B (now in R)
2654 -- save the original value of B or |B| in C
2656 v.state := IDIV_NORMB2;
2658 -- get B into the range [1, 2) in 8.56 format
2659 set_x := '1'; -- record if any 1 bits shifted out
2660 opsel_r <= RES_SHIFT;
2661 re_sel2 <= REXP2_NE;
2662 re_set_result <= '1';
2663 v.state := IDIV_NORMB3;
2665 -- add the X bit onto R to round up B
2667 -- prepare to do count-leading-zeroes on A
2669 v.state := IDIV_CLZA;
2671 set_b := '1'; -- put R back into B
2672 -- r.opsel_a = AIN_A
2673 if r.is_signed = '1' and r.a.negative = '1' then
2677 re_con2 <= RECON2_UNIT;
2678 re_set_result <= '1';
2680 v.state := IDIV_CLZA2;
2682 -- r.opsel_a = AIN_C
2684 -- write the dividend back into A in case we negated it
2686 -- while doing the count-leading-zeroes on A,
2687 -- also compute A - B to tell us whether A >= B
2688 -- (using the original value of B, which is now in C)
2692 v.state := IDIV_CLZA3;
2694 -- save the exponent of A (but don't overwrite the mantissa)
2696 re_sel2 <= REXP2_NE;
2697 re_set_result <= '1';
2699 if new_exp = r.b.exponent then
2702 v.state := IDIV_NR0;
2703 if new_exp > r.b.exponent or (v.div_close = '1' and r.r(63) = '0') then
2704 -- A >= B, overflow if extended division
2705 if r.divext = '1' then
2707 -- return 0 in overflow cases
2708 v.state := IDIV_ZERO;
2711 -- A < B, result is zero for normal division
2712 if r.divmod = '0' and r.divext = '0' then
2713 v.state := IDIV_ZERO;
2717 -- reduce number of Newton-Raphson iterations for small A
2718 if r.divext = '1' or r.result_exp >= to_signed(32, EXP_BITS) then
2720 elsif r.result_exp >= to_signed(16, EXP_BITS) then
2725 -- first NR iteration does Y = LUT; P = 2 - B * LUT
2727 msel_add <= MULADD_CONST;
2731 if r.b.mantissa(UNIT_BIT + 1) = '1' then
2732 -- rounding up of the mantissa caused overflow, meaning the
2733 -- normalized B is 2.0. Since this is outside the range
2734 -- of the LUT, just use 0.5 as the estimated inverse.
2735 v.state := IDIV_USE0_5;
2737 -- start the first multiply now
2738 f_to_multiply.valid <= '1';
2739 -- note we don't set v.first, thus the following IDIV_NR1
2740 -- state doesn't start a multiply (we already did that)
2741 v.state := IDIV_NR1;
2744 -- subsequent NR iterations do Y = P; P = 2 - B * P
2746 msel_add <= MULADD_CONST;
2751 f_to_multiply.valid <= r.first;
2752 if multiply_to_f.valid = '1' then
2754 v.count := r.count + 1;
2755 v.state := IDIV_NR2;
2758 -- compute P = Y * P
2761 f_to_multiply.valid <= r.first;
2765 rs_con2 <= RSCON2_64;
2766 -- Get 0.5 into R in case the inverse estimate turns out to be
2767 -- less than 0.5, in which case we want to use 0.5, to avoid
2768 -- infinite loops in some cases.
2769 opsel_r <= RES_MISC;
2771 if multiply_to_f.valid = '1' then
2773 if r.count = "11" then
2774 v.state := IDIV_DODIV;
2776 v.state := IDIV_NR1;
2780 -- Get 0.5 into R; it turns out the generated
2781 -- QNaN mantissa is actually what we want
2782 opsel_r <= RES_MISC;
2786 rs_con2 <= RSCON2_64;
2787 v.state := IDIV_DODIV;
2789 -- r.opsel_a = AIN_A
2791 -- inverse estimate is in P or in R; copy it to Y
2792 if r.b.mantissa(UNIT_BIT + 1) = '1' or
2793 (r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0') then
2799 -- shift_res is 0 because r.shift = 64;
2800 -- put that into B, which now holds the quotient
2802 if r.divext = '0' then
2804 rs_con2 <= RSCON2_UNIT;
2807 v.state := IDIV_DIV;
2808 elsif r.single_prec = '1' then
2809 -- divwe[u][o], shift A left 32 bits
2811 rs_con2 <= RSCON2_32;
2812 v.state := IDIV_SH32;
2813 elsif r.div_close = '0' then
2814 -- set shift to 64 - UNIT_BIT (== 8)
2815 rs_con2 <= RSCON2_64_UNIT;
2816 v.state := IDIV_EXTDIV;
2818 -- handle top bit of quotient specially
2819 -- for this we need the divisor left-justified in B
2821 v.state := IDIV_EXT_TBH;
2824 -- r.shift = 32, R contains the dividend
2825 opsel_r <= RES_SHIFT;
2826 -- set shift to -UNIT_BIT (== -56)
2827 rs_con2 <= RSCON2_UNIT;
2830 v.state := IDIV_DIV;
2832 -- Dividing A by C, r.shift = -56; A is in R
2833 -- Put A into the bottom 64 bits of Ahi/A/Alo
2834 set_a_mant := r.first;
2835 set_a_lo := r.first;
2836 -- compute R = R * Y (quotient estimate)
2839 f_to_multiply.valid <= r.first;
2841 opsel_r <= RES_MULT;
2842 -- set shift to - b.exp
2845 if multiply_to_f.valid = '1' then
2846 v.state := IDIV_DIV2;
2849 -- r.shift = - b.exponent
2850 -- shift the quotient estimate right by b.exponent bits
2851 opsel_r <= RES_SHIFT;
2853 v.state := IDIV_DIV3;
2855 -- quotient (so far) is in R; multiply by C and subtract from A
2858 msel_add <= MULADD_A;
2860 f_to_multiply.valid <= r.first;
2861 -- store the current quotient estimate in B
2862 set_b_mant := r.first;
2863 opsel_r <= RES_MULT;
2866 if multiply_to_f.valid = '1' then
2867 v.state := IDIV_DIV4;
2870 -- remainder is in R/S and P
2873 v.inc_quot := not pcmpc_lt and not r.divmod;
2874 if r.divmod = '0' then
2877 -- set shift to UNIT_BIT (== 56)
2878 rs_con2 <= RSCON2_UNIT;
2879 if pcmpc_lt = '1' or pcmpc_eq = '1' then
2880 if r.divmod = '0' then
2881 v.state := IDIV_DIVADJ;
2882 elsif pcmpc_eq = '1' then
2883 v.state := IDIV_ZERO;
2885 v.state := IDIV_MODADJ;
2888 -- need to do another iteration, compute P * Y
2889 f_to_multiply.valid <= '1';
2890 v.state := IDIV_DIV5;
2894 opsel_r <= RES_MULT;
2895 -- set shift to - b.exp
2898 if multiply_to_f.valid = '1' then
2899 v.state := IDIV_DIV6;
2902 -- r.shift = - b.exponent
2903 -- shift the quotient estimate right by b.exponent bits
2904 opsel_r <= RES_SHIFT;
2907 v.state := IDIV_DIV7;
2909 -- r.opsel_a = AIN_B
2910 -- add shifted quotient delta onto the total quotient
2913 v.state := IDIV_DIV8;
2915 -- quotient (so far) is in R; multiply by C and subtract from A
2918 msel_add <= MULADD_A;
2920 f_to_multiply.valid <= r.first;
2921 -- store the current quotient estimate in B
2922 set_b_mant := r.first;
2923 opsel_r <= RES_MULT;
2926 if multiply_to_f.valid = '1' then
2927 v.state := IDIV_DIV9;
2930 -- remainder is in R/S and P
2933 v.inc_quot := not pcmpc_lt and not r.divmod;
2934 if r.divmod = '0' then
2937 -- set shift to UNIT_BIT (== 56)
2938 rs_con2 <= RSCON2_UNIT;
2939 if r.divmod = '0' then
2940 v.state := IDIV_DIVADJ;
2941 elsif pcmpc_eq = '1' then
2942 v.state := IDIV_ZERO;
2944 v.state := IDIV_MODADJ;
2946 when IDIV_EXT_TBH =>
2947 -- r.opsel_a = AIN_C; get divisor into R and prepare to shift left
2948 -- set shift to 63 - b.exp
2951 rs_con2 <= RSCON2_63;
2953 v.state := IDIV_EXT_TBH2;
2954 when IDIV_EXT_TBH2 =>
2955 -- r.opsel_a = AIN_A; divisor is in R
2956 -- r.shift = 63 - b.exponent; shift and put into B
2958 -- set shift to 64 - UNIT_BIT (== 8)
2959 rs_con2 <= RSCON2_64_UNIT;
2960 v.state := IDIV_EXT_TBH3;
2961 when IDIV_EXT_TBH3 =>
2962 -- Dividing (A << 64) by C
2964 -- Put A in the top 64 bits of Ahi/A/Alo
2967 -- set shift to 64 - b.exp
2970 rs_con2 <= RSCON2_64;
2971 v.state := IDIV_EXT_TBH4;
2972 when IDIV_EXT_TBH4 =>
2973 -- dividend (A) is in R
2974 -- r.shift = 64 - B.exponent, so is at least 1
2975 opsel_r <= RES_SHIFT;
2976 -- top bit of A gets lost in the shift, so handle it specially
2979 rs_con2 <= RSCON2_63;
2980 v.state := IDIV_EXT_TBH5;
2981 when IDIV_EXT_TBH5 =>
2982 -- r.opsel_a = AIN_B, r.shift = 63
2983 -- shifted dividend is in R, subtract left-justified divisor
2987 -- and put 1<<63 into B as the divisor (S is still 0)
2991 v.state := IDIV_EXTDIV2;
2993 -- Dividing (A << 64) by C
2995 -- Put A in the top 64 bits of Ahi/A/Alo
2998 -- set shift to 64 - b.exp
3001 rs_con2 <= RSCON2_64;
3002 v.state := IDIV_EXTDIV1;
3003 when IDIV_EXTDIV1 =>
3005 -- r.shift = 64 - B.exponent
3006 opsel_r <= RES_SHIFT;
3008 v.state := IDIV_EXTDIV2;
3009 when IDIV_EXTDIV2 =>
3010 -- shifted remainder is in R; compute R = R * Y (quotient estimate)
3013 f_to_multiply.valid <= r.first;
3016 opsel_r <= RES_MULT;
3017 if multiply_to_f.valid = '1' then
3019 v.state := IDIV_EXTDIV3;
3021 when IDIV_EXTDIV3 =>
3022 -- r.opsel_a = AIN_B
3023 -- delta quotient is in R; add it to B
3026 v.state := IDIV_EXTDIV4;
3027 when IDIV_EXTDIV4 =>
3028 -- quotient is in R; put it in B and compute remainder
3029 set_b_mant := r.first;
3032 msel_add <= MULADD_A;
3034 f_to_multiply.valid <= r.first;
3035 opsel_r <= RES_MULT;
3038 -- set shift to UNIT_BIT - b.exp
3041 rs_con2 <= RSCON2_UNIT;
3042 if multiply_to_f.valid = '1' then
3043 v.state := IDIV_EXTDIV5;
3045 when IDIV_EXTDIV5 =>
3046 -- r.shift = r.b.exponent - 56
3047 -- remainder is in R/S; shift it right r.b.exponent bits
3048 opsel_r <= RES_SHIFT;
3049 -- test LS 64b of remainder in P against divisor in C
3050 v.inc_quot := not pcmpc_lt;
3052 v.state := IDIV_EXTDIV6;
3053 when IDIV_EXTDIV6 =>
3054 -- r.opsel_a = AIN_B
3055 -- shifted remainder is in R, see if it is > 1
3056 -- and compute R = R * Y if so
3060 if r_gt_1 = '1' then
3061 f_to_multiply.valid <= '1';
3062 v.state := IDIV_EXTDIV2;
3064 v.state := IDIV_DIVADJ;
3069 opsel_r <= RES_SHIFT;
3070 if pcmpc_lt = '0' then
3072 v.state := IDIV_MODSUB;
3073 elsif r.result_sign = '0' then
3074 v.state := IDIV_DONE;
3076 v.state := IDIV_DIVADJ;
3079 -- r.opsel_a = AIN_C
3080 -- Subtract divisor from remainder
3084 if r.result_sign = '0' then
3085 v.state := IDIV_DONE;
3087 v.state := IDIV_DIVADJ;
3090 -- result (so far) is on the A input of the adder
3091 -- set carry to increment quotient if needed
3092 -- and also negate R if the answer is negative
3093 opsel_ainv <= r.result_sign;
3094 carry_in <= r.inc_quot xor r.result_sign;
3096 if r.divmod = '0' then
3099 if r.is_signed = '0' then
3100 v.state := IDIV_DONE;
3102 v.state := IDIV_OVFCHK;
3105 if r.single_prec = '0' then
3106 sign_bit := r.r(63);
3108 sign_bit := r.r(31);
3110 v.int_ovf := sign_bit xor r.result_sign;
3111 if v.int_ovf = '1' then
3112 v.state := IDIV_ZERO;
3114 v.state := IDIV_DONE;
3117 v.xerc_result := v.xerc;
3119 v.xerc_result.ov := '0';
3120 v.xerc_result.ov32 := '0';
3121 v.writing_xer := '1';
3123 if r.m32b = '0' then
3124 v.cr_result(3) := r.r(63);
3125 v.cr_result(2 downto 1) := "00";
3126 if r.r = 64x"0" then
3127 v.cr_result(1) := '1';
3129 v.cr_result(2) := not r.r(63);
3132 v.cr_result(3) := r.r(31);
3133 v.cr_result(2 downto 1) := "00";
3134 if r.r(31 downto 0) = 32x"0" then
3135 v.cr_result(1) := '1';
3137 v.cr_result(2) := not r.r(31);
3140 v.cr_result(0) := v.xerc.so;
3142 v.writing_fpr := '1';
3143 v.instr_done := '1';
3145 opsel_r <= RES_MISC;
3147 v.xerc_result := v.xerc;
3149 v.xerc_result.ov := r.int_ovf;
3150 v.xerc_result.ov32 := r.int_ovf;
3151 v.xerc_result.so := r.xerc.so or r.int_ovf;
3152 v.writing_xer := '1';
3154 v.cr_result := "001" & v.xerc_result.so;
3156 v.writing_fpr := '1';
3157 v.instr_done := '1';
3161 if zero_divide = '1' then
3162 v.fpscr(FPSCR_ZX) := '1';
3164 if qnan_result = '1' then
3166 v.result_class := NAN;
3167 v.result_sign := '0';
3170 opsel_r <= RES_MISC;
3173 if invalid = '1' then
3176 if arith_done = '1' then
3177 -- Enabled invalid exception doesn't write result or FPRF
3178 -- Neither does enabled zero-divide exception
3179 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
3180 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
3181 v.writing_fpr := '1';
3182 v.update_fprf := '1';
3184 v.instr_done := '1';
3188 -- Multiplier and divide/square root data path
3191 f_to_multiply.data1 <= r.a.mantissa;
3193 f_to_multiply.data1 <= r.b.mantissa;
3195 f_to_multiply.data1 <= r.y;
3197 f_to_multiply.data1 <= r.r;
3201 f_to_multiply.data2 <= r.c.mantissa;
3203 f_to_multiply.data2 <= std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3206 f_to_multiply.data2 <= r.p;
3208 f_to_multiply.data2 <= r.r;
3210 maddend := (others => '0');
3212 when MULADD_CONST =>
3213 -- addend is 2.0 or 1.5 in 16.112 format
3214 if r.is_sqrt = '0' then
3215 maddend(2*UNIT_BIT + 1) := '1'; -- 2.0
3217 maddend(2*UNIT_BIT downto 2*UNIT_BIT - 1) := "11"; -- 1.5
3220 -- addend is A in 16.112 format
3221 maddend(127 downto UNIT_BIT + 64) := r.a_hi;
3222 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.a.mantissa;
3223 maddend(UNIT_BIT - 1 downto 0) := r.a_lo;
3225 -- addend is concatenation of R and S in 16.112 format
3226 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.r;
3227 maddend(UNIT_BIT - 1 downto 0) := r.s;
3230 f_to_multiply.addend <= maddend;
3231 f_to_multiply.subtract <= msel_inv;
3233 v.y := f_to_multiply.data2;
3235 if multiply_to_f.valid = '1' then
3236 if pshift = '0' then
3237 v.p := multiply_to_f.result(63 downto 0);
3239 v.p := multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3244 -- This has A and B input multiplexers, an adder, a shifter,
3245 -- count-leading-zeroes logic, and a result mux.
3246 if r.longmask = '1' then
3247 mshift := r.shift + to_signed(-29, EXP_BITS);
3251 if is_X(mshift) then
3252 mask := (others => 'X');
3253 elsif mshift < to_signed(-64, EXP_BITS) then
3254 mask := (others => '1');
3255 elsif mshift >= to_signed(0, EXP_BITS) then
3256 mask := (others => '0');
3258 mask := right_mask(unsigned(mshift(5 downto 0)));
3264 in_a0 := r.a.mantissa;
3266 in_a0 := r.b.mantissa;
3268 in_a0 := r.c.mantissa;
3270 if (or (mask and in_a0)) = '1' and set_x = '1' then
3273 if opsel_ainv = '1' then
3279 in_b0 := (others => '0');
3283 if rnd_b32 = '1' then
3284 round_inc := (32 => r.result_sign and r.single_prec, others => '0');
3285 elsif rbit_inc = '0' then
3286 round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0');
3288 round_inc := (DP_RBIT => '1', others => '0');
3292 -- BIN_PS8, 8 LSBs of P sign-extended to 64
3293 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64));
3295 if opsel_binv = '1' then
3299 if is_X(r.shift) then
3300 shift_res := (others => 'X');
3301 elsif r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
3302 shift_res := shifter_64(r.r(63 downto 1) & (shiftin0 or r.r(0)) &
3303 (shiftin or r.s(55)) & r.s(54 downto 0),
3304 std_ulogic_vector(r.shift(6 downto 0)));
3306 shift_res := (others => '0');
3308 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
3309 if opsel_mask = '1' then
3310 sum(DP_LSB - 1 downto 0) := "0000";
3311 if r.single_prec = '1' then
3312 sum(SP_LSB - 1 downto DP_LSB) := (others => '0');
3319 result <= shift_res;
3321 result <= multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3322 if mult_mask = '1' then
3323 -- trim to 54 fraction bits if mult_mask = 1, for quotient when dividing
3324 result(UNIT_BIT - 55 downto 0) <= (others => '0');
3327 misc := (others => '0');
3330 misc := x"00000000" & (r.fpscr and fpscr_mask);
3332 -- generated QNaN mantissa
3333 misc(QNAN_BIT) := '1';
3335 -- mantissa of max representable DP number
3336 misc(UNIT_BIT downto DP_LSB) := (others => '1');
3338 -- mantissa of max representable SP number
3339 misc(UNIT_BIT downto SP_LSB) := (others => '1');
3342 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
3345 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
3347 misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3350 -- max positive result for fctiw[z]
3351 misc := x"000000007fffffff";
3353 -- max negative result for fctiw[z]
3354 misc := x"ffffffff80000000";
3356 -- max positive result for fctiwu[z]
3357 misc := x"00000000ffffffff";
3359 -- max negative result for fctiwu[z]
3360 misc := x"0000000000000000";
3362 -- max positive result for fctid[z]
3363 misc := x"7fffffffffffffff";
3365 -- max negative result for fctid[z]
3366 misc := x"8000000000000000";
3368 -- max positive result for fctidu[z]
3369 misc := x"ffffffffffffffff";
3371 -- max negative result for fctidu[z]
3372 misc := x"0000000000000000";
3381 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
3383 v.s := multiply_to_f.result(55 downto 0);
3385 v.s := shift_res(63 downto 8);
3386 if shift_res(7 downto 0) /= x"00" then
3390 v.s := (others => '0');
3394 if set_a = '1' or set_a_exp = '1' then
3395 v.a.exponent := new_exp;
3397 if set_a = '1' or set_a_mant = '1' then
3398 v.a.mantissa := shift_res;
3400 if e_in.valid = '1' then
3401 v.a_hi := (others => '0');
3402 v.a_lo := (others => '0');
3404 if set_a_hi = '1' then
3405 v.a_hi := r.r(63 downto 56);
3407 if set_a_lo = '1' then
3408 v.a_lo := r.r(55 downto 0);
3412 v.b.exponent := new_exp;
3414 if set_b = '1' or set_b_mant = '1' then
3415 v.b.mantissa := shift_res;
3418 v.c.exponent := new_exp;
3419 v.c.mantissa := shift_res;
3422 -- exponent data path
3425 rexp_in1 := r.result_exp;
3427 rexp_in1 := r.a.exponent;
3429 rexp_in1 := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
3431 rexp_in1 := to_signed(0, EXP_BITS);
3433 if re_neg1 = '1' then
3434 rexp_in1 := not rexp_in1;
3438 rexp_in2 := new_exp;
3440 rexp_in2 := r.c.exponent;
3442 rexp_in2 := r.b.exponent;
3446 rexp_in2 := to_signed(UNIT_BIT, EXP_BITS);
3448 rexp_in2 := max_exp;
3450 rexp_in2 := bias_exp;
3452 rexp_in2 := to_signed(0, EXP_BITS);
3455 if re_neg2 = '1' then
3456 rexp_in2 := not rexp_in2;
3458 rexp_cin := re_neg1 or re_neg2;
3459 rexp_sum := rexp_in1 + rexp_in2 + rexp_cin;
3460 if re_set_result = '1' then
3461 v.result_exp := rexp_sum;
3465 rsh_in1 := r.b.exponent;
3471 rsh_in1 := to_signed(0, EXP_BITS);
3473 if rs_neg1 = '1' then
3474 rsh_in1 := not rsh_in1;
3478 rsh_in2 := r.a.exponent;
3482 rsh_in2 := to_signed(1, EXP_BITS);
3483 when RSCON2_UNIT_52 =>
3484 rsh_in2 := to_signed(UNIT_BIT - 52, EXP_BITS);
3485 when RSCON2_64_UNIT =>
3486 rsh_in2 := to_signed(64 - UNIT_BIT, EXP_BITS);
3488 rsh_in2 := to_signed(32, EXP_BITS);
3490 rsh_in2 := to_signed(52, EXP_BITS);
3492 rsh_in2 := to_signed(UNIT_BIT, EXP_BITS);
3494 rsh_in2 := to_signed(63, EXP_BITS);
3496 rsh_in2 := to_signed(64, EXP_BITS);
3497 when RSCON2_MINEXP =>
3500 rsh_in2 := to_signed(0, EXP_BITS);
3503 if rs_neg2 = '1' then
3504 rsh_in2 := not rsh_in2;
3506 if rs_norm = '1' then
3507 clz := count_left_zeroes(r.r);
3508 if renorm_sqrt = '1' then
3509 -- make denormalized value end up with even exponent
3512 -- do this as a separate dedicated 7-bit adder for timing reasons
3513 v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS);
3515 v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2);
3518 if r.update_fprf = '1' then
3519 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
3520 r.r(UNIT_BIT) and not r.denorm);
3523 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
3524 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
3525 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
3526 v.fpscr(FPSCR_VE downto FPSCR_XE));
3527 if update_fx = '1' and
3528 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
3529 v.fpscr(FPSCR_FX) := '1';
3532 if v.instr_done = '1' then
3533 if r.state /= IDLE then
3537 if r.fp_rc = '1' then
3538 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
3540 v.sp_result := r.single_prec;
3541 v.int_result := int_result;
3542 v.illegal := illegal;
3543 v.nsnan_result := v.quieten_nan;
3544 v.res_negate := v.negate;
3545 v.res_subtract := v.is_subtract;
3546 v.res_rmode := r.round_mode;
3547 if r.integer_op = '1' then
3548 v.cr_mask := num_to_fxm(0);
3549 elsif r.is_cmp = '0' then
3550 v.cr_mask := num_to_fxm(1);
3551 elsif is_X(insn_bf(r.insn)) then
3552 v.cr_mask := (others => 'X');
3554 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(r.insn))));
3556 v.writing_cr := r.is_cmp or r.rc;
3557 v.write_reg := r.dest_fpr;
3558 v.complete_tag := r.instr_tag;
3560 if e_in.stall = '0' then
3561 v.complete := not v.illegal;
3562 v.do_intr := (v.fpscr(FPSCR_FEX) and r.fe_mode) or v.illegal;
3564 -- N.B. We rely on execute1 to prevent any new instruction
3565 -- coming in while e_in.stall = 1, without us needing to
3566 -- have busy asserted.
3568 if r.state /= IDLE and e_in.stall = '0' then
3573 -- This mustn't depend on any fields of r that are modified in IDLE state.
3574 if r.int_result = '1' then
3577 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
3578 r.sp_result, r.nsnan_result,
3579 r.res_negate, r.res_subtract, r.res_rmode);
3585 end architecture behaviour;