ignore /abc.history
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18 flush_in : in std_ulogic;
19
20 e_in : in Execute1ToFPUType;
21 e_out : out FPUToExecute1Type;
22
23 w_out : out FPUToWritebackType
24 );
25 end entity fpu;
26
27 architecture behaviour of fpu is
28 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
29
30 constant EXP_BITS : natural := 13;
31 constant UNIT_BIT : natural := 56;
32 constant QNAN_BIT : natural := UNIT_BIT - 1;
33 constant SP_LSB : natural := UNIT_BIT - 23;
34 constant SP_GBIT : natural := SP_LSB - 1;
35 constant SP_RBIT : natural := SP_LSB - 2;
36 constant DP_LSB : natural := UNIT_BIT - 52;
37 constant DP_GBIT : natural := DP_LSB - 1;
38 constant DP_RBIT : natural := DP_LSB - 2;
39
40 type fpu_reg_type is record
41 class : fp_number_class;
42 negative : std_ulogic;
43 denorm : std_ulogic;
44 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
45 mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format
46 end record;
47
48 type state_t is (IDLE, DO_ILLEGAL,
49 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
50 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
51 DO_FCFID, DO_FCTI,
52 DO_FRSP, DO_FRI,
53 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
54 DO_FRE, DO_FRSQRTE,
55 DO_FSEL,
56 DO_IDIVMOD,
57 FRI_1,
58 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
59 CMP_1, CMP_2,
60 MULT_1,
61 FMADD_0, FMADD_1, FMADD_2, FMADD_3,
62 FMADD_4, FMADD_5, FMADD_6,
63 LOOKUP,
64 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
65 FRE_1,
66 RSQRT_1,
67 FTDIV_1,
68 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
69 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
70 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
71 INT_SHIFT, INT_ROUND, INT_ISHIFT,
72 INT_FINAL, INT_CHECK, INT_OFLOW,
73 FINISH, NORMALIZE,
74 ROUND_UFLOW, ROUND_OFLOW,
75 ROUNDING, ROUNDING_2, ROUNDING_3,
76 DENORM,
77 RENORM_A, RENORM_A2,
78 RENORM_B, RENORM_B2,
79 RENORM_C, RENORM_C2,
80 NAN_RESULT, EXC_RESULT,
81 IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
82 IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
83 IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
84 IDIV_DODIV, IDIV_SH32,
85 IDIV_DIV, IDIV_DIV2, IDIV_DIV3, IDIV_DIV4, IDIV_DIV5,
86 IDIV_DIV6, IDIV_DIV7, IDIV_DIV8, IDIV_DIV9,
87 IDIV_EXT_TBH, IDIV_EXT_TBH2, IDIV_EXT_TBH3,
88 IDIV_EXT_TBH4, IDIV_EXT_TBH5,
89 IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3,
90 IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6,
91 IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO);
92
93 type decode32 is array(0 to 31) of state_t;
94 type decode8 is array(0 to 7) of state_t;
95
96 type reg_type is record
97 state : state_t;
98 busy : std_ulogic;
99 f2stall : std_ulogic;
100 instr_done : std_ulogic;
101 complete : std_ulogic;
102 do_intr : std_ulogic;
103 illegal : std_ulogic;
104 op : insn_type_t;
105 insn : std_ulogic_vector(31 downto 0);
106 instr_tag : instr_tag_t;
107 dest_fpr : gspr_index_t;
108 fe_mode : std_ulogic;
109 rc : std_ulogic;
110 fp_rc : std_ulogic;
111 is_cmp : std_ulogic;
112 single_prec : std_ulogic;
113 sp_result : std_ulogic;
114 fpscr : std_ulogic_vector(31 downto 0);
115 comm_fpscr : std_ulogic_vector(31 downto 0); -- committed FPSCR value
116 a : fpu_reg_type;
117 b : fpu_reg_type;
118 c : fpu_reg_type;
119 r : std_ulogic_vector(63 downto 0); -- 8.56 format
120 s : std_ulogic_vector(55 downto 0); -- extended fraction
121 x : std_ulogic;
122 p : std_ulogic_vector(63 downto 0); -- 8.56 format
123 y : std_ulogic_vector(63 downto 0); -- 8.56 format
124 result_sign : std_ulogic;
125 result_class : fp_number_class;
126 result_exp : signed(EXP_BITS-1 downto 0);
127 shift : signed(EXP_BITS-1 downto 0);
128 writing_fpr : std_ulogic;
129 write_reg : gspr_index_t;
130 complete_tag : instr_tag_t;
131 writing_cr : std_ulogic;
132 writing_xer : std_ulogic;
133 int_result : std_ulogic;
134 cr_result : std_ulogic_vector(3 downto 0);
135 cr_mask : std_ulogic_vector(7 downto 0);
136 old_exc : std_ulogic_vector(4 downto 0);
137 update_fprf : std_ulogic;
138 quieten_nan : std_ulogic;
139 nsnan_result : std_ulogic;
140 tiny : std_ulogic;
141 denorm : std_ulogic;
142 round_mode : std_ulogic_vector(2 downto 0);
143 is_subtract : std_ulogic;
144 exp_cmp : std_ulogic;
145 madd_cmp : std_ulogic;
146 add_bsmall : std_ulogic;
147 is_multiply : std_ulogic;
148 is_sqrt : std_ulogic;
149 first : std_ulogic;
150 count : unsigned(1 downto 0);
151 doing_ftdiv : std_ulogic_vector(1 downto 0);
152 opsel_a : std_ulogic_vector(1 downto 0);
153 use_a : std_ulogic;
154 use_b : std_ulogic;
155 use_c : std_ulogic;
156 invalid : std_ulogic;
157 negate : std_ulogic;
158 longmask : std_ulogic;
159 integer_op : std_ulogic;
160 divext : std_ulogic;
161 divmod : std_ulogic;
162 is_signed : std_ulogic;
163 int_ovf : std_ulogic;
164 div_close : std_ulogic;
165 inc_quot : std_ulogic;
166 a_hi : std_ulogic_vector(7 downto 0);
167 a_lo : std_ulogic_vector(55 downto 0);
168 m32b : std_ulogic;
169 oe : std_ulogic;
170 xerc : xer_common_t;
171 xerc_result : xer_common_t;
172 res_negate : std_ulogic;
173 res_subtract : std_ulogic;
174 res_rmode : std_ulogic_vector(2 downto 0);
175 end record;
176
177 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
178
179 signal r, rin : reg_type;
180
181 signal fp_result : std_ulogic_vector(63 downto 0);
182 signal opsel_b : std_ulogic_vector(1 downto 0);
183 signal opsel_r : std_ulogic_vector(1 downto 0);
184 signal opsel_s : std_ulogic_vector(1 downto 0);
185 signal opsel_ainv : std_ulogic;
186 signal opsel_mask : std_ulogic;
187 signal opsel_binv : std_ulogic;
188 signal in_a : std_ulogic_vector(63 downto 0);
189 signal in_b : std_ulogic_vector(63 downto 0);
190 signal result : std_ulogic_vector(63 downto 0);
191 signal carry_in : std_ulogic;
192 signal lost_bits : std_ulogic;
193 signal r_hi_nz : std_ulogic;
194 signal r_lo_nz : std_ulogic;
195 signal r_gt_1 : std_ulogic;
196 signal s_nz : std_ulogic;
197 signal misc_sel : std_ulogic_vector(3 downto 0);
198 signal f_to_multiply : MultiplyInputType;
199 signal multiply_to_f : MultiplyOutputType;
200 signal msel_1 : std_ulogic_vector(1 downto 0);
201 signal msel_2 : std_ulogic_vector(1 downto 0);
202 signal msel_add : std_ulogic_vector(1 downto 0);
203 signal msel_inv : std_ulogic;
204 signal inverse_est : std_ulogic_vector(18 downto 0);
205
206 -- opsel values
207 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
208 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
209 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
210 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
211
212 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
213 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
214 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
215 constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11";
216
217 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
218 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
219 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
220 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
221
222 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
223 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
224 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
225 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
226
227 -- msel values
228 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
229 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
230 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
231 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
232
233 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
234 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
235 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
236 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
237
238 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
239 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
240 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
241 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
242
243 -- control signals and values for exponent data path
244 constant REXP1_ZERO : std_ulogic_vector(1 downto 0) := "00";
245 constant REXP1_R : std_ulogic_vector(1 downto 0) := "01";
246 constant REXP1_A : std_ulogic_vector(1 downto 0) := "10";
247 constant REXP1_BHALF : std_ulogic_vector(1 downto 0) := "11";
248
249 constant REXP2_CON : std_ulogic_vector(1 downto 0) := "00";
250 constant REXP2_NE : std_ulogic_vector(1 downto 0) := "01";
251 constant REXP2_C : std_ulogic_vector(1 downto 0) := "10";
252 constant REXP2_B : std_ulogic_vector(1 downto 0) := "11";
253
254 constant RECON2_ZERO : std_ulogic_vector(1 downto 0) := "00";
255 constant RECON2_UNIT : std_ulogic_vector(1 downto 0) := "01";
256 constant RECON2_BIAS : std_ulogic_vector(1 downto 0) := "10";
257 constant RECON2_MAX : std_ulogic_vector(1 downto 0) := "11";
258
259 signal re_sel1 : std_ulogic_vector(1 downto 0);
260 signal re_sel2 : std_ulogic_vector(1 downto 0);
261 signal re_con2 : std_ulogic_vector(1 downto 0);
262 signal re_neg1 : std_ulogic;
263 signal re_neg2 : std_ulogic;
264 signal re_set_result : std_ulogic;
265
266 constant RSH1_ZERO : std_ulogic_vector(1 downto 0) := "00";
267 constant RSH1_B : std_ulogic_vector(1 downto 0) := "01";
268 constant RSH1_NE : std_ulogic_vector(1 downto 0) := "10";
269 constant RSH1_S : std_ulogic_vector(1 downto 0) := "11";
270
271 constant RSH2_CON : std_ulogic := '0';
272 constant RSH2_A : std_ulogic := '1';
273
274 constant RSCON2_ZERO : std_ulogic_vector(3 downto 0) := "0000";
275 constant RSCON2_1 : std_ulogic_vector(3 downto 0) := "0001";
276 constant RSCON2_UNIT_52 : std_ulogic_vector(3 downto 0) := "0010";
277 constant RSCON2_64_UNIT : std_ulogic_vector(3 downto 0) := "0011";
278 constant RSCON2_32 : std_ulogic_vector(3 downto 0) := "0100";
279 constant RSCON2_52 : std_ulogic_vector(3 downto 0) := "0101";
280 constant RSCON2_UNIT : std_ulogic_vector(3 downto 0) := "0110";
281 constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111";
282 constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000";
283 constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001";
284
285 signal rs_sel1 : std_ulogic_vector(1 downto 0);
286 signal rs_sel2 : std_ulogic;
287 signal rs_con2 : std_ulogic_vector(3 downto 0);
288 signal rs_neg1 : std_ulogic;
289 signal rs_neg2 : std_ulogic;
290 signal rs_norm : std_ulogic;
291
292 constant arith_decode : decode32 := (
293 -- indexed by bits 5..1 of opcode
294 2#01000# => DO_FRI,
295 2#01100# => DO_FRSP,
296 2#01110# => DO_FCTI,
297 2#01111# => DO_FCTI,
298 2#10010# => DO_FDIV,
299 2#10100# => DO_FADD,
300 2#10101# => DO_FADD,
301 2#10110# => DO_FSQRT,
302 2#11000# => DO_FRE,
303 2#11001# => DO_FMUL,
304 2#11010# => DO_FRSQRTE,
305 2#11100# => DO_FMADD,
306 2#11101# => DO_FMADD,
307 2#11110# => DO_FMADD,
308 2#11111# => DO_FMADD,
309 others => DO_ILLEGAL
310 );
311
312 constant cmp_decode : decode8 := (
313 2#000# => DO_FCMP,
314 2#001# => DO_FCMP,
315 2#010# => DO_MCRFS,
316 2#100# => DO_FTDIV,
317 2#101# => DO_FTSQRT,
318 others => DO_ILLEGAL
319 );
320
321 constant misc_decode : decode32 := (
322 -- indexed by bits 10, 8, 4, 2, 1 of opcode
323 2#00010# => DO_MTFSB,
324 2#01010# => DO_MTFSFI,
325 2#10010# => DO_FMRG,
326 2#11010# => DO_FMRG,
327 2#10011# => DO_MFFS,
328 2#11011# => DO_MTFSF,
329 2#10110# => DO_FCFID,
330 2#11110# => DO_FCFID,
331 others => DO_ILLEGAL
332 );
333
334 -- Inverse lookup table, indexed by the top 8 fraction bits
335 -- The first 256 entries are the reciprocal (1/x) lookup table,
336 -- and the remaining 768 entries are the reciprocal square root table.
337 -- Output range is [0.5, 1) in 0.19 format, though the top
338 -- bit isn't stored since it is always 1.
339 -- Each output value is the inverse of the center of the input
340 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
341 -- entry 1 is 1 / (1 + 3/512), etc.
342 constant inverse_table : lookup_table := (
343 -- 1/x lookup table
344 -- Unit bit is assumed to be 1, so input range is [1, 2)
345 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
346 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
347 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
348 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
349 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
350 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
351 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
352 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
353 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
354 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
355 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
356 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
357 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
358 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
359 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
360 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
361 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
362 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
363 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
364 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
365 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
366 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
367 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
368 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
369 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
370 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
371 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
372 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
373 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
374 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
375 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
376 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
377 -- 1/sqrt(x) lookup table
378 -- Input is in the range [1, 4), i.e. two bits to the left of the
379 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
380 -- 1.0 ... 1.9999
381 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
382 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
383 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
384 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
385 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
386 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
387 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
388 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
389 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
390 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
391 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
392 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
393 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
394 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
395 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
396 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
397 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
398 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
399 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
400 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
401 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
402 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
403 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
404 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
405 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
406 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
407 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
408 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
409 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
410 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
411 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
412 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
413 -- 2.0 ... 2.9999
414 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
415 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
416 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
417 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
418 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
419 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
420 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
421 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
422 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
423 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
424 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
425 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
426 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
427 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
428 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
429 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
430 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
431 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
432 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
433 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
434 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
435 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
436 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
437 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
438 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
439 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
440 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
441 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
442 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
443 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
444 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
445 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
446 -- 3.0 ... 3.9999
447 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
448 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
449 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
450 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
451 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
452 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
453 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
454 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
455 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
456 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
457 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
458 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
459 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
460 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
461 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
462 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
463 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
464 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
465 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
466 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
467 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
468 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
469 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
470 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
471 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
472 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
473 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
474 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
475 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
476 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
477 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
478 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
479 );
480
481 -- Left and right shifter with 120 bit input and 64 bit output.
482 -- Shifts inp left by shift bits and returns the upper 64 bits of
483 -- the result. The shift parameter is interpreted as a signed
484 -- number in the range -64..63, with negative values indicating
485 -- right shifts.
486 function shifter_64(inp: std_ulogic_vector(119 downto 0);
487 shift: std_ulogic_vector(6 downto 0))
488 return std_ulogic_vector is
489 variable s1 : std_ulogic_vector(94 downto 0);
490 variable s2 : std_ulogic_vector(70 downto 0);
491 variable shift_result : std_ulogic_vector(63 downto 0);
492 begin
493 case shift(6 downto 5) is
494 when "00" =>
495 s1 := inp(119 downto 25);
496 when "01" =>
497 s1 := inp(87 downto 0) & "0000000";
498 when "10" =>
499 s1 := x"0000000000000000" & inp(119 downto 89);
500 when others =>
501 s1 := x"00000000" & inp(119 downto 57);
502 end case;
503 case shift(4 downto 3) is
504 when "00" =>
505 s2 := s1(94 downto 24);
506 when "01" =>
507 s2 := s1(86 downto 16);
508 when "10" =>
509 s2 := s1(78 downto 8);
510 when others =>
511 s2 := s1(70 downto 0);
512 end case;
513 case shift(2 downto 0) is
514 when "000" =>
515 shift_result := s2(70 downto 7);
516 when "001" =>
517 shift_result := s2(69 downto 6);
518 when "010" =>
519 shift_result := s2(68 downto 5);
520 when "011" =>
521 shift_result := s2(67 downto 4);
522 when "100" =>
523 shift_result := s2(66 downto 3);
524 when "101" =>
525 shift_result := s2(65 downto 2);
526 when "110" =>
527 shift_result := s2(64 downto 1);
528 when others =>
529 shift_result := s2(63 downto 0);
530 end case;
531 return shift_result;
532 end;
533
534 -- Generate a mask with 0-bits on the left and 1-bits on the right which
535 -- selects the bits will be lost in doing a right shift. The shift
536 -- parameter is the bottom 6 bits of a negative shift count,
537 -- indicating a right shift.
538 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
539 variable mask_result: std_ulogic_vector(63 downto 0);
540 begin
541 mask_result := (others => '0');
542 if is_X(shift) then
543 mask_result := (others => 'X');
544 return mask_result;
545 end if;
546 for i in 0 to 63 loop
547 if i >= shift then
548 mask_result(63 - i) := '1';
549 end if;
550 end loop;
551 return mask_result;
552 end;
553
554 -- Split a DP floating-point number into components and work out its class.
555 -- If is_int = 1, the input is considered an integer
556 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_fp: std_ulogic;
557 is_32bint: std_ulogic; is_signed: std_ulogic) return fpu_reg_type is
558 variable reg : fpu_reg_type;
559 variable exp_nz : std_ulogic;
560 variable exp_ao : std_ulogic;
561 variable frac_nz : std_ulogic;
562 variable low_nz : std_ulogic;
563 variable cls : std_ulogic_vector(2 downto 0);
564 begin
565 reg.negative := fpr(63);
566 reg.denorm := '0';
567 exp_nz := or (fpr(62 downto 52));
568 exp_ao := and (fpr(62 downto 52));
569 frac_nz := or (fpr(51 downto 0));
570 low_nz := or (fpr(31 downto 0));
571 if is_fp = '1' then
572 reg.denorm := frac_nz and not exp_nz;
573 reg.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
574 if exp_nz = '0' then
575 reg.exponent := to_signed(-1022, EXP_BITS);
576 end if;
577 reg.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64),
578 UNIT_BIT - 52));
579 cls := exp_ao & exp_nz & frac_nz;
580 case cls is
581 when "000" => reg.class := ZERO;
582 when "001" => reg.class := FINITE; -- denormalized
583 when "010" => reg.class := FINITE;
584 when "011" => reg.class := FINITE;
585 when "110" => reg.class := INFINITY;
586 when others => reg.class := NAN;
587 end case;
588 elsif is_32bint = '1' then
589 reg.negative := fpr(31);
590 reg.mantissa(31 downto 0) := fpr(31 downto 0);
591 reg.mantissa(63 downto 32) := (others => (is_signed and fpr(31)));
592 reg.exponent := (others => '0');
593 if low_nz = '1' then
594 reg.class := FINITE;
595 else
596 reg.class := ZERO;
597 end if;
598 else
599 reg.mantissa := fpr;
600 reg.exponent := (others => '0');
601 if (fpr(63) or exp_nz or frac_nz) = '1' then
602 reg.class := FINITE;
603 else
604 reg.class := ZERO;
605 end if;
606 end if;
607 return reg;
608 end;
609
610 -- Construct a DP floating-point result from components
611 function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
612 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic;
613 negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector)
614 return std_ulogic_vector is
615 variable dp_result : std_ulogic_vector(63 downto 0);
616 variable sign : std_ulogic;
617 begin
618 dp_result := (others => '0');
619 sign := negative;
620 case class is
621 when ZERO =>
622 if is_subtract = '1' then
623 -- set result sign depending on rounding mode
624 sign := round_mode(0) and round_mode(1);
625 end if;
626 when FINITE =>
627 if mantissa(UNIT_BIT) = '1' then
628 -- normalized number
629 dp_result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
630 end if;
631 dp_result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB);
632 if single_prec = '0' then
633 dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
634 end if;
635 when INFINITY =>
636 dp_result(62 downto 52) := "11111111111";
637 when NAN =>
638 dp_result(62 downto 52) := "11111111111";
639 dp_result(51) := quieten_nan or mantissa(QNAN_BIT);
640 dp_result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB);
641 if single_prec = '0' then
642 dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
643 end if;
644 end case;
645 dp_result(63) := sign xor negate;
646 return dp_result;
647 end;
648
649 -- Determine whether to increment when rounding
650 -- Returns rounding_inc & inexact
651 -- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2)
652 -- bits of the mantissa already (usually arranged by setting set_x = 1 earlier).
653 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
654 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
655 sign: std_ulogic)
656 return std_ulogic_vector is
657 variable grx : std_ulogic_vector(2 downto 0);
658 variable ret : std_ulogic_vector(1 downto 0);
659 variable lsb : std_ulogic;
660 begin
661 if single_prec = '0' then
662 grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0)));
663 lsb := mantissa(DP_LSB);
664 else
665 grx := mantissa(SP_GBIT downto SP_RBIT) & x;
666 lsb := mantissa(SP_LSB);
667 end if;
668 ret(1) := '0';
669 ret(0) := or (grx);
670 case rn(1 downto 0) is
671 when "00" => -- round to nearest
672 if grx = "100" and rn(2) = '0' then
673 ret(1) := lsb; -- tie, round to even
674 else
675 ret(1) := grx(2);
676 end if;
677 when "01" => -- round towards zero
678 when others => -- round towards +/- inf
679 if rn(0) = sign then
680 -- round towards greater magnitude
681 ret(1) := ret(0);
682 end if;
683 end case;
684 return ret;
685 end;
686
687 -- Determine result flags to write into the FPSCR
688 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
689 return std_ulogic_vector is
690 begin
691 case class is
692 when ZERO =>
693 return sign & "0010";
694 when FINITE =>
695 return (not unitbit) & sign & (not sign) & "00";
696 when INFINITY =>
697 return '0' & sign & (not sign) & "01";
698 when NAN =>
699 return "10001";
700 end case;
701 end;
702
703 begin
704 fpu_multiply_0: entity work.multiply
705 port map (
706 clk => clk,
707 m_in => f_to_multiply,
708 m_out => multiply_to_f
709 );
710
711 fpu_0: process(clk)
712 begin
713 if rising_edge(clk) then
714 if rst = '1' or flush_in = '1' then
715 r.state <= IDLE;
716 r.busy <= '0';
717 r.f2stall <= '0';
718 r.instr_done <= '0';
719 r.complete <= '0';
720 r.illegal <= '0';
721 r.do_intr <= '0';
722 r.writing_fpr <= '0';
723 r.writing_cr <= '0';
724 r.writing_xer <= '0';
725 r.fpscr <= (others => '0');
726 r.write_reg <= (others =>'0');
727 r.complete_tag.valid <= '0';
728 r.cr_mask <= (others =>'0');
729 r.cr_result <= (others =>'0');
730 r.instr_tag.valid <= '0';
731 if rst = '1' then
732 r.fpscr <= (others => '0');
733 r.comm_fpscr <= (others => '0');
734 elsif r.do_intr = '0' then
735 -- flush_in = 1 and not due to us generating an interrupt,
736 -- roll back to committed fpscr
737 r.fpscr <= r.comm_fpscr;
738 end if;
739 else
740 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
741 r <= rin;
742 end if;
743 end if;
744 end process;
745
746 -- synchronous reads from lookup table
747 lut_access: process(clk)
748 variable addrhi : std_ulogic_vector(1 downto 0);
749 variable addr : std_ulogic_vector(9 downto 0);
750 begin
751 if rising_edge(clk) then
752 if r.is_sqrt = '1' then
753 addrhi := r.b.mantissa(UNIT_BIT + 1 downto UNIT_BIT);
754 else
755 addrhi := "00";
756 end if;
757 addr := addrhi & r.b.mantissa(UNIT_BIT - 1 downto UNIT_BIT - 8);
758 if is_X(addr) then
759 inverse_est <= (others => 'X');
760 else
761 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
762 end if;
763 end if;
764 end process;
765
766 e_out.busy <= r.busy;
767 e_out.f2stall <= r.f2stall;
768 e_out.exception <= r.fpscr(FPSCR_FEX);
769
770 -- Note that the cycle where r.complete = 1 for an instruction can be as
771 -- late as the second cycle of the following instruction (i.e. in the state
772 -- following IDLE state). Hence it is important that none of the fields of
773 -- r that are used below are modified in IDLE state.
774 w_out.valid <= r.complete;
775 w_out.instr_tag <= r.complete_tag;
776 w_out.write_enable <= r.writing_fpr and r.complete;
777 w_out.write_reg <= r.write_reg;
778 w_out.write_data <= fp_result;
779 w_out.write_cr_enable <= r.writing_cr and r.complete;
780 w_out.write_cr_mask <= r.cr_mask;
781 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
782 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
783 w_out.write_xerc <= r.writing_xer and r.complete;
784 w_out.xerc <= r.xerc_result;
785 w_out.interrupt <= r.do_intr;
786 w_out.intr_vec <= 16#700#;
787 w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
788
789 fpu_1: process(all)
790 variable v : reg_type;
791 variable adec : fpu_reg_type;
792 variable bdec : fpu_reg_type;
793 variable cdec : fpu_reg_type;
794 variable fpscr_mask : std_ulogic_vector(31 downto 0);
795 variable j, k : integer;
796 variable flm : std_ulogic_vector(7 downto 0);
797 variable fpin_a : std_ulogic;
798 variable fpin_b : std_ulogic;
799 variable fpin_c : std_ulogic;
800 variable is_32bint : std_ulogic;
801 variable mask : std_ulogic_vector(63 downto 0);
802 variable in_a0 : std_ulogic_vector(63 downto 0);
803 variable in_b0 : std_ulogic_vector(63 downto 0);
804 variable misc : std_ulogic_vector(63 downto 0);
805 variable shift_res : std_ulogic_vector(63 downto 0);
806 variable round : std_ulogic_vector(1 downto 0);
807 variable update_fx : std_ulogic;
808 variable arith_done : std_ulogic;
809 variable invalid : std_ulogic;
810 variable zero_divide : std_ulogic;
811 variable mant_nz : std_ulogic;
812 variable min_exp : signed(EXP_BITS-1 downto 0);
813 variable max_exp : signed(EXP_BITS-1 downto 0);
814 variable bias_exp : signed(EXP_BITS-1 downto 0);
815 variable new_exp : signed(EXP_BITS-1 downto 0);
816 variable exp_tiny : std_ulogic;
817 variable exp_huge : std_ulogic;
818 variable clz : std_ulogic_vector(5 downto 0);
819 variable set_x : std_ulogic;
820 variable mshift : signed(EXP_BITS-1 downto 0);
821 variable need_check : std_ulogic;
822 variable msb : std_ulogic;
823 variable is_add : std_ulogic;
824 variable set_a : std_ulogic;
825 variable set_a_exp : std_ulogic;
826 variable set_a_mant : std_ulogic;
827 variable set_a_hi : std_ulogic;
828 variable set_a_lo : std_ulogic;
829 variable set_b : std_ulogic;
830 variable set_b_mant : std_ulogic;
831 variable set_c : std_ulogic;
832 variable set_y : std_ulogic;
833 variable set_s : std_ulogic;
834 variable qnan_result : std_ulogic;
835 variable px_nz : std_ulogic;
836 variable pcmpb_eq : std_ulogic;
837 variable pcmpb_lt : std_ulogic;
838 variable pcmpc_eq : std_ulogic;
839 variable pcmpc_lt : std_ulogic;
840 variable pshift : std_ulogic;
841 variable renorm_sqrt : std_ulogic;
842 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
843 variable shiftin : std_ulogic;
844 variable shiftin0 : std_ulogic;
845 variable mulexp : signed(EXP_BITS-1 downto 0);
846 variable maddend : std_ulogic_vector(127 downto 0);
847 variable sum : std_ulogic_vector(63 downto 0);
848 variable round_inc : std_ulogic_vector(63 downto 0);
849 variable rbit_inc : std_ulogic;
850 variable mult_mask : std_ulogic;
851 variable sign_bit : std_ulogic;
852 variable rnd_b32 : std_ulogic;
853 variable rexp_in1 : signed(EXP_BITS-1 downto 0);
854 variable rexp_in2 : signed(EXP_BITS-1 downto 0);
855 variable rexp_cin : std_ulogic;
856 variable rexp_sum : signed(EXP_BITS-1 downto 0);
857 variable rsh_in1 : signed(EXP_BITS-1 downto 0);
858 variable rsh_in2 : signed(EXP_BITS-1 downto 0);
859 variable exec_state : state_t;
860 variable opcbits : std_ulogic_vector(4 downto 0);
861 variable int_result : std_ulogic;
862 variable illegal : std_ulogic;
863 begin
864 v := r;
865 v.complete := '0';
866 v.do_intr := '0';
867 is_32bint := '0';
868 exec_state := IDLE;
869
870 if r.complete = '1' or r.do_intr = '1' then
871 v.instr_done := '0';
872 v.writing_fpr := '0';
873 v.writing_cr := '0';
874 v.writing_xer := '0';
875 v.comm_fpscr := r.fpscr;
876 v.illegal := '0';
877 end if;
878
879 -- capture incoming instruction
880 if e_in.valid = '1' then
881 v.insn := e_in.insn;
882 v.op := e_in.op;
883 v.instr_tag := e_in.itag;
884 v.fe_mode := or (e_in.fe_mode);
885 v.dest_fpr := e_in.frt;
886 v.single_prec := e_in.single;
887 v.is_signed := e_in.is_signed;
888 v.rc := e_in.rc;
889 v.fp_rc := '0';
890 v.is_cmp := e_in.out_cr;
891 v.oe := e_in.oe;
892 v.m32b := e_in.m32b;
893 v.xerc := e_in.xerc;
894 v.longmask := '0';
895 v.integer_op := '0';
896 v.divext := '0';
897 v.divmod := '0';
898 v.is_sqrt := '0';
899 v.is_multiply := '0';
900 fpin_a := '0';
901 fpin_b := '0';
902 fpin_c := '0';
903 v.use_a := e_in.valid_a;
904 v.use_b := e_in.valid_b;
905 v.use_c := e_in.valid_c;
906 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
907 case e_in.op is
908 when OP_FP_ARITH =>
909 fpin_a := e_in.valid_a;
910 fpin_b := e_in.valid_b;
911 fpin_c := e_in.valid_c;
912 v.longmask := e_in.single;
913 v.fp_rc := e_in.rc;
914 exec_state := arith_decode(to_integer(unsigned(e_in.insn(5 downto 1))));
915 if e_in.insn(5 downto 1) = "11001" or e_in.insn(5 downto 3) = "111" then
916 v.is_multiply := '1';
917 end if;
918 if e_in.insn(5 downto 1) = "10110" or e_in.insn(5 downto 1) = "11010" then
919 v.is_sqrt := '1';
920 end if;
921 if e_in.insn(5 downto 1) = "01111" then
922 v.round_mode := "001";
923 end if;
924 when OP_FP_CMP =>
925 fpin_a := e_in.valid_a;
926 fpin_b := e_in.valid_b;
927 exec_state := cmp_decode(to_integer(unsigned(e_in.insn(8 downto 6))));
928 when OP_FP_MISC =>
929 v.fp_rc := e_in.rc;
930 opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1);
931 exec_state := misc_decode(to_integer(unsigned(opcbits)));
932 when OP_FP_MOVE =>
933 v.fp_rc := e_in.rc;
934 fpin_a := e_in.valid_a;
935 fpin_b := e_in.valid_b;
936 fpin_c := e_in.valid_c;
937 if e_in.insn(5) = '0' then
938 exec_state := DO_FMR;
939 else
940 exec_state := DO_FSEL;
941 end if;
942 when OP_DIV =>
943 v.integer_op := '1';
944 is_32bint := e_in.single;
945 exec_state := DO_IDIVMOD;
946 when OP_DIVE =>
947 v.integer_op := '1';
948 v.divext := '1';
949 is_32bint := e_in.single;
950 exec_state := DO_IDIVMOD;
951 when OP_MOD =>
952 v.integer_op := '1';
953 v.divmod := '1';
954 is_32bint := e_in.single;
955 exec_state := DO_IDIVMOD;
956 when others =>
957 exec_state := DO_ILLEGAL;
958 end case;
959 v.quieten_nan := '1';
960 v.tiny := '0';
961 v.denorm := '0';
962 v.is_subtract := '0';
963 v.add_bsmall := '0';
964 v.doing_ftdiv := "00";
965 v.int_ovf := '0';
966 v.div_close := '0';
967
968 adec := decode_dp(e_in.fra, fpin_a, is_32bint, e_in.is_signed);
969 bdec := decode_dp(e_in.frb, fpin_b, is_32bint, e_in.is_signed);
970 cdec := decode_dp(e_in.frc, fpin_c, '0', '0');
971 v.a := adec;
972 v.b := bdec;
973 v.c := cdec;
974
975 v.exp_cmp := '0';
976 if adec.exponent > bdec.exponent then
977 v.exp_cmp := '1';
978 end if;
979 v.madd_cmp := '0';
980 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
981 v.madd_cmp := '1';
982 end if;
983
984 v.a_hi := 8x"0";
985 v.a_lo := 56x"0";
986 end if;
987
988 r_hi_nz <= or (r.r(UNIT_BIT + 1 downto SP_LSB));
989 r_lo_nz <= or (r.r(SP_LSB - 1 downto DP_LSB));
990 r_gt_1 <= or (r.r(63 downto 1));
991 s_nz <= or (r.s);
992
993 if r.single_prec = '0' then
994 if r.doing_ftdiv(1) = '0' then
995 max_exp := to_signed(1023, EXP_BITS);
996 else
997 max_exp := to_signed(1020, EXP_BITS);
998 end if;
999 if r.doing_ftdiv(0) = '0' then
1000 min_exp := to_signed(-1022, EXP_BITS);
1001 else
1002 min_exp := to_signed(-1021, EXP_BITS);
1003 end if;
1004 bias_exp := to_signed(1536, EXP_BITS);
1005 else
1006 max_exp := to_signed(127, EXP_BITS);
1007 min_exp := to_signed(-126, EXP_BITS);
1008 bias_exp := to_signed(192, EXP_BITS);
1009 end if;
1010 new_exp := r.result_exp - r.shift;
1011 exp_tiny := '0';
1012 exp_huge := '0';
1013 if is_X(new_exp) or is_X(min_exp) then
1014 exp_tiny := 'X';
1015 elsif new_exp < min_exp then
1016 exp_tiny := '1';
1017 end if;
1018 if is_X(new_exp) or is_X(min_exp) then
1019 exp_huge := 'X';
1020 elsif new_exp > max_exp then
1021 exp_huge := '1';
1022 end if;
1023
1024 -- Compare P with zero and with B
1025 px_nz := or (r.p(UNIT_BIT + 1 downto 4));
1026 pcmpb_eq := '0';
1027 if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then
1028 pcmpb_eq := '1';
1029 end if;
1030 pcmpb_lt := '0';
1031 if is_X(r.p(59 downto 4)) or is_X(r.b.mantissa(55 downto 0)) then
1032 pcmpb_lt := 'X';
1033 elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then
1034 pcmpb_lt := '1';
1035 end if;
1036 pcmpc_eq := '0';
1037 if r.p = r.c.mantissa then
1038 pcmpc_eq := '1';
1039 end if;
1040 pcmpc_lt := '0';
1041 if is_X(r.p) or is_X(r.c.mantissa) then
1042 pcmpc_lt := 'X';
1043 elsif unsigned(r.p) < unsigned(r.c.mantissa) then
1044 pcmpc_lt := '1';
1045 end if;
1046
1047 v.update_fprf := '0';
1048 v.first := '0';
1049 v.opsel_a := AIN_R;
1050 opsel_ainv <= '0';
1051 opsel_mask <= '0';
1052 opsel_b <= BIN_ZERO;
1053 opsel_binv <= '0';
1054 opsel_r <= RES_SUM;
1055 opsel_s <= S_ZERO;
1056 carry_in <= '0';
1057 misc_sel <= "0000";
1058 fpscr_mask := (others => '1');
1059 update_fx := '0';
1060 arith_done := '0';
1061 invalid := '0';
1062 zero_divide := '0';
1063 set_x := '0';
1064 qnan_result := '0';
1065 set_a := '0';
1066 set_a_exp := '0';
1067 set_a_mant := '0';
1068 set_a_hi := '0';
1069 set_a_lo := '0';
1070 set_b := '0';
1071 set_b_mant := '0';
1072 set_c := '0';
1073 set_s := '0';
1074 f_to_multiply.is_signed <= '0';
1075 f_to_multiply.valid <= '0';
1076 msel_1 <= MUL1_A;
1077 msel_2 <= MUL2_C;
1078 msel_add <= MULADD_ZERO;
1079 msel_inv <= '0';
1080 set_y := '0';
1081 pshift := '0';
1082 renorm_sqrt := '0';
1083 shiftin := '0';
1084 shiftin0 := '0';
1085 rbit_inc := '0';
1086 mult_mask := '0';
1087 rnd_b32 := '0';
1088 int_result := '0';
1089 illegal := '0';
1090
1091 re_sel1 <= REXP1_ZERO;
1092 re_sel2 <= REXP2_CON;
1093 re_con2 <= RECON2_ZERO;
1094 re_neg1 <= '0';
1095 re_neg2 <= '0';
1096 re_set_result <= '0';
1097 rs_sel1 <= RSH1_ZERO;
1098 rs_sel2 <= RSH2_CON;
1099 rs_con2 <= RSCON2_ZERO;
1100 rs_neg1 <= '0';
1101 rs_neg2 <= '0';
1102 rs_norm <= '0';
1103
1104 case r.state is
1105 when IDLE =>
1106 v.invalid := '0';
1107 v.negate := '0';
1108 if e_in.valid = '1' then
1109 v.opsel_a := AIN_B;
1110 v.busy := '1';
1111 if e_in.op = OP_FP_ARITH and e_in.valid_a = '1' and
1112 (e_in.valid_b = '0' or e_in.valid_c = '0') then
1113 v.opsel_a := AIN_A;
1114 end if;
1115 if e_in.op = OP_FP_ARITH then
1116 -- input selection for denorm cases
1117 case e_in.insn(5 downto 1) is
1118 when "10010" => -- fdiv
1119 if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1120 v.opsel_a := AIN_B;
1121 end if;
1122 when "11001" => -- fmul
1123 if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
1124 v.opsel_a := AIN_C;
1125 end if;
1126 when "11100" | "11101" | "11110" | "11111" => -- fmadd etc.
1127 if v.a.mantissa(UNIT_BIT) = '0' then
1128 v.opsel_a := AIN_A;
1129 elsif v.c.mantissa(UNIT_BIT) = '0' then
1130 v.opsel_a := AIN_C;
1131 end if;
1132 when others =>
1133 end case;
1134 end if;
1135 v.state := exec_state;
1136 end if;
1137 v.x := '0';
1138 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
1139 set_s := '1';
1140
1141 when DO_ILLEGAL =>
1142 illegal := '1';
1143 v.instr_done := '1';
1144
1145 when DO_MCRFS =>
1146 j := to_integer(unsigned(insn_bfa(r.insn)));
1147 for i in 0 to 7 loop
1148 if i = j then
1149 k := (7 - i) * 4;
1150 v.cr_result := r.fpscr(k + 3 downto k);
1151 fpscr_mask(k + 3 downto k) := "0000";
1152 end if;
1153 end loop;
1154 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
1155 v.instr_done := '1';
1156
1157 when DO_FTDIV =>
1158 v.instr_done := '1';
1159 v.cr_result := "0000";
1160 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
1161 (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1162 v.cr_result(2) := '1';
1163 end if;
1164 if r.a.class = NAN or r.a.class = INFINITY or
1165 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
1166 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
1167 v.cr_result(1) := '1';
1168 else
1169 v.doing_ftdiv := "11";
1170 v.first := '1';
1171 v.state := FTDIV_1;
1172 v.instr_done := '0';
1173 end if;
1174
1175 when DO_FTSQRT =>
1176 v.instr_done := '1';
1177 v.cr_result := "0000";
1178 if r.b.class = ZERO or r.b.class = INFINITY or
1179 (r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
1180 v.cr_result(2) := '1';
1181 end if;
1182 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
1183 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
1184 v.cr_result(1) := '0';
1185 end if;
1186
1187 when DO_FCMP =>
1188 -- fcmp[uo]
1189 -- r.opsel_a = AIN_B
1190 v.instr_done := '1';
1191 update_fx := '1';
1192 re_sel2 <= REXP2_B;
1193 re_set_result <= '1';
1194 if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
1195 (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then
1196 -- Signalling NAN
1197 v.fpscr(FPSCR_VXSNAN) := '1';
1198 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
1199 v.fpscr(FPSCR_VXVC) := '1';
1200 end if;
1201 invalid := '1';
1202 v.cr_result := "0001"; -- unordered
1203 elsif r.a.class = NAN or r.b.class = NAN then
1204 if r.insn(6) = '1' then
1205 -- fcmpo
1206 v.fpscr(FPSCR_VXVC) := '1';
1207 invalid := '1';
1208 end if;
1209 v.cr_result := "0001"; -- unordered
1210 elsif r.a.class = ZERO and r.b.class = ZERO then
1211 v.cr_result := "0010"; -- equal
1212 elsif r.a.negative /= r.b.negative then
1213 v.cr_result := r.a.negative & r.b.negative & "00";
1214 elsif r.a.class = ZERO then
1215 -- A and B are the same sign from here down
1216 v.cr_result := not r.b.negative & r.b.negative & "00";
1217 elsif r.a.class = INFINITY then
1218 if r.b.class = INFINITY then
1219 v.cr_result := "0010";
1220 else
1221 v.cr_result := r.a.negative & not r.a.negative & "00";
1222 end if;
1223 elsif r.b.class = ZERO then
1224 -- A is finite from here down
1225 v.cr_result := r.a.negative & not r.a.negative & "00";
1226 elsif r.b.class = INFINITY then
1227 v.cr_result := not r.b.negative & r.b.negative & "00";
1228 elsif r.exp_cmp = '1' then
1229 -- A and B are both finite from here down
1230 v.cr_result := r.a.negative & not r.a.negative & "00";
1231 elsif r.a.exponent /= r.b.exponent then
1232 -- A exponent is smaller than B
1233 v.cr_result := not r.a.negative & r.a.negative & "00";
1234 else
1235 -- Prepare to subtract mantissas, put B in R
1236 v.cr_result := "0000";
1237 v.instr_done := '0';
1238 v.opsel_a := AIN_A;
1239 v.state := CMP_1;
1240 end if;
1241 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1242
1243 when DO_MTFSB =>
1244 -- mtfsb{0,1}
1245 j := to_integer(unsigned(insn_bt(r.insn)));
1246 for i in 0 to 31 loop
1247 if i = j then
1248 v.fpscr(31 - i) := r.insn(6);
1249 end if;
1250 end loop;
1251 v.instr_done := '1';
1252
1253 when DO_MTFSFI =>
1254 -- mtfsfi
1255 j := to_integer(unsigned(insn_bf(r.insn)));
1256 if r.insn(16) = '0' then
1257 for i in 0 to 7 loop
1258 if i = j then
1259 k := (7 - i) * 4;
1260 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1261 end if;
1262 end loop;
1263 end if;
1264 v.instr_done := '1';
1265
1266 when DO_FMRG =>
1267 -- fmrgew, fmrgow
1268 opsel_r <= RES_MISC;
1269 misc_sel <= "01" & r.insn(8) & '0';
1270 int_result := '1';
1271 v.writing_fpr := '1';
1272 v.instr_done := '1';
1273
1274 when DO_MFFS =>
1275 v.writing_fpr := '1';
1276 opsel_r <= RES_MISC;
1277 case r.insn(20 downto 16) is
1278 when "00000" =>
1279 -- mffs
1280 when "00001" =>
1281 -- mffsce
1282 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1283 when "10100" | "10101" =>
1284 -- mffscdrn[i] (but we don't implement DRN)
1285 fpscr_mask := x"000000FF";
1286 when "10110" =>
1287 -- mffscrn
1288 fpscr_mask := x"000000FF";
1289 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1290 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1291 when "10111" =>
1292 -- mffscrni
1293 fpscr_mask := x"000000FF";
1294 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1295 when "11000" =>
1296 -- mffsl
1297 fpscr_mask := x"0007F0FF";
1298 when others =>
1299 v.illegal := '1';
1300 v.writing_fpr := '0';
1301 end case;
1302 int_result := '1';
1303 v.instr_done := '1';
1304
1305 when DO_MTFSF =>
1306 if r.insn(25) = '1' then
1307 flm := x"FF";
1308 elsif r.insn(16) = '1' then
1309 flm := x"00";
1310 else
1311 flm := r.insn(24 downto 17);
1312 end if;
1313 for i in 0 to 7 loop
1314 k := i * 4;
1315 if flm(i) = '1' then
1316 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1317 end if;
1318 end loop;
1319 v.instr_done := '1';
1320
1321 when DO_FMR =>
1322 -- r.opsel_a = AIN_B
1323 v.result_class := r.b.class;
1324 re_sel2 <= REXP2_B;
1325 re_set_result <= '1';
1326 v.quieten_nan := '0';
1327 if r.insn(9) = '1' then
1328 v.result_sign := '0'; -- fabs
1329 elsif r.insn(8) = '1' then
1330 v.result_sign := '1'; -- fnabs
1331 elsif r.insn(7) = '1' then
1332 v.result_sign := r.b.negative; -- fmr
1333 elsif r.insn(6) = '1' then
1334 v.result_sign := not r.b.negative; -- fneg
1335 else
1336 v.result_sign := r.a.negative; -- fcpsgn
1337 end if;
1338 v.writing_fpr := '1';
1339 v.instr_done := '1';
1340
1341 when DO_FRI => -- fri[nzpm]
1342 -- r.opsel_a = AIN_B
1343 v.result_class := r.b.class;
1344 v.result_sign := r.b.negative;
1345 re_sel2 <= REXP2_B;
1346 re_set_result <= '1';
1347 -- set shift to exponent - 52
1348 rs_sel1 <= RSH1_B;
1349 rs_con2 <= RSCON2_52;
1350 rs_neg2 <= '1';
1351 v.fpscr(FPSCR_FR) := '0';
1352 v.fpscr(FPSCR_FI) := '0';
1353 if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then
1354 -- Signalling NAN
1355 v.fpscr(FPSCR_VXSNAN) := '1';
1356 invalid := '1';
1357 end if;
1358 if r.b.class = FINITE then
1359 if r.b.exponent >= to_signed(52, EXP_BITS) then
1360 -- integer already, no rounding required
1361 arith_done := '1';
1362 else
1363 v.state := FRI_1;
1364 v.round_mode := '1' & r.insn(7 downto 6);
1365 end if;
1366 else
1367 arith_done := '1';
1368 end if;
1369
1370 when DO_FRSP =>
1371 -- r.opsel_a = AIN_B, r.shift = 0
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 re_sel2 <= REXP2_B;
1375 re_set_result <= '1';
1376 -- set shift to exponent - -126
1377 rs_sel1 <= RSH1_B;
1378 rs_con2 <= RSCON2_MINEXP;
1379 rs_neg2 <= '1';
1380 v.fpscr(FPSCR_FR) := '0';
1381 v.fpscr(FPSCR_FI) := '0';
1382 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1383 -- Signalling NAN
1384 v.fpscr(FPSCR_VXSNAN) := '1';
1385 invalid := '1';
1386 end if;
1387 set_x := '1';
1388 if r.b.class = FINITE then
1389 if r.b.exponent < to_signed(-126, EXP_BITS) then
1390 v.state := ROUND_UFLOW;
1391 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1392 v.state := ROUND_OFLOW;
1393 else
1394 v.state := ROUNDING;
1395 end if;
1396 else
1397 arith_done := '1';
1398 end if;
1399
1400 when DO_FCTI =>
1401 -- instr bit 9: 1=dword 0=word
1402 -- instr bit 8: 1=unsigned 0=signed
1403 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1404 -- r.opsel_a = AIN_B
1405 v.result_class := r.b.class;
1406 v.result_sign := r.b.negative;
1407 re_sel2 <= REXP2_B;
1408 re_set_result <= '1';
1409 rs_sel1 <= RSH1_B;
1410 rs_neg2 <= '1';
1411 v.fpscr(FPSCR_FR) := '0';
1412 v.fpscr(FPSCR_FI) := '0';
1413 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1414 -- Signalling NAN
1415 v.fpscr(FPSCR_VXSNAN) := '1';
1416 invalid := '1';
1417 end if;
1418
1419 int_result := '1';
1420
1421 case r.b.class is
1422 when ZERO =>
1423 arith_done := '1';
1424 when FINITE =>
1425 if r.b.exponent >= to_signed(64, EXP_BITS) or
1426 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1427 v.state := INT_OFLOW;
1428 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1429 -- integer already, no rounding required,
1430 -- shift into final position
1431 -- set shift to exponent - 56
1432 rs_con2 <= RSCON2_UNIT;
1433 if r.insn(8) = '1' and r.b.negative = '1' then
1434 v.state := INT_OFLOW;
1435 else
1436 v.state := INT_ISHIFT;
1437 end if;
1438 else
1439 -- set shift to exponent - 52
1440 rs_con2 <= RSCON2_52;
1441 v.state := INT_SHIFT;
1442 end if;
1443 when INFINITY | NAN =>
1444 v.state := INT_OFLOW;
1445 end case;
1446
1447 when DO_FCFID =>
1448 -- r.opsel_a = AIN_B
1449 v.result_sign := '0';
1450 if r.insn(8) = '0' and r.b.negative = '1' then
1451 -- fcfid[s] with negative operand, set R = -B
1452 opsel_ainv <= '1';
1453 carry_in <= '1';
1454 v.result_sign := '1';
1455 end if;
1456 v.result_class := r.b.class;
1457 re_con2 <= RECON2_UNIT;
1458 re_set_result <= '1';
1459 v.fpscr(FPSCR_FR) := '0';
1460 v.fpscr(FPSCR_FI) := '0';
1461 if r.b.class = ZERO then
1462 arith_done := '1';
1463 else
1464 v.state := FINISH;
1465 end if;
1466
1467 when DO_FADD =>
1468 -- fadd[s] and fsub[s]
1469 -- r.opsel_a = AIN_A
1470 v.result_sign := r.a.negative;
1471 v.result_class := r.a.class;
1472 re_sel1 <= REXP1_A;
1473 re_set_result <= '1';
1474 -- set shift to a.exp - b.exp
1475 rs_sel1 <= RSH1_B;
1476 rs_neg1 <= '1';
1477 rs_sel2 <= RSH2_A;
1478 v.fpscr(FPSCR_FR) := '0';
1479 v.fpscr(FPSCR_FI) := '0';
1480 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1481 v.is_subtract := not is_add;
1482 if r.a.class = FINITE and r.b.class = FINITE then
1483 v.add_bsmall := r.exp_cmp;
1484 v.opsel_a := AIN_B;
1485 if r.exp_cmp = '0' then
1486 v.result_sign := r.b.negative xnor r.insn(1);
1487 if r.a.exponent = r.b.exponent then
1488 v.state := ADD_2;
1489 else
1490 v.longmask := '0';
1491 v.state := ADD_SHIFT;
1492 end if;
1493 else
1494 v.state := ADD_1;
1495 end if;
1496 else
1497 if r.a.class = NAN or r.b.class = NAN then
1498 v.state := NAN_RESULT;
1499 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1500 -- invalid operation, construct QNaN
1501 v.fpscr(FPSCR_VXISI) := '1';
1502 qnan_result := '1';
1503 arith_done := '1';
1504 elsif r.a.class = INFINITY or r.b.class = ZERO then
1505 -- result is A; we're already set up to put A into R
1506 arith_done := '1';
1507 else
1508 -- result is +/- B
1509 v.opsel_a := AIN_B;
1510 v.result_sign := r.b.negative xnor r.insn(1);
1511 v.state := EXC_RESULT;
1512 end if;
1513 end if;
1514
1515 when DO_FMUL =>
1516 -- fmul[s]
1517 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1518 v.result_sign := r.a.negative xor r.c.negative;
1519 v.result_class := r.a.class;
1520 v.fpscr(FPSCR_FR) := '0';
1521 v.fpscr(FPSCR_FI) := '0';
1522 re_sel1 <= REXP1_A;
1523 re_sel2 <= REXP2_C;
1524 re_set_result <= '1';
1525 if r.a.class = FINITE and r.c.class = FINITE then
1526 -- Renormalize denorm operands
1527 if r.a.mantissa(UNIT_BIT) = '0' then
1528 v.state := RENORM_A;
1529 elsif r.c.mantissa(UNIT_BIT) = '0' then
1530 v.state := RENORM_C;
1531 else
1532 f_to_multiply.valid <= '1';
1533 v.state := MULT_1;
1534 end if;
1535 else
1536 if r.a.class = NAN or r.c.class = NAN then
1537 v.state := NAN_RESULT;
1538 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1539 (r.a.class = ZERO and r.c.class = INFINITY) then
1540 -- invalid operation, construct QNaN
1541 v.fpscr(FPSCR_VXIMZ) := '1';
1542 qnan_result := '1';
1543 elsif r.a.class = ZERO or r.a.class = INFINITY then
1544 -- result is +/- A
1545 arith_done := '1';
1546 else
1547 -- r.c.class is ZERO or INFINITY
1548 v.opsel_a := AIN_C;
1549 v.state := EXC_RESULT;
1550 end if;
1551 end if;
1552
1553 when DO_FDIV =>
1554 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1555 v.result_class := r.a.class;
1556 v.fpscr(FPSCR_FR) := '0';
1557 v.fpscr(FPSCR_FI) := '0';
1558 v.result_sign := r.a.negative xor r.b.negative;
1559 re_sel1 <= REXP1_A;
1560 re_sel2 <= REXP2_B;
1561 re_neg2 <= '1';
1562 re_set_result <= '1';
1563 v.count := "00";
1564 if r.a.class = FINITE and r.b.class = FINITE then
1565 -- Renormalize denorm operands
1566 if r.a.mantissa(UNIT_BIT) = '0' then
1567 v.state := RENORM_A;
1568 elsif r.b.mantissa(UNIT_BIT) = '0' then
1569 v.state := RENORM_B;
1570 else
1571 v.first := '1';
1572 v.state := DIV_2;
1573 end if;
1574 else
1575 if r.a.class = NAN or r.b.class = NAN then
1576 v.state := NAN_RESULT;
1577 elsif r.b.class = INFINITY then
1578 if r.a.class = INFINITY then
1579 v.fpscr(FPSCR_VXIDI) := '1';
1580 qnan_result := '1';
1581 else
1582 v.result_class := ZERO;
1583 end if;
1584 arith_done := '1';
1585 elsif r.b.class = ZERO then
1586 if r.a.class = ZERO then
1587 v.fpscr(FPSCR_VXZDZ) := '1';
1588 qnan_result := '1';
1589 else
1590 if r.a.class = FINITE then
1591 zero_divide := '1';
1592 end if;
1593 v.result_class := INFINITY;
1594 end if;
1595 arith_done := '1';
1596 else -- r.b.class = FINITE, result_class = r.a.class
1597 arith_done := '1';
1598 end if;
1599 end if;
1600
1601 when DO_FSEL =>
1602 v.fpscr(FPSCR_FR) := '0';
1603 v.fpscr(FPSCR_FI) := '0';
1604 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1605 v.opsel_a := AIN_C;
1606 v.result_sign := r.c.negative;
1607 else
1608 v.opsel_a := AIN_B;
1609 v.result_sign := r.b.negative;
1610 end if;
1611 v.quieten_nan := '0';
1612 v.state := EXC_RESULT;
1613
1614 when DO_FSQRT =>
1615 -- r.opsel_a = AIN_B
1616 v.result_class := r.b.class;
1617 v.result_sign := r.b.negative;
1618 v.fpscr(FPSCR_FR) := '0';
1619 v.fpscr(FPSCR_FI) := '0';
1620 re_sel2 <= REXP2_B;
1621 re_set_result <= '1';
1622 case r.b.class is
1623 when FINITE =>
1624 if r.b.negative = '1' then
1625 v.fpscr(FPSCR_VXSQRT) := '1';
1626 qnan_result := '1';
1627 elsif r.b.mantissa(UNIT_BIT) = '0' then
1628 v.state := RENORM_B;
1629 elsif r.b.exponent(0) = '0' then
1630 v.state := SQRT_1;
1631 else
1632 -- set shift to 1
1633 rs_con2 <= RSCON2_1;
1634 v.state := RENORM_B2;
1635 end if;
1636 when NAN =>
1637 v.state := NAN_RESULT;
1638 when ZERO =>
1639 -- result is B
1640 arith_done := '1';
1641 when INFINITY =>
1642 if r.b.negative = '1' then
1643 v.fpscr(FPSCR_VXSQRT) := '1';
1644 qnan_result := '1';
1645 -- else result is B
1646 end if;
1647 arith_done := '1';
1648 end case;
1649
1650 when DO_FRE =>
1651 -- r.opsel_a = AIN_B
1652 v.result_class := r.b.class;
1653 v.result_sign := r.b.negative;
1654 v.fpscr(FPSCR_FR) := '0';
1655 v.fpscr(FPSCR_FI) := '0';
1656 re_sel2 <= REXP2_B;
1657 re_set_result <= '1';
1658 case r.b.class is
1659 when FINITE =>
1660 if r.b.mantissa(UNIT_BIT) = '0' then
1661 v.state := RENORM_B;
1662 else
1663 v.state := FRE_1;
1664 end if;
1665 when NAN =>
1666 v.state := NAN_RESULT;
1667 when INFINITY =>
1668 v.result_class := ZERO;
1669 arith_done := '1';
1670 when ZERO =>
1671 v.result_class := INFINITY;
1672 zero_divide := '1';
1673 arith_done := '1';
1674 end case;
1675
1676 when DO_FRSQRTE =>
1677 -- r.opsel_a = AIN_B
1678 v.result_class := r.b.class;
1679 v.result_sign := r.b.negative;
1680 v.fpscr(FPSCR_FR) := '0';
1681 v.fpscr(FPSCR_FI) := '0';
1682 re_sel2 <= REXP2_B;
1683 re_set_result <= '1';
1684 -- set shift to 1
1685 rs_con2 <= RSCON2_1;
1686 case r.b.class is
1687 when FINITE =>
1688 if r.b.negative = '1' then
1689 v.fpscr(FPSCR_VXSQRT) := '1';
1690 qnan_result := '1';
1691 elsif r.b.mantissa(UNIT_BIT) = '0' then
1692 v.state := RENORM_B;
1693 elsif r.b.exponent(0) = '0' then
1694 v.state := RSQRT_1;
1695 else
1696 v.state := RENORM_B2;
1697 end if;
1698 when NAN =>
1699 v.state := NAN_RESULT;
1700 when INFINITY =>
1701 if r.b.negative = '1' then
1702 v.fpscr(FPSCR_VXSQRT) := '1';
1703 qnan_result := '1';
1704 else
1705 v.result_class := ZERO;
1706 end if;
1707 arith_done := '1';
1708 when ZERO =>
1709 v.result_class := INFINITY;
1710 zero_divide := '1';
1711 arith_done := '1';
1712 end case;
1713
1714 when DO_FMADD =>
1715 -- fmadd, fmsub, fnmadd, fnmsub
1716 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1717 -- else AIN_B
1718 v.result_sign := r.a.negative;
1719 v.result_class := r.a.class;
1720 -- put a.exp + c.exp into result_exp
1721 re_sel1 <= REXP1_A;
1722 re_sel2 <= REXP2_C;
1723 re_set_result <= '1';
1724 -- put b.exp into shift
1725 rs_sel1 <= RSH1_B;
1726 v.fpscr(FPSCR_FR) := '0';
1727 v.fpscr(FPSCR_FI) := '0';
1728 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1729 v.negate := r.insn(2);
1730 v.is_subtract := not is_add;
1731 if r.a.class = FINITE and r.c.class = FINITE and
1732 (r.b.class = FINITE or r.b.class = ZERO) then
1733 -- Make sure A and C are normalized
1734 if r.a.mantissa(UNIT_BIT) = '0' then
1735 v.state := RENORM_A;
1736 elsif r.c.mantissa(UNIT_BIT) = '0' then
1737 v.state := RENORM_C;
1738 elsif r.b.class = ZERO then
1739 -- no addend, degenerates to multiply
1740 v.result_sign := r.a.negative xor r.c.negative;
1741 f_to_multiply.valid <= '1';
1742 v.is_multiply := '1';
1743 v.state := MULT_1;
1744 elsif r.madd_cmp = '0' then
1745 -- addend is bigger, do multiply first
1746 v.result_sign := r.b.negative xnor r.insn(1);
1747 f_to_multiply.valid <= '1';
1748 v.first := '1';
1749 v.state := FMADD_0;
1750 else
1751 -- product is bigger, shift B first
1752 v.state := FMADD_1;
1753 end if;
1754 else
1755 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1756 v.state := NAN_RESULT;
1757 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1758 (r.a.class = INFINITY and r.c.class = ZERO) then
1759 -- invalid operation, construct QNaN
1760 v.fpscr(FPSCR_VXIMZ) := '1';
1761 qnan_result := '1';
1762 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1763 if r.b.class = INFINITY and is_add = '0' then
1764 -- invalid operation, construct QNaN
1765 v.fpscr(FPSCR_VXISI) := '1';
1766 qnan_result := '1';
1767 else
1768 -- result is infinity
1769 v.result_class := INFINITY;
1770 v.result_sign := r.a.negative xor r.c.negative;
1771 arith_done := '1';
1772 end if;
1773 else
1774 -- Here A is zero, C is zero, or B is infinity
1775 -- Result is +/-B in all of those cases
1776 v.opsel_a := AIN_B;
1777 v.result_sign := r.b.negative xnor r.insn(1);
1778 v.state := EXC_RESULT;
1779 end if;
1780 end if;
1781
1782 when RENORM_A =>
1783 rs_norm <= '1';
1784 v.state := RENORM_A2;
1785 if r.use_c = '1' and r.c.denorm = '1' then
1786 v.opsel_a := AIN_C;
1787 else
1788 v.opsel_a := AIN_B;
1789 end if;
1790
1791 when RENORM_A2 =>
1792 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1793 set_a := '1';
1794 re_sel2 <= REXP2_NE;
1795 re_set_result <= '1';
1796 if r.insn(4) = '1' then
1797 if r.c.mantissa(UNIT_BIT) = '1' then
1798 if r.insn(3) = '0' or r.b.class = ZERO then
1799 v.first := '1';
1800 v.state := MULT_1;
1801 else
1802 v.madd_cmp := '0';
1803 if new_exp + 1 >= r.b.exponent then
1804 v.madd_cmp := '1';
1805 end if;
1806 v.opsel_a := AIN_B;
1807 v.state := DO_FMADD;
1808 end if;
1809 else
1810 v.state := RENORM_C;
1811 end if;
1812 else
1813 if r.b.mantissa(UNIT_BIT) = '1' then
1814 v.first := '1';
1815 v.state := DIV_2;
1816 else
1817 v.state := RENORM_B;
1818 end if;
1819 end if;
1820
1821 when RENORM_B =>
1822 rs_norm <= '1';
1823 renorm_sqrt := r.is_sqrt;
1824 v.state := RENORM_B2;
1825
1826 when RENORM_B2 =>
1827 set_b := '1';
1828 re_sel2 <= REXP2_NE;
1829 re_set_result <= '1';
1830 v.opsel_a := AIN_B;
1831 v.state := LOOKUP;
1832
1833 when RENORM_C =>
1834 rs_norm <= '1';
1835 v.state := RENORM_C2;
1836
1837 when RENORM_C2 =>
1838 set_c := '1';
1839 re_sel2 <= REXP2_NE;
1840 re_set_result <= '1';
1841 if r.insn(3) = '0' or r.b.class = ZERO then
1842 v.first := '1';
1843 v.state := MULT_1;
1844 else
1845 v.madd_cmp := '0';
1846 if new_exp + 1 >= r.b.exponent then
1847 v.madd_cmp := '1';
1848 end if;
1849 v.opsel_a := AIN_B;
1850 v.state := DO_FMADD;
1851 end if;
1852
1853 when ADD_1 =>
1854 -- transferring B to R
1855 re_sel2 <= REXP2_B;
1856 re_set_result <= '1';
1857 -- set shift to b.exp - a.exp
1858 rs_sel1 <= RSH1_B;
1859 rs_sel2 <= RSH2_A;
1860 rs_neg2 <= '1';
1861 v.longmask := '0';
1862 v.state := ADD_SHIFT;
1863
1864 when ADD_SHIFT =>
1865 -- r.shift = - exponent difference, r.longmask = 0
1866 opsel_r <= RES_SHIFT;
1867 re_sel2 <= REXP2_NE;
1868 re_set_result <= '1';
1869 v.x := s_nz;
1870 set_x := '1';
1871 v.longmask := r.single_prec;
1872 if r.add_bsmall = '1' then
1873 v.opsel_a := AIN_A;
1874 else
1875 v.opsel_a := AIN_B;
1876 end if;
1877 v.state := ADD_2;
1878
1879 when ADD_2 =>
1880 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1881 opsel_b <= BIN_R;
1882 opsel_binv <= r.is_subtract;
1883 carry_in <= r.is_subtract and not r.x;
1884 -- set shift to -1
1885 rs_con2 <= RSCON2_1;
1886 rs_neg2 <= '1';
1887 v.state := ADD_3;
1888
1889 when ADD_3 =>
1890 -- check for overflow or negative result (can't get both)
1891 -- r.shift = -1
1892 re_sel2 <= REXP2_NE;
1893 if r.r(63) = '1' then
1894 -- result is opposite sign to expected
1895 v.result_sign := not r.result_sign;
1896 opsel_ainv <= '1';
1897 carry_in <= '1';
1898 v.state := FINISH;
1899 elsif r.r(UNIT_BIT + 1) = '1' then
1900 -- sum overflowed, shift right
1901 opsel_r <= RES_SHIFT;
1902 re_set_result <= '1';
1903 set_x := '1';
1904 if exp_huge = '1' then
1905 v.state := ROUND_OFLOW;
1906 else
1907 v.state := ROUNDING;
1908 end if;
1909 elsif r.r(UNIT_BIT) = '1' then
1910 set_x := '1';
1911 v.state := ROUNDING;
1912 elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
1913 -- r.x must be zero at this point
1914 v.result_class := ZERO;
1915 arith_done := '1';
1916 else
1917 rs_norm <= '1';
1918 v.state := NORMALIZE;
1919 end if;
1920
1921 when CMP_1 =>
1922 -- r.opsel_a = AIN_A
1923 opsel_b <= BIN_R;
1924 opsel_binv <= '1';
1925 carry_in <= '1';
1926 v.state := CMP_2;
1927
1928 when CMP_2 =>
1929 if r.r(63) = '1' then
1930 -- A is smaller in magnitude
1931 v.cr_result := not r.a.negative & r.a.negative & "00";
1932 elsif (r_hi_nz or r_lo_nz) = '0' then
1933 v.cr_result := "0010";
1934 else
1935 v.cr_result := r.a.negative & not r.a.negative & "00";
1936 end if;
1937 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1938 v.instr_done := '1';
1939
1940 when MULT_1 =>
1941 f_to_multiply.valid <= r.first;
1942 opsel_r <= RES_MULT;
1943 if multiply_to_f.valid = '1' then
1944 v.state := FINISH;
1945 end if;
1946
1947 when FMADD_0 =>
1948 -- r.shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1949 -- (first time through; subsequent times we preserve v.shift)
1950 -- Addend is bigger here
1951 -- set shift to a.exp + c.exp - b.exp
1952 -- note v.shift is at most -2 here
1953 if r.first = '1' then
1954 rs_sel1 <= RSH1_NE;
1955 else
1956 rs_sel1 <= RSH1_S;
1957 end if;
1958 opsel_r <= RES_MULT;
1959 opsel_s <= S_MULT;
1960 set_s := '1';
1961 if multiply_to_f.valid = '1' then
1962 v.longmask := '0';
1963 v.state := ADD_SHIFT;
1964 end if;
1965
1966 when FMADD_1 =>
1967 -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
1968 -- product is bigger here
1969 -- shift B right and use it as the addend to the multiplier
1970 -- for subtract, multiplier does B - A * C
1971 v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract;
1972 re_sel2 <= REXP2_B;
1973 re_set_result <= '1';
1974 -- set shift to b.exp - result_exp + 64
1975 rs_sel1 <= RSH1_NE;
1976 rs_neg1 <= '1';
1977 rs_con2 <= RSCON2_64;
1978 v.state := FMADD_2;
1979
1980 when FMADD_2 =>
1981 -- Product is potentially bigger here
1982 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1983 set_s := '1';
1984 opsel_s <= S_SHIFT;
1985 -- set shift to r.shift - 64
1986 rs_sel1 <= RSH1_S;
1987 rs_con2 <= RSCON2_64;
1988 rs_neg2 <= '1';
1989 v.state := FMADD_3;
1990
1991 when FMADD_3 =>
1992 -- r.shift = addend exp - product exp
1993 opsel_r <= RES_SHIFT;
1994 re_sel2 <= REXP2_NE;
1995 re_set_result <= '1';
1996 v.first := '1';
1997 v.state := FMADD_4;
1998
1999 when FMADD_4 =>
2000 msel_add <= MULADD_RS;
2001 f_to_multiply.valid <= r.first;
2002 msel_inv <= r.is_subtract;
2003 opsel_r <= RES_MULT;
2004 opsel_s <= S_MULT;
2005 set_s := '1';
2006 if multiply_to_f.valid = '1' then
2007 v.state := FMADD_5;
2008 end if;
2009
2010 when FMADD_5 =>
2011 -- negate R:S:X if negative
2012 if r.r(63) = '1' then
2013 v.result_sign := not r.result_sign;
2014 opsel_ainv <= '1';
2015 carry_in <= not (s_nz or r.x);
2016 opsel_s <= S_NEG;
2017 set_s := '1';
2018 end if;
2019 -- set shift to UNIT_BIT
2020 rs_con2 <= RSCON2_UNIT;
2021 v.state := FMADD_6;
2022
2023 when FMADD_6 =>
2024 -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero)
2025 re_sel2 <= REXP2_NE;
2026 rs_norm <= '1';
2027 if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
2028 if s_nz = '0' then
2029 -- must be a subtraction, and r.x must be zero
2030 v.result_class := ZERO;
2031 arith_done := '1';
2032 else
2033 -- R is all zeroes but there are non-zero bits in S
2034 -- so shift them into R and set S to 0
2035 opsel_r <= RES_SHIFT;
2036 re_set_result <= '1';
2037 set_s := '1';
2038 v.state := FINISH;
2039 end if;
2040 elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
2041 v.state := FINISH;
2042 else
2043 v.state := NORMALIZE;
2044 end if;
2045
2046 when LOOKUP =>
2047 -- r.opsel_a = AIN_B
2048 -- wait one cycle for inverse_table[B] lookup
2049 v.first := '1';
2050 if r.insn(4) = '0' then
2051 if r.insn(3) = '0' then
2052 v.state := DIV_2;
2053 else
2054 v.state := SQRT_1;
2055 end if;
2056 elsif r.insn(2) = '0' then
2057 v.state := FRE_1;
2058 else
2059 v.state := RSQRT_1;
2060 end if;
2061
2062 when DIV_2 =>
2063 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
2064 msel_1 <= MUL1_B;
2065 msel_add <= MULADD_CONST;
2066 msel_inv <= '1';
2067 if r.count = 0 then
2068 msel_2 <= MUL2_LUT;
2069 else
2070 msel_2 <= MUL2_P;
2071 end if;
2072 set_y := r.first;
2073 pshift := '1';
2074 f_to_multiply.valid <= r.first;
2075 if multiply_to_f.valid = '1' then
2076 v.first := '1';
2077 v.count := r.count + 1;
2078 v.state := DIV_3;
2079 end if;
2080
2081 when DIV_3 =>
2082 -- compute Y = P = P * Y
2083 msel_1 <= MUL1_Y;
2084 msel_2 <= MUL2_P;
2085 f_to_multiply.valid <= r.first;
2086 pshift := '1';
2087 if multiply_to_f.valid = '1' then
2088 v.first := '1';
2089 if r.count = 3 then
2090 v.state := DIV_4;
2091 else
2092 v.state := DIV_2;
2093 end if;
2094 end if;
2095
2096 when DIV_4 =>
2097 -- compute R = P = A * Y (quotient)
2098 msel_1 <= MUL1_A;
2099 msel_2 <= MUL2_P;
2100 set_y := r.first;
2101 f_to_multiply.valid <= r.first;
2102 pshift := '1';
2103 mult_mask := '1';
2104 if multiply_to_f.valid = '1' then
2105 opsel_r <= RES_MULT;
2106 v.first := '1';
2107 v.state := DIV_5;
2108 end if;
2109
2110 when DIV_5 =>
2111 -- compute P = A - B * R (remainder)
2112 msel_1 <= MUL1_B;
2113 msel_2 <= MUL2_R;
2114 msel_add <= MULADD_A;
2115 msel_inv <= '1';
2116 f_to_multiply.valid <= r.first;
2117 if multiply_to_f.valid = '1' then
2118 v.state := DIV_6;
2119 end if;
2120
2121 when DIV_6 =>
2122 -- r.opsel_a = AIN_R
2123 -- test if remainder is 0 or >= B
2124 if pcmpb_lt = '1' then
2125 -- quotient is correct, set X if remainder non-zero
2126 v.x := r.p(UNIT_BIT + 2) or px_nz;
2127 else
2128 -- quotient needs to be incremented by 1 in R-bit position
2129 rbit_inc := '1';
2130 opsel_b <= BIN_RND;
2131 v.x := not pcmpb_eq;
2132 end if;
2133 v.state := FINISH;
2134
2135 when FRE_1 =>
2136 re_sel1 <= REXP1_R;
2137 re_neg1 <= '1';
2138 re_set_result <= '1';
2139 opsel_r <= RES_MISC;
2140 misc_sel <= "0111";
2141 -- set shift to 1
2142 rs_con2 <= RSCON2_1;
2143 v.state := NORMALIZE;
2144
2145 when FTDIV_1 =>
2146 v.cr_result(1) := exp_tiny or exp_huge;
2147 -- set shift to a.exp
2148 rs_sel2 <= RSH2_A;
2149 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
2150 v.instr_done := '1';
2151 else
2152 v.doing_ftdiv := "10";
2153 end if;
2154
2155 when RSQRT_1 =>
2156 opsel_r <= RES_MISC;
2157 misc_sel <= "0111";
2158 re_sel1 <= REXP1_BHALF;
2159 re_neg1 <= '1';
2160 re_set_result <= '1';
2161 -- set shift to 1
2162 rs_con2 <= RSCON2_1;
2163 v.state := NORMALIZE;
2164
2165 when SQRT_1 =>
2166 -- put invsqr[B] in R and compute P = invsqr[B] * B
2167 -- also transfer B (in R) to A
2168 set_a := '1';
2169 opsel_r <= RES_MISC;
2170 misc_sel <= "0111";
2171 msel_1 <= MUL1_B;
2172 msel_2 <= MUL2_LUT;
2173 f_to_multiply.valid <= '1';
2174 -- set shift to -1
2175 rs_con2 <= RSCON2_1;
2176 rs_neg2 <= '1';
2177 v.count := "00";
2178 v.state := SQRT_2;
2179
2180 when SQRT_2 =>
2181 -- shift R right one place
2182 -- not expecting multiplier result yet
2183 -- r.shift = -1
2184 opsel_r <= RES_SHIFT;
2185 re_sel2 <= REXP2_NE;
2186 re_set_result <= '1';
2187 v.first := '1';
2188 v.state := SQRT_3;
2189
2190 when SQRT_3 =>
2191 -- put R into Y, wait for product from multiplier
2192 msel_2 <= MUL2_R;
2193 set_y := r.first;
2194 pshift := '1';
2195 mult_mask := '1';
2196 if multiply_to_f.valid = '1' then
2197 -- put result into R
2198 opsel_r <= RES_MULT;
2199 v.first := '1';
2200 v.state := SQRT_4;
2201 end if;
2202
2203 when SQRT_4 =>
2204 -- compute 1.5 - Y * P
2205 msel_1 <= MUL1_Y;
2206 msel_2 <= MUL2_P;
2207 msel_add <= MULADD_CONST;
2208 msel_inv <= '1';
2209 f_to_multiply.valid <= r.first;
2210 pshift := '1';
2211 if multiply_to_f.valid = '1' then
2212 v.state := SQRT_5;
2213 end if;
2214
2215 when SQRT_5 =>
2216 -- compute Y = Y * P
2217 msel_1 <= MUL1_Y;
2218 msel_2 <= MUL2_P;
2219 f_to_multiply.valid <= '1';
2220 v.first := '1';
2221 v.state := SQRT_6;
2222
2223 when SQRT_6 =>
2224 -- pipeline in R = R * P
2225 msel_1 <= MUL1_R;
2226 msel_2 <= MUL2_P;
2227 f_to_multiply.valid <= r.first;
2228 pshift := '1';
2229 if multiply_to_f.valid = '1' then
2230 v.first := '1';
2231 v.state := SQRT_7;
2232 end if;
2233
2234 when SQRT_7 =>
2235 -- first multiply is done, put result in Y
2236 msel_2 <= MUL2_P;
2237 set_y := r.first;
2238 -- wait for second multiply (should be here already)
2239 pshift := '1';
2240 mult_mask := '1';
2241 if multiply_to_f.valid = '1' then
2242 -- put result into R
2243 opsel_r <= RES_MULT;
2244 v.first := '1';
2245 v.count := r.count + 1;
2246 if r.count < 2 then
2247 v.state := SQRT_4;
2248 else
2249 v.first := '1';
2250 v.state := SQRT_8;
2251 end if;
2252 end if;
2253
2254 when SQRT_8 =>
2255 -- compute P = A - R * R, which can be +ve or -ve
2256 -- we arranged for B to be put into A earlier
2257 msel_1 <= MUL1_R;
2258 msel_2 <= MUL2_R;
2259 msel_add <= MULADD_A;
2260 msel_inv <= '1';
2261 pshift := '1';
2262 f_to_multiply.valid <= r.first;
2263 if multiply_to_f.valid = '1' then
2264 v.first := '1';
2265 v.state := SQRT_9;
2266 end if;
2267
2268 when SQRT_9 =>
2269 -- compute P = P * Y
2270 -- since Y is an estimate of 1/sqrt(B), this makes P an
2271 -- estimate of the adjustment needed to R. Since the error
2272 -- could be negative and we have an unsigned multiplier, the
2273 -- upper bits can be wrong, but it turns out the lowest 8 bits
2274 -- are correct and are all we need (given 3 iterations through
2275 -- SQRT_4 to SQRT_7).
2276 msel_1 <= MUL1_Y;
2277 msel_2 <= MUL2_P;
2278 pshift := '1';
2279 f_to_multiply.valid <= r.first;
2280 if multiply_to_f.valid = '1' then
2281 v.state := SQRT_10;
2282 end if;
2283
2284 when SQRT_10 =>
2285 -- Add the bottom 8 bits of P, sign-extended, onto R.
2286 opsel_b <= BIN_PS8;
2287 re_sel1 <= REXP1_BHALF;
2288 re_set_result <= '1';
2289 -- set shift to 1
2290 rs_con2 <= RSCON2_1;
2291 v.first := '1';
2292 v.state := SQRT_11;
2293
2294 when SQRT_11 =>
2295 -- compute P = A - R * R (remainder)
2296 -- also put 2 * R + 1 into B for comparison with P
2297 msel_1 <= MUL1_R;
2298 msel_2 <= MUL2_R;
2299 msel_add <= MULADD_A;
2300 msel_inv <= '1';
2301 f_to_multiply.valid <= r.first;
2302 shiftin := '1';
2303 set_b := r.first;
2304 if multiply_to_f.valid = '1' then
2305 v.state := SQRT_12;
2306 end if;
2307
2308 when SQRT_12 =>
2309 -- test if remainder is 0 or >= B = 2*R + 1
2310 if pcmpb_lt = '1' then
2311 -- square root is correct, set X if remainder non-zero
2312 v.x := r.p(UNIT_BIT + 2) or px_nz;
2313 else
2314 -- square root needs to be incremented by 1
2315 carry_in <= '1';
2316 v.x := not pcmpb_eq;
2317 end if;
2318 v.state := FINISH;
2319
2320 when INT_SHIFT =>
2321 -- r.shift = b.exponent - 52
2322 opsel_r <= RES_SHIFT;
2323 re_sel2 <= REXP2_NE;
2324 re_set_result <= '1';
2325 set_x := '1';
2326 v.state := INT_ROUND;
2327 -- set shift to -4 (== 52 - UNIT_BIT)
2328 rs_con2 <= RSCON2_UNIT_52;
2329 rs_neg2 <= '1';
2330
2331 when INT_ROUND =>
2332 -- r.shift = -4 (== 52 - UNIT_BIT)
2333 opsel_r <= RES_SHIFT;
2334 re_sel2 <= REXP2_NE;
2335 re_set_result <= '1';
2336 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2337 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2338 -- Check for negative values that don't round to 0 for fcti*u*
2339 if r.insn(8) = '1' and r.result_sign = '1' and
2340 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2341 v.state := INT_OFLOW;
2342 else
2343 v.state := INT_FINAL;
2344 end if;
2345
2346 when INT_ISHIFT =>
2347 -- r.shift = b.exponent - UNIT_BIT;
2348 opsel_r <= RES_SHIFT;
2349 re_sel2 <= REXP2_NE;
2350 re_set_result <= '1';
2351 v.state := INT_FINAL;
2352
2353 when INT_FINAL =>
2354 -- Negate if necessary, and increment for rounding if needed
2355 opsel_ainv <= r.result_sign;
2356 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2357 -- Check for possible overflows
2358 case r.insn(9 downto 8) is
2359 when "00" => -- fctiw[z]
2360 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2361 when "01" => -- fctiwu[z]
2362 need_check := r.r(31);
2363 when "10" => -- fctid[z]
2364 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2365 when others => -- fctidu[z]
2366 need_check := r.r(63);
2367 end case;
2368 int_result := '1';
2369 if need_check = '1' then
2370 v.state := INT_CHECK;
2371 else
2372 if r.fpscr(FPSCR_FI) = '1' then
2373 v.fpscr(FPSCR_XX) := '1';
2374 end if;
2375 arith_done := '1';
2376 end if;
2377
2378 when INT_CHECK =>
2379 if r.insn(9) = '0' then
2380 msb := r.r(31);
2381 else
2382 msb := r.r(63);
2383 end if;
2384 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2385 if (r.insn(8) = '0' and msb /= r.result_sign) or
2386 (r.insn(8) = '1' and msb /= '1') then
2387 opsel_r <= RES_MISC;
2388 v.fpscr(FPSCR_VXCVI) := '1';
2389 invalid := '1';
2390 else
2391 if r.fpscr(FPSCR_FI) = '1' then
2392 v.fpscr(FPSCR_XX) := '1';
2393 end if;
2394 end if;
2395 int_result := '1';
2396 arith_done := '1';
2397
2398 when INT_OFLOW =>
2399 opsel_r <= RES_MISC;
2400 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2401 if r.b.class = NAN then
2402 misc_sel(0) <= '1';
2403 end if;
2404 v.fpscr(FPSCR_VXCVI) := '1';
2405 invalid := '1';
2406 int_result := '1';
2407 arith_done := '1';
2408
2409 when FRI_1 =>
2410 -- r.shift = b.exponent - 52
2411 opsel_r <= RES_SHIFT;
2412 re_sel2 <= REXP2_NE;
2413 re_set_result <= '1';
2414 set_x := '1';
2415 v.state := ROUNDING;
2416
2417 when FINISH =>
2418 if r.is_multiply = '1' and px_nz = '1' then
2419 v.x := '1';
2420 end if;
2421 -- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
2422 rs_sel1 <= RSH1_NE;
2423 rs_con2 <= RSCON2_MINEXP;
2424 rs_neg2 <= '1';
2425 if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
2426 rs_norm <= '1';
2427 v.state := NORMALIZE;
2428 else
2429 set_x := '1';
2430 if exp_tiny = '1' then
2431 v.state := ROUND_UFLOW;
2432 elsif exp_huge = '1' then
2433 v.state := ROUND_OFLOW;
2434 else
2435 v.state := ROUNDING;
2436 end if;
2437 end if;
2438
2439 when NORMALIZE =>
2440 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2441 -- r.shift = clz(r.r) - 7
2442 opsel_r <= RES_SHIFT;
2443 re_sel2 <= REXP2_NE;
2444 re_set_result <= '1';
2445 -- set shift to new_exp - min_exp
2446 rs_sel1 <= RSH1_NE;
2447 rs_con2 <= RSCON2_MINEXP;
2448 rs_neg2 <= '1';
2449 set_x := '1';
2450 if exp_tiny = '1' then
2451 v.state := ROUND_UFLOW;
2452 elsif exp_huge = '1' then
2453 v.state := ROUND_OFLOW;
2454 else
2455 v.state := ROUNDING;
2456 end if;
2457
2458 when ROUND_UFLOW =>
2459 -- r.shift = - amount by which exponent underflows
2460 v.tiny := '1';
2461 if r.fpscr(FPSCR_UE) = '0' then
2462 -- disabled underflow exception case
2463 -- have to denormalize before rounding
2464 opsel_r <= RES_SHIFT;
2465 re_sel2 <= REXP2_NE;
2466 re_set_result <= '1';
2467 set_x := '1';
2468 v.state := ROUNDING;
2469 else
2470 -- enabled underflow exception case
2471 -- if denormalized, have to normalize before rounding
2472 v.fpscr(FPSCR_UX) := '1';
2473 re_sel1 <= REXP1_R;
2474 re_con2 <= RECON2_BIAS;
2475 re_set_result <= '1';
2476 if r.r(UNIT_BIT) = '0' then
2477 rs_norm <= '1';
2478 v.state := NORMALIZE;
2479 else
2480 v.state := ROUNDING;
2481 end if;
2482 end if;
2483
2484 when ROUND_OFLOW =>
2485 v.fpscr(FPSCR_OX) := '1';
2486 if r.fpscr(FPSCR_OE) = '0' then
2487 -- disabled overflow exception
2488 -- result depends on rounding mode
2489 v.fpscr(FPSCR_XX) := '1';
2490 v.fpscr(FPSCR_FI) := '1';
2491 if r.round_mode(1 downto 0) = "00" or
2492 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2493 v.result_class := INFINITY;
2494 v.fpscr(FPSCR_FR) := '1';
2495 else
2496 v.fpscr(FPSCR_FR) := '0';
2497 end if;
2498 -- construct largest representable number
2499 re_con2 <= RECON2_MAX;
2500 re_set_result <= '1';
2501 opsel_r <= RES_MISC;
2502 misc_sel <= "001" & r.single_prec;
2503 arith_done := '1';
2504 else
2505 -- enabled overflow exception
2506 re_sel1 <= REXP1_R;
2507 re_con2 <= RECON2_BIAS;
2508 re_neg2 <= '1';
2509 re_set_result <= '1';
2510 v.state := ROUNDING;
2511 end if;
2512
2513 when ROUNDING =>
2514 opsel_mask <= '1';
2515 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2516 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2517 if round(1) = '1' then
2518 -- increment the LSB for the precision
2519 opsel_b <= BIN_RND;
2520 -- set shift to -1
2521 rs_con2 <= RSCON2_1;
2522 rs_neg2 <= '1';
2523 v.state := ROUNDING_2;
2524 else
2525 if r.r(UNIT_BIT) = '0' then
2526 -- result after masking could be zero, or could be a
2527 -- denormalized result that needs to be renormalized
2528 rs_norm <= '1';
2529 v.state := ROUNDING_3;
2530 else
2531 arith_done := '1';
2532 end if;
2533 end if;
2534 if round(0) = '1' then
2535 v.fpscr(FPSCR_XX) := '1';
2536 if r.tiny = '1' then
2537 v.fpscr(FPSCR_UX) := '1';
2538 end if;
2539 end if;
2540
2541 when ROUNDING_2 =>
2542 -- Check for overflow during rounding
2543 -- r.shift = -1
2544 v.x := '0';
2545 re_sel2 <= REXP2_NE;
2546 if r.r(UNIT_BIT + 1) = '1' then
2547 opsel_r <= RES_SHIFT;
2548 re_set_result <= '1';
2549 if exp_huge = '1' then
2550 v.state := ROUND_OFLOW;
2551 else
2552 arith_done := '1';
2553 end if;
2554 elsif r.r(UNIT_BIT) = '0' then
2555 -- Do CLZ so we can renormalize the result
2556 rs_norm <= '1';
2557 v.state := ROUNDING_3;
2558 else
2559 arith_done := '1';
2560 end if;
2561
2562 when ROUNDING_3 =>
2563 -- r.shift = clz(r.r) - 9
2564 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2565 re_sel2 <= REXP2_NE;
2566 -- set shift to new_exp - min_exp (== -1022)
2567 rs_sel1 <= RSH1_NE;
2568 rs_con2 <= RSCON2_MINEXP;
2569 rs_neg2 <= '1';
2570 if mant_nz = '0' then
2571 v.result_class := ZERO;
2572 arith_done := '1';
2573 else
2574 -- Renormalize result after rounding
2575 opsel_r <= RES_SHIFT;
2576 re_set_result <= '1';
2577 v.denorm := exp_tiny;
2578 if new_exp < to_signed(-1022, EXP_BITS) then
2579 v.state := DENORM;
2580 else
2581 arith_done := '1';
2582 end if;
2583 end if;
2584
2585 when DENORM =>
2586 -- r.shift = result_exp - -1022
2587 opsel_r <= RES_SHIFT;
2588 re_sel2 <= REXP2_NE;
2589 re_set_result <= '1';
2590 arith_done := '1';
2591
2592 when NAN_RESULT =>
2593 v.negate := '0';
2594 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
2595 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
2596 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
2597 -- Signalling NAN
2598 v.fpscr(FPSCR_VXSNAN) := '1';
2599 invalid := '1';
2600 end if;
2601 if r.use_a = '1' and r.a.class = NAN then
2602 v.opsel_a := AIN_A;
2603 v.result_sign := r.a.negative;
2604 elsif r.use_b = '1' and r.b.class = NAN then
2605 v.opsel_a := AIN_B;
2606 v.result_sign := r.b.negative;
2607 elsif r.use_c = '1' and r.c.class = NAN then
2608 v.opsel_a := AIN_C;
2609 v.result_sign := r.c.negative;
2610 end if;
2611 v.state := EXC_RESULT;
2612
2613 when EXC_RESULT =>
2614 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2615 case r.opsel_a is
2616 when AIN_B =>
2617 re_sel2 <= REXP2_B;
2618 v.result_class := r.b.class;
2619 when AIN_C =>
2620 re_sel2 <= REXP2_C;
2621 v.result_class := r.c.class;
2622 when others =>
2623 re_sel1 <= REXP1_A;
2624 v.result_class := r.a.class;
2625 end case;
2626 re_set_result <= '1';
2627 arith_done := '1';
2628
2629 when DO_IDIVMOD =>
2630 -- r.opsel_a = AIN_B
2631 v.result_sign := r.is_signed and (r.a.negative xor (r.b.negative and not r.divmod));
2632 if r.b.class = ZERO then
2633 -- B is zero, signal overflow
2634 v.int_ovf := '1';
2635 v.state := IDIV_ZERO;
2636 elsif r.a.class = ZERO then
2637 -- A is zero, result is zero (both for div and for mod)
2638 v.state := IDIV_ZERO;
2639 else
2640 -- take absolute value for signed division, and
2641 -- normalize and round up B to 8.56 format, like fcfid[u]
2642 if r.is_signed = '1' and r.b.negative = '1' then
2643 opsel_ainv <= '1';
2644 carry_in <= '1';
2645 end if;
2646 v.result_class := FINITE;
2647 re_con2 <= RECON2_UNIT;
2648 re_set_result <= '1';
2649 v.state := IDIV_NORMB;
2650 end if;
2651 when IDIV_NORMB =>
2652 -- do count-leading-zeroes on B (now in R)
2653 rs_norm <= '1';
2654 -- save the original value of B or |B| in C
2655 set_c := '1';
2656 v.state := IDIV_NORMB2;
2657 when IDIV_NORMB2 =>
2658 -- get B into the range [1, 2) in 8.56 format
2659 set_x := '1'; -- record if any 1 bits shifted out
2660 opsel_r <= RES_SHIFT;
2661 re_sel2 <= REXP2_NE;
2662 re_set_result <= '1';
2663 v.state := IDIV_NORMB3;
2664 when IDIV_NORMB3 =>
2665 -- add the X bit onto R to round up B
2666 carry_in <= r.x;
2667 -- prepare to do count-leading-zeroes on A
2668 v.opsel_a := AIN_A;
2669 v.state := IDIV_CLZA;
2670 when IDIV_CLZA =>
2671 set_b := '1'; -- put R back into B
2672 -- r.opsel_a = AIN_A
2673 if r.is_signed = '1' and r.a.negative = '1' then
2674 opsel_ainv <= '1';
2675 carry_in <= '1';
2676 end if;
2677 re_con2 <= RECON2_UNIT;
2678 re_set_result <= '1';
2679 v.opsel_a := AIN_C;
2680 v.state := IDIV_CLZA2;
2681 when IDIV_CLZA2 =>
2682 -- r.opsel_a = AIN_C
2683 rs_norm <= '1';
2684 -- write the dividend back into A in case we negated it
2685 set_a_mant := '1';
2686 -- while doing the count-leading-zeroes on A,
2687 -- also compute A - B to tell us whether A >= B
2688 -- (using the original value of B, which is now in C)
2689 opsel_b <= BIN_R;
2690 opsel_ainv <= '1';
2691 carry_in <= '1';
2692 v.state := IDIV_CLZA3;
2693 when IDIV_CLZA3 =>
2694 -- save the exponent of A (but don't overwrite the mantissa)
2695 set_a_exp := '1';
2696 re_sel2 <= REXP2_NE;
2697 re_set_result <= '1';
2698 v.div_close := '0';
2699 if new_exp = r.b.exponent then
2700 v.div_close := '1';
2701 end if;
2702 v.state := IDIV_NR0;
2703 if new_exp > r.b.exponent or (v.div_close = '1' and r.r(63) = '0') then
2704 -- A >= B, overflow if extended division
2705 if r.divext = '1' then
2706 v.int_ovf := '1';
2707 -- return 0 in overflow cases
2708 v.state := IDIV_ZERO;
2709 end if;
2710 else
2711 -- A < B, result is zero for normal division
2712 if r.divmod = '0' and r.divext = '0' then
2713 v.state := IDIV_ZERO;
2714 end if;
2715 end if;
2716 when IDIV_NR0 =>
2717 -- reduce number of Newton-Raphson iterations for small A
2718 if r.divext = '1' or r.result_exp >= to_signed(32, EXP_BITS) then
2719 v.count := "00";
2720 elsif r.result_exp >= to_signed(16, EXP_BITS) then
2721 v.count := "01";
2722 else
2723 v.count := "10";
2724 end if;
2725 -- first NR iteration does Y = LUT; P = 2 - B * LUT
2726 msel_1 <= MUL1_B;
2727 msel_add <= MULADD_CONST;
2728 msel_inv <= '1';
2729 msel_2 <= MUL2_LUT;
2730 set_y := '1';
2731 if r.b.mantissa(UNIT_BIT + 1) = '1' then
2732 -- rounding up of the mantissa caused overflow, meaning the
2733 -- normalized B is 2.0. Since this is outside the range
2734 -- of the LUT, just use 0.5 as the estimated inverse.
2735 v.state := IDIV_USE0_5;
2736 else
2737 -- start the first multiply now
2738 f_to_multiply.valid <= '1';
2739 -- note we don't set v.first, thus the following IDIV_NR1
2740 -- state doesn't start a multiply (we already did that)
2741 v.state := IDIV_NR1;
2742 end if;
2743 when IDIV_NR1 =>
2744 -- subsequent NR iterations do Y = P; P = 2 - B * P
2745 msel_1 <= MUL1_B;
2746 msel_add <= MULADD_CONST;
2747 msel_inv <= '1';
2748 msel_2 <= MUL2_P;
2749 set_y := r.first;
2750 pshift := '1';
2751 f_to_multiply.valid <= r.first;
2752 if multiply_to_f.valid = '1' then
2753 v.first := '1';
2754 v.count := r.count + 1;
2755 v.state := IDIV_NR2;
2756 end if;
2757 when IDIV_NR2 =>
2758 -- compute P = Y * P
2759 msel_1 <= MUL1_Y;
2760 msel_2 <= MUL2_P;
2761 f_to_multiply.valid <= r.first;
2762 pshift := '1';
2763 v.opsel_a := AIN_A;
2764 -- set shift to 64
2765 rs_con2 <= RSCON2_64;
2766 -- Get 0.5 into R in case the inverse estimate turns out to be
2767 -- less than 0.5, in which case we want to use 0.5, to avoid
2768 -- infinite loops in some cases.
2769 opsel_r <= RES_MISC;
2770 misc_sel <= "0001";
2771 if multiply_to_f.valid = '1' then
2772 v.first := '1';
2773 if r.count = "11" then
2774 v.state := IDIV_DODIV;
2775 else
2776 v.state := IDIV_NR1;
2777 end if;
2778 end if;
2779 when IDIV_USE0_5 =>
2780 -- Get 0.5 into R; it turns out the generated
2781 -- QNaN mantissa is actually what we want
2782 opsel_r <= RES_MISC;
2783 misc_sel <= "0001";
2784 v.opsel_a := AIN_A;
2785 -- set shift to 64
2786 rs_con2 <= RSCON2_64;
2787 v.state := IDIV_DODIV;
2788 when IDIV_DODIV =>
2789 -- r.opsel_a = AIN_A
2790 -- r.shift = 64
2791 -- inverse estimate is in P or in R; copy it to Y
2792 if r.b.mantissa(UNIT_BIT + 1) = '1' or
2793 (r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0') then
2794 msel_2 <= MUL2_R;
2795 else
2796 msel_2 <= MUL2_P;
2797 end if;
2798 set_y := '1';
2799 -- shift_res is 0 because r.shift = 64;
2800 -- put that into B, which now holds the quotient
2801 set_b_mant := '1';
2802 if r.divext = '0' then
2803 -- set shift to -56
2804 rs_con2 <= RSCON2_UNIT;
2805 rs_neg2 <= '1';
2806 v.first := '1';
2807 v.state := IDIV_DIV;
2808 elsif r.single_prec = '1' then
2809 -- divwe[u][o], shift A left 32 bits
2810 -- set shift to 32
2811 rs_con2 <= RSCON2_32;
2812 v.state := IDIV_SH32;
2813 elsif r.div_close = '0' then
2814 -- set shift to 64 - UNIT_BIT (== 8)
2815 rs_con2 <= RSCON2_64_UNIT;
2816 v.state := IDIV_EXTDIV;
2817 else
2818 -- handle top bit of quotient specially
2819 -- for this we need the divisor left-justified in B
2820 v.opsel_a := AIN_C;
2821 v.state := IDIV_EXT_TBH;
2822 end if;
2823 when IDIV_SH32 =>
2824 -- r.shift = 32, R contains the dividend
2825 opsel_r <= RES_SHIFT;
2826 -- set shift to -UNIT_BIT (== -56)
2827 rs_con2 <= RSCON2_UNIT;
2828 rs_neg2 <= '1';
2829 v.first := '1';
2830 v.state := IDIV_DIV;
2831 when IDIV_DIV =>
2832 -- Dividing A by C, r.shift = -56; A is in R
2833 -- Put A into the bottom 64 bits of Ahi/A/Alo
2834 set_a_mant := r.first;
2835 set_a_lo := r.first;
2836 -- compute R = R * Y (quotient estimate)
2837 msel_1 <= MUL1_Y;
2838 msel_2 <= MUL2_R;
2839 f_to_multiply.valid <= r.first;
2840 pshift := '1';
2841 opsel_r <= RES_MULT;
2842 -- set shift to - b.exp
2843 rs_sel1 <= RSH1_B;
2844 rs_neg1 <= '1';
2845 if multiply_to_f.valid = '1' then
2846 v.state := IDIV_DIV2;
2847 end if;
2848 when IDIV_DIV2 =>
2849 -- r.shift = - b.exponent
2850 -- shift the quotient estimate right by b.exponent bits
2851 opsel_r <= RES_SHIFT;
2852 v.first := '1';
2853 v.state := IDIV_DIV3;
2854 when IDIV_DIV3 =>
2855 -- quotient (so far) is in R; multiply by C and subtract from A
2856 msel_1 <= MUL1_R;
2857 msel_2 <= MUL2_C;
2858 msel_add <= MULADD_A;
2859 msel_inv <= '1';
2860 f_to_multiply.valid <= r.first;
2861 -- store the current quotient estimate in B
2862 set_b_mant := r.first;
2863 opsel_r <= RES_MULT;
2864 opsel_s <= S_MULT;
2865 set_s := '1';
2866 if multiply_to_f.valid = '1' then
2867 v.state := IDIV_DIV4;
2868 end if;
2869 when IDIV_DIV4 =>
2870 -- remainder is in R/S and P
2871 msel_1 <= MUL1_Y;
2872 msel_2 <= MUL2_P;
2873 v.inc_quot := not pcmpc_lt and not r.divmod;
2874 if r.divmod = '0' then
2875 v.opsel_a := AIN_B;
2876 end if;
2877 -- set shift to UNIT_BIT (== 56)
2878 rs_con2 <= RSCON2_UNIT;
2879 if pcmpc_lt = '1' or pcmpc_eq = '1' then
2880 if r.divmod = '0' then
2881 v.state := IDIV_DIVADJ;
2882 elsif pcmpc_eq = '1' then
2883 v.state := IDIV_ZERO;
2884 else
2885 v.state := IDIV_MODADJ;
2886 end if;
2887 else
2888 -- need to do another iteration, compute P * Y
2889 f_to_multiply.valid <= '1';
2890 v.state := IDIV_DIV5;
2891 end if;
2892 when IDIV_DIV5 =>
2893 pshift := '1';
2894 opsel_r <= RES_MULT;
2895 -- set shift to - b.exp
2896 rs_sel1 <= RSH1_B;
2897 rs_neg1 <= '1';
2898 if multiply_to_f.valid = '1' then
2899 v.state := IDIV_DIV6;
2900 end if;
2901 when IDIV_DIV6 =>
2902 -- r.shift = - b.exponent
2903 -- shift the quotient estimate right by b.exponent bits
2904 opsel_r <= RES_SHIFT;
2905 v.opsel_a := AIN_B;
2906 v.first := '1';
2907 v.state := IDIV_DIV7;
2908 when IDIV_DIV7 =>
2909 -- r.opsel_a = AIN_B
2910 -- add shifted quotient delta onto the total quotient
2911 opsel_b <= BIN_R;
2912 v.first := '1';
2913 v.state := IDIV_DIV8;
2914 when IDIV_DIV8 =>
2915 -- quotient (so far) is in R; multiply by C and subtract from A
2916 msel_1 <= MUL1_R;
2917 msel_2 <= MUL2_C;
2918 msel_add <= MULADD_A;
2919 msel_inv <= '1';
2920 f_to_multiply.valid <= r.first;
2921 -- store the current quotient estimate in B
2922 set_b_mant := r.first;
2923 opsel_r <= RES_MULT;
2924 opsel_s <= S_MULT;
2925 set_s := '1';
2926 if multiply_to_f.valid = '1' then
2927 v.state := IDIV_DIV9;
2928 end if;
2929 when IDIV_DIV9 =>
2930 -- remainder is in R/S and P
2931 msel_1 <= MUL1_Y;
2932 msel_2 <= MUL2_P;
2933 v.inc_quot := not pcmpc_lt and not r.divmod;
2934 if r.divmod = '0' then
2935 v.opsel_a := AIN_B;
2936 end if;
2937 -- set shift to UNIT_BIT (== 56)
2938 rs_con2 <= RSCON2_UNIT;
2939 if r.divmod = '0' then
2940 v.state := IDIV_DIVADJ;
2941 elsif pcmpc_eq = '1' then
2942 v.state := IDIV_ZERO;
2943 else
2944 v.state := IDIV_MODADJ;
2945 end if;
2946 when IDIV_EXT_TBH =>
2947 -- r.opsel_a = AIN_C; get divisor into R and prepare to shift left
2948 -- set shift to 63 - b.exp
2949 rs_sel1 <= RSH1_B;
2950 rs_neg1 <= '1';
2951 rs_con2 <= RSCON2_63;
2952 v.opsel_a := AIN_A;
2953 v.state := IDIV_EXT_TBH2;
2954 when IDIV_EXT_TBH2 =>
2955 -- r.opsel_a = AIN_A; divisor is in R
2956 -- r.shift = 63 - b.exponent; shift and put into B
2957 set_b_mant := '1';
2958 -- set shift to 64 - UNIT_BIT (== 8)
2959 rs_con2 <= RSCON2_64_UNIT;
2960 v.state := IDIV_EXT_TBH3;
2961 when IDIV_EXT_TBH3 =>
2962 -- Dividing (A << 64) by C
2963 -- r.shift = 8
2964 -- Put A in the top 64 bits of Ahi/A/Alo
2965 set_a_hi := '1';
2966 set_a_mant := '1';
2967 -- set shift to 64 - b.exp
2968 rs_sel1 <= RSH1_B;
2969 rs_neg1 <= '1';
2970 rs_con2 <= RSCON2_64;
2971 v.state := IDIV_EXT_TBH4;
2972 when IDIV_EXT_TBH4 =>
2973 -- dividend (A) is in R
2974 -- r.shift = 64 - B.exponent, so is at least 1
2975 opsel_r <= RES_SHIFT;
2976 -- top bit of A gets lost in the shift, so handle it specially
2977 v.opsel_a := AIN_B;
2978 -- set shift to 63
2979 rs_con2 <= RSCON2_63;
2980 v.state := IDIV_EXT_TBH5;
2981 when IDIV_EXT_TBH5 =>
2982 -- r.opsel_a = AIN_B, r.shift = 63
2983 -- shifted dividend is in R, subtract left-justified divisor
2984 opsel_b <= BIN_R;
2985 opsel_ainv <= '1';
2986 carry_in <= '1';
2987 -- and put 1<<63 into B as the divisor (S is still 0)
2988 shiftin0 := '1';
2989 set_b_mant := '1';
2990 v.first := '1';
2991 v.state := IDIV_EXTDIV2;
2992 when IDIV_EXTDIV =>
2993 -- Dividing (A << 64) by C
2994 -- r.shift = 8
2995 -- Put A in the top 64 bits of Ahi/A/Alo
2996 set_a_hi := '1';
2997 set_a_mant := '1';
2998 -- set shift to 64 - b.exp
2999 rs_sel1 <= RSH1_B;
3000 rs_neg1 <= '1';
3001 rs_con2 <= RSCON2_64;
3002 v.state := IDIV_EXTDIV1;
3003 when IDIV_EXTDIV1 =>
3004 -- dividend is in R
3005 -- r.shift = 64 - B.exponent
3006 opsel_r <= RES_SHIFT;
3007 v.first := '1';
3008 v.state := IDIV_EXTDIV2;
3009 when IDIV_EXTDIV2 =>
3010 -- shifted remainder is in R; compute R = R * Y (quotient estimate)
3011 msel_1 <= MUL1_Y;
3012 msel_2 <= MUL2_R;
3013 f_to_multiply.valid <= r.first;
3014 pshift := '1';
3015 v.opsel_a := AIN_B;
3016 opsel_r <= RES_MULT;
3017 if multiply_to_f.valid = '1' then
3018 v.first := '1';
3019 v.state := IDIV_EXTDIV3;
3020 end if;
3021 when IDIV_EXTDIV3 =>
3022 -- r.opsel_a = AIN_B
3023 -- delta quotient is in R; add it to B
3024 opsel_b <= BIN_R;
3025 v.first := '1';
3026 v.state := IDIV_EXTDIV4;
3027 when IDIV_EXTDIV4 =>
3028 -- quotient is in R; put it in B and compute remainder
3029 set_b_mant := r.first;
3030 msel_1 <= MUL1_R;
3031 msel_2 <= MUL2_C;
3032 msel_add <= MULADD_A;
3033 msel_inv <= '1';
3034 f_to_multiply.valid <= r.first;
3035 opsel_r <= RES_MULT;
3036 opsel_s <= S_MULT;
3037 set_s := '1';
3038 -- set shift to UNIT_BIT - b.exp
3039 rs_sel1 <= RSH1_B;
3040 rs_neg1 <= '1';
3041 rs_con2 <= RSCON2_UNIT;
3042 if multiply_to_f.valid = '1' then
3043 v.state := IDIV_EXTDIV5;
3044 end if;
3045 when IDIV_EXTDIV5 =>
3046 -- r.shift = r.b.exponent - 56
3047 -- remainder is in R/S; shift it right r.b.exponent bits
3048 opsel_r <= RES_SHIFT;
3049 -- test LS 64b of remainder in P against divisor in C
3050 v.inc_quot := not pcmpc_lt;
3051 v.opsel_a := AIN_B;
3052 v.state := IDIV_EXTDIV6;
3053 when IDIV_EXTDIV6 =>
3054 -- r.opsel_a = AIN_B
3055 -- shifted remainder is in R, see if it is > 1
3056 -- and compute R = R * Y if so
3057 msel_1 <= MUL1_Y;
3058 msel_2 <= MUL2_R;
3059 pshift := '1';
3060 if r_gt_1 = '1' then
3061 f_to_multiply.valid <= '1';
3062 v.state := IDIV_EXTDIV2;
3063 else
3064 v.state := IDIV_DIVADJ;
3065 end if;
3066 when IDIV_MODADJ =>
3067 -- r.shift = 56
3068 -- result is in R/S
3069 opsel_r <= RES_SHIFT;
3070 if pcmpc_lt = '0' then
3071 v.opsel_a := AIN_C;
3072 v.state := IDIV_MODSUB;
3073 elsif r.result_sign = '0' then
3074 v.state := IDIV_DONE;
3075 else
3076 v.state := IDIV_DIVADJ;
3077 end if;
3078 when IDIV_MODSUB =>
3079 -- r.opsel_a = AIN_C
3080 -- Subtract divisor from remainder
3081 opsel_ainv <= '1';
3082 carry_in <= '1';
3083 opsel_b <= BIN_R;
3084 if r.result_sign = '0' then
3085 v.state := IDIV_DONE;
3086 else
3087 v.state := IDIV_DIVADJ;
3088 end if;
3089 when IDIV_DIVADJ =>
3090 -- result (so far) is on the A input of the adder
3091 -- set carry to increment quotient if needed
3092 -- and also negate R if the answer is negative
3093 opsel_ainv <= r.result_sign;
3094 carry_in <= r.inc_quot xor r.result_sign;
3095 rnd_b32 := '1';
3096 if r.divmod = '0' then
3097 opsel_b <= BIN_RND;
3098 end if;
3099 if r.is_signed = '0' then
3100 v.state := IDIV_DONE;
3101 else
3102 v.state := IDIV_OVFCHK;
3103 end if;
3104 when IDIV_OVFCHK =>
3105 if r.single_prec = '0' then
3106 sign_bit := r.r(63);
3107 else
3108 sign_bit := r.r(31);
3109 end if;
3110 v.int_ovf := sign_bit xor r.result_sign;
3111 if v.int_ovf = '1' then
3112 v.state := IDIV_ZERO;
3113 else
3114 v.state := IDIV_DONE;
3115 end if;
3116 when IDIV_DONE =>
3117 v.xerc_result := v.xerc;
3118 if r.oe = '1' then
3119 v.xerc_result.ov := '0';
3120 v.xerc_result.ov32 := '0';
3121 v.writing_xer := '1';
3122 end if;
3123 if r.m32b = '0' then
3124 v.cr_result(3) := r.r(63);
3125 v.cr_result(2 downto 1) := "00";
3126 if r.r = 64x"0" then
3127 v.cr_result(1) := '1';
3128 else
3129 v.cr_result(2) := not r.r(63);
3130 end if;
3131 else
3132 v.cr_result(3) := r.r(31);
3133 v.cr_result(2 downto 1) := "00";
3134 if r.r(31 downto 0) = 32x"0" then
3135 v.cr_result(1) := '1';
3136 else
3137 v.cr_result(2) := not r.r(31);
3138 end if;
3139 end if;
3140 v.cr_result(0) := v.xerc.so;
3141 int_result := '1';
3142 v.writing_fpr := '1';
3143 v.instr_done := '1';
3144 when IDIV_ZERO =>
3145 opsel_r <= RES_MISC;
3146 misc_sel <= "0101";
3147 v.xerc_result := v.xerc;
3148 if r.oe = '1' then
3149 v.xerc_result.ov := r.int_ovf;
3150 v.xerc_result.ov32 := r.int_ovf;
3151 v.xerc_result.so := r.xerc.so or r.int_ovf;
3152 v.writing_xer := '1';
3153 end if;
3154 v.cr_result := "001" & v.xerc_result.so;
3155 int_result := '1';
3156 v.writing_fpr := '1';
3157 v.instr_done := '1';
3158
3159 end case;
3160
3161 if zero_divide = '1' then
3162 v.fpscr(FPSCR_ZX) := '1';
3163 end if;
3164 if qnan_result = '1' then
3165 invalid := '1';
3166 v.result_class := NAN;
3167 v.result_sign := '0';
3168 v.negate := '0';
3169 misc_sel <= "0001";
3170 opsel_r <= RES_MISC;
3171 arith_done := '1';
3172 end if;
3173 if invalid = '1' then
3174 v.invalid := '1';
3175 end if;
3176 if arith_done = '1' then
3177 -- Enabled invalid exception doesn't write result or FPRF
3178 -- Neither does enabled zero-divide exception
3179 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
3180 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
3181 v.writing_fpr := '1';
3182 v.update_fprf := '1';
3183 end if;
3184 v.instr_done := '1';
3185 update_fx := '1';
3186 end if;
3187
3188 -- Multiplier and divide/square root data path
3189 case msel_1 is
3190 when MUL1_A =>
3191 f_to_multiply.data1 <= r.a.mantissa;
3192 when MUL1_B =>
3193 f_to_multiply.data1 <= r.b.mantissa;
3194 when MUL1_Y =>
3195 f_to_multiply.data1 <= r.y;
3196 when others =>
3197 f_to_multiply.data1 <= r.r;
3198 end case;
3199 case msel_2 is
3200 when MUL2_C =>
3201 f_to_multiply.data2 <= r.c.mantissa;
3202 when MUL2_LUT =>
3203 f_to_multiply.data2 <= std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3204 UNIT_BIT - 19));
3205 when MUL2_P =>
3206 f_to_multiply.data2 <= r.p;
3207 when others =>
3208 f_to_multiply.data2 <= r.r;
3209 end case;
3210 maddend := (others => '0');
3211 case msel_add is
3212 when MULADD_CONST =>
3213 -- addend is 2.0 or 1.5 in 16.112 format
3214 if r.is_sqrt = '0' then
3215 maddend(2*UNIT_BIT + 1) := '1'; -- 2.0
3216 else
3217 maddend(2*UNIT_BIT downto 2*UNIT_BIT - 1) := "11"; -- 1.5
3218 end if;
3219 when MULADD_A =>
3220 -- addend is A in 16.112 format
3221 maddend(127 downto UNIT_BIT + 64) := r.a_hi;
3222 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.a.mantissa;
3223 maddend(UNIT_BIT - 1 downto 0) := r.a_lo;
3224 when MULADD_RS =>
3225 -- addend is concatenation of R and S in 16.112 format
3226 maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.r;
3227 maddend(UNIT_BIT - 1 downto 0) := r.s;
3228 when others =>
3229 end case;
3230 f_to_multiply.addend <= maddend;
3231 f_to_multiply.subtract <= msel_inv;
3232 if set_y = '1' then
3233 v.y := f_to_multiply.data2;
3234 end if;
3235 if multiply_to_f.valid = '1' then
3236 if pshift = '0' then
3237 v.p := multiply_to_f.result(63 downto 0);
3238 else
3239 v.p := multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3240 end if;
3241 end if;
3242
3243 -- Data path.
3244 -- This has A and B input multiplexers, an adder, a shifter,
3245 -- count-leading-zeroes logic, and a result mux.
3246 if r.longmask = '1' then
3247 mshift := r.shift + to_signed(-29, EXP_BITS);
3248 else
3249 mshift := r.shift;
3250 end if;
3251 if is_X(mshift) then
3252 mask := (others => 'X');
3253 elsif mshift < to_signed(-64, EXP_BITS) then
3254 mask := (others => '1');
3255 elsif mshift >= to_signed(0, EXP_BITS) then
3256 mask := (others => '0');
3257 else
3258 mask := right_mask(unsigned(mshift(5 downto 0)));
3259 end if;
3260 case r.opsel_a is
3261 when AIN_R =>
3262 in_a0 := r.r;
3263 when AIN_A =>
3264 in_a0 := r.a.mantissa;
3265 when AIN_B =>
3266 in_a0 := r.b.mantissa;
3267 when others =>
3268 in_a0 := r.c.mantissa;
3269 end case;
3270 if (or (mask and in_a0)) = '1' and set_x = '1' then
3271 v.x := '1';
3272 end if;
3273 if opsel_ainv = '1' then
3274 in_a0 := not in_a0;
3275 end if;
3276 in_a <= in_a0;
3277 case opsel_b is
3278 when BIN_ZERO =>
3279 in_b0 := (others => '0');
3280 when BIN_R =>
3281 in_b0 := r.r;
3282 when BIN_RND =>
3283 if rnd_b32 = '1' then
3284 round_inc := (32 => r.result_sign and r.single_prec, others => '0');
3285 elsif rbit_inc = '0' then
3286 round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0');
3287 else
3288 round_inc := (DP_RBIT => '1', others => '0');
3289 end if;
3290 in_b0 := round_inc;
3291 when others =>
3292 -- BIN_PS8, 8 LSBs of P sign-extended to 64
3293 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64));
3294 end case;
3295 if opsel_binv = '1' then
3296 in_b0 := not in_b0;
3297 end if;
3298 in_b <= in_b0;
3299 if is_X(r.shift) then
3300 shift_res := (others => 'X');
3301 elsif r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
3302 shift_res := shifter_64(r.r(63 downto 1) & (shiftin0 or r.r(0)) &
3303 (shiftin or r.s(55)) & r.s(54 downto 0),
3304 std_ulogic_vector(r.shift(6 downto 0)));
3305 else
3306 shift_res := (others => '0');
3307 end if;
3308 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
3309 if opsel_mask = '1' then
3310 sum(DP_LSB - 1 downto 0) := "0000";
3311 if r.single_prec = '1' then
3312 sum(SP_LSB - 1 downto DP_LSB) := (others => '0');
3313 end if;
3314 end if;
3315 case opsel_r is
3316 when RES_SUM =>
3317 result <= sum;
3318 when RES_SHIFT =>
3319 result <= shift_res;
3320 when RES_MULT =>
3321 result <= multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
3322 if mult_mask = '1' then
3323 -- trim to 54 fraction bits if mult_mask = 1, for quotient when dividing
3324 result(UNIT_BIT - 55 downto 0) <= (others => '0');
3325 end if;
3326 when others =>
3327 misc := (others => '0');
3328 case misc_sel is
3329 when "0000" =>
3330 misc := x"00000000" & (r.fpscr and fpscr_mask);
3331 when "0001" =>
3332 -- generated QNaN mantissa
3333 misc(QNAN_BIT) := '1';
3334 when "0010" =>
3335 -- mantissa of max representable DP number
3336 misc(UNIT_BIT downto DP_LSB) := (others => '1');
3337 when "0011" =>
3338 -- mantissa of max representable SP number
3339 misc(UNIT_BIT downto SP_LSB) := (others => '1');
3340 when "0100" =>
3341 -- fmrgow result
3342 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
3343 when "0110" =>
3344 -- fmrgew result
3345 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
3346 when "0111" =>
3347 misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
3348 UNIT_BIT - 19));
3349 when "1000" =>
3350 -- max positive result for fctiw[z]
3351 misc := x"000000007fffffff";
3352 when "1001" =>
3353 -- max negative result for fctiw[z]
3354 misc := x"ffffffff80000000";
3355 when "1010" =>
3356 -- max positive result for fctiwu[z]
3357 misc := x"00000000ffffffff";
3358 when "1011" =>
3359 -- max negative result for fctiwu[z]
3360 misc := x"0000000000000000";
3361 when "1100" =>
3362 -- max positive result for fctid[z]
3363 misc := x"7fffffffffffffff";
3364 when "1101" =>
3365 -- max negative result for fctid[z]
3366 misc := x"8000000000000000";
3367 when "1110" =>
3368 -- max positive result for fctidu[z]
3369 misc := x"ffffffffffffffff";
3370 when "1111" =>
3371 -- max negative result for fctidu[z]
3372 misc := x"0000000000000000";
3373 when others =>
3374 end case;
3375 result <= misc;
3376 end case;
3377 v.r := result;
3378 if set_s = '1' then
3379 case opsel_s is
3380 when S_NEG =>
3381 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
3382 when S_MULT =>
3383 v.s := multiply_to_f.result(55 downto 0);
3384 when S_SHIFT =>
3385 v.s := shift_res(63 downto 8);
3386 if shift_res(7 downto 0) /= x"00" then
3387 v.x := '1';
3388 end if;
3389 when others =>
3390 v.s := (others => '0');
3391 end case;
3392 end if;
3393
3394 if set_a = '1' or set_a_exp = '1' then
3395 v.a.exponent := new_exp;
3396 end if;
3397 if set_a = '1' or set_a_mant = '1' then
3398 v.a.mantissa := shift_res;
3399 end if;
3400 if e_in.valid = '1' then
3401 v.a_hi := (others => '0');
3402 v.a_lo := (others => '0');
3403 else
3404 if set_a_hi = '1' then
3405 v.a_hi := r.r(63 downto 56);
3406 end if;
3407 if set_a_lo = '1' then
3408 v.a_lo := r.r(55 downto 0);
3409 end if;
3410 end if;
3411 if set_b = '1' then
3412 v.b.exponent := new_exp;
3413 end if;
3414 if set_b = '1' or set_b_mant = '1' then
3415 v.b.mantissa := shift_res;
3416 end if;
3417 if set_c = '1' then
3418 v.c.exponent := new_exp;
3419 v.c.mantissa := shift_res;
3420 end if;
3421
3422 -- exponent data path
3423 case re_sel1 is
3424 when REXP1_R =>
3425 rexp_in1 := r.result_exp;
3426 when REXP1_A =>
3427 rexp_in1 := r.a.exponent;
3428 when REXP1_BHALF =>
3429 rexp_in1 := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
3430 when others =>
3431 rexp_in1 := to_signed(0, EXP_BITS);
3432 end case;
3433 if re_neg1 = '1' then
3434 rexp_in1 := not rexp_in1;
3435 end if;
3436 case re_sel2 is
3437 when REXP2_NE =>
3438 rexp_in2 := new_exp;
3439 when REXP2_C =>
3440 rexp_in2 := r.c.exponent;
3441 when REXP2_B =>
3442 rexp_in2 := r.b.exponent;
3443 when others =>
3444 case re_con2 is
3445 when RECON2_UNIT =>
3446 rexp_in2 := to_signed(UNIT_BIT, EXP_BITS);
3447 when RECON2_MAX =>
3448 rexp_in2 := max_exp;
3449 when RECON2_BIAS =>
3450 rexp_in2 := bias_exp;
3451 when others =>
3452 rexp_in2 := to_signed(0, EXP_BITS);
3453 end case;
3454 end case;
3455 if re_neg2 = '1' then
3456 rexp_in2 := not rexp_in2;
3457 end if;
3458 rexp_cin := re_neg1 or re_neg2;
3459 rexp_sum := rexp_in1 + rexp_in2 + rexp_cin;
3460 if re_set_result = '1' then
3461 v.result_exp := rexp_sum;
3462 end if;
3463 case rs_sel1 is
3464 when RSH1_B =>
3465 rsh_in1 := r.b.exponent;
3466 when RSH1_NE =>
3467 rsh_in1 := new_exp;
3468 when RSH1_S =>
3469 rsh_in1 := r.shift;
3470 when others =>
3471 rsh_in1 := to_signed(0, EXP_BITS);
3472 end case;
3473 if rs_neg1 = '1' then
3474 rsh_in1 := not rsh_in1;
3475 end if;
3476 case rs_sel2 is
3477 when RSH2_A =>
3478 rsh_in2 := r.a.exponent;
3479 when others =>
3480 case rs_con2 is
3481 when RSCON2_1 =>
3482 rsh_in2 := to_signed(1, EXP_BITS);
3483 when RSCON2_UNIT_52 =>
3484 rsh_in2 := to_signed(UNIT_BIT - 52, EXP_BITS);
3485 when RSCON2_64_UNIT =>
3486 rsh_in2 := to_signed(64 - UNIT_BIT, EXP_BITS);
3487 when RSCON2_32 =>
3488 rsh_in2 := to_signed(32, EXP_BITS);
3489 when RSCON2_52 =>
3490 rsh_in2 := to_signed(52, EXP_BITS);
3491 when RSCON2_UNIT =>
3492 rsh_in2 := to_signed(UNIT_BIT, EXP_BITS);
3493 when RSCON2_63 =>
3494 rsh_in2 := to_signed(63, EXP_BITS);
3495 when RSCON2_64 =>
3496 rsh_in2 := to_signed(64, EXP_BITS);
3497 when RSCON2_MINEXP =>
3498 rsh_in2 := min_exp;
3499 when others =>
3500 rsh_in2 := to_signed(0, EXP_BITS);
3501 end case;
3502 end case;
3503 if rs_neg2 = '1' then
3504 rsh_in2 := not rsh_in2;
3505 end if;
3506 if rs_norm = '1' then
3507 clz := count_left_zeroes(r.r);
3508 if renorm_sqrt = '1' then
3509 -- make denormalized value end up with even exponent
3510 clz(0) := '1';
3511 end if;
3512 -- do this as a separate dedicated 7-bit adder for timing reasons
3513 v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS);
3514 else
3515 v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2);
3516 end if;
3517
3518 if r.update_fprf = '1' then
3519 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
3520 r.r(UNIT_BIT) and not r.denorm);
3521 end if;
3522
3523 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
3524 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
3525 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
3526 v.fpscr(FPSCR_VE downto FPSCR_XE));
3527 if update_fx = '1' and
3528 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
3529 v.fpscr(FPSCR_FX) := '1';
3530 end if;
3531
3532 if v.instr_done = '1' then
3533 if r.state /= IDLE then
3534 v.state := IDLE;
3535 v.busy := '0';
3536 v.f2stall := '0';
3537 if r.fp_rc = '1' then
3538 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
3539 end if;
3540 v.sp_result := r.single_prec;
3541 v.int_result := int_result;
3542 v.illegal := illegal;
3543 v.nsnan_result := v.quieten_nan;
3544 v.res_negate := v.negate;
3545 v.res_subtract := v.is_subtract;
3546 v.res_rmode := r.round_mode;
3547 if r.integer_op = '1' then
3548 v.cr_mask := num_to_fxm(0);
3549 elsif r.is_cmp = '0' then
3550 v.cr_mask := num_to_fxm(1);
3551 elsif is_X(insn_bf(r.insn)) then
3552 v.cr_mask := (others => 'X');
3553 else
3554 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(r.insn))));
3555 end if;
3556 v.writing_cr := r.is_cmp or r.rc;
3557 v.write_reg := r.dest_fpr;
3558 v.complete_tag := r.instr_tag;
3559 end if;
3560 if e_in.stall = '0' then
3561 v.complete := not v.illegal;
3562 v.do_intr := (v.fpscr(FPSCR_FEX) and r.fe_mode) or v.illegal;
3563 end if;
3564 -- N.B. We rely on execute1 to prevent any new instruction
3565 -- coming in while e_in.stall = 1, without us needing to
3566 -- have busy asserted.
3567 else
3568 if r.state /= IDLE and e_in.stall = '0' then
3569 v.f2stall := '1';
3570 end if;
3571 end if;
3572
3573 -- This mustn't depend on any fields of r that are modified in IDLE state.
3574 if r.int_result = '1' then
3575 fp_result <= r.r;
3576 else
3577 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
3578 r.sp_result, r.nsnan_result,
3579 r.res_negate, r.res_subtract, r.res_rmode);
3580 end if;
3581
3582 rin <= v;
3583 end process;
3584
3585 end architecture behaviour;