FPU: Implement ftdiv and ftsqrt
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 LOOKUP,
51 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
52 FRE_1,
53 RSQRT_1,
54 FTDIV_1,
55 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
56 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
57 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
58 INT_SHIFT, INT_ROUND, INT_ISHIFT,
59 INT_FINAL, INT_CHECK, INT_OFLOW,
60 FINISH, NORMALIZE,
61 ROUND_UFLOW, ROUND_OFLOW,
62 ROUNDING, ROUNDING_2, ROUNDING_3,
63 DENORM,
64 RENORM_A, RENORM_A2,
65 RENORM_B, RENORM_B2,
66 RENORM_C, RENORM_C2);
67
68 type reg_type is record
69 state : state_t;
70 busy : std_ulogic;
71 instr_done : std_ulogic;
72 do_intr : std_ulogic;
73 op : insn_type_t;
74 insn : std_ulogic_vector(31 downto 0);
75 dest_fpr : gspr_index_t;
76 fe_mode : std_ulogic;
77 rc : std_ulogic;
78 is_cmp : std_ulogic;
79 single_prec : std_ulogic;
80 fpscr : std_ulogic_vector(31 downto 0);
81 a : fpu_reg_type;
82 b : fpu_reg_type;
83 c : fpu_reg_type;
84 r : std_ulogic_vector(63 downto 0); -- 10.54 format
85 x : std_ulogic;
86 p : std_ulogic_vector(63 downto 0); -- 8.56 format
87 y : std_ulogic_vector(63 downto 0); -- 8.56 format
88 result_sign : std_ulogic;
89 result_class : fp_number_class;
90 result_exp : signed(EXP_BITS-1 downto 0);
91 shift : signed(EXP_BITS-1 downto 0);
92 writing_back : std_ulogic;
93 int_result : std_ulogic;
94 cr_result : std_ulogic_vector(3 downto 0);
95 cr_mask : std_ulogic_vector(7 downto 0);
96 old_exc : std_ulogic_vector(4 downto 0);
97 update_fprf : std_ulogic;
98 quieten_nan : std_ulogic;
99 tiny : std_ulogic;
100 denorm : std_ulogic;
101 round_mode : std_ulogic_vector(2 downto 0);
102 is_subtract : std_ulogic;
103 exp_cmp : std_ulogic;
104 add_bsmall : std_ulogic;
105 is_multiply : std_ulogic;
106 is_sqrt : std_ulogic;
107 first : std_ulogic;
108 count : unsigned(1 downto 0);
109 doing_ftdiv : std_ulogic_vector(1 downto 0);
110 end record;
111
112 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
113
114 signal r, rin : reg_type;
115
116 signal fp_result : std_ulogic_vector(63 downto 0);
117 signal opsel_a : std_ulogic_vector(1 downto 0);
118 signal opsel_b : std_ulogic_vector(1 downto 0);
119 signal opsel_r : std_ulogic_vector(1 downto 0);
120 signal opsel_ainv : std_ulogic;
121 signal opsel_amask : std_ulogic;
122 signal opsel_binv : std_ulogic;
123 signal in_a : std_ulogic_vector(63 downto 0);
124 signal in_b : std_ulogic_vector(63 downto 0);
125 signal result : std_ulogic_vector(63 downto 0);
126 signal carry_in : std_ulogic;
127 signal lost_bits : std_ulogic;
128 signal r_hi_nz : std_ulogic;
129 signal r_lo_nz : std_ulogic;
130 signal misc_sel : std_ulogic_vector(3 downto 0);
131 signal f_to_multiply : MultiplyInputType;
132 signal multiply_to_f : MultiplyOutputType;
133 signal msel_1 : std_ulogic_vector(1 downto 0);
134 signal msel_2 : std_ulogic_vector(1 downto 0);
135 signal msel_add : std_ulogic_vector(1 downto 0);
136 signal msel_inv : std_ulogic;
137 signal inverse_est : std_ulogic_vector(18 downto 0);
138
139 -- opsel values
140 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
141 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
142 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
143 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
144
145 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
146 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
147 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
148 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
149
150 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
151 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
152 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
153 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
154
155 -- msel values
156 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
157 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
158 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
159 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
160
161 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
162 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
163 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
164 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
165
166 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
167 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
168 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
169
170 -- Inverse lookup table, indexed by the top 8 fraction bits
171 -- The first 256 entries are the reciprocal (1/x) lookup table,
172 -- and the remaining 768 entries are the reciprocal square root table.
173 -- Output range is [0.5, 1) in 0.19 format, though the top
174 -- bit isn't stored since it is always 1.
175 -- Each output value is the inverse of the center of the input
176 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
177 -- entry 1 is 1 / (1 + 3/512), etc.
178 signal inverse_table : lookup_table := (
179 -- 1/x lookup table
180 -- Unit bit is assumed to be 1, so input range is [1, 2)
181 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
182 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
183 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
184 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
185 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
186 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
187 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
188 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
189 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
190 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
191 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
192 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
193 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
194 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
195 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
196 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
197 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
198 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
199 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
200 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
201 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
202 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
203 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
204 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
205 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
206 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
207 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
208 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
209 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
210 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
211 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
212 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
213 -- 1/sqrt(x) lookup table
214 -- Input is in the range [1, 4), i.e. two bits to the left of the
215 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
216 -- 1.0 ... 1.9999
217 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
218 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
219 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
220 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
221 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
222 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
223 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
224 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
225 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
226 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
227 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
228 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
229 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
230 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
231 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
232 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
233 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
234 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
235 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
236 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
237 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
238 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
239 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
240 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
241 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
242 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
243 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
244 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
245 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
246 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
247 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
248 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
249 -- 2.0 ... 2.9999
250 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
251 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
252 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
253 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
254 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
255 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
256 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
257 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
258 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
259 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
260 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
261 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
262 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
263 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
264 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
265 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
266 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
267 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
268 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
269 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
270 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
271 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
272 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
273 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
274 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
275 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
276 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
277 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
278 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
279 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
280 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
281 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
282 -- 3.0 ... 3.9999
283 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
284 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
285 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
286 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
287 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
288 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
289 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
290 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
291 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
292 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
293 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
294 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
295 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
296 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
297 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
298 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
299 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
300 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
301 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
302 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
303 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
304 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
305 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
306 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
307 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
308 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
309 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
310 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
311 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
312 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
313 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
314 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
315 );
316
317 -- Left and right shifter with 120 bit input and 64 bit output.
318 -- Shifts inp left by shift bits and returns the upper 64 bits of
319 -- the result. The shift parameter is interpreted as a signed
320 -- number in the range -64..63, with negative values indicating
321 -- right shifts.
322 function shifter_64(inp: std_ulogic_vector(119 downto 0);
323 shift: std_ulogic_vector(6 downto 0))
324 return std_ulogic_vector is
325 variable s1 : std_ulogic_vector(94 downto 0);
326 variable s2 : std_ulogic_vector(70 downto 0);
327 variable result : std_ulogic_vector(63 downto 0);
328 begin
329 case shift(6 downto 5) is
330 when "00" =>
331 s1 := inp(119 downto 25);
332 when "01" =>
333 s1 := inp(87 downto 0) & "0000000";
334 when "10" =>
335 s1 := x"0000000000000000" & inp(119 downto 89);
336 when others =>
337 s1 := x"00000000" & inp(119 downto 57);
338 end case;
339 case shift(4 downto 3) is
340 when "00" =>
341 s2 := s1(94 downto 24);
342 when "01" =>
343 s2 := s1(86 downto 16);
344 when "10" =>
345 s2 := s1(78 downto 8);
346 when others =>
347 s2 := s1(70 downto 0);
348 end case;
349 case shift(2 downto 0) is
350 when "000" =>
351 result := s2(70 downto 7);
352 when "001" =>
353 result := s2(69 downto 6);
354 when "010" =>
355 result := s2(68 downto 5);
356 when "011" =>
357 result := s2(67 downto 4);
358 when "100" =>
359 result := s2(66 downto 3);
360 when "101" =>
361 result := s2(65 downto 2);
362 when "110" =>
363 result := s2(64 downto 1);
364 when others =>
365 result := s2(63 downto 0);
366 end case;
367 return result;
368 end;
369
370 -- Generate a mask with 0-bits on the left and 1-bits on the right which
371 -- selects the bits will be lost in doing a right shift. The shift
372 -- parameter is the bottom 6 bits of a negative shift count,
373 -- indicating a right shift.
374 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
375 variable result: std_ulogic_vector(63 downto 0);
376 begin
377 result := (others => '0');
378 for i in 0 to 63 loop
379 if i >= shift then
380 result(63 - i) := '1';
381 end if;
382 end loop;
383 return result;
384 end;
385
386 -- Split a DP floating-point number into components and work out its class.
387 -- If is_int = 1, the input is considered an integer
388 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
389 variable r : fpu_reg_type;
390 variable exp_nz : std_ulogic;
391 variable exp_ao : std_ulogic;
392 variable frac_nz : std_ulogic;
393 variable cls : std_ulogic_vector(2 downto 0);
394 begin
395 r.negative := fpr(63);
396 exp_nz := or (fpr(62 downto 52));
397 exp_ao := and (fpr(62 downto 52));
398 frac_nz := or (fpr(51 downto 0));
399 if is_int = '0' then
400 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
401 if exp_nz = '0' then
402 r.exponent := to_signed(-1022, EXP_BITS);
403 end if;
404 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
405 cls := exp_ao & exp_nz & frac_nz;
406 case cls is
407 when "000" => r.class := ZERO;
408 when "001" => r.class := FINITE; -- denormalized
409 when "010" => r.class := FINITE;
410 when "011" => r.class := FINITE;
411 when "110" => r.class := INFINITY;
412 when others => r.class := NAN;
413 end case;
414 else
415 r.mantissa := fpr;
416 r.exponent := (others => '0');
417 if (fpr(63) or exp_nz or frac_nz) = '1' then
418 r.class := FINITE;
419 else
420 r.class := ZERO;
421 end if;
422 end if;
423 return r;
424 end;
425
426 -- Construct a DP floating-point result from components
427 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
428 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
429 return std_ulogic_vector is
430 variable result : std_ulogic_vector(63 downto 0);
431 begin
432 result := (others => '0');
433 result(63) := sign;
434 case class is
435 when ZERO =>
436 when FINITE =>
437 if mantissa(54) = '1' then
438 -- normalized number
439 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
440 end if;
441 result(51 downto 29) := mantissa(53 downto 31);
442 if single_prec = '0' then
443 result(28 downto 0) := mantissa(30 downto 2);
444 end if;
445 when INFINITY =>
446 result(62 downto 52) := "11111111111";
447 when NAN =>
448 result(62 downto 52) := "11111111111";
449 result(51) := quieten_nan or mantissa(53);
450 result(50 downto 29) := mantissa(52 downto 31);
451 if single_prec = '0' then
452 result(28 downto 0) := mantissa(30 downto 2);
453 end if;
454 end case;
455 return result;
456 end;
457
458 -- Determine whether to increment when rounding
459 -- Returns rounding_inc & inexact
460 -- Assumes x includes the bottom 29 bits of the mantissa already
461 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
462 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
463 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
464 sign: std_ulogic)
465 return std_ulogic_vector is
466 variable grx : std_ulogic_vector(2 downto 0);
467 variable ret : std_ulogic_vector(1 downto 0);
468 variable lsb : std_ulogic;
469 begin
470 if single_prec = '0' then
471 grx := mantissa(1 downto 0) & x;
472 lsb := mantissa(2);
473 else
474 grx := mantissa(30 downto 29) & x;
475 lsb := mantissa(31);
476 end if;
477 ret(1) := '0';
478 ret(0) := or (grx);
479 case rn(1 downto 0) is
480 when "00" => -- round to nearest
481 if grx = "100" and rn(2) = '0' then
482 ret(1) := lsb; -- tie, round to even
483 else
484 ret(1) := grx(2);
485 end if;
486 when "01" => -- round towards zero
487 when others => -- round towards +/- inf
488 if rn(0) = sign then
489 -- round towards greater magnitude
490 ret(1) := ret(0);
491 end if;
492 end case;
493 return ret;
494 end;
495
496 -- Determine result flags to write into the FPSCR
497 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
498 return std_ulogic_vector is
499 begin
500 case class is
501 when ZERO =>
502 return sign & "0010";
503 when FINITE =>
504 return (not unitbit) & sign & (not sign) & "00";
505 when INFINITY =>
506 return '0' & sign & (not sign) & "01";
507 when NAN =>
508 return "10001";
509 end case;
510 end;
511
512 begin
513 fpu_multiply_0: entity work.multiply
514 port map (
515 clk => clk,
516 m_in => f_to_multiply,
517 m_out => multiply_to_f
518 );
519
520 fpu_0: process(clk)
521 begin
522 if rising_edge(clk) then
523 if rst = '1' then
524 r.state <= IDLE;
525 r.busy <= '0';
526 r.instr_done <= '0';
527 r.do_intr <= '0';
528 r.fpscr <= (others => '0');
529 r.writing_back <= '0';
530 else
531 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
532 r <= rin;
533 end if;
534 end if;
535 end process;
536
537 -- synchronous reads from lookup table
538 lut_access: process(clk)
539 variable addrhi : std_ulogic_vector(1 downto 0);
540 variable addr : std_ulogic_vector(9 downto 0);
541 begin
542 if rising_edge(clk) then
543 if r.is_sqrt = '1' then
544 addrhi := r.b.mantissa(55 downto 54);
545 else
546 addrhi := "00";
547 end if;
548 addr := addrhi & r.b.mantissa(53 downto 46);
549 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
550 end if;
551 end process;
552
553 e_out.busy <= r.busy;
554 e_out.exception <= r.fpscr(FPSCR_FEX);
555 e_out.interrupt <= r.do_intr;
556
557 w_out.valid <= r.instr_done and not r.do_intr;
558 w_out.write_enable <= r.writing_back;
559 w_out.write_reg <= r.dest_fpr;
560 w_out.write_data <= fp_result;
561 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
562 w_out.write_cr_mask <= r.cr_mask;
563 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
564 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
565
566 fpu_1: process(all)
567 variable v : reg_type;
568 variable adec : fpu_reg_type;
569 variable bdec : fpu_reg_type;
570 variable cdec : fpu_reg_type;
571 variable fpscr_mask : std_ulogic_vector(31 downto 0);
572 variable illegal : std_ulogic;
573 variable j, k : integer;
574 variable flm : std_ulogic_vector(7 downto 0);
575 variable int_input : std_ulogic;
576 variable mask : std_ulogic_vector(63 downto 0);
577 variable in_a0 : std_ulogic_vector(63 downto 0);
578 variable in_b0 : std_ulogic_vector(63 downto 0);
579 variable misc : std_ulogic_vector(63 downto 0);
580 variable shift_res : std_ulogic_vector(63 downto 0);
581 variable round : std_ulogic_vector(1 downto 0);
582 variable update_fx : std_ulogic;
583 variable arith_done : std_ulogic;
584 variable invalid : std_ulogic;
585 variable zero_divide : std_ulogic;
586 variable mant_nz : std_ulogic;
587 variable min_exp : signed(EXP_BITS-1 downto 0);
588 variable max_exp : signed(EXP_BITS-1 downto 0);
589 variable bias_exp : signed(EXP_BITS-1 downto 0);
590 variable new_exp : signed(EXP_BITS-1 downto 0);
591 variable exp_tiny : std_ulogic;
592 variable exp_huge : std_ulogic;
593 variable renormalize : std_ulogic;
594 variable clz : std_ulogic_vector(5 downto 0);
595 variable set_x : std_ulogic;
596 variable mshift : signed(EXP_BITS-1 downto 0);
597 variable need_check : std_ulogic;
598 variable msb : std_ulogic;
599 variable is_add : std_ulogic;
600 variable qnan_result : std_ulogic;
601 variable longmask : std_ulogic;
602 variable set_a : std_ulogic;
603 variable set_b : std_ulogic;
604 variable set_c : std_ulogic;
605 variable px_nz : std_ulogic;
606 variable maddend : std_ulogic_vector(127 downto 0);
607 variable set_y : std_ulogic;
608 variable pcmpb_eq : std_ulogic;
609 variable pcmpb_lt : std_ulogic;
610 variable pshift : std_ulogic;
611 variable renorm_sqrt : std_ulogic;
612 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
613 variable shiftin : std_ulogic;
614 begin
615 v := r;
616 illegal := '0';
617 v.busy := '0';
618 int_input := '0';
619
620 -- capture incoming instruction
621 if e_in.valid = '1' then
622 v.insn := e_in.insn;
623 v.op := e_in.op;
624 v.fe_mode := or (e_in.fe_mode);
625 v.dest_fpr := e_in.frt;
626 v.single_prec := e_in.single;
627 v.int_result := '0';
628 v.rc := e_in.rc;
629 v.is_cmp := e_in.out_cr;
630 if e_in.out_cr = '0' then
631 v.cr_mask := num_to_fxm(1);
632 else
633 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
634 end if;
635 int_input := '0';
636 if e_in.op = OP_FPOP_I then
637 int_input := '1';
638 end if;
639 v.quieten_nan := '1';
640 v.tiny := '0';
641 v.denorm := '0';
642 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
643 v.is_subtract := '0';
644 v.is_multiply := '0';
645 v.is_sqrt := '0';
646 v.add_bsmall := '0';
647 v.doing_ftdiv := "00";
648
649 adec := decode_dp(e_in.fra, int_input);
650 bdec := decode_dp(e_in.frb, int_input);
651 cdec := decode_dp(e_in.frc, int_input);
652 v.a := adec;
653 v.b := bdec;
654 v.c := cdec;
655
656 v.exp_cmp := '0';
657 if adec.exponent > bdec.exponent then
658 v.exp_cmp := '1';
659 end if;
660 end if;
661
662 r_hi_nz <= or (r.r(55 downto 31));
663 r_lo_nz <= or (r.r(30 downto 2));
664
665 if r.single_prec = '0' then
666 if r.doing_ftdiv(1) = '0' then
667 max_exp := to_signed(1023, EXP_BITS);
668 else
669 max_exp := to_signed(1020, EXP_BITS);
670 end if;
671 if r.doing_ftdiv(0) = '0' then
672 min_exp := to_signed(-1022, EXP_BITS);
673 else
674 min_exp := to_signed(-1021, EXP_BITS);
675 end if;
676 bias_exp := to_signed(1536, EXP_BITS);
677 else
678 max_exp := to_signed(127, EXP_BITS);
679 min_exp := to_signed(-126, EXP_BITS);
680 bias_exp := to_signed(192, EXP_BITS);
681 end if;
682 new_exp := r.result_exp - r.shift;
683 exp_tiny := '0';
684 exp_huge := '0';
685 if new_exp < min_exp then
686 exp_tiny := '1';
687 end if;
688 if new_exp > max_exp then
689 exp_huge := '1';
690 end if;
691
692 -- Compare P with zero and with B
693 px_nz := or (r.p(57 downto 4));
694 pcmpb_eq := '0';
695 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
696 pcmpb_eq := '1';
697 end if;
698 pcmpb_lt := '0';
699 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
700 pcmpb_lt := '1';
701 end if;
702
703 v.writing_back := '0';
704 v.instr_done := '0';
705 v.update_fprf := '0';
706 v.shift := to_signed(0, EXP_BITS);
707 v.first := '0';
708 opsel_a <= AIN_R;
709 opsel_ainv <= '0';
710 opsel_amask <= '0';
711 opsel_b <= BIN_ZERO;
712 opsel_binv <= '0';
713 opsel_r <= RES_SUM;
714 carry_in <= '0';
715 misc_sel <= "0000";
716 fpscr_mask := (others => '1');
717 update_fx := '0';
718 arith_done := '0';
719 invalid := '0';
720 zero_divide := '0';
721 renormalize := '0';
722 set_x := '0';
723 qnan_result := '0';
724 longmask := r.single_prec;
725 set_a := '0';
726 set_b := '0';
727 set_c := '0';
728 f_to_multiply.is_32bit <= '0';
729 f_to_multiply.valid <= '0';
730 msel_1 <= MUL1_A;
731 msel_2 <= MUL2_C;
732 msel_add <= MULADD_ZERO;
733 msel_inv <= '0';
734 set_y := '0';
735 pshift := '0';
736 renorm_sqrt := '0';
737 shiftin := '0';
738 case r.state is
739 when IDLE =>
740 if e_in.valid = '1' then
741 case e_in.insn(5 downto 1) is
742 when "00000" =>
743 if e_in.insn(8) = '1' then
744 if e_in.insn(6) = '0' then
745 v.state := DO_FTDIV;
746 else
747 v.state := DO_FTSQRT;
748 end if;
749 elsif e_in.insn(7) = '1' then
750 v.state := DO_MCRFS;
751 else
752 v.state := DO_FCMP;
753 end if;
754 when "00110" =>
755 if e_in.insn(10) = '0' then
756 if e_in.insn(8) = '0' then
757 v.state := DO_MTFSB;
758 else
759 v.state := DO_MTFSFI;
760 end if;
761 else
762 v.state := DO_FMRG;
763 end if;
764 when "00111" =>
765 if e_in.insn(8) = '0' then
766 v.state := DO_MFFS;
767 else
768 v.state := DO_MTFSF;
769 end if;
770 when "01000" =>
771 if e_in.insn(9 downto 8) /= "11" then
772 v.state := DO_FMR;
773 else
774 v.state := DO_FRI;
775 end if;
776 when "01100" =>
777 v.state := DO_FRSP;
778 when "01110" =>
779 if int_input = '1' then
780 -- fcfid[u][s]
781 v.state := DO_FCFID;
782 else
783 v.state := DO_FCTI;
784 end if;
785 when "01111" =>
786 v.round_mode := "001";
787 v.state := DO_FCTI;
788 when "10010" =>
789 v.state := DO_FDIV;
790 when "10100" | "10101" =>
791 v.state := DO_FADD;
792 when "10110" =>
793 v.is_sqrt := '1';
794 v.state := DO_FSQRT;
795 when "10111" =>
796 v.state := DO_FSEL;
797 when "11000" =>
798 v.state := DO_FRE;
799 when "11001" =>
800 v.is_multiply := '1';
801 v.state := DO_FMUL;
802 when "11010" =>
803 v.is_sqrt := '1';
804 v.state := DO_FRSQRTE;
805 when others =>
806 illegal := '1';
807 end case;
808 end if;
809 v.x := '0';
810 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
811
812 when DO_MCRFS =>
813 j := to_integer(unsigned(insn_bfa(r.insn)));
814 for i in 0 to 7 loop
815 if i = j then
816 k := (7 - i) * 4;
817 v.cr_result := r.fpscr(k + 3 downto k);
818 fpscr_mask(k + 3 downto k) := "0000";
819 end if;
820 end loop;
821 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
822 v.instr_done := '1';
823 v.state := IDLE;
824
825 when DO_FTDIV =>
826 v.instr_done := '1';
827 v.state := IDLE;
828 v.cr_result := "0000";
829 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
830 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
831 v.cr_result(2) := '1';
832 end if;
833 if r.a.class = NAN or r.a.class = INFINITY or
834 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
835 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
836 v.cr_result(1) := '1';
837 else
838 v.doing_ftdiv := "11";
839 v.first := '1';
840 v.state := FTDIV_1;
841 v.instr_done := '0';
842 end if;
843
844 when DO_FTSQRT =>
845 v.instr_done := '1';
846 v.state := IDLE;
847 v.cr_result := "0000";
848 if r.b.class = ZERO or r.b.class = INFINITY or
849 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
850 v.cr_result(2) := '1';
851 end if;
852 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
853 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
854 v.cr_result(1) := '0';
855 end if;
856
857 when DO_FCMP =>
858 -- fcmp[uo]
859 v.instr_done := '1';
860 v.state := IDLE;
861 update_fx := '1';
862 opsel_a <= AIN_B;
863 opsel_r <= RES_SUM;
864 v.result_exp := r.b.exponent;
865 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
866 (r.b.class = NAN and r.b.mantissa(53) = '0') then
867 -- Signalling NAN
868 v.fpscr(FPSCR_VXSNAN) := '1';
869 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
870 v.fpscr(FPSCR_VXVC) := '1';
871 end if;
872 invalid := '1';
873 v.cr_result := "0001"; -- unordered
874 elsif r.a.class = NAN or r.b.class = NAN then
875 if r.insn(6) = '1' then
876 -- fcmpo
877 v.fpscr(FPSCR_VXVC) := '1';
878 invalid := '1';
879 end if;
880 v.cr_result := "0001"; -- unordered
881 elsif r.a.class = ZERO and r.b.class = ZERO then
882 v.cr_result := "0010"; -- equal
883 elsif r.a.negative /= r.b.negative then
884 v.cr_result := r.a.negative & r.b.negative & "00";
885 elsif r.a.class = ZERO then
886 -- A and B are the same sign from here down
887 v.cr_result := not r.b.negative & r.b.negative & "00";
888 elsif r.a.class = INFINITY then
889 if r.b.class = INFINITY then
890 v.cr_result := "0010";
891 else
892 v.cr_result := r.a.negative & not r.a.negative & "00";
893 end if;
894 elsif r.b.class = ZERO then
895 -- A is finite from here down
896 v.cr_result := r.a.negative & not r.a.negative & "00";
897 elsif r.b.class = INFINITY then
898 v.cr_result := not r.b.negative & r.b.negative & "00";
899 elsif r.exp_cmp = '1' then
900 -- A and B are both finite from here down
901 v.cr_result := r.a.negative & not r.a.negative & "00";
902 elsif r.a.exponent /= r.b.exponent then
903 -- A exponent is smaller than B
904 v.cr_result := not r.a.negative & r.a.negative & "00";
905 else
906 -- Prepare to subtract mantissas, put B in R
907 v.cr_result := "0000";
908 v.instr_done := '0';
909 v.state := CMP_1;
910 end if;
911 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
912
913 when DO_MTFSB =>
914 -- mtfsb{0,1}
915 j := to_integer(unsigned(insn_bt(r.insn)));
916 for i in 0 to 31 loop
917 if i = j then
918 v.fpscr(31 - i) := r.insn(6);
919 end if;
920 end loop;
921 v.instr_done := '1';
922 v.state := IDLE;
923
924 when DO_MTFSFI =>
925 -- mtfsfi
926 j := to_integer(unsigned(insn_bf(r.insn)));
927 if r.insn(16) = '0' then
928 for i in 0 to 7 loop
929 if i = j then
930 k := (7 - i) * 4;
931 v.fpscr(k + 3 downto k) := insn_u(r.insn);
932 end if;
933 end loop;
934 end if;
935 v.instr_done := '1';
936 v.state := IDLE;
937
938 when DO_FMRG =>
939 -- fmrgew, fmrgow
940 opsel_r <= RES_MISC;
941 misc_sel <= "01" & r.insn(8) & '0';
942 v.int_result := '1';
943 v.writing_back := '1';
944 v.instr_done := '1';
945 v.state := IDLE;
946
947 when DO_MFFS =>
948 v.int_result := '1';
949 v.writing_back := '1';
950 opsel_r <= RES_MISC;
951 case r.insn(20 downto 16) is
952 when "00000" =>
953 -- mffs
954 when "00001" =>
955 -- mffsce
956 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
957 when "10100" | "10101" =>
958 -- mffscdrn[i] (but we don't implement DRN)
959 fpscr_mask := x"000000FF";
960 when "10110" =>
961 -- mffscrn
962 fpscr_mask := x"000000FF";
963 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
964 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
965 when "10111" =>
966 -- mffscrni
967 fpscr_mask := x"000000FF";
968 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
969 when "11000" =>
970 -- mffsl
971 fpscr_mask := x"0007F0FF";
972 when others =>
973 illegal := '1';
974 end case;
975 v.instr_done := '1';
976 v.state := IDLE;
977
978 when DO_MTFSF =>
979 if r.insn(25) = '1' then
980 flm := x"FF";
981 elsif r.insn(16) = '1' then
982 flm := x"00";
983 else
984 flm := r.insn(24 downto 17);
985 end if;
986 for i in 0 to 7 loop
987 k := i * 4;
988 if flm(i) = '1' then
989 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
990 end if;
991 end loop;
992 v.instr_done := '1';
993 v.state := IDLE;
994
995 when DO_FMR =>
996 opsel_a <= AIN_B;
997 v.result_class := r.b.class;
998 v.result_exp := r.b.exponent;
999 v.quieten_nan := '0';
1000 if r.insn(9) = '1' then
1001 v.result_sign := '0'; -- fabs
1002 elsif r.insn(8) = '1' then
1003 v.result_sign := '1'; -- fnabs
1004 elsif r.insn(7) = '1' then
1005 v.result_sign := r.b.negative; -- fmr
1006 elsif r.insn(6) = '1' then
1007 v.result_sign := not r.b.negative; -- fneg
1008 else
1009 v.result_sign := r.a.negative; -- fcpsgn
1010 end if;
1011 v.writing_back := '1';
1012 v.instr_done := '1';
1013 v.state := IDLE;
1014
1015 when DO_FRI => -- fri[nzpm]
1016 opsel_a <= AIN_B;
1017 v.result_class := r.b.class;
1018 v.result_sign := r.b.negative;
1019 v.result_exp := r.b.exponent;
1020 v.fpscr(FPSCR_FR) := '0';
1021 v.fpscr(FPSCR_FI) := '0';
1022 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1023 -- Signalling NAN
1024 v.fpscr(FPSCR_VXSNAN) := '1';
1025 invalid := '1';
1026 end if;
1027 if r.b.class = FINITE then
1028 if r.b.exponent >= to_signed(52, EXP_BITS) then
1029 -- integer already, no rounding required
1030 arith_done := '1';
1031 else
1032 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1033 v.state := FRI_1;
1034 v.round_mode := '1' & r.insn(7 downto 6);
1035 end if;
1036 else
1037 arith_done := '1';
1038 end if;
1039
1040 when DO_FRSP =>
1041 opsel_a <= AIN_B;
1042 v.result_class := r.b.class;
1043 v.result_sign := r.b.negative;
1044 v.result_exp := r.b.exponent;
1045 v.fpscr(FPSCR_FR) := '0';
1046 v.fpscr(FPSCR_FI) := '0';
1047 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1048 -- Signalling NAN
1049 v.fpscr(FPSCR_VXSNAN) := '1';
1050 invalid := '1';
1051 end if;
1052 set_x := '1';
1053 if r.b.class = FINITE then
1054 if r.b.exponent < to_signed(-126, EXP_BITS) then
1055 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1056 v.state := ROUND_UFLOW;
1057 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1058 v.state := ROUND_OFLOW;
1059 else
1060 v.shift := to_signed(-2, EXP_BITS);
1061 v.state := ROUNDING;
1062 end if;
1063 else
1064 arith_done := '1';
1065 end if;
1066
1067 when DO_FCTI =>
1068 -- instr bit 9: 1=dword 0=word
1069 -- instr bit 8: 1=unsigned 0=signed
1070 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1071 opsel_a <= AIN_B;
1072 v.result_class := r.b.class;
1073 v.result_sign := r.b.negative;
1074 v.result_exp := r.b.exponent;
1075 v.fpscr(FPSCR_FR) := '0';
1076 v.fpscr(FPSCR_FI) := '0';
1077 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1078 -- Signalling NAN
1079 v.fpscr(FPSCR_VXSNAN) := '1';
1080 invalid := '1';
1081 end if;
1082
1083 v.int_result := '1';
1084 case r.b.class is
1085 when ZERO =>
1086 arith_done := '1';
1087 when FINITE =>
1088 if r.b.exponent >= to_signed(64, EXP_BITS) or
1089 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1090 v.state := INT_OFLOW;
1091 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1092 -- integer already, no rounding required,
1093 -- shift into final position
1094 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1095 if r.insn(8) = '1' and r.b.negative = '1' then
1096 v.state := INT_OFLOW;
1097 else
1098 v.state := INT_ISHIFT;
1099 end if;
1100 else
1101 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1102 v.state := INT_SHIFT;
1103 end if;
1104 when INFINITY | NAN =>
1105 v.state := INT_OFLOW;
1106 end case;
1107
1108 when DO_FCFID =>
1109 v.result_sign := '0';
1110 opsel_a <= AIN_B;
1111 if r.insn(8) = '0' and r.b.negative = '1' then
1112 -- fcfid[s] with negative operand, set R = -B
1113 opsel_ainv <= '1';
1114 carry_in <= '1';
1115 v.result_sign := '1';
1116 end if;
1117 v.result_class := r.b.class;
1118 v.result_exp := to_signed(54, EXP_BITS);
1119 v.fpscr(FPSCR_FR) := '0';
1120 v.fpscr(FPSCR_FI) := '0';
1121 if r.b.class = ZERO then
1122 arith_done := '1';
1123 else
1124 v.state := FINISH;
1125 end if;
1126
1127 when DO_FADD =>
1128 -- fadd[s] and fsub[s]
1129 opsel_a <= AIN_A;
1130 v.result_sign := r.a.negative;
1131 v.result_class := r.a.class;
1132 v.result_exp := r.a.exponent;
1133 v.fpscr(FPSCR_FR) := '0';
1134 v.fpscr(FPSCR_FI) := '0';
1135 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1136 if r.a.class = FINITE and r.b.class = FINITE then
1137 v.is_subtract := not is_add;
1138 v.add_bsmall := r.exp_cmp;
1139 if r.exp_cmp = '0' then
1140 v.shift := r.a.exponent - r.b.exponent;
1141 v.result_sign := r.b.negative xnor r.insn(1);
1142 if r.a.exponent = r.b.exponent then
1143 v.state := ADD_2;
1144 else
1145 v.state := ADD_SHIFT;
1146 end if;
1147 else
1148 opsel_a <= AIN_B;
1149 v.shift := r.b.exponent - r.a.exponent;
1150 v.result_exp := r.b.exponent;
1151 v.state := ADD_SHIFT;
1152 end if;
1153 else
1154 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1155 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1156 -- Signalling NAN
1157 v.fpscr(FPSCR_VXSNAN) := '1';
1158 invalid := '1';
1159 end if;
1160 if r.a.class = NAN then
1161 -- nothing to do, result is A
1162 elsif r.b.class = NAN then
1163 v.result_class := NAN;
1164 v.result_sign := r.b.negative;
1165 opsel_a <= AIN_B;
1166 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1167 -- invalid operation, construct QNaN
1168 v.fpscr(FPSCR_VXISI) := '1';
1169 qnan_result := '1';
1170 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1171 -- return -0 for rounding to -infinity
1172 v.result_sign := r.round_mode(1) and r.round_mode(0);
1173 elsif r.a.class = INFINITY or r.b.class = ZERO then
1174 -- nothing to do, result is A
1175 else
1176 -- result is +/- B
1177 v.result_sign := r.b.negative xnor r.insn(1);
1178 v.result_class := r.b.class;
1179 v.result_exp := r.b.exponent;
1180 opsel_a <= AIN_B;
1181 end if;
1182 arith_done := '1';
1183 end if;
1184
1185 when DO_FMUL =>
1186 -- fmul[s]
1187 opsel_a <= AIN_A;
1188 v.result_sign := r.a.negative;
1189 v.result_class := r.a.class;
1190 v.result_exp := r.a.exponent;
1191 v.fpscr(FPSCR_FR) := '0';
1192 v.fpscr(FPSCR_FI) := '0';
1193 if r.a.class = FINITE and r.c.class = FINITE then
1194 v.result_sign := r.a.negative xor r.c.negative;
1195 v.result_exp := r.a.exponent + r.c.exponent;
1196 -- Renormalize denorm operands
1197 if r.a.mantissa(54) = '0' then
1198 v.state := RENORM_A;
1199 elsif r.c.mantissa(54) = '0' then
1200 opsel_a <= AIN_C;
1201 v.state := RENORM_C;
1202 else
1203 f_to_multiply.valid <= '1';
1204 v.state := MULT_1;
1205 end if;
1206 else
1207 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1208 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1209 -- Signalling NAN
1210 v.fpscr(FPSCR_VXSNAN) := '1';
1211 invalid := '1';
1212 end if;
1213 if r.a.class = NAN then
1214 -- result is A
1215 elsif r.c.class = NAN then
1216 v.result_class := NAN;
1217 v.result_sign := r.c.negative;
1218 opsel_a <= AIN_C;
1219 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1220 (r.a.class = ZERO and r.c.class = INFINITY) then
1221 -- invalid operation, construct QNaN
1222 v.fpscr(FPSCR_VXIMZ) := '1';
1223 qnan_result := '1';
1224 elsif r.a.class = ZERO or r.a.class = INFINITY then
1225 -- result is +/- A
1226 v.result_sign := r.a.negative xor r.c.negative;
1227 else
1228 -- r.c.class is ZERO or INFINITY
1229 v.result_class := r.c.class;
1230 v.result_sign := r.a.negative xor r.c.negative;
1231 end if;
1232 arith_done := '1';
1233 end if;
1234
1235 when DO_FDIV =>
1236 opsel_a <= AIN_A;
1237 v.result_sign := r.a.negative;
1238 v.result_class := r.a.class;
1239 v.result_exp := r.a.exponent;
1240 v.fpscr(FPSCR_FR) := '0';
1241 v.fpscr(FPSCR_FI) := '0';
1242 v.result_sign := r.a.negative xor r.b.negative;
1243 v.result_exp := r.a.exponent - r.b.exponent;
1244 v.count := "00";
1245 if r.a.class = FINITE and r.b.class = FINITE then
1246 -- Renormalize denorm operands
1247 if r.a.mantissa(54) = '0' then
1248 v.state := RENORM_A;
1249 elsif r.b.mantissa(54) = '0' then
1250 opsel_a <= AIN_B;
1251 v.state := RENORM_B;
1252 else
1253 v.first := '1';
1254 v.state := DIV_2;
1255 end if;
1256 else
1257 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1258 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1259 -- Signalling NAN
1260 v.fpscr(FPSCR_VXSNAN) := '1';
1261 invalid := '1';
1262 end if;
1263 if r.a.class = NAN then
1264 -- result is A
1265 v.result_sign := r.a.negative;
1266 elsif r.b.class = NAN then
1267 v.result_class := NAN;
1268 v.result_sign := r.b.negative;
1269 opsel_a <= AIN_B;
1270 elsif r.b.class = INFINITY then
1271 if r.a.class = INFINITY then
1272 v.fpscr(FPSCR_VXIDI) := '1';
1273 qnan_result := '1';
1274 else
1275 v.result_class := ZERO;
1276 end if;
1277 elsif r.b.class = ZERO then
1278 if r.a.class = ZERO then
1279 v.fpscr(FPSCR_VXZDZ) := '1';
1280 qnan_result := '1';
1281 else
1282 if r.a.class = FINITE then
1283 zero_divide := '1';
1284 end if;
1285 v.result_class := INFINITY;
1286 end if;
1287 -- else r.b.class = FINITE, result_class = r.a.class
1288 end if;
1289 arith_done := '1';
1290 end if;
1291
1292 when DO_FSEL =>
1293 opsel_a <= AIN_A;
1294 v.fpscr(FPSCR_FR) := '0';
1295 v.fpscr(FPSCR_FI) := '0';
1296 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1297 v.result_sign := r.c.negative;
1298 v.result_exp := r.c.exponent;
1299 v.result_class := r.c.class;
1300 opsel_a <= AIN_C;
1301 else
1302 v.result_sign := r.b.negative;
1303 v.result_exp := r.b.exponent;
1304 v.result_class := r.b.class;
1305 opsel_a <= AIN_B;
1306 end if;
1307 v.quieten_nan := '0';
1308 arith_done := '1';
1309
1310 when DO_FSQRT =>
1311 opsel_a <= AIN_B;
1312 v.result_class := r.b.class;
1313 v.result_sign := r.b.negative;
1314 v.fpscr(FPSCR_FR) := '0';
1315 v.fpscr(FPSCR_FI) := '0';
1316 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1317 v.fpscr(FPSCR_VXSNAN) := '1';
1318 invalid := '1';
1319 end if;
1320 case r.b.class is
1321 when FINITE =>
1322 v.result_exp := r.b.exponent;
1323 if r.b.negative = '1' then
1324 v.fpscr(FPSCR_VXSQRT) := '1';
1325 qnan_result := '1';
1326 arith_done := '1';
1327 elsif r.b.mantissa(54) = '0' then
1328 v.state := RENORM_B;
1329 elsif r.b.exponent(0) = '0' then
1330 v.state := SQRT_1;
1331 else
1332 v.shift := to_signed(1, EXP_BITS);
1333 v.state := RENORM_B2;
1334 end if;
1335 when NAN | ZERO =>
1336 -- result is B
1337 arith_done := '1';
1338 when INFINITY =>
1339 if r.b.negative = '1' then
1340 v.fpscr(FPSCR_VXSQRT) := '1';
1341 qnan_result := '1';
1342 -- else result is B
1343 end if;
1344 arith_done := '1';
1345 end case;
1346
1347 when DO_FRE =>
1348 opsel_a <= AIN_B;
1349 v.result_class := r.b.class;
1350 v.result_sign := r.b.negative;
1351 v.fpscr(FPSCR_FR) := '0';
1352 v.fpscr(FPSCR_FI) := '0';
1353 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1354 v.fpscr(FPSCR_VXSNAN) := '1';
1355 invalid := '1';
1356 end if;
1357 case r.b.class is
1358 when FINITE =>
1359 v.result_exp := - r.b.exponent;
1360 if r.b.mantissa(54) = '0' then
1361 v.state := RENORM_B;
1362 else
1363 v.state := FRE_1;
1364 end if;
1365 when NAN =>
1366 -- result is B
1367 arith_done := '1';
1368 when INFINITY =>
1369 v.result_class := ZERO;
1370 arith_done := '1';
1371 when ZERO =>
1372 v.result_class := INFINITY;
1373 zero_divide := '1';
1374 arith_done := '1';
1375 end case;
1376
1377 when DO_FRSQRTE =>
1378 opsel_a <= AIN_B;
1379 v.result_class := r.b.class;
1380 v.result_sign := r.b.negative;
1381 v.fpscr(FPSCR_FR) := '0';
1382 v.fpscr(FPSCR_FI) := '0';
1383 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1384 v.fpscr(FPSCR_VXSNAN) := '1';
1385 invalid := '1';
1386 end if;
1387 v.shift := to_signed(1, EXP_BITS);
1388 case r.b.class is
1389 when FINITE =>
1390 v.result_exp := r.b.exponent;
1391 if r.b.negative = '1' then
1392 v.fpscr(FPSCR_VXSQRT) := '1';
1393 qnan_result := '1';
1394 arith_done := '1';
1395 elsif r.b.mantissa(54) = '0' then
1396 v.state := RENORM_B;
1397 elsif r.b.exponent(0) = '0' then
1398 v.state := RSQRT_1;
1399 else
1400 v.state := RENORM_B2;
1401 end if;
1402 when NAN =>
1403 -- result is B
1404 arith_done := '1';
1405 when INFINITY =>
1406 if r.b.negative = '1' then
1407 v.fpscr(FPSCR_VXSQRT) := '1';
1408 qnan_result := '1';
1409 else
1410 v.result_class := ZERO;
1411 end if;
1412 arith_done := '1';
1413 when ZERO =>
1414 v.result_class := INFINITY;
1415 zero_divide := '1';
1416 arith_done := '1';
1417 end case;
1418
1419 when RENORM_A =>
1420 renormalize := '1';
1421 v.state := RENORM_A2;
1422
1423 when RENORM_A2 =>
1424 set_a := '1';
1425 v.result_exp := new_exp;
1426 if r.insn(4) = '1' then
1427 opsel_a <= AIN_C;
1428 if r.c.mantissa(54) = '1' then
1429 v.first := '1';
1430 v.state := MULT_1;
1431 else
1432 v.state := RENORM_C;
1433 end if;
1434 else
1435 opsel_a <= AIN_B;
1436 if r.b.mantissa(54) = '1' then
1437 v.first := '1';
1438 v.state := DIV_2;
1439 else
1440 v.state := RENORM_B;
1441 end if;
1442 end if;
1443
1444 when RENORM_B =>
1445 renormalize := '1';
1446 renorm_sqrt := r.is_sqrt;
1447 v.state := RENORM_B2;
1448
1449 when RENORM_B2 =>
1450 set_b := '1';
1451 if r.is_sqrt = '0' then
1452 v.result_exp := r.result_exp + r.shift;
1453 else
1454 v.result_exp := new_exp;
1455 end if;
1456 v.state := LOOKUP;
1457
1458 when RENORM_C =>
1459 renormalize := '1';
1460 v.state := RENORM_C2;
1461
1462 when RENORM_C2 =>
1463 set_c := '1';
1464 v.result_exp := new_exp;
1465 v.first := '1';
1466 v.state := MULT_1;
1467
1468 when ADD_SHIFT =>
1469 opsel_r <= RES_SHIFT;
1470 set_x := '1';
1471 longmask := '0';
1472 v.state := ADD_2;
1473
1474 when ADD_2 =>
1475 if r.add_bsmall = '1' then
1476 opsel_a <= AIN_A;
1477 else
1478 opsel_a <= AIN_B;
1479 end if;
1480 opsel_b <= BIN_R;
1481 opsel_binv <= r.is_subtract;
1482 carry_in <= r.is_subtract and not r.x;
1483 v.shift := to_signed(-1, EXP_BITS);
1484 v.state := ADD_3;
1485
1486 when ADD_3 =>
1487 -- check for overflow or negative result (can't get both)
1488 if r.r(63) = '1' then
1489 -- result is opposite sign to expected
1490 v.result_sign := not r.result_sign;
1491 opsel_ainv <= '1';
1492 carry_in <= '1';
1493 v.state := FINISH;
1494 elsif r.r(55) = '1' then
1495 -- sum overflowed, shift right
1496 opsel_r <= RES_SHIFT;
1497 set_x := '1';
1498 v.shift := to_signed(-2, EXP_BITS);
1499 if exp_huge = '1' then
1500 v.state := ROUND_OFLOW;
1501 else
1502 v.state := ROUNDING;
1503 end if;
1504 elsif r.r(54) = '1' then
1505 set_x := '1';
1506 v.shift := to_signed(-2, EXP_BITS);
1507 v.state := ROUNDING;
1508 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1509 -- r.x must be zero at this point
1510 v.result_class := ZERO;
1511 if r.is_subtract = '1' then
1512 -- set result sign depending on rounding mode
1513 v.result_sign := r.round_mode(1) and r.round_mode(0);
1514 end if;
1515 arith_done := '1';
1516 else
1517 renormalize := '1';
1518 v.state := NORMALIZE;
1519 end if;
1520
1521 when CMP_1 =>
1522 opsel_a <= AIN_A;
1523 opsel_b <= BIN_R;
1524 opsel_binv <= '1';
1525 carry_in <= '1';
1526 v.state := CMP_2;
1527
1528 when CMP_2 =>
1529 if r.r(63) = '1' then
1530 -- A is smaller in magnitude
1531 v.cr_result := not r.a.negative & r.a.negative & "00";
1532 elsif (r_hi_nz or r_lo_nz) = '0' then
1533 v.cr_result := "0010";
1534 else
1535 v.cr_result := r.a.negative & not r.a.negative & "00";
1536 end if;
1537 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1538 v.instr_done := '1';
1539 v.state := IDLE;
1540
1541 when MULT_1 =>
1542 f_to_multiply.valid <= r.first;
1543 opsel_r <= RES_MULT;
1544 if multiply_to_f.valid = '1' then
1545 v.state := FINISH;
1546 end if;
1547
1548 when LOOKUP =>
1549 opsel_a <= AIN_B;
1550 -- wait one cycle for inverse_table[B] lookup
1551 v.first := '1';
1552 if r.insn(4) = '0' then
1553 if r.insn(3) = '0' then
1554 v.state := DIV_2;
1555 else
1556 v.state := SQRT_1;
1557 end if;
1558 elsif r.insn(2) = '0' then
1559 v.state := FRE_1;
1560 else
1561 v.state := RSQRT_1;
1562 end if;
1563
1564 when DIV_2 =>
1565 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1566 msel_1 <= MUL1_B;
1567 msel_add <= MULADD_CONST;
1568 msel_inv <= '1';
1569 if r.count = 0 then
1570 msel_2 <= MUL2_LUT;
1571 else
1572 msel_2 <= MUL2_P;
1573 end if;
1574 set_y := r.first;
1575 pshift := '1';
1576 f_to_multiply.valid <= r.first;
1577 if multiply_to_f.valid = '1' then
1578 v.first := '1';
1579 v.count := r.count + 1;
1580 v.state := DIV_3;
1581 end if;
1582
1583 when DIV_3 =>
1584 -- compute Y = P = P * Y
1585 msel_1 <= MUL1_Y;
1586 msel_2 <= MUL2_P;
1587 f_to_multiply.valid <= r.first;
1588 pshift := '1';
1589 if multiply_to_f.valid = '1' then
1590 v.first := '1';
1591 if r.count = 3 then
1592 v.state := DIV_4;
1593 else
1594 v.state := DIV_2;
1595 end if;
1596 end if;
1597
1598 when DIV_4 =>
1599 -- compute R = P = A * Y (quotient)
1600 msel_1 <= MUL1_A;
1601 msel_2 <= MUL2_P;
1602 set_y := r.first;
1603 f_to_multiply.valid <= r.first;
1604 pshift := '1';
1605 if multiply_to_f.valid = '1' then
1606 opsel_r <= RES_MULT;
1607 v.first := '1';
1608 v.state := DIV_5;
1609 end if;
1610
1611 when DIV_5 =>
1612 -- compute P = A - B * R (remainder)
1613 msel_1 <= MUL1_B;
1614 msel_2 <= MUL2_R;
1615 msel_add <= MULADD_A;
1616 msel_inv <= '1';
1617 f_to_multiply.valid <= r.first;
1618 if multiply_to_f.valid = '1' then
1619 v.state := DIV_6;
1620 end if;
1621
1622 when DIV_6 =>
1623 -- test if remainder is 0 or >= B
1624 if pcmpb_lt = '1' then
1625 -- quotient is correct, set X if remainder non-zero
1626 v.x := r.p(58) or px_nz;
1627 else
1628 -- quotient needs to be incremented by 1
1629 carry_in <= '1';
1630 v.x := not pcmpb_eq;
1631 end if;
1632 v.state := FINISH;
1633
1634 when FRE_1 =>
1635 opsel_r <= RES_MISC;
1636 misc_sel <= "0111";
1637 v.shift := to_signed(1, EXP_BITS);
1638 v.state := NORMALIZE;
1639
1640 when FTDIV_1 =>
1641 v.cr_result(1) := exp_tiny or exp_huge;
1642 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1643 v.instr_done := '1';
1644 v.state := IDLE;
1645 else
1646 v.shift := r.a.exponent;
1647 v.doing_ftdiv := "10";
1648 end if;
1649
1650 when RSQRT_1 =>
1651 opsel_r <= RES_MISC;
1652 misc_sel <= "0111";
1653 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1654 v.result_exp := - sqrt_exp;
1655 v.shift := to_signed(1, EXP_BITS);
1656 v.state := NORMALIZE;
1657
1658 when SQRT_1 =>
1659 -- put invsqr[B] in R and compute P = invsqr[B] * B
1660 -- also transfer B (in R) to A
1661 set_a := '1';
1662 opsel_r <= RES_MISC;
1663 misc_sel <= "0111";
1664 msel_1 <= MUL1_B;
1665 msel_2 <= MUL2_LUT;
1666 f_to_multiply.valid <= '1';
1667 v.shift := to_signed(-1, EXP_BITS);
1668 v.count := "00";
1669 v.state := SQRT_2;
1670
1671 when SQRT_2 =>
1672 -- shift R right one place
1673 -- not expecting multiplier result yet
1674 opsel_r <= RES_SHIFT;
1675 v.first := '1';
1676 v.state := SQRT_3;
1677
1678 when SQRT_3 =>
1679 -- put R into Y, wait for product from multiplier
1680 msel_2 <= MUL2_R;
1681 set_y := r.first;
1682 pshift := '1';
1683 if multiply_to_f.valid = '1' then
1684 -- put result into R
1685 opsel_r <= RES_MULT;
1686 v.first := '1';
1687 v.state := SQRT_4;
1688 end if;
1689
1690 when SQRT_4 =>
1691 -- compute 1.5 - Y * P
1692 msel_1 <= MUL1_Y;
1693 msel_2 <= MUL2_P;
1694 msel_add <= MULADD_CONST;
1695 msel_inv <= '1';
1696 f_to_multiply.valid <= r.first;
1697 pshift := '1';
1698 if multiply_to_f.valid = '1' then
1699 v.state := SQRT_5;
1700 end if;
1701
1702 when SQRT_5 =>
1703 -- compute Y = Y * P
1704 msel_1 <= MUL1_Y;
1705 msel_2 <= MUL2_P;
1706 f_to_multiply.valid <= '1';
1707 v.first := '1';
1708 v.state := SQRT_6;
1709
1710 when SQRT_6 =>
1711 -- pipeline in R = R * P
1712 msel_1 <= MUL1_R;
1713 msel_2 <= MUL2_P;
1714 f_to_multiply.valid <= r.first;
1715 pshift := '1';
1716 if multiply_to_f.valid = '1' then
1717 v.first := '1';
1718 v.state := SQRT_7;
1719 end if;
1720
1721 when SQRT_7 =>
1722 -- first multiply is done, put result in Y
1723 msel_2 <= MUL2_P;
1724 set_y := r.first;
1725 -- wait for second multiply (should be here already)
1726 pshift := '1';
1727 if multiply_to_f.valid = '1' then
1728 -- put result into R
1729 opsel_r <= RES_MULT;
1730 v.first := '1';
1731 v.count := r.count + 1;
1732 if r.count < 2 then
1733 v.state := SQRT_4;
1734 else
1735 v.first := '1';
1736 v.state := SQRT_8;
1737 end if;
1738 end if;
1739
1740 when SQRT_8 =>
1741 -- compute P = A - R * R, which can be +ve or -ve
1742 -- we arranged for B to be put into A earlier
1743 msel_1 <= MUL1_R;
1744 msel_2 <= MUL2_R;
1745 msel_add <= MULADD_A;
1746 msel_inv <= '1';
1747 pshift := '1';
1748 f_to_multiply.valid <= r.first;
1749 if multiply_to_f.valid = '1' then
1750 v.first := '1';
1751 v.state := SQRT_9;
1752 end if;
1753
1754 when SQRT_9 =>
1755 -- compute P = P * Y
1756 -- since Y is an estimate of 1/sqrt(B), this makes P an
1757 -- estimate of the adjustment needed to R. Since the error
1758 -- could be negative and we have an unsigned multiplier, the
1759 -- upper bits can be wrong, but it turns out the lowest 8 bits
1760 -- are correct and are all we need (given 3 iterations through
1761 -- SQRT_4 to SQRT_7).
1762 msel_1 <= MUL1_Y;
1763 msel_2 <= MUL2_P;
1764 pshift := '1';
1765 f_to_multiply.valid <= r.first;
1766 if multiply_to_f.valid = '1' then
1767 v.state := SQRT_10;
1768 end if;
1769
1770 when SQRT_10 =>
1771 -- Add the bottom 8 bits of P, sign-extended,
1772 -- divided by 4, onto R.
1773 -- The division by 4 is because R is 10.54 format
1774 -- whereas P is 8.56 format.
1775 opsel_b <= BIN_PS6;
1776 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1777 v.result_exp := sqrt_exp;
1778 v.shift := to_signed(1, EXP_BITS);
1779 v.first := '1';
1780 v.state := SQRT_11;
1781
1782 when SQRT_11 =>
1783 -- compute P = A - R * R (remainder)
1784 -- also put 2 * R + 1 into B for comparison with P
1785 msel_1 <= MUL1_R;
1786 msel_2 <= MUL2_R;
1787 msel_add <= MULADD_A;
1788 msel_inv <= '1';
1789 f_to_multiply.valid <= r.first;
1790 shiftin := '1';
1791 set_b := r.first;
1792 if multiply_to_f.valid = '1' then
1793 v.state := SQRT_12;
1794 end if;
1795
1796 when SQRT_12 =>
1797 -- test if remainder is 0 or >= B = 2*R + 1
1798 if pcmpb_lt = '1' then
1799 -- square root is correct, set X if remainder non-zero
1800 v.x := r.p(58) or px_nz;
1801 else
1802 -- square root needs to be incremented by 1
1803 carry_in <= '1';
1804 v.x := not pcmpb_eq;
1805 end if;
1806 v.state := FINISH;
1807
1808 when INT_SHIFT =>
1809 opsel_r <= RES_SHIFT;
1810 set_x := '1';
1811 v.state := INT_ROUND;
1812 v.shift := to_signed(-2, EXP_BITS);
1813
1814 when INT_ROUND =>
1815 opsel_r <= RES_SHIFT;
1816 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
1817 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1818 -- Check for negative values that don't round to 0 for fcti*u*
1819 if r.insn(8) = '1' and r.result_sign = '1' and
1820 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
1821 v.state := INT_OFLOW;
1822 else
1823 v.state := INT_FINAL;
1824 end if;
1825
1826 when INT_ISHIFT =>
1827 opsel_r <= RES_SHIFT;
1828 v.state := INT_FINAL;
1829
1830 when INT_FINAL =>
1831 -- Negate if necessary, and increment for rounding if needed
1832 opsel_ainv <= r.result_sign;
1833 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
1834 -- Check for possible overflows
1835 case r.insn(9 downto 8) is
1836 when "00" => -- fctiw[z]
1837 need_check := r.r(31) or (r.r(30) and not r.result_sign);
1838 when "01" => -- fctiwu[z]
1839 need_check := r.r(31);
1840 when "10" => -- fctid[z]
1841 need_check := r.r(63) or (r.r(62) and not r.result_sign);
1842 when others => -- fctidu[z]
1843 need_check := r.r(63);
1844 end case;
1845 if need_check = '1' then
1846 v.state := INT_CHECK;
1847 else
1848 if r.fpscr(FPSCR_FI) = '1' then
1849 v.fpscr(FPSCR_XX) := '1';
1850 end if;
1851 arith_done := '1';
1852 end if;
1853
1854 when INT_CHECK =>
1855 if r.insn(9) = '0' then
1856 msb := r.r(31);
1857 else
1858 msb := r.r(63);
1859 end if;
1860 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1861 if (r.insn(8) = '0' and msb /= r.result_sign) or
1862 (r.insn(8) = '1' and msb /= '1') then
1863 opsel_r <= RES_MISC;
1864 v.fpscr(FPSCR_VXCVI) := '1';
1865 invalid := '1';
1866 else
1867 if r.fpscr(FPSCR_FI) = '1' then
1868 v.fpscr(FPSCR_XX) := '1';
1869 end if;
1870 end if;
1871 arith_done := '1';
1872
1873 when INT_OFLOW =>
1874 opsel_r <= RES_MISC;
1875 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
1876 if r.b.class = NAN then
1877 misc_sel(0) <= '1';
1878 end if;
1879 v.fpscr(FPSCR_VXCVI) := '1';
1880 invalid := '1';
1881 arith_done := '1';
1882
1883 when FRI_1 =>
1884 opsel_r <= RES_SHIFT;
1885 set_x := '1';
1886 v.shift := to_signed(-2, EXP_BITS);
1887 v.state := ROUNDING;
1888
1889 when FINISH =>
1890 if r.is_multiply = '1' and px_nz = '1' then
1891 v.x := '1';
1892 end if;
1893 if r.r(63 downto 54) /= "0000000001" then
1894 renormalize := '1';
1895 v.state := NORMALIZE;
1896 else
1897 set_x := '1';
1898 if exp_tiny = '1' then
1899 v.shift := new_exp - min_exp;
1900 v.state := ROUND_UFLOW;
1901 elsif exp_huge = '1' then
1902 v.state := ROUND_OFLOW;
1903 else
1904 v.shift := to_signed(-2, EXP_BITS);
1905 v.state := ROUNDING;
1906 end if;
1907 end if;
1908
1909 when NORMALIZE =>
1910 -- Shift so we have 9 leading zeroes (we know R is non-zero)
1911 opsel_r <= RES_SHIFT;
1912 set_x := '1';
1913 if exp_tiny = '1' then
1914 v.shift := new_exp - min_exp;
1915 v.state := ROUND_UFLOW;
1916 elsif exp_huge = '1' then
1917 v.state := ROUND_OFLOW;
1918 else
1919 v.shift := to_signed(-2, EXP_BITS);
1920 v.state := ROUNDING;
1921 end if;
1922
1923 when ROUND_UFLOW =>
1924 v.tiny := '1';
1925 if r.fpscr(FPSCR_UE) = '0' then
1926 -- disabled underflow exception case
1927 -- have to denormalize before rounding
1928 opsel_r <= RES_SHIFT;
1929 set_x := '1';
1930 v.shift := to_signed(-2, EXP_BITS);
1931 v.state := ROUNDING;
1932 else
1933 -- enabled underflow exception case
1934 -- if denormalized, have to normalize before rounding
1935 v.fpscr(FPSCR_UX) := '1';
1936 v.result_exp := r.result_exp + bias_exp;
1937 if r.r(54) = '0' then
1938 renormalize := '1';
1939 v.state := NORMALIZE;
1940 else
1941 v.shift := to_signed(-2, EXP_BITS);
1942 v.state := ROUNDING;
1943 end if;
1944 end if;
1945
1946 when ROUND_OFLOW =>
1947 v.fpscr(FPSCR_OX) := '1';
1948 if r.fpscr(FPSCR_OE) = '0' then
1949 -- disabled overflow exception
1950 -- result depends on rounding mode
1951 v.fpscr(FPSCR_XX) := '1';
1952 v.fpscr(FPSCR_FI) := '1';
1953 if r.round_mode(1 downto 0) = "00" or
1954 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
1955 v.result_class := INFINITY;
1956 v.fpscr(FPSCR_FR) := '1';
1957 else
1958 v.fpscr(FPSCR_FR) := '0';
1959 end if;
1960 -- construct largest representable number
1961 v.result_exp := max_exp;
1962 opsel_r <= RES_MISC;
1963 misc_sel <= "001" & r.single_prec;
1964 arith_done := '1';
1965 else
1966 -- enabled overflow exception
1967 v.result_exp := r.result_exp - bias_exp;
1968 v.shift := to_signed(-2, EXP_BITS);
1969 v.state := ROUNDING;
1970 end if;
1971
1972 when ROUNDING =>
1973 opsel_amask <= '1';
1974 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
1975 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
1976 if round(1) = '1' then
1977 -- set mask to increment the LSB for the precision
1978 opsel_b <= BIN_MASK;
1979 carry_in <= '1';
1980 v.shift := to_signed(-1, EXP_BITS);
1981 v.state := ROUNDING_2;
1982 else
1983 if r.r(54) = '0' then
1984 -- result after masking could be zero, or could be a
1985 -- denormalized result that needs to be renormalized
1986 renormalize := '1';
1987 v.state := ROUNDING_3;
1988 else
1989 arith_done := '1';
1990 end if;
1991 end if;
1992 if round(0) = '1' then
1993 v.fpscr(FPSCR_XX) := '1';
1994 if r.tiny = '1' then
1995 v.fpscr(FPSCR_UX) := '1';
1996 end if;
1997 end if;
1998
1999 when ROUNDING_2 =>
2000 -- Check for overflow during rounding
2001 v.x := '0';
2002 if r.r(55) = '1' then
2003 opsel_r <= RES_SHIFT;
2004 if exp_huge = '1' then
2005 v.state := ROUND_OFLOW;
2006 else
2007 arith_done := '1';
2008 end if;
2009 elsif r.r(54) = '0' then
2010 -- Do CLZ so we can renormalize the result
2011 renormalize := '1';
2012 v.state := ROUNDING_3;
2013 else
2014 arith_done := '1';
2015 end if;
2016
2017 when ROUNDING_3 =>
2018 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2019 if mant_nz = '0' then
2020 v.result_class := ZERO;
2021 if r.is_subtract = '1' then
2022 -- set result sign depending on rounding mode
2023 v.result_sign := r.round_mode(1) and r.round_mode(0);
2024 end if;
2025 arith_done := '1';
2026 else
2027 -- Renormalize result after rounding
2028 opsel_r <= RES_SHIFT;
2029 v.denorm := exp_tiny;
2030 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2031 if new_exp < to_signed(-1022, EXP_BITS) then
2032 v.state := DENORM;
2033 else
2034 arith_done := '1';
2035 end if;
2036 end if;
2037
2038 when DENORM =>
2039 opsel_r <= RES_SHIFT;
2040 arith_done := '1';
2041
2042 end case;
2043
2044 if zero_divide = '1' then
2045 v.fpscr(FPSCR_ZX) := '1';
2046 end if;
2047 if qnan_result = '1' then
2048 invalid := '1';
2049 v.result_class := NAN;
2050 v.result_sign := '0';
2051 misc_sel <= "0001";
2052 opsel_r <= RES_MISC;
2053 end if;
2054 if arith_done = '1' then
2055 -- Enabled invalid exception doesn't write result or FPRF
2056 -- Neither does enabled zero-divide exception
2057 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2058 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2059 v.writing_back := '1';
2060 v.update_fprf := '1';
2061 end if;
2062 v.instr_done := '1';
2063 v.state := IDLE;
2064 update_fx := '1';
2065 end if;
2066
2067 -- Multiplier and divide/square root data path
2068 case msel_1 is
2069 when MUL1_A =>
2070 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2071 when MUL1_B =>
2072 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2073 when MUL1_Y =>
2074 f_to_multiply.data1 <= r.y;
2075 when others =>
2076 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2077 end case;
2078 case msel_2 is
2079 when MUL2_C =>
2080 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2081 when MUL2_LUT =>
2082 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2083 when MUL2_P =>
2084 f_to_multiply.data2 <= r.p;
2085 when others =>
2086 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2087 end case;
2088 maddend := (others => '0');
2089 case msel_add is
2090 when MULADD_CONST =>
2091 -- addend is 2.0 or 1.5 in 16.112 format
2092 if r.is_sqrt = '0' then
2093 maddend(113) := '1'; -- 2.0
2094 else
2095 maddend(112 downto 111) := "11"; -- 1.5
2096 end if;
2097 when MULADD_A =>
2098 -- addend is A in 16.112 format
2099 maddend(121 downto 58) := r.a.mantissa;
2100 when others =>
2101 end case;
2102 if msel_inv = '1' then
2103 f_to_multiply.addend <= not maddend;
2104 else
2105 f_to_multiply.addend <= maddend;
2106 end if;
2107 f_to_multiply.not_result <= msel_inv;
2108 if set_y = '1' then
2109 v.y := f_to_multiply.data2;
2110 end if;
2111 if multiply_to_f.valid = '1' then
2112 if pshift = '0' then
2113 v.p := multiply_to_f.result(63 downto 0);
2114 else
2115 v.p := multiply_to_f.result(119 downto 56);
2116 end if;
2117 end if;
2118
2119 -- Data path.
2120 -- This has A and B input multiplexers, an adder, a shifter,
2121 -- count-leading-zeroes logic, and a result mux.
2122 if longmask = '1' then
2123 mshift := r.shift + to_signed(-29, EXP_BITS);
2124 else
2125 mshift := r.shift;
2126 end if;
2127 if mshift < to_signed(-64, EXP_BITS) then
2128 mask := (others => '1');
2129 elsif mshift >= to_signed(0, EXP_BITS) then
2130 mask := (others => '0');
2131 else
2132 mask := right_mask(unsigned(mshift(5 downto 0)));
2133 end if;
2134 case opsel_a is
2135 when AIN_R =>
2136 in_a0 := r.r;
2137 when AIN_A =>
2138 in_a0 := r.a.mantissa;
2139 when AIN_B =>
2140 in_a0 := r.b.mantissa;
2141 when others =>
2142 in_a0 := r.c.mantissa;
2143 end case;
2144 if (or (mask and in_a0)) = '1' and set_x = '1' then
2145 v.x := '1';
2146 end if;
2147 if opsel_ainv = '1' then
2148 in_a0 := not in_a0;
2149 end if;
2150 if opsel_amask = '1' then
2151 in_a0 := in_a0 and not mask;
2152 end if;
2153 in_a <= in_a0;
2154 case opsel_b is
2155 when BIN_ZERO =>
2156 in_b0 := (others => '0');
2157 when BIN_R =>
2158 in_b0 := r.r;
2159 when BIN_MASK =>
2160 in_b0 := mask;
2161 when others =>
2162 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2163 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2164 end case;
2165 if opsel_binv = '1' then
2166 in_b0 := not in_b0;
2167 end if;
2168 in_b <= in_b0;
2169 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2170 shift_res := shifter_64(r.r & shiftin & 55x"00000000000000",
2171 std_ulogic_vector(r.shift(6 downto 0)));
2172 else
2173 shift_res := (others => '0');
2174 end if;
2175 case opsel_r is
2176 when RES_SUM =>
2177 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2178 when RES_SHIFT =>
2179 result <= shift_res;
2180 when RES_MULT =>
2181 result <= multiply_to_f.result(121 downto 58);
2182 when others =>
2183 case misc_sel is
2184 when "0000" =>
2185 misc := x"00000000" & (r.fpscr and fpscr_mask);
2186 when "0001" =>
2187 -- generated QNaN mantissa
2188 misc := x"0020000000000000";
2189 when "0010" =>
2190 -- mantissa of max representable DP number
2191 misc := x"007ffffffffffffc";
2192 when "0011" =>
2193 -- mantissa of max representable SP number
2194 misc := x"007fffff80000000";
2195 when "0100" =>
2196 -- fmrgow result
2197 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2198 when "0110" =>
2199 -- fmrgew result
2200 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2201 when "0111" =>
2202 misc := 10x"000" & inverse_est & 35x"000000000";
2203 when "1000" =>
2204 -- max positive result for fctiw[z]
2205 misc := x"000000007fffffff";
2206 when "1001" =>
2207 -- max negative result for fctiw[z]
2208 misc := x"ffffffff80000000";
2209 when "1010" =>
2210 -- max positive result for fctiwu[z]
2211 misc := x"00000000ffffffff";
2212 when "1011" =>
2213 -- max negative result for fctiwu[z]
2214 misc := x"0000000000000000";
2215 when "1100" =>
2216 -- max positive result for fctid[z]
2217 misc := x"7fffffffffffffff";
2218 when "1101" =>
2219 -- max negative result for fctid[z]
2220 misc := x"8000000000000000";
2221 when "1110" =>
2222 -- max positive result for fctidu[z]
2223 misc := x"ffffffffffffffff";
2224 when "1111" =>
2225 -- max negative result for fctidu[z]
2226 misc := x"0000000000000000";
2227 when others =>
2228 misc := x"0000000000000000";
2229 end case;
2230 result <= misc;
2231 end case;
2232 v.r := result;
2233
2234 if set_a = '1' then
2235 v.a.exponent := new_exp;
2236 v.a.mantissa := shift_res;
2237 end if;
2238 if set_b = '1' then
2239 v.b.exponent := new_exp;
2240 v.b.mantissa := shift_res;
2241 end if;
2242 if set_c = '1' then
2243 v.c.exponent := new_exp;
2244 v.c.mantissa := shift_res;
2245 end if;
2246
2247 if opsel_r = RES_SHIFT then
2248 v.result_exp := new_exp;
2249 end if;
2250
2251 if renormalize = '1' then
2252 clz := count_left_zeroes(r.r);
2253 if renorm_sqrt = '1' then
2254 -- make denormalized value end up with even exponent
2255 clz(0) := '1';
2256 end if;
2257 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2258 end if;
2259
2260 if r.int_result = '1' then
2261 fp_result <= r.r;
2262 else
2263 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2264 r.single_prec, r.quieten_nan);
2265 end if;
2266 if r.update_fprf = '1' then
2267 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2268 r.r(54) and not r.denorm);
2269 end if;
2270
2271 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2272 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2273 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2274 v.fpscr(FPSCR_VE downto FPSCR_XE));
2275 if update_fx = '1' and
2276 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2277 v.fpscr(FPSCR_FX) := '1';
2278 end if;
2279 if r.rc = '1' then
2280 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2281 end if;
2282
2283 if illegal = '1' then
2284 v.instr_done := '0';
2285 v.do_intr := '0';
2286 v.writing_back := '0';
2287 v.busy := '0';
2288 v.state := IDLE;
2289 else
2290 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2291 if v.state /= IDLE or v.do_intr = '1' then
2292 v.busy := '1';
2293 end if;
2294 end if;
2295
2296 rin <= v;
2297 e_out.illegal <= illegal;
2298 end process;
2299
2300 end architecture behaviour;