FPU: Add comments specifying the expectation of r.shift for each state
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2);
69
70 type reg_type is record
71 state : state_t;
72 busy : std_ulogic;
73 instr_done : std_ulogic;
74 do_intr : std_ulogic;
75 op : insn_type_t;
76 insn : std_ulogic_vector(31 downto 0);
77 dest_fpr : gspr_index_t;
78 fe_mode : std_ulogic;
79 rc : std_ulogic;
80 is_cmp : std_ulogic;
81 single_prec : std_ulogic;
82 fpscr : std_ulogic_vector(31 downto 0);
83 a : fpu_reg_type;
84 b : fpu_reg_type;
85 c : fpu_reg_type;
86 r : std_ulogic_vector(63 downto 0); -- 10.54 format
87 s : std_ulogic_vector(55 downto 0); -- extended fraction
88 x : std_ulogic;
89 p : std_ulogic_vector(63 downto 0); -- 8.56 format
90 y : std_ulogic_vector(63 downto 0); -- 8.56 format
91 result_sign : std_ulogic;
92 result_class : fp_number_class;
93 result_exp : signed(EXP_BITS-1 downto 0);
94 shift : signed(EXP_BITS-1 downto 0);
95 writing_back : std_ulogic;
96 int_result : std_ulogic;
97 cr_result : std_ulogic_vector(3 downto 0);
98 cr_mask : std_ulogic_vector(7 downto 0);
99 old_exc : std_ulogic_vector(4 downto 0);
100 update_fprf : std_ulogic;
101 quieten_nan : std_ulogic;
102 tiny : std_ulogic;
103 denorm : std_ulogic;
104 round_mode : std_ulogic_vector(2 downto 0);
105 is_subtract : std_ulogic;
106 exp_cmp : std_ulogic;
107 madd_cmp : std_ulogic;
108 add_bsmall : std_ulogic;
109 is_multiply : std_ulogic;
110 is_sqrt : std_ulogic;
111 first : std_ulogic;
112 count : unsigned(1 downto 0);
113 doing_ftdiv : std_ulogic_vector(1 downto 0);
114 end record;
115
116 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
117
118 signal r, rin : reg_type;
119
120 signal fp_result : std_ulogic_vector(63 downto 0);
121 signal opsel_a : std_ulogic_vector(1 downto 0);
122 signal opsel_b : std_ulogic_vector(1 downto 0);
123 signal opsel_r : std_ulogic_vector(1 downto 0);
124 signal opsel_s : std_ulogic_vector(1 downto 0);
125 signal opsel_ainv : std_ulogic;
126 signal opsel_amask : std_ulogic;
127 signal opsel_binv : std_ulogic;
128 signal in_a : std_ulogic_vector(63 downto 0);
129 signal in_b : std_ulogic_vector(63 downto 0);
130 signal result : std_ulogic_vector(63 downto 0);
131 signal carry_in : std_ulogic;
132 signal lost_bits : std_ulogic;
133 signal r_hi_nz : std_ulogic;
134 signal r_lo_nz : std_ulogic;
135 signal s_nz : std_ulogic;
136 signal misc_sel : std_ulogic_vector(3 downto 0);
137 signal f_to_multiply : MultiplyInputType;
138 signal multiply_to_f : MultiplyOutputType;
139 signal msel_1 : std_ulogic_vector(1 downto 0);
140 signal msel_2 : std_ulogic_vector(1 downto 0);
141 signal msel_add : std_ulogic_vector(1 downto 0);
142 signal msel_inv : std_ulogic;
143 signal inverse_est : std_ulogic_vector(18 downto 0);
144
145 -- opsel values
146 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
147 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
148 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
149 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
150
151 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
152 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
153 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
154 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
155
156 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
157 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
158 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
159 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
160
161 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
163 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
164 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
165
166 -- msel values
167 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
168 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
169 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
170 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
171
172 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
173 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
174 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
175 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
176
177 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
178 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
179 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
180 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
181
182 -- Inverse lookup table, indexed by the top 8 fraction bits
183 -- The first 256 entries are the reciprocal (1/x) lookup table,
184 -- and the remaining 768 entries are the reciprocal square root table.
185 -- Output range is [0.5, 1) in 0.19 format, though the top
186 -- bit isn't stored since it is always 1.
187 -- Each output value is the inverse of the center of the input
188 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
189 -- entry 1 is 1 / (1 + 3/512), etc.
190 signal inverse_table : lookup_table := (
191 -- 1/x lookup table
192 -- Unit bit is assumed to be 1, so input range is [1, 2)
193 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
194 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
195 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
196 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
197 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
198 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
199 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
200 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
201 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
202 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
203 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
204 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
205 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
206 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
207 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
208 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
209 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
210 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
211 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
212 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
213 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
214 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
215 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
216 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
217 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
218 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
219 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
220 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
221 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
222 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
223 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
224 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
225 -- 1/sqrt(x) lookup table
226 -- Input is in the range [1, 4), i.e. two bits to the left of the
227 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
228 -- 1.0 ... 1.9999
229 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
230 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
231 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
232 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
233 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
234 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
235 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
236 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
237 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
238 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
239 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
240 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
241 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
242 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
243 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
244 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
245 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
246 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
247 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
248 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
249 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
250 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
251 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
252 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
253 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
254 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
255 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
256 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
257 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
258 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
259 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
260 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
261 -- 2.0 ... 2.9999
262 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
263 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
264 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
265 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
266 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
267 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
268 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
269 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
270 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
271 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
272 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
273 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
274 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
275 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
276 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
277 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
278 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
279 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
280 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
281 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
282 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
283 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
284 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
285 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
286 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
287 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
288 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
289 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
290 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
291 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
292 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
293 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
294 -- 3.0 ... 3.9999
295 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
296 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
297 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
298 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
299 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
300 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
301 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
302 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
303 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
304 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
305 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
306 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
307 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
308 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
309 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
310 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
311 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
312 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
313 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
314 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
315 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
316 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
317 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
318 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
319 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
320 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
321 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
322 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
323 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
324 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
325 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
326 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
327 );
328
329 -- Left and right shifter with 120 bit input and 64 bit output.
330 -- Shifts inp left by shift bits and returns the upper 64 bits of
331 -- the result. The shift parameter is interpreted as a signed
332 -- number in the range -64..63, with negative values indicating
333 -- right shifts.
334 function shifter_64(inp: std_ulogic_vector(119 downto 0);
335 shift: std_ulogic_vector(6 downto 0))
336 return std_ulogic_vector is
337 variable s1 : std_ulogic_vector(94 downto 0);
338 variable s2 : std_ulogic_vector(70 downto 0);
339 variable result : std_ulogic_vector(63 downto 0);
340 begin
341 case shift(6 downto 5) is
342 when "00" =>
343 s1 := inp(119 downto 25);
344 when "01" =>
345 s1 := inp(87 downto 0) & "0000000";
346 when "10" =>
347 s1 := x"0000000000000000" & inp(119 downto 89);
348 when others =>
349 s1 := x"00000000" & inp(119 downto 57);
350 end case;
351 case shift(4 downto 3) is
352 when "00" =>
353 s2 := s1(94 downto 24);
354 when "01" =>
355 s2 := s1(86 downto 16);
356 when "10" =>
357 s2 := s1(78 downto 8);
358 when others =>
359 s2 := s1(70 downto 0);
360 end case;
361 case shift(2 downto 0) is
362 when "000" =>
363 result := s2(70 downto 7);
364 when "001" =>
365 result := s2(69 downto 6);
366 when "010" =>
367 result := s2(68 downto 5);
368 when "011" =>
369 result := s2(67 downto 4);
370 when "100" =>
371 result := s2(66 downto 3);
372 when "101" =>
373 result := s2(65 downto 2);
374 when "110" =>
375 result := s2(64 downto 1);
376 when others =>
377 result := s2(63 downto 0);
378 end case;
379 return result;
380 end;
381
382 -- Generate a mask with 0-bits on the left and 1-bits on the right which
383 -- selects the bits will be lost in doing a right shift. The shift
384 -- parameter is the bottom 6 bits of a negative shift count,
385 -- indicating a right shift.
386 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
387 variable result: std_ulogic_vector(63 downto 0);
388 begin
389 result := (others => '0');
390 for i in 0 to 63 loop
391 if i >= shift then
392 result(63 - i) := '1';
393 end if;
394 end loop;
395 return result;
396 end;
397
398 -- Split a DP floating-point number into components and work out its class.
399 -- If is_int = 1, the input is considered an integer
400 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
401 variable r : fpu_reg_type;
402 variable exp_nz : std_ulogic;
403 variable exp_ao : std_ulogic;
404 variable frac_nz : std_ulogic;
405 variable cls : std_ulogic_vector(2 downto 0);
406 begin
407 r.negative := fpr(63);
408 exp_nz := or (fpr(62 downto 52));
409 exp_ao := and (fpr(62 downto 52));
410 frac_nz := or (fpr(51 downto 0));
411 if is_int = '0' then
412 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
413 if exp_nz = '0' then
414 r.exponent := to_signed(-1022, EXP_BITS);
415 end if;
416 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
417 cls := exp_ao & exp_nz & frac_nz;
418 case cls is
419 when "000" => r.class := ZERO;
420 when "001" => r.class := FINITE; -- denormalized
421 when "010" => r.class := FINITE;
422 when "011" => r.class := FINITE;
423 when "110" => r.class := INFINITY;
424 when others => r.class := NAN;
425 end case;
426 else
427 r.mantissa := fpr;
428 r.exponent := (others => '0');
429 if (fpr(63) or exp_nz or frac_nz) = '1' then
430 r.class := FINITE;
431 else
432 r.class := ZERO;
433 end if;
434 end if;
435 return r;
436 end;
437
438 -- Construct a DP floating-point result from components
439 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
440 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
441 return std_ulogic_vector is
442 variable result : std_ulogic_vector(63 downto 0);
443 begin
444 result := (others => '0');
445 result(63) := sign;
446 case class is
447 when ZERO =>
448 when FINITE =>
449 if mantissa(54) = '1' then
450 -- normalized number
451 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
452 end if;
453 result(51 downto 29) := mantissa(53 downto 31);
454 if single_prec = '0' then
455 result(28 downto 0) := mantissa(30 downto 2);
456 end if;
457 when INFINITY =>
458 result(62 downto 52) := "11111111111";
459 when NAN =>
460 result(62 downto 52) := "11111111111";
461 result(51) := quieten_nan or mantissa(53);
462 result(50 downto 29) := mantissa(52 downto 31);
463 if single_prec = '0' then
464 result(28 downto 0) := mantissa(30 downto 2);
465 end if;
466 end case;
467 return result;
468 end;
469
470 -- Determine whether to increment when rounding
471 -- Returns rounding_inc & inexact
472 -- Assumes x includes the bottom 29 bits of the mantissa already
473 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
474 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
475 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
476 sign: std_ulogic)
477 return std_ulogic_vector is
478 variable grx : std_ulogic_vector(2 downto 0);
479 variable ret : std_ulogic_vector(1 downto 0);
480 variable lsb : std_ulogic;
481 begin
482 if single_prec = '0' then
483 grx := mantissa(1 downto 0) & x;
484 lsb := mantissa(2);
485 else
486 grx := mantissa(30 downto 29) & x;
487 lsb := mantissa(31);
488 end if;
489 ret(1) := '0';
490 ret(0) := or (grx);
491 case rn(1 downto 0) is
492 when "00" => -- round to nearest
493 if grx = "100" and rn(2) = '0' then
494 ret(1) := lsb; -- tie, round to even
495 else
496 ret(1) := grx(2);
497 end if;
498 when "01" => -- round towards zero
499 when others => -- round towards +/- inf
500 if rn(0) = sign then
501 -- round towards greater magnitude
502 ret(1) := ret(0);
503 end if;
504 end case;
505 return ret;
506 end;
507
508 -- Determine result flags to write into the FPSCR
509 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
510 return std_ulogic_vector is
511 begin
512 case class is
513 when ZERO =>
514 return sign & "0010";
515 when FINITE =>
516 return (not unitbit) & sign & (not sign) & "00";
517 when INFINITY =>
518 return '0' & sign & (not sign) & "01";
519 when NAN =>
520 return "10001";
521 end case;
522 end;
523
524 begin
525 fpu_multiply_0: entity work.multiply
526 port map (
527 clk => clk,
528 m_in => f_to_multiply,
529 m_out => multiply_to_f
530 );
531
532 fpu_0: process(clk)
533 begin
534 if rising_edge(clk) then
535 if rst = '1' then
536 r.state <= IDLE;
537 r.busy <= '0';
538 r.instr_done <= '0';
539 r.do_intr <= '0';
540 r.fpscr <= (others => '0');
541 r.writing_back <= '0';
542 else
543 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
544 r <= rin;
545 end if;
546 end if;
547 end process;
548
549 -- synchronous reads from lookup table
550 lut_access: process(clk)
551 variable addrhi : std_ulogic_vector(1 downto 0);
552 variable addr : std_ulogic_vector(9 downto 0);
553 begin
554 if rising_edge(clk) then
555 if r.is_sqrt = '1' then
556 addrhi := r.b.mantissa(55 downto 54);
557 else
558 addrhi := "00";
559 end if;
560 addr := addrhi & r.b.mantissa(53 downto 46);
561 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
562 end if;
563 end process;
564
565 e_out.busy <= r.busy;
566 e_out.exception <= r.fpscr(FPSCR_FEX);
567 e_out.interrupt <= r.do_intr;
568
569 w_out.valid <= r.instr_done and not r.do_intr;
570 w_out.write_enable <= r.writing_back;
571 w_out.write_reg <= r.dest_fpr;
572 w_out.write_data <= fp_result;
573 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
574 w_out.write_cr_mask <= r.cr_mask;
575 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
576 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
577
578 fpu_1: process(all)
579 variable v : reg_type;
580 variable adec : fpu_reg_type;
581 variable bdec : fpu_reg_type;
582 variable cdec : fpu_reg_type;
583 variable fpscr_mask : std_ulogic_vector(31 downto 0);
584 variable illegal : std_ulogic;
585 variable j, k : integer;
586 variable flm : std_ulogic_vector(7 downto 0);
587 variable int_input : std_ulogic;
588 variable mask : std_ulogic_vector(63 downto 0);
589 variable in_a0 : std_ulogic_vector(63 downto 0);
590 variable in_b0 : std_ulogic_vector(63 downto 0);
591 variable misc : std_ulogic_vector(63 downto 0);
592 variable shift_res : std_ulogic_vector(63 downto 0);
593 variable round : std_ulogic_vector(1 downto 0);
594 variable update_fx : std_ulogic;
595 variable arith_done : std_ulogic;
596 variable invalid : std_ulogic;
597 variable zero_divide : std_ulogic;
598 variable mant_nz : std_ulogic;
599 variable min_exp : signed(EXP_BITS-1 downto 0);
600 variable max_exp : signed(EXP_BITS-1 downto 0);
601 variable bias_exp : signed(EXP_BITS-1 downto 0);
602 variable new_exp : signed(EXP_BITS-1 downto 0);
603 variable exp_tiny : std_ulogic;
604 variable exp_huge : std_ulogic;
605 variable renormalize : std_ulogic;
606 variable clz : std_ulogic_vector(5 downto 0);
607 variable set_x : std_ulogic;
608 variable mshift : signed(EXP_BITS-1 downto 0);
609 variable need_check : std_ulogic;
610 variable msb : std_ulogic;
611 variable is_add : std_ulogic;
612 variable longmask : std_ulogic;
613 variable set_a : std_ulogic;
614 variable set_b : std_ulogic;
615 variable set_c : std_ulogic;
616 variable set_y : std_ulogic;
617 variable set_s : std_ulogic;
618 variable qnan_result : std_ulogic;
619 variable px_nz : std_ulogic;
620 variable pcmpb_eq : std_ulogic;
621 variable pcmpb_lt : std_ulogic;
622 variable pshift : std_ulogic;
623 variable renorm_sqrt : std_ulogic;
624 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
625 variable shiftin : std_ulogic;
626 variable mulexp : signed(EXP_BITS-1 downto 0);
627 variable maddend : std_ulogic_vector(127 downto 0);
628 begin
629 v := r;
630 illegal := '0';
631 v.busy := '0';
632 int_input := '0';
633
634 -- capture incoming instruction
635 if e_in.valid = '1' then
636 v.insn := e_in.insn;
637 v.op := e_in.op;
638 v.fe_mode := or (e_in.fe_mode);
639 v.dest_fpr := e_in.frt;
640 v.single_prec := e_in.single;
641 v.int_result := '0';
642 v.rc := e_in.rc;
643 v.is_cmp := e_in.out_cr;
644 if e_in.out_cr = '0' then
645 v.cr_mask := num_to_fxm(1);
646 else
647 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
648 end if;
649 int_input := '0';
650 if e_in.op = OP_FPOP_I then
651 int_input := '1';
652 end if;
653 v.quieten_nan := '1';
654 v.tiny := '0';
655 v.denorm := '0';
656 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
657 v.is_subtract := '0';
658 v.is_multiply := '0';
659 v.is_sqrt := '0';
660 v.add_bsmall := '0';
661 v.doing_ftdiv := "00";
662
663 adec := decode_dp(e_in.fra, int_input);
664 bdec := decode_dp(e_in.frb, int_input);
665 cdec := decode_dp(e_in.frc, int_input);
666 v.a := adec;
667 v.b := bdec;
668 v.c := cdec;
669
670 v.exp_cmp := '0';
671 if adec.exponent > bdec.exponent then
672 v.exp_cmp := '1';
673 end if;
674 v.madd_cmp := '0';
675 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
676 v.madd_cmp := '1';
677 end if;
678 end if;
679
680 r_hi_nz <= or (r.r(55 downto 31));
681 r_lo_nz <= or (r.r(30 downto 2));
682 s_nz <= or (r.s);
683
684 if r.single_prec = '0' then
685 if r.doing_ftdiv(1) = '0' then
686 max_exp := to_signed(1023, EXP_BITS);
687 else
688 max_exp := to_signed(1020, EXP_BITS);
689 end if;
690 if r.doing_ftdiv(0) = '0' then
691 min_exp := to_signed(-1022, EXP_BITS);
692 else
693 min_exp := to_signed(-1021, EXP_BITS);
694 end if;
695 bias_exp := to_signed(1536, EXP_BITS);
696 else
697 max_exp := to_signed(127, EXP_BITS);
698 min_exp := to_signed(-126, EXP_BITS);
699 bias_exp := to_signed(192, EXP_BITS);
700 end if;
701 new_exp := r.result_exp - r.shift;
702 exp_tiny := '0';
703 exp_huge := '0';
704 if new_exp < min_exp then
705 exp_tiny := '1';
706 end if;
707 if new_exp > max_exp then
708 exp_huge := '1';
709 end if;
710
711 -- Compare P with zero and with B
712 px_nz := or (r.p(57 downto 4));
713 pcmpb_eq := '0';
714 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
715 pcmpb_eq := '1';
716 end if;
717 pcmpb_lt := '0';
718 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
719 pcmpb_lt := '1';
720 end if;
721
722 v.writing_back := '0';
723 v.instr_done := '0';
724 v.update_fprf := '0';
725 v.shift := to_signed(0, EXP_BITS);
726 v.first := '0';
727 opsel_a <= AIN_R;
728 opsel_ainv <= '0';
729 opsel_amask <= '0';
730 opsel_b <= BIN_ZERO;
731 opsel_binv <= '0';
732 opsel_r <= RES_SUM;
733 opsel_s <= S_ZERO;
734 carry_in <= '0';
735 misc_sel <= "0000";
736 fpscr_mask := (others => '1');
737 update_fx := '0';
738 arith_done := '0';
739 invalid := '0';
740 zero_divide := '0';
741 renormalize := '0';
742 set_x := '0';
743 qnan_result := '0';
744 longmask := r.single_prec;
745 set_a := '0';
746 set_b := '0';
747 set_c := '0';
748 set_s := '0';
749 f_to_multiply.is_32bit <= '0';
750 f_to_multiply.valid <= '0';
751 msel_1 <= MUL1_A;
752 msel_2 <= MUL2_C;
753 msel_add <= MULADD_ZERO;
754 msel_inv <= '0';
755 set_y := '0';
756 pshift := '0';
757 renorm_sqrt := '0';
758 shiftin := '0';
759 case r.state is
760 when IDLE =>
761 if e_in.valid = '1' then
762 case e_in.insn(5 downto 1) is
763 when "00000" =>
764 if e_in.insn(8) = '1' then
765 if e_in.insn(6) = '0' then
766 v.state := DO_FTDIV;
767 else
768 v.state := DO_FTSQRT;
769 end if;
770 elsif e_in.insn(7) = '1' then
771 v.state := DO_MCRFS;
772 else
773 v.state := DO_FCMP;
774 end if;
775 when "00110" =>
776 if e_in.insn(10) = '0' then
777 if e_in.insn(8) = '0' then
778 v.state := DO_MTFSB;
779 else
780 v.state := DO_MTFSFI;
781 end if;
782 else
783 v.state := DO_FMRG;
784 end if;
785 when "00111" =>
786 if e_in.insn(8) = '0' then
787 v.state := DO_MFFS;
788 else
789 v.state := DO_MTFSF;
790 end if;
791 when "01000" =>
792 if e_in.insn(9 downto 8) /= "11" then
793 v.state := DO_FMR;
794 else
795 v.state := DO_FRI;
796 end if;
797 when "01100" =>
798 v.state := DO_FRSP;
799 when "01110" =>
800 if int_input = '1' then
801 -- fcfid[u][s]
802 v.state := DO_FCFID;
803 else
804 v.state := DO_FCTI;
805 end if;
806 when "01111" =>
807 v.round_mode := "001";
808 v.state := DO_FCTI;
809 when "10010" =>
810 v.state := DO_FDIV;
811 when "10100" | "10101" =>
812 v.state := DO_FADD;
813 when "10110" =>
814 v.is_sqrt := '1';
815 v.state := DO_FSQRT;
816 when "10111" =>
817 v.state := DO_FSEL;
818 when "11000" =>
819 v.state := DO_FRE;
820 when "11001" =>
821 v.is_multiply := '1';
822 v.state := DO_FMUL;
823 when "11010" =>
824 v.is_sqrt := '1';
825 v.state := DO_FRSQRTE;
826 when "11100" | "11101" | "11110" | "11111" =>
827 v.state := DO_FMADD;
828 when others =>
829 illegal := '1';
830 end case;
831 end if;
832 v.x := '0';
833 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
834 set_s := '1';
835
836 when DO_MCRFS =>
837 j := to_integer(unsigned(insn_bfa(r.insn)));
838 for i in 0 to 7 loop
839 if i = j then
840 k := (7 - i) * 4;
841 v.cr_result := r.fpscr(k + 3 downto k);
842 fpscr_mask(k + 3 downto k) := "0000";
843 end if;
844 end loop;
845 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
846 v.instr_done := '1';
847 v.state := IDLE;
848
849 when DO_FTDIV =>
850 v.instr_done := '1';
851 v.state := IDLE;
852 v.cr_result := "0000";
853 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
854 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
855 v.cr_result(2) := '1';
856 end if;
857 if r.a.class = NAN or r.a.class = INFINITY or
858 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
859 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
860 v.cr_result(1) := '1';
861 else
862 v.doing_ftdiv := "11";
863 v.first := '1';
864 v.state := FTDIV_1;
865 v.instr_done := '0';
866 end if;
867
868 when DO_FTSQRT =>
869 v.instr_done := '1';
870 v.state := IDLE;
871 v.cr_result := "0000";
872 if r.b.class = ZERO or r.b.class = INFINITY or
873 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
874 v.cr_result(2) := '1';
875 end if;
876 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
877 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
878 v.cr_result(1) := '0';
879 end if;
880
881 when DO_FCMP =>
882 -- fcmp[uo]
883 v.instr_done := '1';
884 v.state := IDLE;
885 update_fx := '1';
886 opsel_a <= AIN_B;
887 opsel_r <= RES_SUM;
888 v.result_exp := r.b.exponent;
889 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
890 (r.b.class = NAN and r.b.mantissa(53) = '0') then
891 -- Signalling NAN
892 v.fpscr(FPSCR_VXSNAN) := '1';
893 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
894 v.fpscr(FPSCR_VXVC) := '1';
895 end if;
896 invalid := '1';
897 v.cr_result := "0001"; -- unordered
898 elsif r.a.class = NAN or r.b.class = NAN then
899 if r.insn(6) = '1' then
900 -- fcmpo
901 v.fpscr(FPSCR_VXVC) := '1';
902 invalid := '1';
903 end if;
904 v.cr_result := "0001"; -- unordered
905 elsif r.a.class = ZERO and r.b.class = ZERO then
906 v.cr_result := "0010"; -- equal
907 elsif r.a.negative /= r.b.negative then
908 v.cr_result := r.a.negative & r.b.negative & "00";
909 elsif r.a.class = ZERO then
910 -- A and B are the same sign from here down
911 v.cr_result := not r.b.negative & r.b.negative & "00";
912 elsif r.a.class = INFINITY then
913 if r.b.class = INFINITY then
914 v.cr_result := "0010";
915 else
916 v.cr_result := r.a.negative & not r.a.negative & "00";
917 end if;
918 elsif r.b.class = ZERO then
919 -- A is finite from here down
920 v.cr_result := r.a.negative & not r.a.negative & "00";
921 elsif r.b.class = INFINITY then
922 v.cr_result := not r.b.negative & r.b.negative & "00";
923 elsif r.exp_cmp = '1' then
924 -- A and B are both finite from here down
925 v.cr_result := r.a.negative & not r.a.negative & "00";
926 elsif r.a.exponent /= r.b.exponent then
927 -- A exponent is smaller than B
928 v.cr_result := not r.a.negative & r.a.negative & "00";
929 else
930 -- Prepare to subtract mantissas, put B in R
931 v.cr_result := "0000";
932 v.instr_done := '0';
933 v.state := CMP_1;
934 end if;
935 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
936
937 when DO_MTFSB =>
938 -- mtfsb{0,1}
939 j := to_integer(unsigned(insn_bt(r.insn)));
940 for i in 0 to 31 loop
941 if i = j then
942 v.fpscr(31 - i) := r.insn(6);
943 end if;
944 end loop;
945 v.instr_done := '1';
946 v.state := IDLE;
947
948 when DO_MTFSFI =>
949 -- mtfsfi
950 j := to_integer(unsigned(insn_bf(r.insn)));
951 if r.insn(16) = '0' then
952 for i in 0 to 7 loop
953 if i = j then
954 k := (7 - i) * 4;
955 v.fpscr(k + 3 downto k) := insn_u(r.insn);
956 end if;
957 end loop;
958 end if;
959 v.instr_done := '1';
960 v.state := IDLE;
961
962 when DO_FMRG =>
963 -- fmrgew, fmrgow
964 opsel_r <= RES_MISC;
965 misc_sel <= "01" & r.insn(8) & '0';
966 v.int_result := '1';
967 v.writing_back := '1';
968 v.instr_done := '1';
969 v.state := IDLE;
970
971 when DO_MFFS =>
972 v.int_result := '1';
973 v.writing_back := '1';
974 opsel_r <= RES_MISC;
975 case r.insn(20 downto 16) is
976 when "00000" =>
977 -- mffs
978 when "00001" =>
979 -- mffsce
980 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
981 when "10100" | "10101" =>
982 -- mffscdrn[i] (but we don't implement DRN)
983 fpscr_mask := x"000000FF";
984 when "10110" =>
985 -- mffscrn
986 fpscr_mask := x"000000FF";
987 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
988 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
989 when "10111" =>
990 -- mffscrni
991 fpscr_mask := x"000000FF";
992 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
993 when "11000" =>
994 -- mffsl
995 fpscr_mask := x"0007F0FF";
996 when others =>
997 illegal := '1';
998 end case;
999 v.instr_done := '1';
1000 v.state := IDLE;
1001
1002 when DO_MTFSF =>
1003 if r.insn(25) = '1' then
1004 flm := x"FF";
1005 elsif r.insn(16) = '1' then
1006 flm := x"00";
1007 else
1008 flm := r.insn(24 downto 17);
1009 end if;
1010 for i in 0 to 7 loop
1011 k := i * 4;
1012 if flm(i) = '1' then
1013 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1014 end if;
1015 end loop;
1016 v.instr_done := '1';
1017 v.state := IDLE;
1018
1019 when DO_FMR =>
1020 opsel_a <= AIN_B;
1021 v.result_class := r.b.class;
1022 v.result_exp := r.b.exponent;
1023 v.quieten_nan := '0';
1024 if r.insn(9) = '1' then
1025 v.result_sign := '0'; -- fabs
1026 elsif r.insn(8) = '1' then
1027 v.result_sign := '1'; -- fnabs
1028 elsif r.insn(7) = '1' then
1029 v.result_sign := r.b.negative; -- fmr
1030 elsif r.insn(6) = '1' then
1031 v.result_sign := not r.b.negative; -- fneg
1032 else
1033 v.result_sign := r.a.negative; -- fcpsgn
1034 end if;
1035 v.writing_back := '1';
1036 v.instr_done := '1';
1037 v.state := IDLE;
1038
1039 when DO_FRI => -- fri[nzpm]
1040 opsel_a <= AIN_B;
1041 v.result_class := r.b.class;
1042 v.result_sign := r.b.negative;
1043 v.result_exp := r.b.exponent;
1044 v.fpscr(FPSCR_FR) := '0';
1045 v.fpscr(FPSCR_FI) := '0';
1046 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1047 -- Signalling NAN
1048 v.fpscr(FPSCR_VXSNAN) := '1';
1049 invalid := '1';
1050 end if;
1051 if r.b.class = FINITE then
1052 if r.b.exponent >= to_signed(52, EXP_BITS) then
1053 -- integer already, no rounding required
1054 arith_done := '1';
1055 else
1056 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1057 v.state := FRI_1;
1058 v.round_mode := '1' & r.insn(7 downto 6);
1059 end if;
1060 else
1061 arith_done := '1';
1062 end if;
1063
1064 when DO_FRSP =>
1065 opsel_a <= AIN_B;
1066 v.result_class := r.b.class;
1067 v.result_sign := r.b.negative;
1068 v.result_exp := r.b.exponent;
1069 v.fpscr(FPSCR_FR) := '0';
1070 v.fpscr(FPSCR_FI) := '0';
1071 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1072 -- Signalling NAN
1073 v.fpscr(FPSCR_VXSNAN) := '1';
1074 invalid := '1';
1075 end if;
1076 set_x := '1';
1077 if r.b.class = FINITE then
1078 if r.b.exponent < to_signed(-126, EXP_BITS) then
1079 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080 v.state := ROUND_UFLOW;
1081 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082 v.state := ROUND_OFLOW;
1083 else
1084 v.shift := to_signed(-2, EXP_BITS);
1085 v.state := ROUNDING;
1086 end if;
1087 else
1088 arith_done := '1';
1089 end if;
1090
1091 when DO_FCTI =>
1092 -- instr bit 9: 1=dword 0=word
1093 -- instr bit 8: 1=unsigned 0=signed
1094 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1095 opsel_a <= AIN_B;
1096 v.result_class := r.b.class;
1097 v.result_sign := r.b.negative;
1098 v.result_exp := r.b.exponent;
1099 v.fpscr(FPSCR_FR) := '0';
1100 v.fpscr(FPSCR_FI) := '0';
1101 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1102 -- Signalling NAN
1103 v.fpscr(FPSCR_VXSNAN) := '1';
1104 invalid := '1';
1105 end if;
1106
1107 v.int_result := '1';
1108 case r.b.class is
1109 when ZERO =>
1110 arith_done := '1';
1111 when FINITE =>
1112 if r.b.exponent >= to_signed(64, EXP_BITS) or
1113 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114 v.state := INT_OFLOW;
1115 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116 -- integer already, no rounding required,
1117 -- shift into final position
1118 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119 if r.insn(8) = '1' and r.b.negative = '1' then
1120 v.state := INT_OFLOW;
1121 else
1122 v.state := INT_ISHIFT;
1123 end if;
1124 else
1125 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126 v.state := INT_SHIFT;
1127 end if;
1128 when INFINITY | NAN =>
1129 v.state := INT_OFLOW;
1130 end case;
1131
1132 when DO_FCFID =>
1133 v.result_sign := '0';
1134 opsel_a <= AIN_B;
1135 if r.insn(8) = '0' and r.b.negative = '1' then
1136 -- fcfid[s] with negative operand, set R = -B
1137 opsel_ainv <= '1';
1138 carry_in <= '1';
1139 v.result_sign := '1';
1140 end if;
1141 v.result_class := r.b.class;
1142 v.result_exp := to_signed(54, EXP_BITS);
1143 v.fpscr(FPSCR_FR) := '0';
1144 v.fpscr(FPSCR_FI) := '0';
1145 if r.b.class = ZERO then
1146 arith_done := '1';
1147 else
1148 v.state := FINISH;
1149 end if;
1150
1151 when DO_FADD =>
1152 -- fadd[s] and fsub[s]
1153 opsel_a <= AIN_A;
1154 v.result_sign := r.a.negative;
1155 v.result_class := r.a.class;
1156 v.result_exp := r.a.exponent;
1157 v.fpscr(FPSCR_FR) := '0';
1158 v.fpscr(FPSCR_FI) := '0';
1159 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160 if r.a.class = FINITE and r.b.class = FINITE then
1161 v.is_subtract := not is_add;
1162 v.add_bsmall := r.exp_cmp;
1163 if r.exp_cmp = '0' then
1164 v.shift := r.a.exponent - r.b.exponent;
1165 v.result_sign := r.b.negative xnor r.insn(1);
1166 if r.a.exponent = r.b.exponent then
1167 v.state := ADD_2;
1168 else
1169 v.state := ADD_SHIFT;
1170 end if;
1171 else
1172 opsel_a <= AIN_B;
1173 v.shift := r.b.exponent - r.a.exponent;
1174 v.result_exp := r.b.exponent;
1175 v.state := ADD_SHIFT;
1176 end if;
1177 else
1178 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1180 -- Signalling NAN
1181 v.fpscr(FPSCR_VXSNAN) := '1';
1182 invalid := '1';
1183 end if;
1184 if r.a.class = NAN then
1185 -- nothing to do, result is A
1186 elsif r.b.class = NAN then
1187 v.result_class := NAN;
1188 v.result_sign := r.b.negative;
1189 opsel_a <= AIN_B;
1190 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191 -- invalid operation, construct QNaN
1192 v.fpscr(FPSCR_VXISI) := '1';
1193 qnan_result := '1';
1194 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195 -- return -0 for rounding to -infinity
1196 v.result_sign := r.round_mode(1) and r.round_mode(0);
1197 elsif r.a.class = INFINITY or r.b.class = ZERO then
1198 -- nothing to do, result is A
1199 else
1200 -- result is +/- B
1201 v.result_sign := r.b.negative xnor r.insn(1);
1202 v.result_class := r.b.class;
1203 v.result_exp := r.b.exponent;
1204 opsel_a <= AIN_B;
1205 end if;
1206 arith_done := '1';
1207 end if;
1208
1209 when DO_FMUL =>
1210 -- fmul[s]
1211 opsel_a <= AIN_A;
1212 v.result_sign := r.a.negative;
1213 v.result_class := r.a.class;
1214 v.result_exp := r.a.exponent;
1215 v.fpscr(FPSCR_FR) := '0';
1216 v.fpscr(FPSCR_FI) := '0';
1217 if r.a.class = FINITE and r.c.class = FINITE then
1218 v.result_sign := r.a.negative xor r.c.negative;
1219 v.result_exp := r.a.exponent + r.c.exponent;
1220 -- Renormalize denorm operands
1221 if r.a.mantissa(54) = '0' then
1222 v.state := RENORM_A;
1223 elsif r.c.mantissa(54) = '0' then
1224 opsel_a <= AIN_C;
1225 v.state := RENORM_C;
1226 else
1227 f_to_multiply.valid <= '1';
1228 v.state := MULT_1;
1229 end if;
1230 else
1231 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1233 -- Signalling NAN
1234 v.fpscr(FPSCR_VXSNAN) := '1';
1235 invalid := '1';
1236 end if;
1237 if r.a.class = NAN then
1238 -- result is A
1239 elsif r.c.class = NAN then
1240 v.result_class := NAN;
1241 v.result_sign := r.c.negative;
1242 opsel_a <= AIN_C;
1243 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244 (r.a.class = ZERO and r.c.class = INFINITY) then
1245 -- invalid operation, construct QNaN
1246 v.fpscr(FPSCR_VXIMZ) := '1';
1247 qnan_result := '1';
1248 elsif r.a.class = ZERO or r.a.class = INFINITY then
1249 -- result is +/- A
1250 v.result_sign := r.a.negative xor r.c.negative;
1251 else
1252 -- r.c.class is ZERO or INFINITY
1253 v.result_class := r.c.class;
1254 v.result_sign := r.a.negative xor r.c.negative;
1255 end if;
1256 arith_done := '1';
1257 end if;
1258
1259 when DO_FDIV =>
1260 opsel_a <= AIN_A;
1261 v.result_sign := r.a.negative;
1262 v.result_class := r.a.class;
1263 v.result_exp := r.a.exponent;
1264 v.fpscr(FPSCR_FR) := '0';
1265 v.fpscr(FPSCR_FI) := '0';
1266 v.result_sign := r.a.negative xor r.b.negative;
1267 v.result_exp := r.a.exponent - r.b.exponent;
1268 v.count := "00";
1269 if r.a.class = FINITE and r.b.class = FINITE then
1270 -- Renormalize denorm operands
1271 if r.a.mantissa(54) = '0' then
1272 v.state := RENORM_A;
1273 elsif r.b.mantissa(54) = '0' then
1274 opsel_a <= AIN_B;
1275 v.state := RENORM_B;
1276 else
1277 v.first := '1';
1278 v.state := DIV_2;
1279 end if;
1280 else
1281 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1283 -- Signalling NAN
1284 v.fpscr(FPSCR_VXSNAN) := '1';
1285 invalid := '1';
1286 end if;
1287 if r.a.class = NAN then
1288 -- result is A
1289 v.result_sign := r.a.negative;
1290 elsif r.b.class = NAN then
1291 v.result_class := NAN;
1292 v.result_sign := r.b.negative;
1293 opsel_a <= AIN_B;
1294 elsif r.b.class = INFINITY then
1295 if r.a.class = INFINITY then
1296 v.fpscr(FPSCR_VXIDI) := '1';
1297 qnan_result := '1';
1298 else
1299 v.result_class := ZERO;
1300 end if;
1301 elsif r.b.class = ZERO then
1302 if r.a.class = ZERO then
1303 v.fpscr(FPSCR_VXZDZ) := '1';
1304 qnan_result := '1';
1305 else
1306 if r.a.class = FINITE then
1307 zero_divide := '1';
1308 end if;
1309 v.result_class := INFINITY;
1310 end if;
1311 -- else r.b.class = FINITE, result_class = r.a.class
1312 end if;
1313 arith_done := '1';
1314 end if;
1315
1316 when DO_FSEL =>
1317 opsel_a <= AIN_A;
1318 v.fpscr(FPSCR_FR) := '0';
1319 v.fpscr(FPSCR_FI) := '0';
1320 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321 v.result_sign := r.c.negative;
1322 v.result_exp := r.c.exponent;
1323 v.result_class := r.c.class;
1324 opsel_a <= AIN_C;
1325 else
1326 v.result_sign := r.b.negative;
1327 v.result_exp := r.b.exponent;
1328 v.result_class := r.b.class;
1329 opsel_a <= AIN_B;
1330 end if;
1331 v.quieten_nan := '0';
1332 arith_done := '1';
1333
1334 when DO_FSQRT =>
1335 opsel_a <= AIN_B;
1336 v.result_class := r.b.class;
1337 v.result_sign := r.b.negative;
1338 v.fpscr(FPSCR_FR) := '0';
1339 v.fpscr(FPSCR_FI) := '0';
1340 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341 v.fpscr(FPSCR_VXSNAN) := '1';
1342 invalid := '1';
1343 end if;
1344 case r.b.class is
1345 when FINITE =>
1346 v.result_exp := r.b.exponent;
1347 if r.b.negative = '1' then
1348 v.fpscr(FPSCR_VXSQRT) := '1';
1349 qnan_result := '1';
1350 arith_done := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1354 v.state := SQRT_1;
1355 else
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1358 end if;
1359 when NAN | ZERO =>
1360 -- result is B
1361 arith_done := '1';
1362 when INFINITY =>
1363 if r.b.negative = '1' then
1364 v.fpscr(FPSCR_VXSQRT) := '1';
1365 qnan_result := '1';
1366 -- else result is B
1367 end if;
1368 arith_done := '1';
1369 end case;
1370
1371 when DO_FRE =>
1372 opsel_a <= AIN_B;
1373 v.result_class := r.b.class;
1374 v.result_sign := r.b.negative;
1375 v.fpscr(FPSCR_FR) := '0';
1376 v.fpscr(FPSCR_FI) := '0';
1377 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378 v.fpscr(FPSCR_VXSNAN) := '1';
1379 invalid := '1';
1380 end if;
1381 case r.b.class is
1382 when FINITE =>
1383 v.result_exp := - r.b.exponent;
1384 if r.b.mantissa(54) = '0' then
1385 v.state := RENORM_B;
1386 else
1387 v.state := FRE_1;
1388 end if;
1389 when NAN =>
1390 -- result is B
1391 arith_done := '1';
1392 when INFINITY =>
1393 v.result_class := ZERO;
1394 arith_done := '1';
1395 when ZERO =>
1396 v.result_class := INFINITY;
1397 zero_divide := '1';
1398 arith_done := '1';
1399 end case;
1400
1401 when DO_FRSQRTE =>
1402 opsel_a <= AIN_B;
1403 v.result_class := r.b.class;
1404 v.result_sign := r.b.negative;
1405 v.fpscr(FPSCR_FR) := '0';
1406 v.fpscr(FPSCR_FI) := '0';
1407 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408 v.fpscr(FPSCR_VXSNAN) := '1';
1409 invalid := '1';
1410 end if;
1411 v.shift := to_signed(1, EXP_BITS);
1412 case r.b.class is
1413 when FINITE =>
1414 v.result_exp := r.b.exponent;
1415 if r.b.negative = '1' then
1416 v.fpscr(FPSCR_VXSQRT) := '1';
1417 qnan_result := '1';
1418 arith_done := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1422 v.state := RSQRT_1;
1423 else
1424 v.state := RENORM_B2;
1425 end if;
1426 when NAN =>
1427 -- result is B
1428 arith_done := '1';
1429 when INFINITY =>
1430 if r.b.negative = '1' then
1431 v.fpscr(FPSCR_VXSQRT) := '1';
1432 qnan_result := '1';
1433 else
1434 v.result_class := ZERO;
1435 end if;
1436 arith_done := '1';
1437 when ZERO =>
1438 v.result_class := INFINITY;
1439 zero_divide := '1';
1440 arith_done := '1';
1441 end case;
1442
1443 when DO_FMADD =>
1444 -- fmadd, fmsub, fnmadd, fnmsub
1445 opsel_a <= AIN_A;
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1451 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452 if r.a.class = FINITE and r.c.class = FINITE and
1453 (r.b.class = FINITE or r.b.class = ZERO) then
1454 v.is_subtract := not is_add;
1455 mulexp := r.a.exponent + r.c.exponent;
1456 v.result_exp := mulexp;
1457 opsel_a <= AIN_B;
1458 -- Make sure A and C are normalized
1459 if r.a.mantissa(54) = '0' then
1460 opsel_a <= AIN_A;
1461 v.state := RENORM_A;
1462 elsif r.c.mantissa(54) = '0' then
1463 opsel_a <= AIN_C;
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1470 v.state := MULT_1;
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1475 v.state := FMADD_1;
1476 else
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1483 v.state := FMADD_2;
1484 end if;
1485 else
1486 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487 (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1489 -- Signalling NAN
1490 v.fpscr(FPSCR_VXSNAN) := '1';
1491 invalid := '1';
1492 end if;
1493 if r.a.class = NAN then
1494 -- nothing to do, result is A
1495 elsif r.b.class = NAN then
1496 -- result is B
1497 v.result_class := NAN;
1498 v.result_sign := r.b.negative;
1499 opsel_a <= AIN_B;
1500 elsif r.c.class = NAN then
1501 -- result is C
1502 v.result_class := NAN;
1503 v.result_sign := r.c.negative;
1504 opsel_a <= AIN_C;
1505 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506 (r.a.class = INFINITY and r.c.class = ZERO) then
1507 -- invalid operation, construct QNaN
1508 v.fpscr(FPSCR_VXIMZ) := '1';
1509 qnan_result := '1';
1510 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511 if r.b.class = INFINITY and is_add = '0' then
1512 -- invalid operation, construct QNaN
1513 v.fpscr(FPSCR_VXISI) := '1';
1514 qnan_result := '1';
1515 else
1516 -- result is infinity
1517 v.result_class := INFINITY;
1518 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1519 end if;
1520 else
1521 -- Here A is zero, C is zero, or B is infinity
1522 -- Result is +/-B in all of those cases
1523 v.result_class := r.b.class;
1524 v.result_exp := r.b.exponent;
1525 if v.result_class /= ZERO or is_add = '1' then
1526 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1527 else
1528 -- have to be careful about rule for 0 - 0 result sign
1529 v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1530 end if;
1531 opsel_a <= AIN_B;
1532 end if;
1533 arith_done := '1';
1534 end if;
1535
1536 when RENORM_A =>
1537 renormalize := '1';
1538 v.state := RENORM_A2;
1539
1540 when RENORM_A2 =>
1541 set_a := '1';
1542 v.result_exp := new_exp;
1543 if r.insn(4) = '1' then
1544 opsel_a <= AIN_C;
1545 if r.c.mantissa(54) = '1' then
1546 if r.insn(3) = '0' or r.b.class = ZERO then
1547 v.first := '1';
1548 v.state := MULT_1;
1549 else
1550 v.madd_cmp := '0';
1551 if new_exp + 1 >= r.b.exponent then
1552 v.madd_cmp := '1';
1553 end if;
1554 v.state := DO_FMADD;
1555 end if;
1556 else
1557 v.state := RENORM_C;
1558 end if;
1559 else
1560 opsel_a <= AIN_B;
1561 if r.b.mantissa(54) = '1' then
1562 v.first := '1';
1563 v.state := DIV_2;
1564 else
1565 v.state := RENORM_B;
1566 end if;
1567 end if;
1568
1569 when RENORM_B =>
1570 renormalize := '1';
1571 renorm_sqrt := r.is_sqrt;
1572 v.state := RENORM_B2;
1573
1574 when RENORM_B2 =>
1575 set_b := '1';
1576 if r.is_sqrt = '0' then
1577 v.result_exp := r.result_exp + r.shift;
1578 else
1579 v.result_exp := new_exp;
1580 end if;
1581 v.state := LOOKUP;
1582
1583 when RENORM_C =>
1584 renormalize := '1';
1585 v.state := RENORM_C2;
1586
1587 when RENORM_C2 =>
1588 set_c := '1';
1589 v.result_exp := new_exp;
1590 if r.insn(3) = '0' or r.b.class = ZERO then
1591 v.first := '1';
1592 v.state := MULT_1;
1593 else
1594 v.madd_cmp := '0';
1595 if new_exp + 1 >= r.b.exponent then
1596 v.madd_cmp := '1';
1597 end if;
1598 v.state := DO_FMADD;
1599 end if;
1600
1601 when ADD_SHIFT =>
1602 -- r.shift = - exponent difference
1603 opsel_r <= RES_SHIFT;
1604 v.x := s_nz;
1605 set_x := '1';
1606 longmask := '0';
1607 v.state := ADD_2;
1608
1609 when ADD_2 =>
1610 if r.add_bsmall = '1' then
1611 opsel_a <= AIN_A;
1612 else
1613 opsel_a <= AIN_B;
1614 end if;
1615 opsel_b <= BIN_R;
1616 opsel_binv <= r.is_subtract;
1617 carry_in <= r.is_subtract and not r.x;
1618 v.shift := to_signed(-1, EXP_BITS);
1619 v.state := ADD_3;
1620
1621 when ADD_3 =>
1622 -- check for overflow or negative result (can't get both)
1623 -- r.shift = -1
1624 if r.r(63) = '1' then
1625 -- result is opposite sign to expected
1626 v.result_sign := not r.result_sign;
1627 opsel_ainv <= '1';
1628 carry_in <= '1';
1629 v.state := FINISH;
1630 elsif r.r(55) = '1' then
1631 -- sum overflowed, shift right
1632 opsel_r <= RES_SHIFT;
1633 set_x := '1';
1634 v.shift := to_signed(-2, EXP_BITS);
1635 if exp_huge = '1' then
1636 v.state := ROUND_OFLOW;
1637 else
1638 v.state := ROUNDING;
1639 end if;
1640 elsif r.r(54) = '1' then
1641 set_x := '1';
1642 v.shift := to_signed(-2, EXP_BITS);
1643 v.state := ROUNDING;
1644 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1645 -- r.x must be zero at this point
1646 v.result_class := ZERO;
1647 if r.is_subtract = '1' then
1648 -- set result sign depending on rounding mode
1649 v.result_sign := r.round_mode(1) and r.round_mode(0);
1650 end if;
1651 arith_done := '1';
1652 else
1653 renormalize := '1';
1654 v.state := NORMALIZE;
1655 end if;
1656
1657 when CMP_1 =>
1658 opsel_a <= AIN_A;
1659 opsel_b <= BIN_R;
1660 opsel_binv <= '1';
1661 carry_in <= '1';
1662 v.state := CMP_2;
1663
1664 when CMP_2 =>
1665 if r.r(63) = '1' then
1666 -- A is smaller in magnitude
1667 v.cr_result := not r.a.negative & r.a.negative & "00";
1668 elsif (r_hi_nz or r_lo_nz) = '0' then
1669 v.cr_result := "0010";
1670 else
1671 v.cr_result := r.a.negative & not r.a.negative & "00";
1672 end if;
1673 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1674 v.instr_done := '1';
1675 v.state := IDLE;
1676
1677 when MULT_1 =>
1678 f_to_multiply.valid <= r.first;
1679 opsel_r <= RES_MULT;
1680 if multiply_to_f.valid = '1' then
1681 v.state := FINISH;
1682 end if;
1683
1684 when FMADD_1 =>
1685 -- Addend is bigger here
1686 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1687 -- note v.shift is at most -2 here
1688 v.shift := r.result_exp - r.b.exponent;
1689 opsel_r <= RES_MULT;
1690 opsel_s <= S_MULT;
1691 set_s := '1';
1692 f_to_multiply.valid <= r.first;
1693 if multiply_to_f.valid = '1' then
1694 v.state := ADD_SHIFT;
1695 end if;
1696
1697 when FMADD_2 =>
1698 -- Product is potentially bigger here
1699 -- r.shift = addend exp - product exp + 64
1700 set_s := '1';
1701 opsel_s <= S_SHIFT;
1702 v.shift := r.shift - to_signed(64, EXP_BITS);
1703 v.state := FMADD_3;
1704
1705 when FMADD_3 =>
1706 -- r.shift = addend exp - product exp
1707 opsel_r <= RES_SHIFT;
1708 v.first := '1';
1709 v.state := FMADD_4;
1710
1711 when FMADD_4 =>
1712 msel_add <= MULADD_RS;
1713 f_to_multiply.valid <= r.first;
1714 msel_inv <= r.is_subtract;
1715 opsel_r <= RES_MULT;
1716 opsel_s <= S_MULT;
1717 set_s := '1';
1718 v.shift := to_signed(56, EXP_BITS);
1719 if multiply_to_f.valid = '1' then
1720 if multiply_to_f.result(121) = '1' then
1721 v.state := FMADD_5;
1722 else
1723 v.state := FMADD_6;
1724 end if;
1725 end if;
1726
1727 when FMADD_5 =>
1728 -- negate R:S:X
1729 v.result_sign := not r.result_sign;
1730 opsel_ainv <= '1';
1731 carry_in <= not (s_nz or r.x);
1732 opsel_s <= S_NEG;
1733 set_s := '1';
1734 v.shift := to_signed(56, EXP_BITS);
1735 v.state := FMADD_6;
1736
1737 when FMADD_6 =>
1738 -- r.shift = 56 (or 0, but only if r is now nonzero)
1739 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1740 if s_nz = '0' then
1741 -- must be a subtraction, and r.x must be zero
1742 v.result_class := ZERO;
1743 v.result_sign := r.round_mode(1) and r.round_mode(0);
1744 arith_done := '1';
1745 else
1746 -- R is all zeroes but there are non-zero bits in S
1747 -- so shift them into R and set S to 0
1748 opsel_r <= RES_SHIFT;
1749 set_s := '1';
1750 -- stay in state FMADD_6
1751 end if;
1752 elsif r.r(56 downto 54) = "001" then
1753 v.state := FINISH;
1754 else
1755 renormalize := '1';
1756 v.state := NORMALIZE;
1757 end if;
1758
1759 when LOOKUP =>
1760 opsel_a <= AIN_B;
1761 -- wait one cycle for inverse_table[B] lookup
1762 v.first := '1';
1763 if r.insn(4) = '0' then
1764 if r.insn(3) = '0' then
1765 v.state := DIV_2;
1766 else
1767 v.state := SQRT_1;
1768 end if;
1769 elsif r.insn(2) = '0' then
1770 v.state := FRE_1;
1771 else
1772 v.state := RSQRT_1;
1773 end if;
1774
1775 when DIV_2 =>
1776 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1777 msel_1 <= MUL1_B;
1778 msel_add <= MULADD_CONST;
1779 msel_inv <= '1';
1780 if r.count = 0 then
1781 msel_2 <= MUL2_LUT;
1782 else
1783 msel_2 <= MUL2_P;
1784 end if;
1785 set_y := r.first;
1786 pshift := '1';
1787 f_to_multiply.valid <= r.first;
1788 if multiply_to_f.valid = '1' then
1789 v.first := '1';
1790 v.count := r.count + 1;
1791 v.state := DIV_3;
1792 end if;
1793
1794 when DIV_3 =>
1795 -- compute Y = P = P * Y
1796 msel_1 <= MUL1_Y;
1797 msel_2 <= MUL2_P;
1798 f_to_multiply.valid <= r.first;
1799 pshift := '1';
1800 if multiply_to_f.valid = '1' then
1801 v.first := '1';
1802 if r.count = 3 then
1803 v.state := DIV_4;
1804 else
1805 v.state := DIV_2;
1806 end if;
1807 end if;
1808
1809 when DIV_4 =>
1810 -- compute R = P = A * Y (quotient)
1811 msel_1 <= MUL1_A;
1812 msel_2 <= MUL2_P;
1813 set_y := r.first;
1814 f_to_multiply.valid <= r.first;
1815 pshift := '1';
1816 if multiply_to_f.valid = '1' then
1817 opsel_r <= RES_MULT;
1818 v.first := '1';
1819 v.state := DIV_5;
1820 end if;
1821
1822 when DIV_5 =>
1823 -- compute P = A - B * R (remainder)
1824 msel_1 <= MUL1_B;
1825 msel_2 <= MUL2_R;
1826 msel_add <= MULADD_A;
1827 msel_inv <= '1';
1828 f_to_multiply.valid <= r.first;
1829 if multiply_to_f.valid = '1' then
1830 v.state := DIV_6;
1831 end if;
1832
1833 when DIV_6 =>
1834 -- test if remainder is 0 or >= B
1835 if pcmpb_lt = '1' then
1836 -- quotient is correct, set X if remainder non-zero
1837 v.x := r.p(58) or px_nz;
1838 else
1839 -- quotient needs to be incremented by 1
1840 carry_in <= '1';
1841 v.x := not pcmpb_eq;
1842 end if;
1843 v.state := FINISH;
1844
1845 when FRE_1 =>
1846 opsel_r <= RES_MISC;
1847 misc_sel <= "0111";
1848 v.shift := to_signed(1, EXP_BITS);
1849 v.state := NORMALIZE;
1850
1851 when FTDIV_1 =>
1852 v.cr_result(1) := exp_tiny or exp_huge;
1853 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1854 v.instr_done := '1';
1855 v.state := IDLE;
1856 else
1857 v.shift := r.a.exponent;
1858 v.doing_ftdiv := "10";
1859 end if;
1860
1861 when RSQRT_1 =>
1862 opsel_r <= RES_MISC;
1863 misc_sel <= "0111";
1864 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1865 v.result_exp := - sqrt_exp;
1866 v.shift := to_signed(1, EXP_BITS);
1867 v.state := NORMALIZE;
1868
1869 when SQRT_1 =>
1870 -- put invsqr[B] in R and compute P = invsqr[B] * B
1871 -- also transfer B (in R) to A
1872 set_a := '1';
1873 opsel_r <= RES_MISC;
1874 misc_sel <= "0111";
1875 msel_1 <= MUL1_B;
1876 msel_2 <= MUL2_LUT;
1877 f_to_multiply.valid <= '1';
1878 v.shift := to_signed(-1, EXP_BITS);
1879 v.count := "00";
1880 v.state := SQRT_2;
1881
1882 when SQRT_2 =>
1883 -- shift R right one place
1884 -- not expecting multiplier result yet
1885 -- r.shift = -1
1886 opsel_r <= RES_SHIFT;
1887 v.first := '1';
1888 v.state := SQRT_3;
1889
1890 when SQRT_3 =>
1891 -- put R into Y, wait for product from multiplier
1892 msel_2 <= MUL2_R;
1893 set_y := r.first;
1894 pshift := '1';
1895 if multiply_to_f.valid = '1' then
1896 -- put result into R
1897 opsel_r <= RES_MULT;
1898 v.first := '1';
1899 v.state := SQRT_4;
1900 end if;
1901
1902 when SQRT_4 =>
1903 -- compute 1.5 - Y * P
1904 msel_1 <= MUL1_Y;
1905 msel_2 <= MUL2_P;
1906 msel_add <= MULADD_CONST;
1907 msel_inv <= '1';
1908 f_to_multiply.valid <= r.first;
1909 pshift := '1';
1910 if multiply_to_f.valid = '1' then
1911 v.state := SQRT_5;
1912 end if;
1913
1914 when SQRT_5 =>
1915 -- compute Y = Y * P
1916 msel_1 <= MUL1_Y;
1917 msel_2 <= MUL2_P;
1918 f_to_multiply.valid <= '1';
1919 v.first := '1';
1920 v.state := SQRT_6;
1921
1922 when SQRT_6 =>
1923 -- pipeline in R = R * P
1924 msel_1 <= MUL1_R;
1925 msel_2 <= MUL2_P;
1926 f_to_multiply.valid <= r.first;
1927 pshift := '1';
1928 if multiply_to_f.valid = '1' then
1929 v.first := '1';
1930 v.state := SQRT_7;
1931 end if;
1932
1933 when SQRT_7 =>
1934 -- first multiply is done, put result in Y
1935 msel_2 <= MUL2_P;
1936 set_y := r.first;
1937 -- wait for second multiply (should be here already)
1938 pshift := '1';
1939 if multiply_to_f.valid = '1' then
1940 -- put result into R
1941 opsel_r <= RES_MULT;
1942 v.first := '1';
1943 v.count := r.count + 1;
1944 if r.count < 2 then
1945 v.state := SQRT_4;
1946 else
1947 v.first := '1';
1948 v.state := SQRT_8;
1949 end if;
1950 end if;
1951
1952 when SQRT_8 =>
1953 -- compute P = A - R * R, which can be +ve or -ve
1954 -- we arranged for B to be put into A earlier
1955 msel_1 <= MUL1_R;
1956 msel_2 <= MUL2_R;
1957 msel_add <= MULADD_A;
1958 msel_inv <= '1';
1959 pshift := '1';
1960 f_to_multiply.valid <= r.first;
1961 if multiply_to_f.valid = '1' then
1962 v.first := '1';
1963 v.state := SQRT_9;
1964 end if;
1965
1966 when SQRT_9 =>
1967 -- compute P = P * Y
1968 -- since Y is an estimate of 1/sqrt(B), this makes P an
1969 -- estimate of the adjustment needed to R. Since the error
1970 -- could be negative and we have an unsigned multiplier, the
1971 -- upper bits can be wrong, but it turns out the lowest 8 bits
1972 -- are correct and are all we need (given 3 iterations through
1973 -- SQRT_4 to SQRT_7).
1974 msel_1 <= MUL1_Y;
1975 msel_2 <= MUL2_P;
1976 pshift := '1';
1977 f_to_multiply.valid <= r.first;
1978 if multiply_to_f.valid = '1' then
1979 v.state := SQRT_10;
1980 end if;
1981
1982 when SQRT_10 =>
1983 -- Add the bottom 8 bits of P, sign-extended,
1984 -- divided by 4, onto R.
1985 -- The division by 4 is because R is 10.54 format
1986 -- whereas P is 8.56 format.
1987 opsel_b <= BIN_PS6;
1988 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1989 v.result_exp := sqrt_exp;
1990 v.shift := to_signed(1, EXP_BITS);
1991 v.first := '1';
1992 v.state := SQRT_11;
1993
1994 when SQRT_11 =>
1995 -- compute P = A - R * R (remainder)
1996 -- also put 2 * R + 1 into B for comparison with P
1997 msel_1 <= MUL1_R;
1998 msel_2 <= MUL2_R;
1999 msel_add <= MULADD_A;
2000 msel_inv <= '1';
2001 f_to_multiply.valid <= r.first;
2002 shiftin := '1';
2003 set_b := r.first;
2004 if multiply_to_f.valid = '1' then
2005 v.state := SQRT_12;
2006 end if;
2007
2008 when SQRT_12 =>
2009 -- test if remainder is 0 or >= B = 2*R + 1
2010 if pcmpb_lt = '1' then
2011 -- square root is correct, set X if remainder non-zero
2012 v.x := r.p(58) or px_nz;
2013 else
2014 -- square root needs to be incremented by 1
2015 carry_in <= '1';
2016 v.x := not pcmpb_eq;
2017 end if;
2018 v.state := FINISH;
2019
2020 when INT_SHIFT =>
2021 -- r.shift = b.exponent - 52
2022 opsel_r <= RES_SHIFT;
2023 set_x := '1';
2024 v.state := INT_ROUND;
2025 v.shift := to_signed(-2, EXP_BITS);
2026
2027 when INT_ROUND =>
2028 -- r.shift = -2
2029 opsel_r <= RES_SHIFT;
2030 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2031 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2032 -- Check for negative values that don't round to 0 for fcti*u*
2033 if r.insn(8) = '1' and r.result_sign = '1' and
2034 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2035 v.state := INT_OFLOW;
2036 else
2037 v.state := INT_FINAL;
2038 end if;
2039
2040 when INT_ISHIFT =>
2041 -- r.shift = b.exponent - 54;
2042 opsel_r <= RES_SHIFT;
2043 v.state := INT_FINAL;
2044
2045 when INT_FINAL =>
2046 -- Negate if necessary, and increment for rounding if needed
2047 opsel_ainv <= r.result_sign;
2048 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2049 -- Check for possible overflows
2050 case r.insn(9 downto 8) is
2051 when "00" => -- fctiw[z]
2052 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2053 when "01" => -- fctiwu[z]
2054 need_check := r.r(31);
2055 when "10" => -- fctid[z]
2056 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2057 when others => -- fctidu[z]
2058 need_check := r.r(63);
2059 end case;
2060 if need_check = '1' then
2061 v.state := INT_CHECK;
2062 else
2063 if r.fpscr(FPSCR_FI) = '1' then
2064 v.fpscr(FPSCR_XX) := '1';
2065 end if;
2066 arith_done := '1';
2067 end if;
2068
2069 when INT_CHECK =>
2070 if r.insn(9) = '0' then
2071 msb := r.r(31);
2072 else
2073 msb := r.r(63);
2074 end if;
2075 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2076 if (r.insn(8) = '0' and msb /= r.result_sign) or
2077 (r.insn(8) = '1' and msb /= '1') then
2078 opsel_r <= RES_MISC;
2079 v.fpscr(FPSCR_VXCVI) := '1';
2080 invalid := '1';
2081 else
2082 if r.fpscr(FPSCR_FI) = '1' then
2083 v.fpscr(FPSCR_XX) := '1';
2084 end if;
2085 end if;
2086 arith_done := '1';
2087
2088 when INT_OFLOW =>
2089 opsel_r <= RES_MISC;
2090 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2091 if r.b.class = NAN then
2092 misc_sel(0) <= '1';
2093 end if;
2094 v.fpscr(FPSCR_VXCVI) := '1';
2095 invalid := '1';
2096 arith_done := '1';
2097
2098 when FRI_1 =>
2099 -- r.shift = b.exponent - 52
2100 opsel_r <= RES_SHIFT;
2101 set_x := '1';
2102 v.shift := to_signed(-2, EXP_BITS);
2103 v.state := ROUNDING;
2104
2105 when FINISH =>
2106 if r.is_multiply = '1' and px_nz = '1' then
2107 v.x := '1';
2108 end if;
2109 if r.r(63 downto 54) /= "0000000001" then
2110 renormalize := '1';
2111 v.state := NORMALIZE;
2112 else
2113 set_x := '1';
2114 if exp_tiny = '1' then
2115 v.shift := new_exp - min_exp;
2116 v.state := ROUND_UFLOW;
2117 elsif exp_huge = '1' then
2118 v.state := ROUND_OFLOW;
2119 else
2120 v.shift := to_signed(-2, EXP_BITS);
2121 v.state := ROUNDING;
2122 end if;
2123 end if;
2124
2125 when NORMALIZE =>
2126 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2127 -- r.shift = clz(r.r) - 9
2128 opsel_r <= RES_SHIFT;
2129 set_x := '1';
2130 if exp_tiny = '1' then
2131 v.shift := new_exp - min_exp;
2132 v.state := ROUND_UFLOW;
2133 elsif exp_huge = '1' then
2134 v.state := ROUND_OFLOW;
2135 else
2136 v.shift := to_signed(-2, EXP_BITS);
2137 v.state := ROUNDING;
2138 end if;
2139
2140 when ROUND_UFLOW =>
2141 -- r.shift = - amount by which exponent underflows
2142 v.tiny := '1';
2143 if r.fpscr(FPSCR_UE) = '0' then
2144 -- disabled underflow exception case
2145 -- have to denormalize before rounding
2146 opsel_r <= RES_SHIFT;
2147 set_x := '1';
2148 v.shift := to_signed(-2, EXP_BITS);
2149 v.state := ROUNDING;
2150 else
2151 -- enabled underflow exception case
2152 -- if denormalized, have to normalize before rounding
2153 v.fpscr(FPSCR_UX) := '1';
2154 v.result_exp := r.result_exp + bias_exp;
2155 if r.r(54) = '0' then
2156 renormalize := '1';
2157 v.state := NORMALIZE;
2158 else
2159 v.shift := to_signed(-2, EXP_BITS);
2160 v.state := ROUNDING;
2161 end if;
2162 end if;
2163
2164 when ROUND_OFLOW =>
2165 v.fpscr(FPSCR_OX) := '1';
2166 if r.fpscr(FPSCR_OE) = '0' then
2167 -- disabled overflow exception
2168 -- result depends on rounding mode
2169 v.fpscr(FPSCR_XX) := '1';
2170 v.fpscr(FPSCR_FI) := '1';
2171 if r.round_mode(1 downto 0) = "00" or
2172 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2173 v.result_class := INFINITY;
2174 v.fpscr(FPSCR_FR) := '1';
2175 else
2176 v.fpscr(FPSCR_FR) := '0';
2177 end if;
2178 -- construct largest representable number
2179 v.result_exp := max_exp;
2180 opsel_r <= RES_MISC;
2181 misc_sel <= "001" & r.single_prec;
2182 arith_done := '1';
2183 else
2184 -- enabled overflow exception
2185 v.result_exp := r.result_exp - bias_exp;
2186 v.shift := to_signed(-2, EXP_BITS);
2187 v.state := ROUNDING;
2188 end if;
2189
2190 when ROUNDING =>
2191 opsel_amask <= '1';
2192 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2193 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2194 if round(1) = '1' then
2195 -- set mask to increment the LSB for the precision
2196 opsel_b <= BIN_MASK;
2197 carry_in <= '1';
2198 v.shift := to_signed(-1, EXP_BITS);
2199 v.state := ROUNDING_2;
2200 else
2201 if r.r(54) = '0' then
2202 -- result after masking could be zero, or could be a
2203 -- denormalized result that needs to be renormalized
2204 renormalize := '1';
2205 v.state := ROUNDING_3;
2206 else
2207 arith_done := '1';
2208 end if;
2209 end if;
2210 if round(0) = '1' then
2211 v.fpscr(FPSCR_XX) := '1';
2212 if r.tiny = '1' then
2213 v.fpscr(FPSCR_UX) := '1';
2214 end if;
2215 end if;
2216
2217 when ROUNDING_2 =>
2218 -- Check for overflow during rounding
2219 -- r.shift = -1
2220 v.x := '0';
2221 if r.r(55) = '1' then
2222 opsel_r <= RES_SHIFT;
2223 if exp_huge = '1' then
2224 v.state := ROUND_OFLOW;
2225 else
2226 arith_done := '1';
2227 end if;
2228 elsif r.r(54) = '0' then
2229 -- Do CLZ so we can renormalize the result
2230 renormalize := '1';
2231 v.state := ROUNDING_3;
2232 else
2233 arith_done := '1';
2234 end if;
2235
2236 when ROUNDING_3 =>
2237 -- r.shift = clz(r.r) - 9
2238 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2239 if mant_nz = '0' then
2240 v.result_class := ZERO;
2241 if r.is_subtract = '1' then
2242 -- set result sign depending on rounding mode
2243 v.result_sign := r.round_mode(1) and r.round_mode(0);
2244 end if;
2245 arith_done := '1';
2246 else
2247 -- Renormalize result after rounding
2248 opsel_r <= RES_SHIFT;
2249 v.denorm := exp_tiny;
2250 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2251 if new_exp < to_signed(-1022, EXP_BITS) then
2252 v.state := DENORM;
2253 else
2254 arith_done := '1';
2255 end if;
2256 end if;
2257
2258 when DENORM =>
2259 -- r.shift = result_exp - -1022
2260 opsel_r <= RES_SHIFT;
2261 arith_done := '1';
2262
2263 end case;
2264
2265 if zero_divide = '1' then
2266 v.fpscr(FPSCR_ZX) := '1';
2267 end if;
2268 if qnan_result = '1' then
2269 invalid := '1';
2270 v.result_class := NAN;
2271 v.result_sign := '0';
2272 misc_sel <= "0001";
2273 opsel_r <= RES_MISC;
2274 end if;
2275 if arith_done = '1' then
2276 -- Enabled invalid exception doesn't write result or FPRF
2277 -- Neither does enabled zero-divide exception
2278 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2279 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2280 v.writing_back := '1';
2281 v.update_fprf := '1';
2282 end if;
2283 v.instr_done := '1';
2284 v.state := IDLE;
2285 update_fx := '1';
2286 end if;
2287
2288 -- Multiplier and divide/square root data path
2289 case msel_1 is
2290 when MUL1_A =>
2291 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2292 when MUL1_B =>
2293 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2294 when MUL1_Y =>
2295 f_to_multiply.data1 <= r.y;
2296 when others =>
2297 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2298 end case;
2299 case msel_2 is
2300 when MUL2_C =>
2301 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2302 when MUL2_LUT =>
2303 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2304 when MUL2_P =>
2305 f_to_multiply.data2 <= r.p;
2306 when others =>
2307 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2308 end case;
2309 maddend := (others => '0');
2310 case msel_add is
2311 when MULADD_CONST =>
2312 -- addend is 2.0 or 1.5 in 16.112 format
2313 if r.is_sqrt = '0' then
2314 maddend(113) := '1'; -- 2.0
2315 else
2316 maddend(112 downto 111) := "11"; -- 1.5
2317 end if;
2318 when MULADD_A =>
2319 -- addend is A in 16.112 format
2320 maddend(121 downto 58) := r.a.mantissa;
2321 when MULADD_RS =>
2322 -- addend is concatenation of R and S in 16.112 format
2323 maddend := "000000" & r.r & r.s & "00";
2324 when others =>
2325 end case;
2326 if msel_inv = '1' then
2327 f_to_multiply.addend <= not maddend;
2328 else
2329 f_to_multiply.addend <= maddend;
2330 end if;
2331 f_to_multiply.not_result <= msel_inv;
2332 if set_y = '1' then
2333 v.y := f_to_multiply.data2;
2334 end if;
2335 if multiply_to_f.valid = '1' then
2336 if pshift = '0' then
2337 v.p := multiply_to_f.result(63 downto 0);
2338 else
2339 v.p := multiply_to_f.result(119 downto 56);
2340 end if;
2341 end if;
2342
2343 -- Data path.
2344 -- This has A and B input multiplexers, an adder, a shifter,
2345 -- count-leading-zeroes logic, and a result mux.
2346 if longmask = '1' then
2347 mshift := r.shift + to_signed(-29, EXP_BITS);
2348 else
2349 mshift := r.shift;
2350 end if;
2351 if mshift < to_signed(-64, EXP_BITS) then
2352 mask := (others => '1');
2353 elsif mshift >= to_signed(0, EXP_BITS) then
2354 mask := (others => '0');
2355 else
2356 mask := right_mask(unsigned(mshift(5 downto 0)));
2357 end if;
2358 case opsel_a is
2359 when AIN_R =>
2360 in_a0 := r.r;
2361 when AIN_A =>
2362 in_a0 := r.a.mantissa;
2363 when AIN_B =>
2364 in_a0 := r.b.mantissa;
2365 when others =>
2366 in_a0 := r.c.mantissa;
2367 end case;
2368 if (or (mask and in_a0)) = '1' and set_x = '1' then
2369 v.x := '1';
2370 end if;
2371 if opsel_ainv = '1' then
2372 in_a0 := not in_a0;
2373 end if;
2374 if opsel_amask = '1' then
2375 in_a0 := in_a0 and not mask;
2376 end if;
2377 in_a <= in_a0;
2378 case opsel_b is
2379 when BIN_ZERO =>
2380 in_b0 := (others => '0');
2381 when BIN_R =>
2382 in_b0 := r.r;
2383 when BIN_MASK =>
2384 in_b0 := mask;
2385 when others =>
2386 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2387 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2388 end case;
2389 if opsel_binv = '1' then
2390 in_b0 := not in_b0;
2391 end if;
2392 in_b <= in_b0;
2393 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2394 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2395 std_ulogic_vector(r.shift(6 downto 0)));
2396 else
2397 shift_res := (others => '0');
2398 end if;
2399 case opsel_r is
2400 when RES_SUM =>
2401 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2402 when RES_SHIFT =>
2403 result <= shift_res;
2404 when RES_MULT =>
2405 result <= multiply_to_f.result(121 downto 58);
2406 when others =>
2407 case misc_sel is
2408 when "0000" =>
2409 misc := x"00000000" & (r.fpscr and fpscr_mask);
2410 when "0001" =>
2411 -- generated QNaN mantissa
2412 misc := x"0020000000000000";
2413 when "0010" =>
2414 -- mantissa of max representable DP number
2415 misc := x"007ffffffffffffc";
2416 when "0011" =>
2417 -- mantissa of max representable SP number
2418 misc := x"007fffff80000000";
2419 when "0100" =>
2420 -- fmrgow result
2421 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2422 when "0110" =>
2423 -- fmrgew result
2424 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2425 when "0111" =>
2426 misc := 10x"000" & inverse_est & 35x"000000000";
2427 when "1000" =>
2428 -- max positive result for fctiw[z]
2429 misc := x"000000007fffffff";
2430 when "1001" =>
2431 -- max negative result for fctiw[z]
2432 misc := x"ffffffff80000000";
2433 when "1010" =>
2434 -- max positive result for fctiwu[z]
2435 misc := x"00000000ffffffff";
2436 when "1011" =>
2437 -- max negative result for fctiwu[z]
2438 misc := x"0000000000000000";
2439 when "1100" =>
2440 -- max positive result for fctid[z]
2441 misc := x"7fffffffffffffff";
2442 when "1101" =>
2443 -- max negative result for fctid[z]
2444 misc := x"8000000000000000";
2445 when "1110" =>
2446 -- max positive result for fctidu[z]
2447 misc := x"ffffffffffffffff";
2448 when "1111" =>
2449 -- max negative result for fctidu[z]
2450 misc := x"0000000000000000";
2451 when others =>
2452 misc := x"0000000000000000";
2453 end case;
2454 result <= misc;
2455 end case;
2456 v.r := result;
2457 if set_s = '1' then
2458 case opsel_s is
2459 when S_NEG =>
2460 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2461 when S_MULT =>
2462 v.s := multiply_to_f.result(57 downto 2);
2463 when S_SHIFT =>
2464 v.s := shift_res(63 downto 8);
2465 if shift_res(7 downto 0) /= x"00" then
2466 v.x := '1';
2467 end if;
2468 when others =>
2469 v.s := (others => '0');
2470 end case;
2471 end if;
2472
2473 if set_a = '1' then
2474 v.a.exponent := new_exp;
2475 v.a.mantissa := shift_res;
2476 end if;
2477 if set_b = '1' then
2478 v.b.exponent := new_exp;
2479 v.b.mantissa := shift_res;
2480 end if;
2481 if set_c = '1' then
2482 v.c.exponent := new_exp;
2483 v.c.mantissa := shift_res;
2484 end if;
2485
2486 if opsel_r = RES_SHIFT then
2487 v.result_exp := new_exp;
2488 end if;
2489
2490 if renormalize = '1' then
2491 clz := count_left_zeroes(r.r);
2492 if renorm_sqrt = '1' then
2493 -- make denormalized value end up with even exponent
2494 clz(0) := '1';
2495 end if;
2496 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2497 end if;
2498
2499 if r.int_result = '1' then
2500 fp_result <= r.r;
2501 else
2502 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2503 r.single_prec, r.quieten_nan);
2504 end if;
2505 if r.update_fprf = '1' then
2506 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2507 r.r(54) and not r.denorm);
2508 end if;
2509
2510 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2511 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2512 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2513 v.fpscr(FPSCR_VE downto FPSCR_XE));
2514 if update_fx = '1' and
2515 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2516 v.fpscr(FPSCR_FX) := '1';
2517 end if;
2518 if r.rc = '1' then
2519 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2520 end if;
2521
2522 if illegal = '1' then
2523 v.instr_done := '0';
2524 v.do_intr := '0';
2525 v.writing_back := '0';
2526 v.busy := '0';
2527 v.state := IDLE;
2528 else
2529 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2530 if v.state /= IDLE or v.do_intr = '1' then
2531 v.busy := '1';
2532 end if;
2533 end if;
2534
2535 rin <= v;
2536 e_out.illegal <= illegal;
2537 end process;
2538
2539 end architecture behaviour;