core: Restore bypass path from execute1
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 op : insn_type_t;
77 insn : std_ulogic_vector(31 downto 0);
78 instr_tag : instr_tag_t;
79 dest_fpr : gspr_index_t;
80 fe_mode : std_ulogic;
81 rc : std_ulogic;
82 is_cmp : std_ulogic;
83 single_prec : std_ulogic;
84 fpscr : std_ulogic_vector(31 downto 0);
85 a : fpu_reg_type;
86 b : fpu_reg_type;
87 c : fpu_reg_type;
88 r : std_ulogic_vector(63 downto 0); -- 10.54 format
89 s : std_ulogic_vector(55 downto 0); -- extended fraction
90 x : std_ulogic;
91 p : std_ulogic_vector(63 downto 0); -- 8.56 format
92 y : std_ulogic_vector(63 downto 0); -- 8.56 format
93 result_sign : std_ulogic;
94 result_class : fp_number_class;
95 result_exp : signed(EXP_BITS-1 downto 0);
96 shift : signed(EXP_BITS-1 downto 0);
97 writing_back : std_ulogic;
98 int_result : std_ulogic;
99 cr_result : std_ulogic_vector(3 downto 0);
100 cr_mask : std_ulogic_vector(7 downto 0);
101 old_exc : std_ulogic_vector(4 downto 0);
102 update_fprf : std_ulogic;
103 quieten_nan : std_ulogic;
104 tiny : std_ulogic;
105 denorm : std_ulogic;
106 round_mode : std_ulogic_vector(2 downto 0);
107 is_subtract : std_ulogic;
108 exp_cmp : std_ulogic;
109 madd_cmp : std_ulogic;
110 add_bsmall : std_ulogic;
111 is_multiply : std_ulogic;
112 is_sqrt : std_ulogic;
113 first : std_ulogic;
114 count : unsigned(1 downto 0);
115 doing_ftdiv : std_ulogic_vector(1 downto 0);
116 opsel_a : std_ulogic_vector(1 downto 0);
117 use_a : std_ulogic;
118 use_b : std_ulogic;
119 use_c : std_ulogic;
120 invalid : std_ulogic;
121 negate : std_ulogic;
122 longmask : std_ulogic;
123 end record;
124
125 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
126
127 signal r, rin : reg_type;
128
129 signal fp_result : std_ulogic_vector(63 downto 0);
130 signal opsel_b : std_ulogic_vector(1 downto 0);
131 signal opsel_r : std_ulogic_vector(1 downto 0);
132 signal opsel_s : std_ulogic_vector(1 downto 0);
133 signal opsel_ainv : std_ulogic;
134 signal opsel_mask : std_ulogic;
135 signal opsel_binv : std_ulogic;
136 signal in_a : std_ulogic_vector(63 downto 0);
137 signal in_b : std_ulogic_vector(63 downto 0);
138 signal result : std_ulogic_vector(63 downto 0);
139 signal carry_in : std_ulogic;
140 signal lost_bits : std_ulogic;
141 signal r_hi_nz : std_ulogic;
142 signal r_lo_nz : std_ulogic;
143 signal s_nz : std_ulogic;
144 signal misc_sel : std_ulogic_vector(3 downto 0);
145 signal f_to_multiply : MultiplyInputType;
146 signal multiply_to_f : MultiplyOutputType;
147 signal msel_1 : std_ulogic_vector(1 downto 0);
148 signal msel_2 : std_ulogic_vector(1 downto 0);
149 signal msel_add : std_ulogic_vector(1 downto 0);
150 signal msel_inv : std_ulogic;
151 signal inverse_est : std_ulogic_vector(18 downto 0);
152
153 -- opsel values
154 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
155 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
156 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
157 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
158
159 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
160 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
161 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
162 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
163
164 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
165 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
166 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
167 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
168
169 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
170 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
171 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
172 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
173
174 -- msel values
175 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
176 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
177 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
178 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
179
180 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
181 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
182 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
183 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
184
185 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
186 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
187 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
188 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
189
190 -- Inverse lookup table, indexed by the top 8 fraction bits
191 -- The first 256 entries are the reciprocal (1/x) lookup table,
192 -- and the remaining 768 entries are the reciprocal square root table.
193 -- Output range is [0.5, 1) in 0.19 format, though the top
194 -- bit isn't stored since it is always 1.
195 -- Each output value is the inverse of the center of the input
196 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
197 -- entry 1 is 1 / (1 + 3/512), etc.
198 signal inverse_table : lookup_table := (
199 -- 1/x lookup table
200 -- Unit bit is assumed to be 1, so input range is [1, 2)
201 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
202 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
203 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
204 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
205 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
206 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
207 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
208 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
209 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
210 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
211 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
212 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
213 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
214 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
215 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
216 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
217 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
218 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
219 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
220 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
221 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
222 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
223 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
224 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
225 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
226 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
227 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
228 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
229 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
230 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
231 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
232 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
233 -- 1/sqrt(x) lookup table
234 -- Input is in the range [1, 4), i.e. two bits to the left of the
235 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
236 -- 1.0 ... 1.9999
237 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
238 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
239 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
240 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
241 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
242 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
243 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
244 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
245 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
246 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
247 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
248 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
249 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
250 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
251 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
252 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
253 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
254 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
255 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
256 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
257 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
258 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
259 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
260 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
261 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
262 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
263 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
264 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
265 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
266 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
267 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
268 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
269 -- 2.0 ... 2.9999
270 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
271 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
272 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
273 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
274 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
275 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
276 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
277 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
278 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
279 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
280 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
281 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
282 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
283 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
284 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
285 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
286 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
287 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
288 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
289 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
290 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
291 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
292 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
293 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
294 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
295 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
296 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
297 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
298 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
299 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
300 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
301 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
302 -- 3.0 ... 3.9999
303 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
304 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
305 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
306 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
307 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
308 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
309 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
310 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
311 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
312 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
313 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
314 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
315 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
316 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
317 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
318 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
319 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
320 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
321 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
322 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
323 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
324 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
325 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
326 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
327 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
328 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
329 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
330 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
331 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
332 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
333 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
334 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
335 );
336
337 -- Left and right shifter with 120 bit input and 64 bit output.
338 -- Shifts inp left by shift bits and returns the upper 64 bits of
339 -- the result. The shift parameter is interpreted as a signed
340 -- number in the range -64..63, with negative values indicating
341 -- right shifts.
342 function shifter_64(inp: std_ulogic_vector(119 downto 0);
343 shift: std_ulogic_vector(6 downto 0))
344 return std_ulogic_vector is
345 variable s1 : std_ulogic_vector(94 downto 0);
346 variable s2 : std_ulogic_vector(70 downto 0);
347 variable result : std_ulogic_vector(63 downto 0);
348 begin
349 case shift(6 downto 5) is
350 when "00" =>
351 s1 := inp(119 downto 25);
352 when "01" =>
353 s1 := inp(87 downto 0) & "0000000";
354 when "10" =>
355 s1 := x"0000000000000000" & inp(119 downto 89);
356 when others =>
357 s1 := x"00000000" & inp(119 downto 57);
358 end case;
359 case shift(4 downto 3) is
360 when "00" =>
361 s2 := s1(94 downto 24);
362 when "01" =>
363 s2 := s1(86 downto 16);
364 when "10" =>
365 s2 := s1(78 downto 8);
366 when others =>
367 s2 := s1(70 downto 0);
368 end case;
369 case shift(2 downto 0) is
370 when "000" =>
371 result := s2(70 downto 7);
372 when "001" =>
373 result := s2(69 downto 6);
374 when "010" =>
375 result := s2(68 downto 5);
376 when "011" =>
377 result := s2(67 downto 4);
378 when "100" =>
379 result := s2(66 downto 3);
380 when "101" =>
381 result := s2(65 downto 2);
382 when "110" =>
383 result := s2(64 downto 1);
384 when others =>
385 result := s2(63 downto 0);
386 end case;
387 return result;
388 end;
389
390 -- Generate a mask with 0-bits on the left and 1-bits on the right which
391 -- selects the bits will be lost in doing a right shift. The shift
392 -- parameter is the bottom 6 bits of a negative shift count,
393 -- indicating a right shift.
394 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
395 variable result: std_ulogic_vector(63 downto 0);
396 begin
397 result := (others => '0');
398 for i in 0 to 63 loop
399 if i >= shift then
400 result(63 - i) := '1';
401 end if;
402 end loop;
403 return result;
404 end;
405
406 -- Split a DP floating-point number into components and work out its class.
407 -- If is_int = 1, the input is considered an integer
408 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
409 variable r : fpu_reg_type;
410 variable exp_nz : std_ulogic;
411 variable exp_ao : std_ulogic;
412 variable frac_nz : std_ulogic;
413 variable cls : std_ulogic_vector(2 downto 0);
414 begin
415 r.negative := fpr(63);
416 exp_nz := or (fpr(62 downto 52));
417 exp_ao := and (fpr(62 downto 52));
418 frac_nz := or (fpr(51 downto 0));
419 if is_int = '0' then
420 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
421 if exp_nz = '0' then
422 r.exponent := to_signed(-1022, EXP_BITS);
423 end if;
424 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
425 cls := exp_ao & exp_nz & frac_nz;
426 case cls is
427 when "000" => r.class := ZERO;
428 when "001" => r.class := FINITE; -- denormalized
429 when "010" => r.class := FINITE;
430 when "011" => r.class := FINITE;
431 when "110" => r.class := INFINITY;
432 when others => r.class := NAN;
433 end case;
434 else
435 r.mantissa := fpr;
436 r.exponent := (others => '0');
437 if (fpr(63) or exp_nz or frac_nz) = '1' then
438 r.class := FINITE;
439 else
440 r.class := ZERO;
441 end if;
442 end if;
443 return r;
444 end;
445
446 -- Construct a DP floating-point result from components
447 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
448 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
449 return std_ulogic_vector is
450 variable result : std_ulogic_vector(63 downto 0);
451 begin
452 result := (others => '0');
453 result(63) := sign;
454 case class is
455 when ZERO =>
456 when FINITE =>
457 if mantissa(54) = '1' then
458 -- normalized number
459 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
460 end if;
461 result(51 downto 29) := mantissa(53 downto 31);
462 if single_prec = '0' then
463 result(28 downto 0) := mantissa(30 downto 2);
464 end if;
465 when INFINITY =>
466 result(62 downto 52) := "11111111111";
467 when NAN =>
468 result(62 downto 52) := "11111111111";
469 result(51) := quieten_nan or mantissa(53);
470 result(50 downto 29) := mantissa(52 downto 31);
471 if single_prec = '0' then
472 result(28 downto 0) := mantissa(30 downto 2);
473 end if;
474 end case;
475 return result;
476 end;
477
478 -- Determine whether to increment when rounding
479 -- Returns rounding_inc & inexact
480 -- Assumes x includes the bottom 29 bits of the mantissa already
481 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
482 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
483 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
484 sign: std_ulogic)
485 return std_ulogic_vector is
486 variable grx : std_ulogic_vector(2 downto 0);
487 variable ret : std_ulogic_vector(1 downto 0);
488 variable lsb : std_ulogic;
489 begin
490 if single_prec = '0' then
491 grx := mantissa(1 downto 0) & x;
492 lsb := mantissa(2);
493 else
494 grx := mantissa(30 downto 29) & x;
495 lsb := mantissa(31);
496 end if;
497 ret(1) := '0';
498 ret(0) := or (grx);
499 case rn(1 downto 0) is
500 when "00" => -- round to nearest
501 if grx = "100" and rn(2) = '0' then
502 ret(1) := lsb; -- tie, round to even
503 else
504 ret(1) := grx(2);
505 end if;
506 when "01" => -- round towards zero
507 when others => -- round towards +/- inf
508 if rn(0) = sign then
509 -- round towards greater magnitude
510 ret(1) := ret(0);
511 end if;
512 end case;
513 return ret;
514 end;
515
516 -- Determine result flags to write into the FPSCR
517 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
518 return std_ulogic_vector is
519 begin
520 case class is
521 when ZERO =>
522 return sign & "0010";
523 when FINITE =>
524 return (not unitbit) & sign & (not sign) & "00";
525 when INFINITY =>
526 return '0' & sign & (not sign) & "01";
527 when NAN =>
528 return "10001";
529 end case;
530 end;
531
532 begin
533 fpu_multiply_0: entity work.multiply
534 port map (
535 clk => clk,
536 m_in => f_to_multiply,
537 m_out => multiply_to_f
538 );
539
540 fpu_0: process(clk)
541 begin
542 if rising_edge(clk) then
543 if rst = '1' then
544 r.state <= IDLE;
545 r.busy <= '0';
546 r.instr_done <= '0';
547 r.do_intr <= '0';
548 r.fpscr <= (others => '0');
549 r.writing_back <= '0';
550 else
551 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
552 r <= rin;
553 end if;
554 end if;
555 end process;
556
557 -- synchronous reads from lookup table
558 lut_access: process(clk)
559 variable addrhi : std_ulogic_vector(1 downto 0);
560 variable addr : std_ulogic_vector(9 downto 0);
561 begin
562 if rising_edge(clk) then
563 if r.is_sqrt = '1' then
564 addrhi := r.b.mantissa(55 downto 54);
565 else
566 addrhi := "00";
567 end if;
568 addr := addrhi & r.b.mantissa(53 downto 46);
569 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
570 end if;
571 end process;
572
573 e_out.busy <= r.busy;
574 e_out.exception <= r.fpscr(FPSCR_FEX);
575 e_out.interrupt <= r.do_intr;
576
577 w_out.valid <= r.instr_done and not r.do_intr;
578 w_out.instr_tag <= r.instr_tag;
579 w_out.write_enable <= r.writing_back;
580 w_out.write_reg <= r.dest_fpr;
581 w_out.write_data <= fp_result;
582 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
583 w_out.write_cr_mask <= r.cr_mask;
584 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
585 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
586
587 fpu_1: process(all)
588 variable v : reg_type;
589 variable adec : fpu_reg_type;
590 variable bdec : fpu_reg_type;
591 variable cdec : fpu_reg_type;
592 variable fpscr_mask : std_ulogic_vector(31 downto 0);
593 variable illegal : std_ulogic;
594 variable j, k : integer;
595 variable flm : std_ulogic_vector(7 downto 0);
596 variable int_input : std_ulogic;
597 variable mask : std_ulogic_vector(63 downto 0);
598 variable in_a0 : std_ulogic_vector(63 downto 0);
599 variable in_b0 : std_ulogic_vector(63 downto 0);
600 variable misc : std_ulogic_vector(63 downto 0);
601 variable shift_res : std_ulogic_vector(63 downto 0);
602 variable round : std_ulogic_vector(1 downto 0);
603 variable update_fx : std_ulogic;
604 variable arith_done : std_ulogic;
605 variable invalid : std_ulogic;
606 variable zero_divide : std_ulogic;
607 variable mant_nz : std_ulogic;
608 variable min_exp : signed(EXP_BITS-1 downto 0);
609 variable max_exp : signed(EXP_BITS-1 downto 0);
610 variable bias_exp : signed(EXP_BITS-1 downto 0);
611 variable new_exp : signed(EXP_BITS-1 downto 0);
612 variable exp_tiny : std_ulogic;
613 variable exp_huge : std_ulogic;
614 variable renormalize : std_ulogic;
615 variable clz : std_ulogic_vector(5 downto 0);
616 variable set_x : std_ulogic;
617 variable mshift : signed(EXP_BITS-1 downto 0);
618 variable need_check : std_ulogic;
619 variable msb : std_ulogic;
620 variable is_add : std_ulogic;
621 variable set_a : std_ulogic;
622 variable set_b : std_ulogic;
623 variable set_c : std_ulogic;
624 variable set_y : std_ulogic;
625 variable set_s : std_ulogic;
626 variable qnan_result : std_ulogic;
627 variable px_nz : std_ulogic;
628 variable pcmpb_eq : std_ulogic;
629 variable pcmpb_lt : std_ulogic;
630 variable pshift : std_ulogic;
631 variable renorm_sqrt : std_ulogic;
632 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
633 variable shiftin : std_ulogic;
634 variable mulexp : signed(EXP_BITS-1 downto 0);
635 variable maddend : std_ulogic_vector(127 downto 0);
636 variable sum : std_ulogic_vector(63 downto 0);
637 variable round_inc : std_ulogic_vector(63 downto 0);
638 begin
639 v := r;
640 illegal := '0';
641 v.busy := '0';
642 int_input := '0';
643
644 -- capture incoming instruction
645 if e_in.valid = '1' then
646 v.insn := e_in.insn;
647 v.op := e_in.op;
648 v.instr_tag := e_in.itag;
649 v.fe_mode := or (e_in.fe_mode);
650 v.dest_fpr := e_in.frt;
651 v.single_prec := e_in.single;
652 v.longmask := e_in.single;
653 v.int_result := '0';
654 v.rc := e_in.rc;
655 v.is_cmp := e_in.out_cr;
656 if e_in.out_cr = '0' then
657 v.cr_mask := num_to_fxm(1);
658 else
659 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
660 end if;
661 int_input := '0';
662 if e_in.op = OP_FPOP_I then
663 int_input := '1';
664 end if;
665 v.quieten_nan := '1';
666 v.tiny := '0';
667 v.denorm := '0';
668 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
669 v.is_subtract := '0';
670 v.is_multiply := '0';
671 v.is_sqrt := '0';
672 v.add_bsmall := '0';
673 v.doing_ftdiv := "00";
674
675 adec := decode_dp(e_in.fra, int_input);
676 bdec := decode_dp(e_in.frb, int_input);
677 cdec := decode_dp(e_in.frc, int_input);
678 v.a := adec;
679 v.b := bdec;
680 v.c := cdec;
681
682 v.exp_cmp := '0';
683 if adec.exponent > bdec.exponent then
684 v.exp_cmp := '1';
685 end if;
686 v.madd_cmp := '0';
687 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
688 v.madd_cmp := '1';
689 end if;
690 end if;
691
692 r_hi_nz <= or (r.r(55 downto 31));
693 r_lo_nz <= or (r.r(30 downto 2));
694 s_nz <= or (r.s);
695
696 if r.single_prec = '0' then
697 if r.doing_ftdiv(1) = '0' then
698 max_exp := to_signed(1023, EXP_BITS);
699 else
700 max_exp := to_signed(1020, EXP_BITS);
701 end if;
702 if r.doing_ftdiv(0) = '0' then
703 min_exp := to_signed(-1022, EXP_BITS);
704 else
705 min_exp := to_signed(-1021, EXP_BITS);
706 end if;
707 bias_exp := to_signed(1536, EXP_BITS);
708 else
709 max_exp := to_signed(127, EXP_BITS);
710 min_exp := to_signed(-126, EXP_BITS);
711 bias_exp := to_signed(192, EXP_BITS);
712 end if;
713 new_exp := r.result_exp - r.shift;
714 exp_tiny := '0';
715 exp_huge := '0';
716 if new_exp < min_exp then
717 exp_tiny := '1';
718 end if;
719 if new_exp > max_exp then
720 exp_huge := '1';
721 end if;
722
723 -- Compare P with zero and with B
724 px_nz := or (r.p(57 downto 4));
725 pcmpb_eq := '0';
726 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
727 pcmpb_eq := '1';
728 end if;
729 pcmpb_lt := '0';
730 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
731 pcmpb_lt := '1';
732 end if;
733
734 v.writing_back := '0';
735 v.instr_done := '0';
736 v.update_fprf := '0';
737 v.shift := to_signed(0, EXP_BITS);
738 v.first := '0';
739 v.opsel_a := AIN_R;
740 opsel_ainv <= '0';
741 opsel_mask <= '0';
742 opsel_b <= BIN_ZERO;
743 opsel_binv <= '0';
744 opsel_r <= RES_SUM;
745 opsel_s <= S_ZERO;
746 carry_in <= '0';
747 misc_sel <= "0000";
748 fpscr_mask := (others => '1');
749 update_fx := '0';
750 arith_done := '0';
751 invalid := '0';
752 zero_divide := '0';
753 renormalize := '0';
754 set_x := '0';
755 qnan_result := '0';
756 set_a := '0';
757 set_b := '0';
758 set_c := '0';
759 set_s := '0';
760 f_to_multiply.is_32bit <= '0';
761 f_to_multiply.valid <= '0';
762 msel_1 <= MUL1_A;
763 msel_2 <= MUL2_C;
764 msel_add <= MULADD_ZERO;
765 msel_inv <= '0';
766 set_y := '0';
767 pshift := '0';
768 renorm_sqrt := '0';
769 shiftin := '0';
770 case r.state is
771 when IDLE =>
772 v.use_a := '0';
773 v.use_b := '0';
774 v.use_c := '0';
775 v.invalid := '0';
776 v.negate := '0';
777 if e_in.valid = '1' then
778 case e_in.insn(5 downto 1) is
779 when "00000" =>
780 if e_in.insn(8) = '1' then
781 if e_in.insn(6) = '0' then
782 v.state := DO_FTDIV;
783 else
784 v.state := DO_FTSQRT;
785 end if;
786 elsif e_in.insn(7) = '1' then
787 v.state := DO_MCRFS;
788 else
789 v.opsel_a := AIN_B;
790 v.state := DO_FCMP;
791 end if;
792 when "00110" =>
793 if e_in.insn(10) = '0' then
794 if e_in.insn(8) = '0' then
795 v.state := DO_MTFSB;
796 else
797 v.state := DO_MTFSFI;
798 end if;
799 else
800 v.state := DO_FMRG;
801 end if;
802 when "00111" =>
803 if e_in.insn(8) = '0' then
804 v.state := DO_MFFS;
805 else
806 v.state := DO_MTFSF;
807 end if;
808 when "01000" =>
809 v.opsel_a := AIN_B;
810 if e_in.insn(9 downto 8) /= "11" then
811 v.state := DO_FMR;
812 else
813 v.state := DO_FRI;
814 end if;
815 when "01100" =>
816 v.opsel_a := AIN_B;
817 v.state := DO_FRSP;
818 when "01110" =>
819 v.opsel_a := AIN_B;
820 if int_input = '1' then
821 -- fcfid[u][s]
822 v.state := DO_FCFID;
823 else
824 v.state := DO_FCTI;
825 end if;
826 when "01111" =>
827 v.round_mode := "001";
828 v.opsel_a := AIN_B;
829 v.state := DO_FCTI;
830 when "10010" =>
831 v.opsel_a := AIN_A;
832 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
833 v.opsel_a := AIN_B;
834 end if;
835 v.state := DO_FDIV;
836 when "10100" | "10101" =>
837 v.opsel_a := AIN_A;
838 v.state := DO_FADD;
839 when "10110" =>
840 v.is_sqrt := '1';
841 v.opsel_a := AIN_B;
842 v.state := DO_FSQRT;
843 when "10111" =>
844 v.state := DO_FSEL;
845 when "11000" =>
846 v.opsel_a := AIN_B;
847 v.state := DO_FRE;
848 when "11001" =>
849 v.is_multiply := '1';
850 v.opsel_a := AIN_A;
851 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
852 v.opsel_a := AIN_C;
853 end if;
854 v.state := DO_FMUL;
855 when "11010" =>
856 v.is_sqrt := '1';
857 v.opsel_a := AIN_B;
858 v.state := DO_FRSQRTE;
859 when "11100" | "11101" | "11110" | "11111" =>
860 if v.a.mantissa(54) = '0' then
861 v.opsel_a := AIN_A;
862 elsif v.c.mantissa(54) = '0' then
863 v.opsel_a := AIN_C;
864 else
865 v.opsel_a := AIN_B;
866 end if;
867 v.state := DO_FMADD;
868 when others =>
869 illegal := '1';
870 end case;
871 end if;
872 v.x := '0';
873 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
874 set_s := '1';
875
876 when DO_MCRFS =>
877 j := to_integer(unsigned(insn_bfa(r.insn)));
878 for i in 0 to 7 loop
879 if i = j then
880 k := (7 - i) * 4;
881 v.cr_result := r.fpscr(k + 3 downto k);
882 fpscr_mask(k + 3 downto k) := "0000";
883 end if;
884 end loop;
885 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
886 v.instr_done := '1';
887 v.state := IDLE;
888
889 when DO_FTDIV =>
890 v.instr_done := '1';
891 v.state := IDLE;
892 v.cr_result := "0000";
893 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
894 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
895 v.cr_result(2) := '1';
896 end if;
897 if r.a.class = NAN or r.a.class = INFINITY or
898 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
899 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
900 v.cr_result(1) := '1';
901 else
902 v.doing_ftdiv := "11";
903 v.first := '1';
904 v.state := FTDIV_1;
905 v.instr_done := '0';
906 end if;
907
908 when DO_FTSQRT =>
909 v.instr_done := '1';
910 v.state := IDLE;
911 v.cr_result := "0000";
912 if r.b.class = ZERO or r.b.class = INFINITY or
913 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
914 v.cr_result(2) := '1';
915 end if;
916 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
917 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
918 v.cr_result(1) := '0';
919 end if;
920
921 when DO_FCMP =>
922 -- fcmp[uo]
923 -- r.opsel_a = AIN_B
924 v.instr_done := '1';
925 v.state := IDLE;
926 update_fx := '1';
927 v.result_exp := r.b.exponent;
928 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
929 (r.b.class = NAN and r.b.mantissa(53) = '0') then
930 -- Signalling NAN
931 v.fpscr(FPSCR_VXSNAN) := '1';
932 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
933 v.fpscr(FPSCR_VXVC) := '1';
934 end if;
935 invalid := '1';
936 v.cr_result := "0001"; -- unordered
937 elsif r.a.class = NAN or r.b.class = NAN then
938 if r.insn(6) = '1' then
939 -- fcmpo
940 v.fpscr(FPSCR_VXVC) := '1';
941 invalid := '1';
942 end if;
943 v.cr_result := "0001"; -- unordered
944 elsif r.a.class = ZERO and r.b.class = ZERO then
945 v.cr_result := "0010"; -- equal
946 elsif r.a.negative /= r.b.negative then
947 v.cr_result := r.a.negative & r.b.negative & "00";
948 elsif r.a.class = ZERO then
949 -- A and B are the same sign from here down
950 v.cr_result := not r.b.negative & r.b.negative & "00";
951 elsif r.a.class = INFINITY then
952 if r.b.class = INFINITY then
953 v.cr_result := "0010";
954 else
955 v.cr_result := r.a.negative & not r.a.negative & "00";
956 end if;
957 elsif r.b.class = ZERO then
958 -- A is finite from here down
959 v.cr_result := r.a.negative & not r.a.negative & "00";
960 elsif r.b.class = INFINITY then
961 v.cr_result := not r.b.negative & r.b.negative & "00";
962 elsif r.exp_cmp = '1' then
963 -- A and B are both finite from here down
964 v.cr_result := r.a.negative & not r.a.negative & "00";
965 elsif r.a.exponent /= r.b.exponent then
966 -- A exponent is smaller than B
967 v.cr_result := not r.a.negative & r.a.negative & "00";
968 else
969 -- Prepare to subtract mantissas, put B in R
970 v.cr_result := "0000";
971 v.instr_done := '0';
972 v.opsel_a := AIN_A;
973 v.state := CMP_1;
974 end if;
975 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
976
977 when DO_MTFSB =>
978 -- mtfsb{0,1}
979 j := to_integer(unsigned(insn_bt(r.insn)));
980 for i in 0 to 31 loop
981 if i = j then
982 v.fpscr(31 - i) := r.insn(6);
983 end if;
984 end loop;
985 v.instr_done := '1';
986 v.state := IDLE;
987
988 when DO_MTFSFI =>
989 -- mtfsfi
990 j := to_integer(unsigned(insn_bf(r.insn)));
991 if r.insn(16) = '0' then
992 for i in 0 to 7 loop
993 if i = j then
994 k := (7 - i) * 4;
995 v.fpscr(k + 3 downto k) := insn_u(r.insn);
996 end if;
997 end loop;
998 end if;
999 v.instr_done := '1';
1000 v.state := IDLE;
1001
1002 when DO_FMRG =>
1003 -- fmrgew, fmrgow
1004 opsel_r <= RES_MISC;
1005 misc_sel <= "01" & r.insn(8) & '0';
1006 v.int_result := '1';
1007 v.writing_back := '1';
1008 v.instr_done := '1';
1009 v.state := IDLE;
1010
1011 when DO_MFFS =>
1012 v.int_result := '1';
1013 v.writing_back := '1';
1014 opsel_r <= RES_MISC;
1015 case r.insn(20 downto 16) is
1016 when "00000" =>
1017 -- mffs
1018 when "00001" =>
1019 -- mffsce
1020 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1021 when "10100" | "10101" =>
1022 -- mffscdrn[i] (but we don't implement DRN)
1023 fpscr_mask := x"000000FF";
1024 when "10110" =>
1025 -- mffscrn
1026 fpscr_mask := x"000000FF";
1027 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1028 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1029 when "10111" =>
1030 -- mffscrni
1031 fpscr_mask := x"000000FF";
1032 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1033 when "11000" =>
1034 -- mffsl
1035 fpscr_mask := x"0007F0FF";
1036 when others =>
1037 illegal := '1';
1038 end case;
1039 v.instr_done := '1';
1040 v.state := IDLE;
1041
1042 when DO_MTFSF =>
1043 if r.insn(25) = '1' then
1044 flm := x"FF";
1045 elsif r.insn(16) = '1' then
1046 flm := x"00";
1047 else
1048 flm := r.insn(24 downto 17);
1049 end if;
1050 for i in 0 to 7 loop
1051 k := i * 4;
1052 if flm(i) = '1' then
1053 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1054 end if;
1055 end loop;
1056 v.instr_done := '1';
1057 v.state := IDLE;
1058
1059 when DO_FMR =>
1060 -- r.opsel_a = AIN_B
1061 v.result_class := r.b.class;
1062 v.result_exp := r.b.exponent;
1063 v.quieten_nan := '0';
1064 if r.insn(9) = '1' then
1065 v.result_sign := '0'; -- fabs
1066 elsif r.insn(8) = '1' then
1067 v.result_sign := '1'; -- fnabs
1068 elsif r.insn(7) = '1' then
1069 v.result_sign := r.b.negative; -- fmr
1070 elsif r.insn(6) = '1' then
1071 v.result_sign := not r.b.negative; -- fneg
1072 else
1073 v.result_sign := r.a.negative; -- fcpsgn
1074 end if;
1075 v.writing_back := '1';
1076 v.instr_done := '1';
1077 v.state := IDLE;
1078
1079 when DO_FRI => -- fri[nzpm]
1080 -- r.opsel_a = AIN_B
1081 v.result_class := r.b.class;
1082 v.result_sign := r.b.negative;
1083 v.result_exp := r.b.exponent;
1084 v.fpscr(FPSCR_FR) := '0';
1085 v.fpscr(FPSCR_FI) := '0';
1086 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1087 -- Signalling NAN
1088 v.fpscr(FPSCR_VXSNAN) := '1';
1089 invalid := '1';
1090 end if;
1091 if r.b.class = FINITE then
1092 if r.b.exponent >= to_signed(52, EXP_BITS) then
1093 -- integer already, no rounding required
1094 arith_done := '1';
1095 else
1096 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1097 v.state := FRI_1;
1098 v.round_mode := '1' & r.insn(7 downto 6);
1099 end if;
1100 else
1101 arith_done := '1';
1102 end if;
1103
1104 when DO_FRSP =>
1105 -- r.opsel_a = AIN_B, r.shift = 0
1106 v.result_class := r.b.class;
1107 v.result_sign := r.b.negative;
1108 v.result_exp := r.b.exponent;
1109 v.fpscr(FPSCR_FR) := '0';
1110 v.fpscr(FPSCR_FI) := '0';
1111 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1112 -- Signalling NAN
1113 v.fpscr(FPSCR_VXSNAN) := '1';
1114 invalid := '1';
1115 end if;
1116 set_x := '1';
1117 if r.b.class = FINITE then
1118 if r.b.exponent < to_signed(-126, EXP_BITS) then
1119 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1120 v.state := ROUND_UFLOW;
1121 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1122 v.state := ROUND_OFLOW;
1123 else
1124 v.state := ROUNDING;
1125 end if;
1126 else
1127 arith_done := '1';
1128 end if;
1129
1130 when DO_FCTI =>
1131 -- instr bit 9: 1=dword 0=word
1132 -- instr bit 8: 1=unsigned 0=signed
1133 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1134 -- r.opsel_a = AIN_B
1135 v.result_class := r.b.class;
1136 v.result_sign := r.b.negative;
1137 v.result_exp := r.b.exponent;
1138 v.fpscr(FPSCR_FR) := '0';
1139 v.fpscr(FPSCR_FI) := '0';
1140 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1141 -- Signalling NAN
1142 v.fpscr(FPSCR_VXSNAN) := '1';
1143 invalid := '1';
1144 end if;
1145
1146 v.int_result := '1';
1147 case r.b.class is
1148 when ZERO =>
1149 arith_done := '1';
1150 when FINITE =>
1151 if r.b.exponent >= to_signed(64, EXP_BITS) or
1152 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1153 v.state := INT_OFLOW;
1154 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1155 -- integer already, no rounding required,
1156 -- shift into final position
1157 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1158 if r.insn(8) = '1' and r.b.negative = '1' then
1159 v.state := INT_OFLOW;
1160 else
1161 v.state := INT_ISHIFT;
1162 end if;
1163 else
1164 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1165 v.state := INT_SHIFT;
1166 end if;
1167 when INFINITY | NAN =>
1168 v.state := INT_OFLOW;
1169 end case;
1170
1171 when DO_FCFID =>
1172 -- r.opsel_a = AIN_B
1173 v.result_sign := '0';
1174 if r.insn(8) = '0' and r.b.negative = '1' then
1175 -- fcfid[s] with negative operand, set R = -B
1176 opsel_ainv <= '1';
1177 carry_in <= '1';
1178 v.result_sign := '1';
1179 end if;
1180 v.result_class := r.b.class;
1181 v.result_exp := to_signed(54, EXP_BITS);
1182 v.fpscr(FPSCR_FR) := '0';
1183 v.fpscr(FPSCR_FI) := '0';
1184 if r.b.class = ZERO then
1185 arith_done := '1';
1186 else
1187 v.state := FINISH;
1188 end if;
1189
1190 when DO_FADD =>
1191 -- fadd[s] and fsub[s]
1192 -- r.opsel_a = AIN_A
1193 v.result_sign := r.a.negative;
1194 v.result_class := r.a.class;
1195 v.result_exp := r.a.exponent;
1196 v.fpscr(FPSCR_FR) := '0';
1197 v.fpscr(FPSCR_FI) := '0';
1198 v.use_a := '1';
1199 v.use_b := '1';
1200 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1201 if r.a.class = FINITE and r.b.class = FINITE then
1202 v.is_subtract := not is_add;
1203 v.add_bsmall := r.exp_cmp;
1204 v.opsel_a := AIN_B;
1205 if r.exp_cmp = '0' then
1206 v.shift := r.a.exponent - r.b.exponent;
1207 v.result_sign := r.b.negative xnor r.insn(1);
1208 if r.a.exponent = r.b.exponent then
1209 v.state := ADD_2;
1210 else
1211 v.longmask := '0';
1212 v.state := ADD_SHIFT;
1213 end if;
1214 else
1215 v.state := ADD_1;
1216 end if;
1217 else
1218 if r.a.class = NAN or r.b.class = NAN then
1219 v.state := NAN_RESULT;
1220 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1221 -- invalid operation, construct QNaN
1222 v.fpscr(FPSCR_VXISI) := '1';
1223 qnan_result := '1';
1224 arith_done := '1';
1225 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1226 -- return -0 for rounding to -infinity
1227 v.result_sign := r.round_mode(1) and r.round_mode(0);
1228 arith_done := '1';
1229 elsif r.a.class = INFINITY or r.b.class = ZERO then
1230 -- result is A
1231 v.opsel_a := AIN_A;
1232 v.state := EXC_RESULT;
1233 else
1234 -- result is +/- B
1235 v.opsel_a := AIN_B;
1236 v.negate := not r.insn(1);
1237 v.state := EXC_RESULT;
1238 end if;
1239 end if;
1240
1241 when DO_FMUL =>
1242 -- fmul[s]
1243 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1244 v.result_sign := r.a.negative xor r.c.negative;
1245 v.result_class := r.a.class;
1246 v.fpscr(FPSCR_FR) := '0';
1247 v.fpscr(FPSCR_FI) := '0';
1248 v.use_a := '1';
1249 v.use_c := '1';
1250 if r.a.class = FINITE and r.c.class = FINITE then
1251 v.result_exp := r.a.exponent + r.c.exponent;
1252 -- Renormalize denorm operands
1253 if r.a.mantissa(54) = '0' then
1254 v.state := RENORM_A;
1255 elsif r.c.mantissa(54) = '0' then
1256 v.state := RENORM_C;
1257 else
1258 f_to_multiply.valid <= '1';
1259 v.state := MULT_1;
1260 end if;
1261 else
1262 if r.a.class = NAN or r.c.class = NAN then
1263 v.state := NAN_RESULT;
1264 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1265 (r.a.class = ZERO and r.c.class = INFINITY) then
1266 -- invalid operation, construct QNaN
1267 v.fpscr(FPSCR_VXIMZ) := '1';
1268 qnan_result := '1';
1269 elsif r.a.class = ZERO or r.a.class = INFINITY then
1270 -- result is +/- A
1271 arith_done := '1';
1272 else
1273 -- r.c.class is ZERO or INFINITY
1274 v.opsel_a := AIN_C;
1275 v.negate := r.a.negative;
1276 v.state := EXC_RESULT;
1277 end if;
1278 end if;
1279
1280 when DO_FDIV =>
1281 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1282 v.result_class := r.a.class;
1283 v.fpscr(FPSCR_FR) := '0';
1284 v.fpscr(FPSCR_FI) := '0';
1285 v.use_a := '1';
1286 v.use_b := '1';
1287 v.result_sign := r.a.negative xor r.b.negative;
1288 v.result_exp := r.a.exponent - r.b.exponent;
1289 v.count := "00";
1290 if r.a.class = FINITE and r.b.class = FINITE then
1291 -- Renormalize denorm operands
1292 if r.a.mantissa(54) = '0' then
1293 v.state := RENORM_A;
1294 elsif r.b.mantissa(54) = '0' then
1295 v.state := RENORM_B;
1296 else
1297 v.first := '1';
1298 v.state := DIV_2;
1299 end if;
1300 else
1301 if r.a.class = NAN or r.b.class = NAN then
1302 v.state := NAN_RESULT;
1303 elsif r.b.class = INFINITY then
1304 if r.a.class = INFINITY then
1305 v.fpscr(FPSCR_VXIDI) := '1';
1306 qnan_result := '1';
1307 else
1308 v.result_class := ZERO;
1309 end if;
1310 arith_done := '1';
1311 elsif r.b.class = ZERO then
1312 if r.a.class = ZERO then
1313 v.fpscr(FPSCR_VXZDZ) := '1';
1314 qnan_result := '1';
1315 else
1316 if r.a.class = FINITE then
1317 zero_divide := '1';
1318 end if;
1319 v.result_class := INFINITY;
1320 end if;
1321 arith_done := '1';
1322 else -- r.b.class = FINITE, result_class = r.a.class
1323 arith_done := '1';
1324 end if;
1325 end if;
1326
1327 when DO_FSEL =>
1328 v.fpscr(FPSCR_FR) := '0';
1329 v.fpscr(FPSCR_FI) := '0';
1330 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1331 v.opsel_a := AIN_C;
1332 else
1333 v.opsel_a := AIN_B;
1334 end if;
1335 v.quieten_nan := '0';
1336 v.state := EXC_RESULT;
1337
1338 when DO_FSQRT =>
1339 -- r.opsel_a = AIN_B
1340 v.result_class := r.b.class;
1341 v.result_sign := r.b.negative;
1342 v.fpscr(FPSCR_FR) := '0';
1343 v.fpscr(FPSCR_FI) := '0';
1344 v.use_b := '1';
1345 case r.b.class is
1346 when FINITE =>
1347 v.result_exp := r.b.exponent;
1348 if r.b.negative = '1' then
1349 v.fpscr(FPSCR_VXSQRT) := '1';
1350 qnan_result := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1354 v.state := SQRT_1;
1355 else
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1358 end if;
1359 when NAN =>
1360 v.state := NAN_RESULT;
1361 when ZERO =>
1362 -- result is B
1363 arith_done := '1';
1364 when INFINITY =>
1365 if r.b.negative = '1' then
1366 v.fpscr(FPSCR_VXSQRT) := '1';
1367 qnan_result := '1';
1368 -- else result is B
1369 end if;
1370 arith_done := '1';
1371 end case;
1372
1373 when DO_FRE =>
1374 -- r.opsel_a = AIN_B
1375 v.result_class := r.b.class;
1376 v.result_sign := r.b.negative;
1377 v.fpscr(FPSCR_FR) := '0';
1378 v.fpscr(FPSCR_FI) := '0';
1379 v.use_b := '1';
1380 case r.b.class is
1381 when FINITE =>
1382 v.result_exp := - r.b.exponent;
1383 if r.b.mantissa(54) = '0' then
1384 v.state := RENORM_B;
1385 else
1386 v.state := FRE_1;
1387 end if;
1388 when NAN =>
1389 v.state := NAN_RESULT;
1390 when INFINITY =>
1391 v.result_class := ZERO;
1392 arith_done := '1';
1393 when ZERO =>
1394 v.result_class := INFINITY;
1395 zero_divide := '1';
1396 arith_done := '1';
1397 end case;
1398
1399 when DO_FRSQRTE =>
1400 -- r.opsel_a = AIN_B
1401 v.result_class := r.b.class;
1402 v.result_sign := r.b.negative;
1403 v.fpscr(FPSCR_FR) := '0';
1404 v.fpscr(FPSCR_FI) := '0';
1405 v.use_b := '1';
1406 v.shift := to_signed(1, EXP_BITS);
1407 case r.b.class is
1408 when FINITE =>
1409 v.result_exp := r.b.exponent;
1410 if r.b.negative = '1' then
1411 v.fpscr(FPSCR_VXSQRT) := '1';
1412 qnan_result := '1';
1413 elsif r.b.mantissa(54) = '0' then
1414 v.state := RENORM_B;
1415 elsif r.b.exponent(0) = '0' then
1416 v.state := RSQRT_1;
1417 else
1418 v.state := RENORM_B2;
1419 end if;
1420 when NAN =>
1421 v.state := NAN_RESULT;
1422 when INFINITY =>
1423 if r.b.negative = '1' then
1424 v.fpscr(FPSCR_VXSQRT) := '1';
1425 qnan_result := '1';
1426 else
1427 v.result_class := ZERO;
1428 end if;
1429 arith_done := '1';
1430 when ZERO =>
1431 v.result_class := INFINITY;
1432 zero_divide := '1';
1433 arith_done := '1';
1434 end case;
1435
1436 when DO_FMADD =>
1437 -- fmadd, fmsub, fnmadd, fnmsub
1438 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1439 -- else AIN_B
1440 v.result_sign := r.a.negative;
1441 v.result_class := r.a.class;
1442 v.result_exp := r.a.exponent;
1443 v.fpscr(FPSCR_FR) := '0';
1444 v.fpscr(FPSCR_FI) := '0';
1445 v.use_a := '1';
1446 v.use_b := '1';
1447 v.use_c := '1';
1448 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1449 if r.a.class = FINITE and r.c.class = FINITE and
1450 (r.b.class = FINITE or r.b.class = ZERO) then
1451 v.is_subtract := not is_add;
1452 mulexp := r.a.exponent + r.c.exponent;
1453 v.result_exp := mulexp;
1454 -- Make sure A and C are normalized
1455 if r.a.mantissa(54) = '0' then
1456 v.state := RENORM_A;
1457 elsif r.c.mantissa(54) = '0' then
1458 v.state := RENORM_C;
1459 elsif r.b.class = ZERO then
1460 -- no addend, degenerates to multiply
1461 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1462 f_to_multiply.valid <= '1';
1463 v.is_multiply := '1';
1464 v.state := MULT_1;
1465 elsif r.madd_cmp = '0' then
1466 -- addend is bigger, do multiply first
1467 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1468 f_to_multiply.valid <= '1';
1469 v.state := FMADD_1;
1470 else
1471 -- product is bigger, shift B right and use it as the
1472 -- addend to the multiplier
1473 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1474 -- for subtract, multiplier does B - A * C
1475 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1476 v.result_exp := r.b.exponent;
1477 v.state := FMADD_2;
1478 end if;
1479 else
1480 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1481 v.state := NAN_RESULT;
1482 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1483 (r.a.class = INFINITY and r.c.class = ZERO) then
1484 -- invalid operation, construct QNaN
1485 v.fpscr(FPSCR_VXIMZ) := '1';
1486 qnan_result := '1';
1487 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1488 if r.b.class = INFINITY and is_add = '0' then
1489 -- invalid operation, construct QNaN
1490 v.fpscr(FPSCR_VXISI) := '1';
1491 qnan_result := '1';
1492 else
1493 -- result is infinity
1494 v.result_class := INFINITY;
1495 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1496 arith_done := '1';
1497 end if;
1498 else
1499 -- Here A is zero, C is zero, or B is infinity
1500 -- Result is +/-B in all of those cases
1501 v.opsel_a := AIN_B;
1502 if r.b.class /= ZERO or is_add = '1' then
1503 v.negate := not (r.insn(1) xor r.insn(2));
1504 else
1505 -- have to be careful about rule for 0 - 0 result sign
1506 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1507 end if;
1508 v.state := EXC_RESULT;
1509 end if;
1510 end if;
1511
1512 when RENORM_A =>
1513 renormalize := '1';
1514 v.state := RENORM_A2;
1515 if r.insn(4) = '1' then
1516 v.opsel_a := AIN_C;
1517 else
1518 v.opsel_a := AIN_B;
1519 end if;
1520
1521 when RENORM_A2 =>
1522 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1523 set_a := '1';
1524 v.result_exp := new_exp;
1525 if r.insn(4) = '1' then
1526 if r.c.mantissa(54) = '1' then
1527 if r.insn(3) = '0' or r.b.class = ZERO then
1528 v.first := '1';
1529 v.state := MULT_1;
1530 else
1531 v.madd_cmp := '0';
1532 if new_exp + 1 >= r.b.exponent then
1533 v.madd_cmp := '1';
1534 end if;
1535 v.opsel_a := AIN_B;
1536 v.state := DO_FMADD;
1537 end if;
1538 else
1539 v.state := RENORM_C;
1540 end if;
1541 else
1542 if r.b.mantissa(54) = '1' then
1543 v.first := '1';
1544 v.state := DIV_2;
1545 else
1546 v.state := RENORM_B;
1547 end if;
1548 end if;
1549
1550 when RENORM_B =>
1551 renormalize := '1';
1552 renorm_sqrt := r.is_sqrt;
1553 v.state := RENORM_B2;
1554
1555 when RENORM_B2 =>
1556 set_b := '1';
1557 if r.is_sqrt = '0' then
1558 v.result_exp := r.result_exp + r.shift;
1559 else
1560 v.result_exp := new_exp;
1561 end if;
1562 v.opsel_a := AIN_B;
1563 v.state := LOOKUP;
1564
1565 when RENORM_C =>
1566 renormalize := '1';
1567 v.state := RENORM_C2;
1568
1569 when RENORM_C2 =>
1570 set_c := '1';
1571 v.result_exp := new_exp;
1572 if r.insn(3) = '0' or r.b.class = ZERO then
1573 v.first := '1';
1574 v.state := MULT_1;
1575 else
1576 v.madd_cmp := '0';
1577 if new_exp + 1 >= r.b.exponent then
1578 v.madd_cmp := '1';
1579 end if;
1580 v.opsel_a := AIN_B;
1581 v.state := DO_FMADD;
1582 end if;
1583
1584 when ADD_1 =>
1585 -- transferring B to R
1586 v.shift := r.b.exponent - r.a.exponent;
1587 v.result_exp := r.b.exponent;
1588 v.longmask := '0';
1589 v.state := ADD_SHIFT;
1590
1591 when ADD_SHIFT =>
1592 -- r.shift = - exponent difference, r.longmask = 0
1593 opsel_r <= RES_SHIFT;
1594 v.x := s_nz;
1595 set_x := '1';
1596 v.longmask := r.single_prec;
1597 if r.add_bsmall = '1' then
1598 v.opsel_a := AIN_A;
1599 else
1600 v.opsel_a := AIN_B;
1601 end if;
1602 v.state := ADD_2;
1603
1604 when ADD_2 =>
1605 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1606 opsel_b <= BIN_R;
1607 opsel_binv <= r.is_subtract;
1608 carry_in <= r.is_subtract and not r.x;
1609 v.shift := to_signed(-1, EXP_BITS);
1610 v.state := ADD_3;
1611
1612 when ADD_3 =>
1613 -- check for overflow or negative result (can't get both)
1614 -- r.shift = -1
1615 if r.r(63) = '1' then
1616 -- result is opposite sign to expected
1617 v.result_sign := not r.result_sign;
1618 opsel_ainv <= '1';
1619 carry_in <= '1';
1620 v.state := FINISH;
1621 elsif r.r(55) = '1' then
1622 -- sum overflowed, shift right
1623 opsel_r <= RES_SHIFT;
1624 set_x := '1';
1625 if exp_huge = '1' then
1626 v.state := ROUND_OFLOW;
1627 else
1628 v.state := ROUNDING;
1629 end if;
1630 elsif r.r(54) = '1' then
1631 set_x := '1';
1632 v.state := ROUNDING;
1633 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1634 -- r.x must be zero at this point
1635 v.result_class := ZERO;
1636 if r.is_subtract = '1' then
1637 -- set result sign depending on rounding mode
1638 v.result_sign := r.round_mode(1) and r.round_mode(0);
1639 end if;
1640 arith_done := '1';
1641 else
1642 renormalize := '1';
1643 v.state := NORMALIZE;
1644 end if;
1645
1646 when CMP_1 =>
1647 -- r.opsel_a = AIN_A
1648 opsel_b <= BIN_R;
1649 opsel_binv <= '1';
1650 carry_in <= '1';
1651 v.state := CMP_2;
1652
1653 when CMP_2 =>
1654 if r.r(63) = '1' then
1655 -- A is smaller in magnitude
1656 v.cr_result := not r.a.negative & r.a.negative & "00";
1657 elsif (r_hi_nz or r_lo_nz) = '0' then
1658 v.cr_result := "0010";
1659 else
1660 v.cr_result := r.a.negative & not r.a.negative & "00";
1661 end if;
1662 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1663 v.instr_done := '1';
1664 v.state := IDLE;
1665
1666 when MULT_1 =>
1667 f_to_multiply.valid <= r.first;
1668 opsel_r <= RES_MULT;
1669 if multiply_to_f.valid = '1' then
1670 v.state := FINISH;
1671 end if;
1672
1673 when FMADD_1 =>
1674 -- Addend is bigger here
1675 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1676 -- note v.shift is at most -2 here
1677 v.shift := r.result_exp - r.b.exponent;
1678 opsel_r <= RES_MULT;
1679 opsel_s <= S_MULT;
1680 set_s := '1';
1681 f_to_multiply.valid <= r.first;
1682 if multiply_to_f.valid = '1' then
1683 v.longmask := '0';
1684 v.state := ADD_SHIFT;
1685 end if;
1686
1687 when FMADD_2 =>
1688 -- Product is potentially bigger here
1689 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1690 set_s := '1';
1691 opsel_s <= S_SHIFT;
1692 v.shift := r.shift - to_signed(64, EXP_BITS);
1693 v.state := FMADD_3;
1694
1695 when FMADD_3 =>
1696 -- r.shift = addend exp - product exp
1697 opsel_r <= RES_SHIFT;
1698 v.first := '1';
1699 v.state := FMADD_4;
1700
1701 when FMADD_4 =>
1702 msel_add <= MULADD_RS;
1703 f_to_multiply.valid <= r.first;
1704 msel_inv <= r.is_subtract;
1705 opsel_r <= RES_MULT;
1706 opsel_s <= S_MULT;
1707 set_s := '1';
1708 if multiply_to_f.valid = '1' then
1709 v.state := FMADD_5;
1710 end if;
1711
1712 when FMADD_5 =>
1713 -- negate R:S:X if negative
1714 if r.r(63) = '1' then
1715 v.result_sign := not r.result_sign;
1716 opsel_ainv <= '1';
1717 carry_in <= not (s_nz or r.x);
1718 opsel_s <= S_NEG;
1719 set_s := '1';
1720 end if;
1721 v.shift := to_signed(56, EXP_BITS);
1722 v.state := FMADD_6;
1723
1724 when FMADD_6 =>
1725 -- r.shift = 56 (or 0, but only if r is now nonzero)
1726 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1727 if s_nz = '0' then
1728 -- must be a subtraction, and r.x must be zero
1729 v.result_class := ZERO;
1730 v.result_sign := r.round_mode(1) and r.round_mode(0);
1731 arith_done := '1';
1732 else
1733 -- R is all zeroes but there are non-zero bits in S
1734 -- so shift them into R and set S to 0
1735 opsel_r <= RES_SHIFT;
1736 set_s := '1';
1737 -- stay in state FMADD_6
1738 end if;
1739 elsif r.r(56 downto 54) = "001" then
1740 v.state := FINISH;
1741 else
1742 renormalize := '1';
1743 v.state := NORMALIZE;
1744 end if;
1745
1746 when LOOKUP =>
1747 -- r.opsel_a = AIN_B
1748 -- wait one cycle for inverse_table[B] lookup
1749 v.first := '1';
1750 if r.insn(4) = '0' then
1751 if r.insn(3) = '0' then
1752 v.state := DIV_2;
1753 else
1754 v.state := SQRT_1;
1755 end if;
1756 elsif r.insn(2) = '0' then
1757 v.state := FRE_1;
1758 else
1759 v.state := RSQRT_1;
1760 end if;
1761
1762 when DIV_2 =>
1763 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1764 msel_1 <= MUL1_B;
1765 msel_add <= MULADD_CONST;
1766 msel_inv <= '1';
1767 if r.count = 0 then
1768 msel_2 <= MUL2_LUT;
1769 else
1770 msel_2 <= MUL2_P;
1771 end if;
1772 set_y := r.first;
1773 pshift := '1';
1774 f_to_multiply.valid <= r.first;
1775 if multiply_to_f.valid = '1' then
1776 v.first := '1';
1777 v.count := r.count + 1;
1778 v.state := DIV_3;
1779 end if;
1780
1781 when DIV_3 =>
1782 -- compute Y = P = P * Y
1783 msel_1 <= MUL1_Y;
1784 msel_2 <= MUL2_P;
1785 f_to_multiply.valid <= r.first;
1786 pshift := '1';
1787 if multiply_to_f.valid = '1' then
1788 v.first := '1';
1789 if r.count = 3 then
1790 v.state := DIV_4;
1791 else
1792 v.state := DIV_2;
1793 end if;
1794 end if;
1795
1796 when DIV_4 =>
1797 -- compute R = P = A * Y (quotient)
1798 msel_1 <= MUL1_A;
1799 msel_2 <= MUL2_P;
1800 set_y := r.first;
1801 f_to_multiply.valid <= r.first;
1802 pshift := '1';
1803 if multiply_to_f.valid = '1' then
1804 opsel_r <= RES_MULT;
1805 v.first := '1';
1806 v.state := DIV_5;
1807 end if;
1808
1809 when DIV_5 =>
1810 -- compute P = A - B * R (remainder)
1811 msel_1 <= MUL1_B;
1812 msel_2 <= MUL2_R;
1813 msel_add <= MULADD_A;
1814 msel_inv <= '1';
1815 f_to_multiply.valid <= r.first;
1816 if multiply_to_f.valid = '1' then
1817 v.state := DIV_6;
1818 end if;
1819
1820 when DIV_6 =>
1821 -- test if remainder is 0 or >= B
1822 if pcmpb_lt = '1' then
1823 -- quotient is correct, set X if remainder non-zero
1824 v.x := r.p(58) or px_nz;
1825 else
1826 -- quotient needs to be incremented by 1
1827 carry_in <= '1';
1828 v.x := not pcmpb_eq;
1829 end if;
1830 v.state := FINISH;
1831
1832 when FRE_1 =>
1833 opsel_r <= RES_MISC;
1834 misc_sel <= "0111";
1835 v.shift := to_signed(1, EXP_BITS);
1836 v.state := NORMALIZE;
1837
1838 when FTDIV_1 =>
1839 v.cr_result(1) := exp_tiny or exp_huge;
1840 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1841 v.instr_done := '1';
1842 v.state := IDLE;
1843 else
1844 v.shift := r.a.exponent;
1845 v.doing_ftdiv := "10";
1846 end if;
1847
1848 when RSQRT_1 =>
1849 opsel_r <= RES_MISC;
1850 misc_sel <= "0111";
1851 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1852 v.result_exp := - sqrt_exp;
1853 v.shift := to_signed(1, EXP_BITS);
1854 v.state := NORMALIZE;
1855
1856 when SQRT_1 =>
1857 -- put invsqr[B] in R and compute P = invsqr[B] * B
1858 -- also transfer B (in R) to A
1859 set_a := '1';
1860 opsel_r <= RES_MISC;
1861 misc_sel <= "0111";
1862 msel_1 <= MUL1_B;
1863 msel_2 <= MUL2_LUT;
1864 f_to_multiply.valid <= '1';
1865 v.shift := to_signed(-1, EXP_BITS);
1866 v.count := "00";
1867 v.state := SQRT_2;
1868
1869 when SQRT_2 =>
1870 -- shift R right one place
1871 -- not expecting multiplier result yet
1872 -- r.shift = -1
1873 opsel_r <= RES_SHIFT;
1874 v.first := '1';
1875 v.state := SQRT_3;
1876
1877 when SQRT_3 =>
1878 -- put R into Y, wait for product from multiplier
1879 msel_2 <= MUL2_R;
1880 set_y := r.first;
1881 pshift := '1';
1882 if multiply_to_f.valid = '1' then
1883 -- put result into R
1884 opsel_r <= RES_MULT;
1885 v.first := '1';
1886 v.state := SQRT_4;
1887 end if;
1888
1889 when SQRT_4 =>
1890 -- compute 1.5 - Y * P
1891 msel_1 <= MUL1_Y;
1892 msel_2 <= MUL2_P;
1893 msel_add <= MULADD_CONST;
1894 msel_inv <= '1';
1895 f_to_multiply.valid <= r.first;
1896 pshift := '1';
1897 if multiply_to_f.valid = '1' then
1898 v.state := SQRT_5;
1899 end if;
1900
1901 when SQRT_5 =>
1902 -- compute Y = Y * P
1903 msel_1 <= MUL1_Y;
1904 msel_2 <= MUL2_P;
1905 f_to_multiply.valid <= '1';
1906 v.first := '1';
1907 v.state := SQRT_6;
1908
1909 when SQRT_6 =>
1910 -- pipeline in R = R * P
1911 msel_1 <= MUL1_R;
1912 msel_2 <= MUL2_P;
1913 f_to_multiply.valid <= r.first;
1914 pshift := '1';
1915 if multiply_to_f.valid = '1' then
1916 v.first := '1';
1917 v.state := SQRT_7;
1918 end if;
1919
1920 when SQRT_7 =>
1921 -- first multiply is done, put result in Y
1922 msel_2 <= MUL2_P;
1923 set_y := r.first;
1924 -- wait for second multiply (should be here already)
1925 pshift := '1';
1926 if multiply_to_f.valid = '1' then
1927 -- put result into R
1928 opsel_r <= RES_MULT;
1929 v.first := '1';
1930 v.count := r.count + 1;
1931 if r.count < 2 then
1932 v.state := SQRT_4;
1933 else
1934 v.first := '1';
1935 v.state := SQRT_8;
1936 end if;
1937 end if;
1938
1939 when SQRT_8 =>
1940 -- compute P = A - R * R, which can be +ve or -ve
1941 -- we arranged for B to be put into A earlier
1942 msel_1 <= MUL1_R;
1943 msel_2 <= MUL2_R;
1944 msel_add <= MULADD_A;
1945 msel_inv <= '1';
1946 pshift := '1';
1947 f_to_multiply.valid <= r.first;
1948 if multiply_to_f.valid = '1' then
1949 v.first := '1';
1950 v.state := SQRT_9;
1951 end if;
1952
1953 when SQRT_9 =>
1954 -- compute P = P * Y
1955 -- since Y is an estimate of 1/sqrt(B), this makes P an
1956 -- estimate of the adjustment needed to R. Since the error
1957 -- could be negative and we have an unsigned multiplier, the
1958 -- upper bits can be wrong, but it turns out the lowest 8 bits
1959 -- are correct and are all we need (given 3 iterations through
1960 -- SQRT_4 to SQRT_7).
1961 msel_1 <= MUL1_Y;
1962 msel_2 <= MUL2_P;
1963 pshift := '1';
1964 f_to_multiply.valid <= r.first;
1965 if multiply_to_f.valid = '1' then
1966 v.state := SQRT_10;
1967 end if;
1968
1969 when SQRT_10 =>
1970 -- Add the bottom 8 bits of P, sign-extended,
1971 -- divided by 4, onto R.
1972 -- The division by 4 is because R is 10.54 format
1973 -- whereas P is 8.56 format.
1974 opsel_b <= BIN_PS6;
1975 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1976 v.result_exp := sqrt_exp;
1977 v.shift := to_signed(1, EXP_BITS);
1978 v.first := '1';
1979 v.state := SQRT_11;
1980
1981 when SQRT_11 =>
1982 -- compute P = A - R * R (remainder)
1983 -- also put 2 * R + 1 into B for comparison with P
1984 msel_1 <= MUL1_R;
1985 msel_2 <= MUL2_R;
1986 msel_add <= MULADD_A;
1987 msel_inv <= '1';
1988 f_to_multiply.valid <= r.first;
1989 shiftin := '1';
1990 set_b := r.first;
1991 if multiply_to_f.valid = '1' then
1992 v.state := SQRT_12;
1993 end if;
1994
1995 when SQRT_12 =>
1996 -- test if remainder is 0 or >= B = 2*R + 1
1997 if pcmpb_lt = '1' then
1998 -- square root is correct, set X if remainder non-zero
1999 v.x := r.p(58) or px_nz;
2000 else
2001 -- square root needs to be incremented by 1
2002 carry_in <= '1';
2003 v.x := not pcmpb_eq;
2004 end if;
2005 v.state := FINISH;
2006
2007 when INT_SHIFT =>
2008 -- r.shift = b.exponent - 52
2009 opsel_r <= RES_SHIFT;
2010 set_x := '1';
2011 v.state := INT_ROUND;
2012 v.shift := to_signed(-2, EXP_BITS);
2013
2014 when INT_ROUND =>
2015 -- r.shift = -2
2016 opsel_r <= RES_SHIFT;
2017 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2018 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2019 -- Check for negative values that don't round to 0 for fcti*u*
2020 if r.insn(8) = '1' and r.result_sign = '1' and
2021 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2022 v.state := INT_OFLOW;
2023 else
2024 v.state := INT_FINAL;
2025 end if;
2026
2027 when INT_ISHIFT =>
2028 -- r.shift = b.exponent - 54;
2029 opsel_r <= RES_SHIFT;
2030 v.state := INT_FINAL;
2031
2032 when INT_FINAL =>
2033 -- Negate if necessary, and increment for rounding if needed
2034 opsel_ainv <= r.result_sign;
2035 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2036 -- Check for possible overflows
2037 case r.insn(9 downto 8) is
2038 when "00" => -- fctiw[z]
2039 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2040 when "01" => -- fctiwu[z]
2041 need_check := r.r(31);
2042 when "10" => -- fctid[z]
2043 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2044 when others => -- fctidu[z]
2045 need_check := r.r(63);
2046 end case;
2047 if need_check = '1' then
2048 v.state := INT_CHECK;
2049 else
2050 if r.fpscr(FPSCR_FI) = '1' then
2051 v.fpscr(FPSCR_XX) := '1';
2052 end if;
2053 arith_done := '1';
2054 end if;
2055
2056 when INT_CHECK =>
2057 if r.insn(9) = '0' then
2058 msb := r.r(31);
2059 else
2060 msb := r.r(63);
2061 end if;
2062 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2063 if (r.insn(8) = '0' and msb /= r.result_sign) or
2064 (r.insn(8) = '1' and msb /= '1') then
2065 opsel_r <= RES_MISC;
2066 v.fpscr(FPSCR_VXCVI) := '1';
2067 invalid := '1';
2068 else
2069 if r.fpscr(FPSCR_FI) = '1' then
2070 v.fpscr(FPSCR_XX) := '1';
2071 end if;
2072 end if;
2073 arith_done := '1';
2074
2075 when INT_OFLOW =>
2076 opsel_r <= RES_MISC;
2077 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2078 if r.b.class = NAN then
2079 misc_sel(0) <= '1';
2080 end if;
2081 v.fpscr(FPSCR_VXCVI) := '1';
2082 invalid := '1';
2083 arith_done := '1';
2084
2085 when FRI_1 =>
2086 -- r.shift = b.exponent - 52
2087 opsel_r <= RES_SHIFT;
2088 set_x := '1';
2089 v.state := ROUNDING;
2090
2091 when FINISH =>
2092 if r.is_multiply = '1' and px_nz = '1' then
2093 v.x := '1';
2094 end if;
2095 if r.r(63 downto 54) /= "0000000001" then
2096 renormalize := '1';
2097 v.state := NORMALIZE;
2098 else
2099 set_x := '1';
2100 if exp_tiny = '1' then
2101 v.shift := new_exp - min_exp;
2102 v.state := ROUND_UFLOW;
2103 elsif exp_huge = '1' then
2104 v.state := ROUND_OFLOW;
2105 else
2106 v.state := ROUNDING;
2107 end if;
2108 end if;
2109
2110 when NORMALIZE =>
2111 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2112 -- r.shift = clz(r.r) - 9
2113 opsel_r <= RES_SHIFT;
2114 set_x := '1';
2115 if exp_tiny = '1' then
2116 v.shift := new_exp - min_exp;
2117 v.state := ROUND_UFLOW;
2118 elsif exp_huge = '1' then
2119 v.state := ROUND_OFLOW;
2120 else
2121 v.state := ROUNDING;
2122 end if;
2123
2124 when ROUND_UFLOW =>
2125 -- r.shift = - amount by which exponent underflows
2126 v.tiny := '1';
2127 if r.fpscr(FPSCR_UE) = '0' then
2128 -- disabled underflow exception case
2129 -- have to denormalize before rounding
2130 opsel_r <= RES_SHIFT;
2131 set_x := '1';
2132 v.state := ROUNDING;
2133 else
2134 -- enabled underflow exception case
2135 -- if denormalized, have to normalize before rounding
2136 v.fpscr(FPSCR_UX) := '1';
2137 v.result_exp := r.result_exp + bias_exp;
2138 if r.r(54) = '0' then
2139 renormalize := '1';
2140 v.state := NORMALIZE;
2141 else
2142 v.state := ROUNDING;
2143 end if;
2144 end if;
2145
2146 when ROUND_OFLOW =>
2147 v.fpscr(FPSCR_OX) := '1';
2148 if r.fpscr(FPSCR_OE) = '0' then
2149 -- disabled overflow exception
2150 -- result depends on rounding mode
2151 v.fpscr(FPSCR_XX) := '1';
2152 v.fpscr(FPSCR_FI) := '1';
2153 if r.round_mode(1 downto 0) = "00" or
2154 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2155 v.result_class := INFINITY;
2156 v.fpscr(FPSCR_FR) := '1';
2157 else
2158 v.fpscr(FPSCR_FR) := '0';
2159 end if;
2160 -- construct largest representable number
2161 v.result_exp := max_exp;
2162 opsel_r <= RES_MISC;
2163 misc_sel <= "001" & r.single_prec;
2164 arith_done := '1';
2165 else
2166 -- enabled overflow exception
2167 v.result_exp := r.result_exp - bias_exp;
2168 v.state := ROUNDING;
2169 end if;
2170
2171 when ROUNDING =>
2172 opsel_mask <= '1';
2173 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2174 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2175 if round(1) = '1' then
2176 -- increment the LSB for the precision
2177 opsel_b <= BIN_RND;
2178 v.shift := to_signed(-1, EXP_BITS);
2179 v.state := ROUNDING_2;
2180 else
2181 if r.r(54) = '0' then
2182 -- result after masking could be zero, or could be a
2183 -- denormalized result that needs to be renormalized
2184 renormalize := '1';
2185 v.state := ROUNDING_3;
2186 else
2187 arith_done := '1';
2188 end if;
2189 end if;
2190 if round(0) = '1' then
2191 v.fpscr(FPSCR_XX) := '1';
2192 if r.tiny = '1' then
2193 v.fpscr(FPSCR_UX) := '1';
2194 end if;
2195 end if;
2196
2197 when ROUNDING_2 =>
2198 -- Check for overflow during rounding
2199 -- r.shift = -1
2200 v.x := '0';
2201 if r.r(55) = '1' then
2202 opsel_r <= RES_SHIFT;
2203 if exp_huge = '1' then
2204 v.state := ROUND_OFLOW;
2205 else
2206 arith_done := '1';
2207 end if;
2208 elsif r.r(54) = '0' then
2209 -- Do CLZ so we can renormalize the result
2210 renormalize := '1';
2211 v.state := ROUNDING_3;
2212 else
2213 arith_done := '1';
2214 end if;
2215
2216 when ROUNDING_3 =>
2217 -- r.shift = clz(r.r) - 9
2218 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2219 if mant_nz = '0' then
2220 v.result_class := ZERO;
2221 if r.is_subtract = '1' then
2222 -- set result sign depending on rounding mode
2223 v.result_sign := r.round_mode(1) and r.round_mode(0);
2224 end if;
2225 arith_done := '1';
2226 else
2227 -- Renormalize result after rounding
2228 opsel_r <= RES_SHIFT;
2229 v.denorm := exp_tiny;
2230 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2231 if new_exp < to_signed(-1022, EXP_BITS) then
2232 v.state := DENORM;
2233 else
2234 arith_done := '1';
2235 end if;
2236 end if;
2237
2238 when DENORM =>
2239 -- r.shift = result_exp - -1022
2240 opsel_r <= RES_SHIFT;
2241 arith_done := '1';
2242
2243 when NAN_RESULT =>
2244 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2245 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2246 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2247 -- Signalling NAN
2248 v.fpscr(FPSCR_VXSNAN) := '1';
2249 invalid := '1';
2250 end if;
2251 if r.use_a = '1' and r.a.class = NAN then
2252 v.opsel_a := AIN_A;
2253 elsif r.use_b = '1' and r.b.class = NAN then
2254 v.opsel_a := AIN_B;
2255 elsif r.use_c = '1' and r.c.class = NAN then
2256 v.opsel_a := AIN_C;
2257 end if;
2258 v.state := EXC_RESULT;
2259
2260 when EXC_RESULT =>
2261 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2262 case r.opsel_a is
2263 when AIN_B =>
2264 v.result_sign := r.b.negative xor r.negate;
2265 v.result_exp := r.b.exponent;
2266 v.result_class := r.b.class;
2267 when AIN_C =>
2268 v.result_sign := r.c.negative xor r.negate;
2269 v.result_exp := r.c.exponent;
2270 v.result_class := r.c.class;
2271 when others =>
2272 v.result_sign := r.a.negative xor r.negate;
2273 v.result_exp := r.a.exponent;
2274 v.result_class := r.a.class;
2275 end case;
2276 arith_done := '1';
2277
2278 end case;
2279
2280 if zero_divide = '1' then
2281 v.fpscr(FPSCR_ZX) := '1';
2282 end if;
2283 if qnan_result = '1' then
2284 invalid := '1';
2285 v.result_class := NAN;
2286 v.result_sign := '0';
2287 misc_sel <= "0001";
2288 opsel_r <= RES_MISC;
2289 arith_done := '1';
2290 end if;
2291 if invalid = '1' then
2292 v.invalid := '1';
2293 end if;
2294 if arith_done = '1' then
2295 -- Enabled invalid exception doesn't write result or FPRF
2296 -- Neither does enabled zero-divide exception
2297 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2298 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2299 v.writing_back := '1';
2300 v.update_fprf := '1';
2301 end if;
2302 v.instr_done := '1';
2303 v.state := IDLE;
2304 update_fx := '1';
2305 end if;
2306
2307 -- Multiplier and divide/square root data path
2308 case msel_1 is
2309 when MUL1_A =>
2310 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2311 when MUL1_B =>
2312 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2313 when MUL1_Y =>
2314 f_to_multiply.data1 <= r.y;
2315 when others =>
2316 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2317 end case;
2318 case msel_2 is
2319 when MUL2_C =>
2320 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2321 when MUL2_LUT =>
2322 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2323 when MUL2_P =>
2324 f_to_multiply.data2 <= r.p;
2325 when others =>
2326 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2327 end case;
2328 maddend := (others => '0');
2329 case msel_add is
2330 when MULADD_CONST =>
2331 -- addend is 2.0 or 1.5 in 16.112 format
2332 if r.is_sqrt = '0' then
2333 maddend(113) := '1'; -- 2.0
2334 else
2335 maddend(112 downto 111) := "11"; -- 1.5
2336 end if;
2337 when MULADD_A =>
2338 -- addend is A in 16.112 format
2339 maddend(121 downto 58) := r.a.mantissa;
2340 when MULADD_RS =>
2341 -- addend is concatenation of R and S in 16.112 format
2342 maddend := "000000" & r.r & r.s & "00";
2343 when others =>
2344 end case;
2345 if msel_inv = '1' then
2346 f_to_multiply.addend <= not maddend;
2347 else
2348 f_to_multiply.addend <= maddend;
2349 end if;
2350 f_to_multiply.not_result <= msel_inv;
2351 if set_y = '1' then
2352 v.y := f_to_multiply.data2;
2353 end if;
2354 if multiply_to_f.valid = '1' then
2355 if pshift = '0' then
2356 v.p := multiply_to_f.result(63 downto 0);
2357 else
2358 v.p := multiply_to_f.result(119 downto 56);
2359 end if;
2360 end if;
2361
2362 -- Data path.
2363 -- This has A and B input multiplexers, an adder, a shifter,
2364 -- count-leading-zeroes logic, and a result mux.
2365 if r.longmask = '1' then
2366 mshift := r.shift + to_signed(-29, EXP_BITS);
2367 else
2368 mshift := r.shift;
2369 end if;
2370 if mshift < to_signed(-64, EXP_BITS) then
2371 mask := (others => '1');
2372 elsif mshift >= to_signed(0, EXP_BITS) then
2373 mask := (others => '0');
2374 else
2375 mask := right_mask(unsigned(mshift(5 downto 0)));
2376 end if;
2377 case r.opsel_a is
2378 when AIN_R =>
2379 in_a0 := r.r;
2380 when AIN_A =>
2381 in_a0 := r.a.mantissa;
2382 when AIN_B =>
2383 in_a0 := r.b.mantissa;
2384 when others =>
2385 in_a0 := r.c.mantissa;
2386 end case;
2387 if (or (mask and in_a0)) = '1' and set_x = '1' then
2388 v.x := '1';
2389 end if;
2390 if opsel_ainv = '1' then
2391 in_a0 := not in_a0;
2392 end if;
2393 in_a <= in_a0;
2394 case opsel_b is
2395 when BIN_ZERO =>
2396 in_b0 := (others => '0');
2397 when BIN_R =>
2398 in_b0 := r.r;
2399 when BIN_RND =>
2400 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2401 in_b0 := round_inc;
2402 when others =>
2403 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2404 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2405 end case;
2406 if opsel_binv = '1' then
2407 in_b0 := not in_b0;
2408 end if;
2409 in_b <= in_b0;
2410 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2411 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2412 std_ulogic_vector(r.shift(6 downto 0)));
2413 else
2414 shift_res := (others => '0');
2415 end if;
2416 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2417 if opsel_mask = '1' then
2418 sum(1 downto 0) := "00";
2419 if r.single_prec = '1' then
2420 sum(30 downto 2) := (others => '0');
2421 end if;
2422 end if;
2423 case opsel_r is
2424 when RES_SUM =>
2425 result <= sum;
2426 when RES_SHIFT =>
2427 result <= shift_res;
2428 when RES_MULT =>
2429 result <= multiply_to_f.result(121 downto 58);
2430 when others =>
2431 case misc_sel is
2432 when "0000" =>
2433 misc := x"00000000" & (r.fpscr and fpscr_mask);
2434 when "0001" =>
2435 -- generated QNaN mantissa
2436 misc := x"0020000000000000";
2437 when "0010" =>
2438 -- mantissa of max representable DP number
2439 misc := x"007ffffffffffffc";
2440 when "0011" =>
2441 -- mantissa of max representable SP number
2442 misc := x"007fffff80000000";
2443 when "0100" =>
2444 -- fmrgow result
2445 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2446 when "0110" =>
2447 -- fmrgew result
2448 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2449 when "0111" =>
2450 misc := 10x"000" & inverse_est & 35x"000000000";
2451 when "1000" =>
2452 -- max positive result for fctiw[z]
2453 misc := x"000000007fffffff";
2454 when "1001" =>
2455 -- max negative result for fctiw[z]
2456 misc := x"ffffffff80000000";
2457 when "1010" =>
2458 -- max positive result for fctiwu[z]
2459 misc := x"00000000ffffffff";
2460 when "1011" =>
2461 -- max negative result for fctiwu[z]
2462 misc := x"0000000000000000";
2463 when "1100" =>
2464 -- max positive result for fctid[z]
2465 misc := x"7fffffffffffffff";
2466 when "1101" =>
2467 -- max negative result for fctid[z]
2468 misc := x"8000000000000000";
2469 when "1110" =>
2470 -- max positive result for fctidu[z]
2471 misc := x"ffffffffffffffff";
2472 when "1111" =>
2473 -- max negative result for fctidu[z]
2474 misc := x"0000000000000000";
2475 when others =>
2476 misc := x"0000000000000000";
2477 end case;
2478 result <= misc;
2479 end case;
2480 v.r := result;
2481 if set_s = '1' then
2482 case opsel_s is
2483 when S_NEG =>
2484 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2485 when S_MULT =>
2486 v.s := multiply_to_f.result(57 downto 2);
2487 when S_SHIFT =>
2488 v.s := shift_res(63 downto 8);
2489 if shift_res(7 downto 0) /= x"00" then
2490 v.x := '1';
2491 end if;
2492 when others =>
2493 v.s := (others => '0');
2494 end case;
2495 end if;
2496
2497 if set_a = '1' then
2498 v.a.exponent := new_exp;
2499 v.a.mantissa := shift_res;
2500 end if;
2501 if set_b = '1' then
2502 v.b.exponent := new_exp;
2503 v.b.mantissa := shift_res;
2504 end if;
2505 if set_c = '1' then
2506 v.c.exponent := new_exp;
2507 v.c.mantissa := shift_res;
2508 end if;
2509
2510 if opsel_r = RES_SHIFT then
2511 v.result_exp := new_exp;
2512 end if;
2513
2514 if renormalize = '1' then
2515 clz := count_left_zeroes(r.r);
2516 if renorm_sqrt = '1' then
2517 -- make denormalized value end up with even exponent
2518 clz(0) := '1';
2519 end if;
2520 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2521 end if;
2522
2523 if r.int_result = '1' then
2524 fp_result <= r.r;
2525 else
2526 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2527 r.single_prec, r.quieten_nan);
2528 end if;
2529 if r.update_fprf = '1' then
2530 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2531 r.r(54) and not r.denorm);
2532 end if;
2533
2534 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2535 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2536 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2537 v.fpscr(FPSCR_VE downto FPSCR_XE));
2538 if update_fx = '1' and
2539 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2540 v.fpscr(FPSCR_FX) := '1';
2541 end if;
2542 if r.rc = '1' then
2543 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2544 end if;
2545
2546 if illegal = '1' then
2547 v.instr_done := '0';
2548 v.do_intr := '0';
2549 v.writing_back := '0';
2550 v.busy := '0';
2551 v.state := IDLE;
2552 else
2553 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2554 if v.state /= IDLE or v.do_intr = '1' then
2555 v.busy := '1';
2556 end if;
2557 end if;
2558
2559 rin <= v;
2560 e_out.illegal <= illegal;
2561 end process;
2562
2563 end architecture behaviour;