FPU: Implement floating multiply-add instructions
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2);
69
70 type reg_type is record
71 state : state_t;
72 busy : std_ulogic;
73 instr_done : std_ulogic;
74 do_intr : std_ulogic;
75 op : insn_type_t;
76 insn : std_ulogic_vector(31 downto 0);
77 dest_fpr : gspr_index_t;
78 fe_mode : std_ulogic;
79 rc : std_ulogic;
80 is_cmp : std_ulogic;
81 single_prec : std_ulogic;
82 fpscr : std_ulogic_vector(31 downto 0);
83 a : fpu_reg_type;
84 b : fpu_reg_type;
85 c : fpu_reg_type;
86 r : std_ulogic_vector(63 downto 0); -- 10.54 format
87 s : std_ulogic_vector(55 downto 0); -- extended fraction
88 x : std_ulogic;
89 p : std_ulogic_vector(63 downto 0); -- 8.56 format
90 y : std_ulogic_vector(63 downto 0); -- 8.56 format
91 result_sign : std_ulogic;
92 result_class : fp_number_class;
93 result_exp : signed(EXP_BITS-1 downto 0);
94 shift : signed(EXP_BITS-1 downto 0);
95 writing_back : std_ulogic;
96 int_result : std_ulogic;
97 cr_result : std_ulogic_vector(3 downto 0);
98 cr_mask : std_ulogic_vector(7 downto 0);
99 old_exc : std_ulogic_vector(4 downto 0);
100 update_fprf : std_ulogic;
101 quieten_nan : std_ulogic;
102 tiny : std_ulogic;
103 denorm : std_ulogic;
104 round_mode : std_ulogic_vector(2 downto 0);
105 is_subtract : std_ulogic;
106 exp_cmp : std_ulogic;
107 madd_cmp : std_ulogic;
108 add_bsmall : std_ulogic;
109 is_multiply : std_ulogic;
110 is_sqrt : std_ulogic;
111 first : std_ulogic;
112 count : unsigned(1 downto 0);
113 doing_ftdiv : std_ulogic_vector(1 downto 0);
114 end record;
115
116 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
117
118 signal r, rin : reg_type;
119
120 signal fp_result : std_ulogic_vector(63 downto 0);
121 signal opsel_a : std_ulogic_vector(1 downto 0);
122 signal opsel_b : std_ulogic_vector(1 downto 0);
123 signal opsel_r : std_ulogic_vector(1 downto 0);
124 signal opsel_s : std_ulogic_vector(1 downto 0);
125 signal opsel_ainv : std_ulogic;
126 signal opsel_amask : std_ulogic;
127 signal opsel_binv : std_ulogic;
128 signal in_a : std_ulogic_vector(63 downto 0);
129 signal in_b : std_ulogic_vector(63 downto 0);
130 signal result : std_ulogic_vector(63 downto 0);
131 signal carry_in : std_ulogic;
132 signal lost_bits : std_ulogic;
133 signal r_hi_nz : std_ulogic;
134 signal r_lo_nz : std_ulogic;
135 signal s_nz : std_ulogic;
136 signal misc_sel : std_ulogic_vector(3 downto 0);
137 signal f_to_multiply : MultiplyInputType;
138 signal multiply_to_f : MultiplyOutputType;
139 signal msel_1 : std_ulogic_vector(1 downto 0);
140 signal msel_2 : std_ulogic_vector(1 downto 0);
141 signal msel_add : std_ulogic_vector(1 downto 0);
142 signal msel_inv : std_ulogic;
143 signal inverse_est : std_ulogic_vector(18 downto 0);
144
145 -- opsel values
146 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
147 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
148 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
149 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
150
151 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
152 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
153 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
154 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
155
156 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
157 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
158 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
159 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
160
161 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
163 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
164 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
165
166 -- msel values
167 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
168 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
169 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
170 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
171
172 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
173 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
174 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
175 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
176
177 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
178 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
179 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
180 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
181
182 -- Inverse lookup table, indexed by the top 8 fraction bits
183 -- The first 256 entries are the reciprocal (1/x) lookup table,
184 -- and the remaining 768 entries are the reciprocal square root table.
185 -- Output range is [0.5, 1) in 0.19 format, though the top
186 -- bit isn't stored since it is always 1.
187 -- Each output value is the inverse of the center of the input
188 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
189 -- entry 1 is 1 / (1 + 3/512), etc.
190 signal inverse_table : lookup_table := (
191 -- 1/x lookup table
192 -- Unit bit is assumed to be 1, so input range is [1, 2)
193 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
194 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
195 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
196 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
197 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
198 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
199 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
200 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
201 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
202 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
203 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
204 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
205 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
206 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
207 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
208 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
209 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
210 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
211 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
212 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
213 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
214 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
215 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
216 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
217 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
218 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
219 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
220 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
221 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
222 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
223 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
224 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
225 -- 1/sqrt(x) lookup table
226 -- Input is in the range [1, 4), i.e. two bits to the left of the
227 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
228 -- 1.0 ... 1.9999
229 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
230 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
231 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
232 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
233 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
234 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
235 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
236 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
237 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
238 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
239 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
240 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
241 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
242 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
243 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
244 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
245 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
246 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
247 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
248 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
249 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
250 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
251 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
252 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
253 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
254 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
255 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
256 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
257 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
258 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
259 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
260 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
261 -- 2.0 ... 2.9999
262 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
263 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
264 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
265 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
266 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
267 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
268 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
269 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
270 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
271 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
272 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
273 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
274 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
275 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
276 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
277 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
278 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
279 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
280 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
281 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
282 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
283 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
284 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
285 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
286 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
287 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
288 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
289 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
290 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
291 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
292 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
293 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
294 -- 3.0 ... 3.9999
295 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
296 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
297 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
298 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
299 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
300 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
301 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
302 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
303 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
304 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
305 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
306 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
307 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
308 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
309 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
310 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
311 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
312 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
313 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
314 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
315 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
316 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
317 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
318 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
319 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
320 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
321 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
322 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
323 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
324 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
325 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
326 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
327 );
328
329 -- Left and right shifter with 120 bit input and 64 bit output.
330 -- Shifts inp left by shift bits and returns the upper 64 bits of
331 -- the result. The shift parameter is interpreted as a signed
332 -- number in the range -64..63, with negative values indicating
333 -- right shifts.
334 function shifter_64(inp: std_ulogic_vector(119 downto 0);
335 shift: std_ulogic_vector(6 downto 0))
336 return std_ulogic_vector is
337 variable s1 : std_ulogic_vector(94 downto 0);
338 variable s2 : std_ulogic_vector(70 downto 0);
339 variable result : std_ulogic_vector(63 downto 0);
340 begin
341 case shift(6 downto 5) is
342 when "00" =>
343 s1 := inp(119 downto 25);
344 when "01" =>
345 s1 := inp(87 downto 0) & "0000000";
346 when "10" =>
347 s1 := x"0000000000000000" & inp(119 downto 89);
348 when others =>
349 s1 := x"00000000" & inp(119 downto 57);
350 end case;
351 case shift(4 downto 3) is
352 when "00" =>
353 s2 := s1(94 downto 24);
354 when "01" =>
355 s2 := s1(86 downto 16);
356 when "10" =>
357 s2 := s1(78 downto 8);
358 when others =>
359 s2 := s1(70 downto 0);
360 end case;
361 case shift(2 downto 0) is
362 when "000" =>
363 result := s2(70 downto 7);
364 when "001" =>
365 result := s2(69 downto 6);
366 when "010" =>
367 result := s2(68 downto 5);
368 when "011" =>
369 result := s2(67 downto 4);
370 when "100" =>
371 result := s2(66 downto 3);
372 when "101" =>
373 result := s2(65 downto 2);
374 when "110" =>
375 result := s2(64 downto 1);
376 when others =>
377 result := s2(63 downto 0);
378 end case;
379 return result;
380 end;
381
382 -- Generate a mask with 0-bits on the left and 1-bits on the right which
383 -- selects the bits will be lost in doing a right shift. The shift
384 -- parameter is the bottom 6 bits of a negative shift count,
385 -- indicating a right shift.
386 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
387 variable result: std_ulogic_vector(63 downto 0);
388 begin
389 result := (others => '0');
390 for i in 0 to 63 loop
391 if i >= shift then
392 result(63 - i) := '1';
393 end if;
394 end loop;
395 return result;
396 end;
397
398 -- Split a DP floating-point number into components and work out its class.
399 -- If is_int = 1, the input is considered an integer
400 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
401 variable r : fpu_reg_type;
402 variable exp_nz : std_ulogic;
403 variable exp_ao : std_ulogic;
404 variable frac_nz : std_ulogic;
405 variable cls : std_ulogic_vector(2 downto 0);
406 begin
407 r.negative := fpr(63);
408 exp_nz := or (fpr(62 downto 52));
409 exp_ao := and (fpr(62 downto 52));
410 frac_nz := or (fpr(51 downto 0));
411 if is_int = '0' then
412 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
413 if exp_nz = '0' then
414 r.exponent := to_signed(-1022, EXP_BITS);
415 end if;
416 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
417 cls := exp_ao & exp_nz & frac_nz;
418 case cls is
419 when "000" => r.class := ZERO;
420 when "001" => r.class := FINITE; -- denormalized
421 when "010" => r.class := FINITE;
422 when "011" => r.class := FINITE;
423 when "110" => r.class := INFINITY;
424 when others => r.class := NAN;
425 end case;
426 else
427 r.mantissa := fpr;
428 r.exponent := (others => '0');
429 if (fpr(63) or exp_nz or frac_nz) = '1' then
430 r.class := FINITE;
431 else
432 r.class := ZERO;
433 end if;
434 end if;
435 return r;
436 end;
437
438 -- Construct a DP floating-point result from components
439 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
440 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
441 return std_ulogic_vector is
442 variable result : std_ulogic_vector(63 downto 0);
443 begin
444 result := (others => '0');
445 result(63) := sign;
446 case class is
447 when ZERO =>
448 when FINITE =>
449 if mantissa(54) = '1' then
450 -- normalized number
451 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
452 end if;
453 result(51 downto 29) := mantissa(53 downto 31);
454 if single_prec = '0' then
455 result(28 downto 0) := mantissa(30 downto 2);
456 end if;
457 when INFINITY =>
458 result(62 downto 52) := "11111111111";
459 when NAN =>
460 result(62 downto 52) := "11111111111";
461 result(51) := quieten_nan or mantissa(53);
462 result(50 downto 29) := mantissa(52 downto 31);
463 if single_prec = '0' then
464 result(28 downto 0) := mantissa(30 downto 2);
465 end if;
466 end case;
467 return result;
468 end;
469
470 -- Determine whether to increment when rounding
471 -- Returns rounding_inc & inexact
472 -- Assumes x includes the bottom 29 bits of the mantissa already
473 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
474 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
475 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
476 sign: std_ulogic)
477 return std_ulogic_vector is
478 variable grx : std_ulogic_vector(2 downto 0);
479 variable ret : std_ulogic_vector(1 downto 0);
480 variable lsb : std_ulogic;
481 begin
482 if single_prec = '0' then
483 grx := mantissa(1 downto 0) & x;
484 lsb := mantissa(2);
485 else
486 grx := mantissa(30 downto 29) & x;
487 lsb := mantissa(31);
488 end if;
489 ret(1) := '0';
490 ret(0) := or (grx);
491 case rn(1 downto 0) is
492 when "00" => -- round to nearest
493 if grx = "100" and rn(2) = '0' then
494 ret(1) := lsb; -- tie, round to even
495 else
496 ret(1) := grx(2);
497 end if;
498 when "01" => -- round towards zero
499 when others => -- round towards +/- inf
500 if rn(0) = sign then
501 -- round towards greater magnitude
502 ret(1) := ret(0);
503 end if;
504 end case;
505 return ret;
506 end;
507
508 -- Determine result flags to write into the FPSCR
509 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
510 return std_ulogic_vector is
511 begin
512 case class is
513 when ZERO =>
514 return sign & "0010";
515 when FINITE =>
516 return (not unitbit) & sign & (not sign) & "00";
517 when INFINITY =>
518 return '0' & sign & (not sign) & "01";
519 when NAN =>
520 return "10001";
521 end case;
522 end;
523
524 begin
525 fpu_multiply_0: entity work.multiply
526 port map (
527 clk => clk,
528 m_in => f_to_multiply,
529 m_out => multiply_to_f
530 );
531
532 fpu_0: process(clk)
533 begin
534 if rising_edge(clk) then
535 if rst = '1' then
536 r.state <= IDLE;
537 r.busy <= '0';
538 r.instr_done <= '0';
539 r.do_intr <= '0';
540 r.fpscr <= (others => '0');
541 r.writing_back <= '0';
542 else
543 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
544 r <= rin;
545 end if;
546 end if;
547 end process;
548
549 -- synchronous reads from lookup table
550 lut_access: process(clk)
551 variable addrhi : std_ulogic_vector(1 downto 0);
552 variable addr : std_ulogic_vector(9 downto 0);
553 begin
554 if rising_edge(clk) then
555 if r.is_sqrt = '1' then
556 addrhi := r.b.mantissa(55 downto 54);
557 else
558 addrhi := "00";
559 end if;
560 addr := addrhi & r.b.mantissa(53 downto 46);
561 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
562 end if;
563 end process;
564
565 e_out.busy <= r.busy;
566 e_out.exception <= r.fpscr(FPSCR_FEX);
567 e_out.interrupt <= r.do_intr;
568
569 w_out.valid <= r.instr_done and not r.do_intr;
570 w_out.write_enable <= r.writing_back;
571 w_out.write_reg <= r.dest_fpr;
572 w_out.write_data <= fp_result;
573 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
574 w_out.write_cr_mask <= r.cr_mask;
575 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
576 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
577
578 fpu_1: process(all)
579 variable v : reg_type;
580 variable adec : fpu_reg_type;
581 variable bdec : fpu_reg_type;
582 variable cdec : fpu_reg_type;
583 variable fpscr_mask : std_ulogic_vector(31 downto 0);
584 variable illegal : std_ulogic;
585 variable j, k : integer;
586 variable flm : std_ulogic_vector(7 downto 0);
587 variable int_input : std_ulogic;
588 variable mask : std_ulogic_vector(63 downto 0);
589 variable in_a0 : std_ulogic_vector(63 downto 0);
590 variable in_b0 : std_ulogic_vector(63 downto 0);
591 variable misc : std_ulogic_vector(63 downto 0);
592 variable shift_res : std_ulogic_vector(63 downto 0);
593 variable round : std_ulogic_vector(1 downto 0);
594 variable update_fx : std_ulogic;
595 variable arith_done : std_ulogic;
596 variable invalid : std_ulogic;
597 variable zero_divide : std_ulogic;
598 variable mant_nz : std_ulogic;
599 variable min_exp : signed(EXP_BITS-1 downto 0);
600 variable max_exp : signed(EXP_BITS-1 downto 0);
601 variable bias_exp : signed(EXP_BITS-1 downto 0);
602 variable new_exp : signed(EXP_BITS-1 downto 0);
603 variable exp_tiny : std_ulogic;
604 variable exp_huge : std_ulogic;
605 variable renormalize : std_ulogic;
606 variable clz : std_ulogic_vector(5 downto 0);
607 variable set_x : std_ulogic;
608 variable mshift : signed(EXP_BITS-1 downto 0);
609 variable need_check : std_ulogic;
610 variable msb : std_ulogic;
611 variable is_add : std_ulogic;
612 variable longmask : std_ulogic;
613 variable set_a : std_ulogic;
614 variable set_b : std_ulogic;
615 variable set_c : std_ulogic;
616 variable set_y : std_ulogic;
617 variable set_s : std_ulogic;
618 variable qnan_result : std_ulogic;
619 variable px_nz : std_ulogic;
620 variable pcmpb_eq : std_ulogic;
621 variable pcmpb_lt : std_ulogic;
622 variable pshift : std_ulogic;
623 variable renorm_sqrt : std_ulogic;
624 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
625 variable shiftin : std_ulogic;
626 variable mulexp : signed(EXP_BITS-1 downto 0);
627 variable maddend : std_ulogic_vector(127 downto 0);
628 begin
629 v := r;
630 illegal := '0';
631 v.busy := '0';
632 int_input := '0';
633
634 -- capture incoming instruction
635 if e_in.valid = '1' then
636 v.insn := e_in.insn;
637 v.op := e_in.op;
638 v.fe_mode := or (e_in.fe_mode);
639 v.dest_fpr := e_in.frt;
640 v.single_prec := e_in.single;
641 v.int_result := '0';
642 v.rc := e_in.rc;
643 v.is_cmp := e_in.out_cr;
644 if e_in.out_cr = '0' then
645 v.cr_mask := num_to_fxm(1);
646 else
647 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
648 end if;
649 int_input := '0';
650 if e_in.op = OP_FPOP_I then
651 int_input := '1';
652 end if;
653 v.quieten_nan := '1';
654 v.tiny := '0';
655 v.denorm := '0';
656 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
657 v.is_subtract := '0';
658 v.is_multiply := '0';
659 v.is_sqrt := '0';
660 v.add_bsmall := '0';
661 v.doing_ftdiv := "00";
662
663 adec := decode_dp(e_in.fra, int_input);
664 bdec := decode_dp(e_in.frb, int_input);
665 cdec := decode_dp(e_in.frc, int_input);
666 v.a := adec;
667 v.b := bdec;
668 v.c := cdec;
669
670 v.exp_cmp := '0';
671 if adec.exponent > bdec.exponent then
672 v.exp_cmp := '1';
673 end if;
674 v.madd_cmp := '0';
675 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
676 v.madd_cmp := '1';
677 end if;
678 end if;
679
680 r_hi_nz <= or (r.r(55 downto 31));
681 r_lo_nz <= or (r.r(30 downto 2));
682 s_nz <= or (r.s);
683
684 if r.single_prec = '0' then
685 if r.doing_ftdiv(1) = '0' then
686 max_exp := to_signed(1023, EXP_BITS);
687 else
688 max_exp := to_signed(1020, EXP_BITS);
689 end if;
690 if r.doing_ftdiv(0) = '0' then
691 min_exp := to_signed(-1022, EXP_BITS);
692 else
693 min_exp := to_signed(-1021, EXP_BITS);
694 end if;
695 bias_exp := to_signed(1536, EXP_BITS);
696 else
697 max_exp := to_signed(127, EXP_BITS);
698 min_exp := to_signed(-126, EXP_BITS);
699 bias_exp := to_signed(192, EXP_BITS);
700 end if;
701 new_exp := r.result_exp - r.shift;
702 exp_tiny := '0';
703 exp_huge := '0';
704 if new_exp < min_exp then
705 exp_tiny := '1';
706 end if;
707 if new_exp > max_exp then
708 exp_huge := '1';
709 end if;
710
711 -- Compare P with zero and with B
712 px_nz := or (r.p(57 downto 4));
713 pcmpb_eq := '0';
714 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
715 pcmpb_eq := '1';
716 end if;
717 pcmpb_lt := '0';
718 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
719 pcmpb_lt := '1';
720 end if;
721
722 v.writing_back := '0';
723 v.instr_done := '0';
724 v.update_fprf := '0';
725 v.shift := to_signed(0, EXP_BITS);
726 v.first := '0';
727 opsel_a <= AIN_R;
728 opsel_ainv <= '0';
729 opsel_amask <= '0';
730 opsel_b <= BIN_ZERO;
731 opsel_binv <= '0';
732 opsel_r <= RES_SUM;
733 opsel_s <= S_ZERO;
734 carry_in <= '0';
735 misc_sel <= "0000";
736 fpscr_mask := (others => '1');
737 update_fx := '0';
738 arith_done := '0';
739 invalid := '0';
740 zero_divide := '0';
741 renormalize := '0';
742 set_x := '0';
743 qnan_result := '0';
744 longmask := r.single_prec;
745 set_a := '0';
746 set_b := '0';
747 set_c := '0';
748 set_s := '0';
749 f_to_multiply.is_32bit <= '0';
750 f_to_multiply.valid <= '0';
751 msel_1 <= MUL1_A;
752 msel_2 <= MUL2_C;
753 msel_add <= MULADD_ZERO;
754 msel_inv <= '0';
755 set_y := '0';
756 pshift := '0';
757 renorm_sqrt := '0';
758 shiftin := '0';
759 case r.state is
760 when IDLE =>
761 if e_in.valid = '1' then
762 case e_in.insn(5 downto 1) is
763 when "00000" =>
764 if e_in.insn(8) = '1' then
765 if e_in.insn(6) = '0' then
766 v.state := DO_FTDIV;
767 else
768 v.state := DO_FTSQRT;
769 end if;
770 elsif e_in.insn(7) = '1' then
771 v.state := DO_MCRFS;
772 else
773 v.state := DO_FCMP;
774 end if;
775 when "00110" =>
776 if e_in.insn(10) = '0' then
777 if e_in.insn(8) = '0' then
778 v.state := DO_MTFSB;
779 else
780 v.state := DO_MTFSFI;
781 end if;
782 else
783 v.state := DO_FMRG;
784 end if;
785 when "00111" =>
786 if e_in.insn(8) = '0' then
787 v.state := DO_MFFS;
788 else
789 v.state := DO_MTFSF;
790 end if;
791 when "01000" =>
792 if e_in.insn(9 downto 8) /= "11" then
793 v.state := DO_FMR;
794 else
795 v.state := DO_FRI;
796 end if;
797 when "01100" =>
798 v.state := DO_FRSP;
799 when "01110" =>
800 if int_input = '1' then
801 -- fcfid[u][s]
802 v.state := DO_FCFID;
803 else
804 v.state := DO_FCTI;
805 end if;
806 when "01111" =>
807 v.round_mode := "001";
808 v.state := DO_FCTI;
809 when "10010" =>
810 v.state := DO_FDIV;
811 when "10100" | "10101" =>
812 v.state := DO_FADD;
813 when "10110" =>
814 v.is_sqrt := '1';
815 v.state := DO_FSQRT;
816 when "10111" =>
817 v.state := DO_FSEL;
818 when "11000" =>
819 v.state := DO_FRE;
820 when "11001" =>
821 v.is_multiply := '1';
822 v.state := DO_FMUL;
823 when "11010" =>
824 v.is_sqrt := '1';
825 v.state := DO_FRSQRTE;
826 when "11100" | "11101" | "11110" | "11111" =>
827 v.state := DO_FMADD;
828 when others =>
829 illegal := '1';
830 end case;
831 end if;
832 v.x := '0';
833 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
834 set_s := '1';
835
836 when DO_MCRFS =>
837 j := to_integer(unsigned(insn_bfa(r.insn)));
838 for i in 0 to 7 loop
839 if i = j then
840 k := (7 - i) * 4;
841 v.cr_result := r.fpscr(k + 3 downto k);
842 fpscr_mask(k + 3 downto k) := "0000";
843 end if;
844 end loop;
845 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
846 v.instr_done := '1';
847 v.state := IDLE;
848
849 when DO_FTDIV =>
850 v.instr_done := '1';
851 v.state := IDLE;
852 v.cr_result := "0000";
853 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
854 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
855 v.cr_result(2) := '1';
856 end if;
857 if r.a.class = NAN or r.a.class = INFINITY or
858 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
859 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
860 v.cr_result(1) := '1';
861 else
862 v.doing_ftdiv := "11";
863 v.first := '1';
864 v.state := FTDIV_1;
865 v.instr_done := '0';
866 end if;
867
868 when DO_FTSQRT =>
869 v.instr_done := '1';
870 v.state := IDLE;
871 v.cr_result := "0000";
872 if r.b.class = ZERO or r.b.class = INFINITY or
873 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
874 v.cr_result(2) := '1';
875 end if;
876 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
877 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
878 v.cr_result(1) := '0';
879 end if;
880
881 when DO_FCMP =>
882 -- fcmp[uo]
883 v.instr_done := '1';
884 v.state := IDLE;
885 update_fx := '1';
886 opsel_a <= AIN_B;
887 opsel_r <= RES_SUM;
888 v.result_exp := r.b.exponent;
889 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
890 (r.b.class = NAN and r.b.mantissa(53) = '0') then
891 -- Signalling NAN
892 v.fpscr(FPSCR_VXSNAN) := '1';
893 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
894 v.fpscr(FPSCR_VXVC) := '1';
895 end if;
896 invalid := '1';
897 v.cr_result := "0001"; -- unordered
898 elsif r.a.class = NAN or r.b.class = NAN then
899 if r.insn(6) = '1' then
900 -- fcmpo
901 v.fpscr(FPSCR_VXVC) := '1';
902 invalid := '1';
903 end if;
904 v.cr_result := "0001"; -- unordered
905 elsif r.a.class = ZERO and r.b.class = ZERO then
906 v.cr_result := "0010"; -- equal
907 elsif r.a.negative /= r.b.negative then
908 v.cr_result := r.a.negative & r.b.negative & "00";
909 elsif r.a.class = ZERO then
910 -- A and B are the same sign from here down
911 v.cr_result := not r.b.negative & r.b.negative & "00";
912 elsif r.a.class = INFINITY then
913 if r.b.class = INFINITY then
914 v.cr_result := "0010";
915 else
916 v.cr_result := r.a.negative & not r.a.negative & "00";
917 end if;
918 elsif r.b.class = ZERO then
919 -- A is finite from here down
920 v.cr_result := r.a.negative & not r.a.negative & "00";
921 elsif r.b.class = INFINITY then
922 v.cr_result := not r.b.negative & r.b.negative & "00";
923 elsif r.exp_cmp = '1' then
924 -- A and B are both finite from here down
925 v.cr_result := r.a.negative & not r.a.negative & "00";
926 elsif r.a.exponent /= r.b.exponent then
927 -- A exponent is smaller than B
928 v.cr_result := not r.a.negative & r.a.negative & "00";
929 else
930 -- Prepare to subtract mantissas, put B in R
931 v.cr_result := "0000";
932 v.instr_done := '0';
933 v.state := CMP_1;
934 end if;
935 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
936
937 when DO_MTFSB =>
938 -- mtfsb{0,1}
939 j := to_integer(unsigned(insn_bt(r.insn)));
940 for i in 0 to 31 loop
941 if i = j then
942 v.fpscr(31 - i) := r.insn(6);
943 end if;
944 end loop;
945 v.instr_done := '1';
946 v.state := IDLE;
947
948 when DO_MTFSFI =>
949 -- mtfsfi
950 j := to_integer(unsigned(insn_bf(r.insn)));
951 if r.insn(16) = '0' then
952 for i in 0 to 7 loop
953 if i = j then
954 k := (7 - i) * 4;
955 v.fpscr(k + 3 downto k) := insn_u(r.insn);
956 end if;
957 end loop;
958 end if;
959 v.instr_done := '1';
960 v.state := IDLE;
961
962 when DO_FMRG =>
963 -- fmrgew, fmrgow
964 opsel_r <= RES_MISC;
965 misc_sel <= "01" & r.insn(8) & '0';
966 v.int_result := '1';
967 v.writing_back := '1';
968 v.instr_done := '1';
969 v.state := IDLE;
970
971 when DO_MFFS =>
972 v.int_result := '1';
973 v.writing_back := '1';
974 opsel_r <= RES_MISC;
975 case r.insn(20 downto 16) is
976 when "00000" =>
977 -- mffs
978 when "00001" =>
979 -- mffsce
980 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
981 when "10100" | "10101" =>
982 -- mffscdrn[i] (but we don't implement DRN)
983 fpscr_mask := x"000000FF";
984 when "10110" =>
985 -- mffscrn
986 fpscr_mask := x"000000FF";
987 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
988 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
989 when "10111" =>
990 -- mffscrni
991 fpscr_mask := x"000000FF";
992 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
993 when "11000" =>
994 -- mffsl
995 fpscr_mask := x"0007F0FF";
996 when others =>
997 illegal := '1';
998 end case;
999 v.instr_done := '1';
1000 v.state := IDLE;
1001
1002 when DO_MTFSF =>
1003 if r.insn(25) = '1' then
1004 flm := x"FF";
1005 elsif r.insn(16) = '1' then
1006 flm := x"00";
1007 else
1008 flm := r.insn(24 downto 17);
1009 end if;
1010 for i in 0 to 7 loop
1011 k := i * 4;
1012 if flm(i) = '1' then
1013 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1014 end if;
1015 end loop;
1016 v.instr_done := '1';
1017 v.state := IDLE;
1018
1019 when DO_FMR =>
1020 opsel_a <= AIN_B;
1021 v.result_class := r.b.class;
1022 v.result_exp := r.b.exponent;
1023 v.quieten_nan := '0';
1024 if r.insn(9) = '1' then
1025 v.result_sign := '0'; -- fabs
1026 elsif r.insn(8) = '1' then
1027 v.result_sign := '1'; -- fnabs
1028 elsif r.insn(7) = '1' then
1029 v.result_sign := r.b.negative; -- fmr
1030 elsif r.insn(6) = '1' then
1031 v.result_sign := not r.b.negative; -- fneg
1032 else
1033 v.result_sign := r.a.negative; -- fcpsgn
1034 end if;
1035 v.writing_back := '1';
1036 v.instr_done := '1';
1037 v.state := IDLE;
1038
1039 when DO_FRI => -- fri[nzpm]
1040 opsel_a <= AIN_B;
1041 v.result_class := r.b.class;
1042 v.result_sign := r.b.negative;
1043 v.result_exp := r.b.exponent;
1044 v.fpscr(FPSCR_FR) := '0';
1045 v.fpscr(FPSCR_FI) := '0';
1046 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1047 -- Signalling NAN
1048 v.fpscr(FPSCR_VXSNAN) := '1';
1049 invalid := '1';
1050 end if;
1051 if r.b.class = FINITE then
1052 if r.b.exponent >= to_signed(52, EXP_BITS) then
1053 -- integer already, no rounding required
1054 arith_done := '1';
1055 else
1056 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1057 v.state := FRI_1;
1058 v.round_mode := '1' & r.insn(7 downto 6);
1059 end if;
1060 else
1061 arith_done := '1';
1062 end if;
1063
1064 when DO_FRSP =>
1065 opsel_a <= AIN_B;
1066 v.result_class := r.b.class;
1067 v.result_sign := r.b.negative;
1068 v.result_exp := r.b.exponent;
1069 v.fpscr(FPSCR_FR) := '0';
1070 v.fpscr(FPSCR_FI) := '0';
1071 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1072 -- Signalling NAN
1073 v.fpscr(FPSCR_VXSNAN) := '1';
1074 invalid := '1';
1075 end if;
1076 set_x := '1';
1077 if r.b.class = FINITE then
1078 if r.b.exponent < to_signed(-126, EXP_BITS) then
1079 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1080 v.state := ROUND_UFLOW;
1081 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1082 v.state := ROUND_OFLOW;
1083 else
1084 v.shift := to_signed(-2, EXP_BITS);
1085 v.state := ROUNDING;
1086 end if;
1087 else
1088 arith_done := '1';
1089 end if;
1090
1091 when DO_FCTI =>
1092 -- instr bit 9: 1=dword 0=word
1093 -- instr bit 8: 1=unsigned 0=signed
1094 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1095 opsel_a <= AIN_B;
1096 v.result_class := r.b.class;
1097 v.result_sign := r.b.negative;
1098 v.result_exp := r.b.exponent;
1099 v.fpscr(FPSCR_FR) := '0';
1100 v.fpscr(FPSCR_FI) := '0';
1101 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1102 -- Signalling NAN
1103 v.fpscr(FPSCR_VXSNAN) := '1';
1104 invalid := '1';
1105 end if;
1106
1107 v.int_result := '1';
1108 case r.b.class is
1109 when ZERO =>
1110 arith_done := '1';
1111 when FINITE =>
1112 if r.b.exponent >= to_signed(64, EXP_BITS) or
1113 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1114 v.state := INT_OFLOW;
1115 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1116 -- integer already, no rounding required,
1117 -- shift into final position
1118 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1119 if r.insn(8) = '1' and r.b.negative = '1' then
1120 v.state := INT_OFLOW;
1121 else
1122 v.state := INT_ISHIFT;
1123 end if;
1124 else
1125 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1126 v.state := INT_SHIFT;
1127 end if;
1128 when INFINITY | NAN =>
1129 v.state := INT_OFLOW;
1130 end case;
1131
1132 when DO_FCFID =>
1133 v.result_sign := '0';
1134 opsel_a <= AIN_B;
1135 if r.insn(8) = '0' and r.b.negative = '1' then
1136 -- fcfid[s] with negative operand, set R = -B
1137 opsel_ainv <= '1';
1138 carry_in <= '1';
1139 v.result_sign := '1';
1140 end if;
1141 v.result_class := r.b.class;
1142 v.result_exp := to_signed(54, EXP_BITS);
1143 v.fpscr(FPSCR_FR) := '0';
1144 v.fpscr(FPSCR_FI) := '0';
1145 if r.b.class = ZERO then
1146 arith_done := '1';
1147 else
1148 v.state := FINISH;
1149 end if;
1150
1151 when DO_FADD =>
1152 -- fadd[s] and fsub[s]
1153 opsel_a <= AIN_A;
1154 v.result_sign := r.a.negative;
1155 v.result_class := r.a.class;
1156 v.result_exp := r.a.exponent;
1157 v.fpscr(FPSCR_FR) := '0';
1158 v.fpscr(FPSCR_FI) := '0';
1159 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1160 if r.a.class = FINITE and r.b.class = FINITE then
1161 v.is_subtract := not is_add;
1162 v.add_bsmall := r.exp_cmp;
1163 if r.exp_cmp = '0' then
1164 v.shift := r.a.exponent - r.b.exponent;
1165 v.result_sign := r.b.negative xnor r.insn(1);
1166 if r.a.exponent = r.b.exponent then
1167 v.state := ADD_2;
1168 else
1169 v.state := ADD_SHIFT;
1170 end if;
1171 else
1172 opsel_a <= AIN_B;
1173 v.shift := r.b.exponent - r.a.exponent;
1174 v.result_exp := r.b.exponent;
1175 v.state := ADD_SHIFT;
1176 end if;
1177 else
1178 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1179 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1180 -- Signalling NAN
1181 v.fpscr(FPSCR_VXSNAN) := '1';
1182 invalid := '1';
1183 end if;
1184 if r.a.class = NAN then
1185 -- nothing to do, result is A
1186 elsif r.b.class = NAN then
1187 v.result_class := NAN;
1188 v.result_sign := r.b.negative;
1189 opsel_a <= AIN_B;
1190 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1191 -- invalid operation, construct QNaN
1192 v.fpscr(FPSCR_VXISI) := '1';
1193 qnan_result := '1';
1194 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1195 -- return -0 for rounding to -infinity
1196 v.result_sign := r.round_mode(1) and r.round_mode(0);
1197 elsif r.a.class = INFINITY or r.b.class = ZERO then
1198 -- nothing to do, result is A
1199 else
1200 -- result is +/- B
1201 v.result_sign := r.b.negative xnor r.insn(1);
1202 v.result_class := r.b.class;
1203 v.result_exp := r.b.exponent;
1204 opsel_a <= AIN_B;
1205 end if;
1206 arith_done := '1';
1207 end if;
1208
1209 when DO_FMUL =>
1210 -- fmul[s]
1211 opsel_a <= AIN_A;
1212 v.result_sign := r.a.negative;
1213 v.result_class := r.a.class;
1214 v.result_exp := r.a.exponent;
1215 v.fpscr(FPSCR_FR) := '0';
1216 v.fpscr(FPSCR_FI) := '0';
1217 if r.a.class = FINITE and r.c.class = FINITE then
1218 v.result_sign := r.a.negative xor r.c.negative;
1219 v.result_exp := r.a.exponent + r.c.exponent;
1220 -- Renormalize denorm operands
1221 if r.a.mantissa(54) = '0' then
1222 v.state := RENORM_A;
1223 elsif r.c.mantissa(54) = '0' then
1224 opsel_a <= AIN_C;
1225 v.state := RENORM_C;
1226 else
1227 f_to_multiply.valid <= '1';
1228 v.state := MULT_1;
1229 end if;
1230 else
1231 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1232 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1233 -- Signalling NAN
1234 v.fpscr(FPSCR_VXSNAN) := '1';
1235 invalid := '1';
1236 end if;
1237 if r.a.class = NAN then
1238 -- result is A
1239 elsif r.c.class = NAN then
1240 v.result_class := NAN;
1241 v.result_sign := r.c.negative;
1242 opsel_a <= AIN_C;
1243 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1244 (r.a.class = ZERO and r.c.class = INFINITY) then
1245 -- invalid operation, construct QNaN
1246 v.fpscr(FPSCR_VXIMZ) := '1';
1247 qnan_result := '1';
1248 elsif r.a.class = ZERO or r.a.class = INFINITY then
1249 -- result is +/- A
1250 v.result_sign := r.a.negative xor r.c.negative;
1251 else
1252 -- r.c.class is ZERO or INFINITY
1253 v.result_class := r.c.class;
1254 v.result_sign := r.a.negative xor r.c.negative;
1255 end if;
1256 arith_done := '1';
1257 end if;
1258
1259 when DO_FDIV =>
1260 opsel_a <= AIN_A;
1261 v.result_sign := r.a.negative;
1262 v.result_class := r.a.class;
1263 v.result_exp := r.a.exponent;
1264 v.fpscr(FPSCR_FR) := '0';
1265 v.fpscr(FPSCR_FI) := '0';
1266 v.result_sign := r.a.negative xor r.b.negative;
1267 v.result_exp := r.a.exponent - r.b.exponent;
1268 v.count := "00";
1269 if r.a.class = FINITE and r.b.class = FINITE then
1270 -- Renormalize denorm operands
1271 if r.a.mantissa(54) = '0' then
1272 v.state := RENORM_A;
1273 elsif r.b.mantissa(54) = '0' then
1274 opsel_a <= AIN_B;
1275 v.state := RENORM_B;
1276 else
1277 v.first := '1';
1278 v.state := DIV_2;
1279 end if;
1280 else
1281 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1282 (r.b.class = NAN and r.b.mantissa(53) = '0') then
1283 -- Signalling NAN
1284 v.fpscr(FPSCR_VXSNAN) := '1';
1285 invalid := '1';
1286 end if;
1287 if r.a.class = NAN then
1288 -- result is A
1289 v.result_sign := r.a.negative;
1290 elsif r.b.class = NAN then
1291 v.result_class := NAN;
1292 v.result_sign := r.b.negative;
1293 opsel_a <= AIN_B;
1294 elsif r.b.class = INFINITY then
1295 if r.a.class = INFINITY then
1296 v.fpscr(FPSCR_VXIDI) := '1';
1297 qnan_result := '1';
1298 else
1299 v.result_class := ZERO;
1300 end if;
1301 elsif r.b.class = ZERO then
1302 if r.a.class = ZERO then
1303 v.fpscr(FPSCR_VXZDZ) := '1';
1304 qnan_result := '1';
1305 else
1306 if r.a.class = FINITE then
1307 zero_divide := '1';
1308 end if;
1309 v.result_class := INFINITY;
1310 end if;
1311 -- else r.b.class = FINITE, result_class = r.a.class
1312 end if;
1313 arith_done := '1';
1314 end if;
1315
1316 when DO_FSEL =>
1317 opsel_a <= AIN_A;
1318 v.fpscr(FPSCR_FR) := '0';
1319 v.fpscr(FPSCR_FI) := '0';
1320 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1321 v.result_sign := r.c.negative;
1322 v.result_exp := r.c.exponent;
1323 v.result_class := r.c.class;
1324 opsel_a <= AIN_C;
1325 else
1326 v.result_sign := r.b.negative;
1327 v.result_exp := r.b.exponent;
1328 v.result_class := r.b.class;
1329 opsel_a <= AIN_B;
1330 end if;
1331 v.quieten_nan := '0';
1332 arith_done := '1';
1333
1334 when DO_FSQRT =>
1335 opsel_a <= AIN_B;
1336 v.result_class := r.b.class;
1337 v.result_sign := r.b.negative;
1338 v.fpscr(FPSCR_FR) := '0';
1339 v.fpscr(FPSCR_FI) := '0';
1340 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1341 v.fpscr(FPSCR_VXSNAN) := '1';
1342 invalid := '1';
1343 end if;
1344 case r.b.class is
1345 when FINITE =>
1346 v.result_exp := r.b.exponent;
1347 if r.b.negative = '1' then
1348 v.fpscr(FPSCR_VXSQRT) := '1';
1349 qnan_result := '1';
1350 arith_done := '1';
1351 elsif r.b.mantissa(54) = '0' then
1352 v.state := RENORM_B;
1353 elsif r.b.exponent(0) = '0' then
1354 v.state := SQRT_1;
1355 else
1356 v.shift := to_signed(1, EXP_BITS);
1357 v.state := RENORM_B2;
1358 end if;
1359 when NAN | ZERO =>
1360 -- result is B
1361 arith_done := '1';
1362 when INFINITY =>
1363 if r.b.negative = '1' then
1364 v.fpscr(FPSCR_VXSQRT) := '1';
1365 qnan_result := '1';
1366 -- else result is B
1367 end if;
1368 arith_done := '1';
1369 end case;
1370
1371 when DO_FRE =>
1372 opsel_a <= AIN_B;
1373 v.result_class := r.b.class;
1374 v.result_sign := r.b.negative;
1375 v.fpscr(FPSCR_FR) := '0';
1376 v.fpscr(FPSCR_FI) := '0';
1377 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1378 v.fpscr(FPSCR_VXSNAN) := '1';
1379 invalid := '1';
1380 end if;
1381 case r.b.class is
1382 when FINITE =>
1383 v.result_exp := - r.b.exponent;
1384 if r.b.mantissa(54) = '0' then
1385 v.state := RENORM_B;
1386 else
1387 v.state := FRE_1;
1388 end if;
1389 when NAN =>
1390 -- result is B
1391 arith_done := '1';
1392 when INFINITY =>
1393 v.result_class := ZERO;
1394 arith_done := '1';
1395 when ZERO =>
1396 v.result_class := INFINITY;
1397 zero_divide := '1';
1398 arith_done := '1';
1399 end case;
1400
1401 when DO_FRSQRTE =>
1402 opsel_a <= AIN_B;
1403 v.result_class := r.b.class;
1404 v.result_sign := r.b.negative;
1405 v.fpscr(FPSCR_FR) := '0';
1406 v.fpscr(FPSCR_FI) := '0';
1407 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1408 v.fpscr(FPSCR_VXSNAN) := '1';
1409 invalid := '1';
1410 end if;
1411 v.shift := to_signed(1, EXP_BITS);
1412 case r.b.class is
1413 when FINITE =>
1414 v.result_exp := r.b.exponent;
1415 if r.b.negative = '1' then
1416 v.fpscr(FPSCR_VXSQRT) := '1';
1417 qnan_result := '1';
1418 arith_done := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1422 v.state := RSQRT_1;
1423 else
1424 v.state := RENORM_B2;
1425 end if;
1426 when NAN =>
1427 -- result is B
1428 arith_done := '1';
1429 when INFINITY =>
1430 if r.b.negative = '1' then
1431 v.fpscr(FPSCR_VXSQRT) := '1';
1432 qnan_result := '1';
1433 else
1434 v.result_class := ZERO;
1435 end if;
1436 arith_done := '1';
1437 when ZERO =>
1438 v.result_class := INFINITY;
1439 zero_divide := '1';
1440 arith_done := '1';
1441 end case;
1442
1443 when DO_FMADD =>
1444 -- fmadd, fmsub, fnmadd, fnmsub
1445 opsel_a <= AIN_A;
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1451 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1452 if r.a.class = FINITE and r.c.class = FINITE and
1453 (r.b.class = FINITE or r.b.class = ZERO) then
1454 v.is_subtract := not is_add;
1455 mulexp := r.a.exponent + r.c.exponent;
1456 v.result_exp := mulexp;
1457 opsel_a <= AIN_B;
1458 -- Make sure A and C are normalized
1459 if r.a.mantissa(54) = '0' then
1460 opsel_a <= AIN_A;
1461 v.state := RENORM_A;
1462 elsif r.c.mantissa(54) = '0' then
1463 opsel_a <= AIN_C;
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1470 v.state := MULT_1;
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1475 v.state := FMADD_1;
1476 else
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1483 v.state := FMADD_2;
1484 end if;
1485 else
1486 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
1487 (r.b.class = NAN and r.b.mantissa(53) = '0') or
1488 (r.c.class = NAN and r.c.mantissa(53) = '0') then
1489 -- Signalling NAN
1490 v.fpscr(FPSCR_VXSNAN) := '1';
1491 invalid := '1';
1492 end if;
1493 if r.a.class = NAN then
1494 -- nothing to do, result is A
1495 elsif r.b.class = NAN then
1496 -- result is B
1497 v.result_class := NAN;
1498 v.result_sign := r.b.negative;
1499 opsel_a <= AIN_B;
1500 elsif r.c.class = NAN then
1501 -- result is C
1502 v.result_class := NAN;
1503 v.result_sign := r.c.negative;
1504 opsel_a <= AIN_C;
1505 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1506 (r.a.class = INFINITY and r.c.class = ZERO) then
1507 -- invalid operation, construct QNaN
1508 v.fpscr(FPSCR_VXIMZ) := '1';
1509 qnan_result := '1';
1510 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1511 if r.b.class = INFINITY and is_add = '0' then
1512 -- invalid operation, construct QNaN
1513 v.fpscr(FPSCR_VXISI) := '1';
1514 qnan_result := '1';
1515 else
1516 -- result is infinity
1517 v.result_class := INFINITY;
1518 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1519 end if;
1520 else
1521 -- Here A is zero, C is zero, or B is infinity
1522 -- Result is +/-B in all of those cases
1523 v.result_class := r.b.class;
1524 v.result_exp := r.b.exponent;
1525 if v.result_class /= ZERO or is_add = '1' then
1526 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1527 else
1528 -- have to be careful about rule for 0 - 0 result sign
1529 v.result_sign := (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1530 end if;
1531 opsel_a <= AIN_B;
1532 end if;
1533 arith_done := '1';
1534 end if;
1535
1536 when RENORM_A =>
1537 renormalize := '1';
1538 v.state := RENORM_A2;
1539
1540 when RENORM_A2 =>
1541 set_a := '1';
1542 v.result_exp := new_exp;
1543 if r.insn(4) = '1' then
1544 opsel_a <= AIN_C;
1545 if r.c.mantissa(54) = '1' then
1546 if r.insn(3) = '0' or r.b.class = ZERO then
1547 v.first := '1';
1548 v.state := MULT_1;
1549 else
1550 v.madd_cmp := '0';
1551 if new_exp + 1 >= r.b.exponent then
1552 v.madd_cmp := '1';
1553 end if;
1554 v.state := DO_FMADD;
1555 end if;
1556 else
1557 v.state := RENORM_C;
1558 end if;
1559 else
1560 opsel_a <= AIN_B;
1561 if r.b.mantissa(54) = '1' then
1562 v.first := '1';
1563 v.state := DIV_2;
1564 else
1565 v.state := RENORM_B;
1566 end if;
1567 end if;
1568
1569 when RENORM_B =>
1570 renormalize := '1';
1571 renorm_sqrt := r.is_sqrt;
1572 v.state := RENORM_B2;
1573
1574 when RENORM_B2 =>
1575 set_b := '1';
1576 if r.is_sqrt = '0' then
1577 v.result_exp := r.result_exp + r.shift;
1578 else
1579 v.result_exp := new_exp;
1580 end if;
1581 v.state := LOOKUP;
1582
1583 when RENORM_C =>
1584 renormalize := '1';
1585 v.state := RENORM_C2;
1586
1587 when RENORM_C2 =>
1588 set_c := '1';
1589 v.result_exp := new_exp;
1590 if r.insn(3) = '0' or r.b.class = ZERO then
1591 v.first := '1';
1592 v.state := MULT_1;
1593 else
1594 v.madd_cmp := '0';
1595 if new_exp + 1 >= r.b.exponent then
1596 v.madd_cmp := '1';
1597 end if;
1598 v.state := DO_FMADD;
1599 end if;
1600
1601 when ADD_SHIFT =>
1602 opsel_r <= RES_SHIFT;
1603 v.x := s_nz;
1604 set_x := '1';
1605 longmask := '0';
1606 v.state := ADD_2;
1607
1608 when ADD_2 =>
1609 if r.add_bsmall = '1' then
1610 opsel_a <= AIN_A;
1611 else
1612 opsel_a <= AIN_B;
1613 end if;
1614 opsel_b <= BIN_R;
1615 opsel_binv <= r.is_subtract;
1616 carry_in <= r.is_subtract and not r.x;
1617 v.shift := to_signed(-1, EXP_BITS);
1618 v.state := ADD_3;
1619
1620 when ADD_3 =>
1621 -- check for overflow or negative result (can't get both)
1622 if r.r(63) = '1' then
1623 -- result is opposite sign to expected
1624 v.result_sign := not r.result_sign;
1625 opsel_ainv <= '1';
1626 carry_in <= '1';
1627 v.state := FINISH;
1628 elsif r.r(55) = '1' then
1629 -- sum overflowed, shift right
1630 opsel_r <= RES_SHIFT;
1631 set_x := '1';
1632 v.shift := to_signed(-2, EXP_BITS);
1633 if exp_huge = '1' then
1634 v.state := ROUND_OFLOW;
1635 else
1636 v.state := ROUNDING;
1637 end if;
1638 elsif r.r(54) = '1' then
1639 set_x := '1';
1640 v.shift := to_signed(-2, EXP_BITS);
1641 v.state := ROUNDING;
1642 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1643 -- r.x must be zero at this point
1644 v.result_class := ZERO;
1645 if r.is_subtract = '1' then
1646 -- set result sign depending on rounding mode
1647 v.result_sign := r.round_mode(1) and r.round_mode(0);
1648 end if;
1649 arith_done := '1';
1650 else
1651 renormalize := '1';
1652 v.state := NORMALIZE;
1653 end if;
1654
1655 when CMP_1 =>
1656 opsel_a <= AIN_A;
1657 opsel_b <= BIN_R;
1658 opsel_binv <= '1';
1659 carry_in <= '1';
1660 v.state := CMP_2;
1661
1662 when CMP_2 =>
1663 if r.r(63) = '1' then
1664 -- A is smaller in magnitude
1665 v.cr_result := not r.a.negative & r.a.negative & "00";
1666 elsif (r_hi_nz or r_lo_nz) = '0' then
1667 v.cr_result := "0010";
1668 else
1669 v.cr_result := r.a.negative & not r.a.negative & "00";
1670 end if;
1671 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1672 v.instr_done := '1';
1673 v.state := IDLE;
1674
1675 when MULT_1 =>
1676 f_to_multiply.valid <= r.first;
1677 opsel_r <= RES_MULT;
1678 if multiply_to_f.valid = '1' then
1679 v.state := FINISH;
1680 end if;
1681
1682 when FMADD_1 =>
1683 -- Addend is bigger here
1684 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1685 -- note v.shift is at most -2 here
1686 v.shift := r.result_exp - r.b.exponent;
1687 opsel_r <= RES_MULT;
1688 opsel_s <= S_MULT;
1689 set_s := '1';
1690 f_to_multiply.valid <= r.first;
1691 if multiply_to_f.valid = '1' then
1692 v.state := ADD_SHIFT;
1693 end if;
1694
1695 when FMADD_2 =>
1696 -- Product is potentially bigger here
1697 set_s := '1';
1698 opsel_s <= S_SHIFT;
1699 v.shift := r.shift - to_signed(64, EXP_BITS);
1700 v.state := FMADD_3;
1701
1702 when FMADD_3 =>
1703 opsel_r <= RES_SHIFT;
1704 v.first := '1';
1705 v.state := FMADD_4;
1706
1707 when FMADD_4 =>
1708 msel_add <= MULADD_RS;
1709 f_to_multiply.valid <= r.first;
1710 msel_inv <= r.is_subtract;
1711 opsel_r <= RES_MULT;
1712 opsel_s <= S_MULT;
1713 set_s := '1';
1714 v.shift := to_signed(56, EXP_BITS);
1715 if multiply_to_f.valid = '1' then
1716 if multiply_to_f.result(121) = '1' then
1717 v.state := FMADD_5;
1718 else
1719 v.state := FMADD_6;
1720 end if;
1721 end if;
1722
1723 when FMADD_5 =>
1724 -- negate R:S:X
1725 v.result_sign := not r.result_sign;
1726 opsel_ainv <= '1';
1727 carry_in <= not (s_nz or r.x);
1728 opsel_s <= S_NEG;
1729 set_s := '1';
1730 v.shift := to_signed(56, EXP_BITS);
1731 v.state := FMADD_6;
1732
1733 when FMADD_6 =>
1734 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1735 if s_nz = '0' then
1736 -- must be a subtraction, and r.x must be zero
1737 v.result_class := ZERO;
1738 v.result_sign := r.round_mode(1) and r.round_mode(0);
1739 arith_done := '1';
1740 else
1741 -- R is all zeroes but there are non-zero bits in S
1742 -- so shift them into R and set S to 0
1743 opsel_r <= RES_SHIFT;
1744 set_s := '1';
1745 -- stay in state FMADD_6
1746 end if;
1747 elsif r.r(56 downto 54) = "001" then
1748 v.state := FINISH;
1749 else
1750 renormalize := '1';
1751 v.state := NORMALIZE;
1752 end if;
1753
1754 when LOOKUP =>
1755 opsel_a <= AIN_B;
1756 -- wait one cycle for inverse_table[B] lookup
1757 v.first := '1';
1758 if r.insn(4) = '0' then
1759 if r.insn(3) = '0' then
1760 v.state := DIV_2;
1761 else
1762 v.state := SQRT_1;
1763 end if;
1764 elsif r.insn(2) = '0' then
1765 v.state := FRE_1;
1766 else
1767 v.state := RSQRT_1;
1768 end if;
1769
1770 when DIV_2 =>
1771 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1772 msel_1 <= MUL1_B;
1773 msel_add <= MULADD_CONST;
1774 msel_inv <= '1';
1775 if r.count = 0 then
1776 msel_2 <= MUL2_LUT;
1777 else
1778 msel_2 <= MUL2_P;
1779 end if;
1780 set_y := r.first;
1781 pshift := '1';
1782 f_to_multiply.valid <= r.first;
1783 if multiply_to_f.valid = '1' then
1784 v.first := '1';
1785 v.count := r.count + 1;
1786 v.state := DIV_3;
1787 end if;
1788
1789 when DIV_3 =>
1790 -- compute Y = P = P * Y
1791 msel_1 <= MUL1_Y;
1792 msel_2 <= MUL2_P;
1793 f_to_multiply.valid <= r.first;
1794 pshift := '1';
1795 if multiply_to_f.valid = '1' then
1796 v.first := '1';
1797 if r.count = 3 then
1798 v.state := DIV_4;
1799 else
1800 v.state := DIV_2;
1801 end if;
1802 end if;
1803
1804 when DIV_4 =>
1805 -- compute R = P = A * Y (quotient)
1806 msel_1 <= MUL1_A;
1807 msel_2 <= MUL2_P;
1808 set_y := r.first;
1809 f_to_multiply.valid <= r.first;
1810 pshift := '1';
1811 if multiply_to_f.valid = '1' then
1812 opsel_r <= RES_MULT;
1813 v.first := '1';
1814 v.state := DIV_5;
1815 end if;
1816
1817 when DIV_5 =>
1818 -- compute P = A - B * R (remainder)
1819 msel_1 <= MUL1_B;
1820 msel_2 <= MUL2_R;
1821 msel_add <= MULADD_A;
1822 msel_inv <= '1';
1823 f_to_multiply.valid <= r.first;
1824 if multiply_to_f.valid = '1' then
1825 v.state := DIV_6;
1826 end if;
1827
1828 when DIV_6 =>
1829 -- test if remainder is 0 or >= B
1830 if pcmpb_lt = '1' then
1831 -- quotient is correct, set X if remainder non-zero
1832 v.x := r.p(58) or px_nz;
1833 else
1834 -- quotient needs to be incremented by 1
1835 carry_in <= '1';
1836 v.x := not pcmpb_eq;
1837 end if;
1838 v.state := FINISH;
1839
1840 when FRE_1 =>
1841 opsel_r <= RES_MISC;
1842 misc_sel <= "0111";
1843 v.shift := to_signed(1, EXP_BITS);
1844 v.state := NORMALIZE;
1845
1846 when FTDIV_1 =>
1847 v.cr_result(1) := exp_tiny or exp_huge;
1848 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1849 v.instr_done := '1';
1850 v.state := IDLE;
1851 else
1852 v.shift := r.a.exponent;
1853 v.doing_ftdiv := "10";
1854 end if;
1855
1856 when RSQRT_1 =>
1857 opsel_r <= RES_MISC;
1858 misc_sel <= "0111";
1859 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1860 v.result_exp := - sqrt_exp;
1861 v.shift := to_signed(1, EXP_BITS);
1862 v.state := NORMALIZE;
1863
1864 when SQRT_1 =>
1865 -- put invsqr[B] in R and compute P = invsqr[B] * B
1866 -- also transfer B (in R) to A
1867 set_a := '1';
1868 opsel_r <= RES_MISC;
1869 misc_sel <= "0111";
1870 msel_1 <= MUL1_B;
1871 msel_2 <= MUL2_LUT;
1872 f_to_multiply.valid <= '1';
1873 v.shift := to_signed(-1, EXP_BITS);
1874 v.count := "00";
1875 v.state := SQRT_2;
1876
1877 when SQRT_2 =>
1878 -- shift R right one place
1879 -- not expecting multiplier result yet
1880 opsel_r <= RES_SHIFT;
1881 v.first := '1';
1882 v.state := SQRT_3;
1883
1884 when SQRT_3 =>
1885 -- put R into Y, wait for product from multiplier
1886 msel_2 <= MUL2_R;
1887 set_y := r.first;
1888 pshift := '1';
1889 if multiply_to_f.valid = '1' then
1890 -- put result into R
1891 opsel_r <= RES_MULT;
1892 v.first := '1';
1893 v.state := SQRT_4;
1894 end if;
1895
1896 when SQRT_4 =>
1897 -- compute 1.5 - Y * P
1898 msel_1 <= MUL1_Y;
1899 msel_2 <= MUL2_P;
1900 msel_add <= MULADD_CONST;
1901 msel_inv <= '1';
1902 f_to_multiply.valid <= r.first;
1903 pshift := '1';
1904 if multiply_to_f.valid = '1' then
1905 v.state := SQRT_5;
1906 end if;
1907
1908 when SQRT_5 =>
1909 -- compute Y = Y * P
1910 msel_1 <= MUL1_Y;
1911 msel_2 <= MUL2_P;
1912 f_to_multiply.valid <= '1';
1913 v.first := '1';
1914 v.state := SQRT_6;
1915
1916 when SQRT_6 =>
1917 -- pipeline in R = R * P
1918 msel_1 <= MUL1_R;
1919 msel_2 <= MUL2_P;
1920 f_to_multiply.valid <= r.first;
1921 pshift := '1';
1922 if multiply_to_f.valid = '1' then
1923 v.first := '1';
1924 v.state := SQRT_7;
1925 end if;
1926
1927 when SQRT_7 =>
1928 -- first multiply is done, put result in Y
1929 msel_2 <= MUL2_P;
1930 set_y := r.first;
1931 -- wait for second multiply (should be here already)
1932 pshift := '1';
1933 if multiply_to_f.valid = '1' then
1934 -- put result into R
1935 opsel_r <= RES_MULT;
1936 v.first := '1';
1937 v.count := r.count + 1;
1938 if r.count < 2 then
1939 v.state := SQRT_4;
1940 else
1941 v.first := '1';
1942 v.state := SQRT_8;
1943 end if;
1944 end if;
1945
1946 when SQRT_8 =>
1947 -- compute P = A - R * R, which can be +ve or -ve
1948 -- we arranged for B to be put into A earlier
1949 msel_1 <= MUL1_R;
1950 msel_2 <= MUL2_R;
1951 msel_add <= MULADD_A;
1952 msel_inv <= '1';
1953 pshift := '1';
1954 f_to_multiply.valid <= r.first;
1955 if multiply_to_f.valid = '1' then
1956 v.first := '1';
1957 v.state := SQRT_9;
1958 end if;
1959
1960 when SQRT_9 =>
1961 -- compute P = P * Y
1962 -- since Y is an estimate of 1/sqrt(B), this makes P an
1963 -- estimate of the adjustment needed to R. Since the error
1964 -- could be negative and we have an unsigned multiplier, the
1965 -- upper bits can be wrong, but it turns out the lowest 8 bits
1966 -- are correct and are all we need (given 3 iterations through
1967 -- SQRT_4 to SQRT_7).
1968 msel_1 <= MUL1_Y;
1969 msel_2 <= MUL2_P;
1970 pshift := '1';
1971 f_to_multiply.valid <= r.first;
1972 if multiply_to_f.valid = '1' then
1973 v.state := SQRT_10;
1974 end if;
1975
1976 when SQRT_10 =>
1977 -- Add the bottom 8 bits of P, sign-extended,
1978 -- divided by 4, onto R.
1979 -- The division by 4 is because R is 10.54 format
1980 -- whereas P is 8.56 format.
1981 opsel_b <= BIN_PS6;
1982 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1983 v.result_exp := sqrt_exp;
1984 v.shift := to_signed(1, EXP_BITS);
1985 v.first := '1';
1986 v.state := SQRT_11;
1987
1988 when SQRT_11 =>
1989 -- compute P = A - R * R (remainder)
1990 -- also put 2 * R + 1 into B for comparison with P
1991 msel_1 <= MUL1_R;
1992 msel_2 <= MUL2_R;
1993 msel_add <= MULADD_A;
1994 msel_inv <= '1';
1995 f_to_multiply.valid <= r.first;
1996 shiftin := '1';
1997 set_b := r.first;
1998 if multiply_to_f.valid = '1' then
1999 v.state := SQRT_12;
2000 end if;
2001
2002 when SQRT_12 =>
2003 -- test if remainder is 0 or >= B = 2*R + 1
2004 if pcmpb_lt = '1' then
2005 -- square root is correct, set X if remainder non-zero
2006 v.x := r.p(58) or px_nz;
2007 else
2008 -- square root needs to be incremented by 1
2009 carry_in <= '1';
2010 v.x := not pcmpb_eq;
2011 end if;
2012 v.state := FINISH;
2013
2014 when INT_SHIFT =>
2015 opsel_r <= RES_SHIFT;
2016 set_x := '1';
2017 v.state := INT_ROUND;
2018 v.shift := to_signed(-2, EXP_BITS);
2019
2020 when INT_ROUND =>
2021 opsel_r <= RES_SHIFT;
2022 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2023 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2024 -- Check for negative values that don't round to 0 for fcti*u*
2025 if r.insn(8) = '1' and r.result_sign = '1' and
2026 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2027 v.state := INT_OFLOW;
2028 else
2029 v.state := INT_FINAL;
2030 end if;
2031
2032 when INT_ISHIFT =>
2033 opsel_r <= RES_SHIFT;
2034 v.state := INT_FINAL;
2035
2036 when INT_FINAL =>
2037 -- Negate if necessary, and increment for rounding if needed
2038 opsel_ainv <= r.result_sign;
2039 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2040 -- Check for possible overflows
2041 case r.insn(9 downto 8) is
2042 when "00" => -- fctiw[z]
2043 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2044 when "01" => -- fctiwu[z]
2045 need_check := r.r(31);
2046 when "10" => -- fctid[z]
2047 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2048 when others => -- fctidu[z]
2049 need_check := r.r(63);
2050 end case;
2051 if need_check = '1' then
2052 v.state := INT_CHECK;
2053 else
2054 if r.fpscr(FPSCR_FI) = '1' then
2055 v.fpscr(FPSCR_XX) := '1';
2056 end if;
2057 arith_done := '1';
2058 end if;
2059
2060 when INT_CHECK =>
2061 if r.insn(9) = '0' then
2062 msb := r.r(31);
2063 else
2064 msb := r.r(63);
2065 end if;
2066 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2067 if (r.insn(8) = '0' and msb /= r.result_sign) or
2068 (r.insn(8) = '1' and msb /= '1') then
2069 opsel_r <= RES_MISC;
2070 v.fpscr(FPSCR_VXCVI) := '1';
2071 invalid := '1';
2072 else
2073 if r.fpscr(FPSCR_FI) = '1' then
2074 v.fpscr(FPSCR_XX) := '1';
2075 end if;
2076 end if;
2077 arith_done := '1';
2078
2079 when INT_OFLOW =>
2080 opsel_r <= RES_MISC;
2081 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2082 if r.b.class = NAN then
2083 misc_sel(0) <= '1';
2084 end if;
2085 v.fpscr(FPSCR_VXCVI) := '1';
2086 invalid := '1';
2087 arith_done := '1';
2088
2089 when FRI_1 =>
2090 opsel_r <= RES_SHIFT;
2091 set_x := '1';
2092 v.shift := to_signed(-2, EXP_BITS);
2093 v.state := ROUNDING;
2094
2095 when FINISH =>
2096 if r.is_multiply = '1' and px_nz = '1' then
2097 v.x := '1';
2098 end if;
2099 if r.r(63 downto 54) /= "0000000001" then
2100 renormalize := '1';
2101 v.state := NORMALIZE;
2102 else
2103 set_x := '1';
2104 if exp_tiny = '1' then
2105 v.shift := new_exp - min_exp;
2106 v.state := ROUND_UFLOW;
2107 elsif exp_huge = '1' then
2108 v.state := ROUND_OFLOW;
2109 else
2110 v.shift := to_signed(-2, EXP_BITS);
2111 v.state := ROUNDING;
2112 end if;
2113 end if;
2114
2115 when NORMALIZE =>
2116 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2117 opsel_r <= RES_SHIFT;
2118 set_x := '1';
2119 if exp_tiny = '1' then
2120 v.shift := new_exp - min_exp;
2121 v.state := ROUND_UFLOW;
2122 elsif exp_huge = '1' then
2123 v.state := ROUND_OFLOW;
2124 else
2125 v.shift := to_signed(-2, EXP_BITS);
2126 v.state := ROUNDING;
2127 end if;
2128
2129 when ROUND_UFLOW =>
2130 v.tiny := '1';
2131 if r.fpscr(FPSCR_UE) = '0' then
2132 -- disabled underflow exception case
2133 -- have to denormalize before rounding
2134 opsel_r <= RES_SHIFT;
2135 set_x := '1';
2136 v.shift := to_signed(-2, EXP_BITS);
2137 v.state := ROUNDING;
2138 else
2139 -- enabled underflow exception case
2140 -- if denormalized, have to normalize before rounding
2141 v.fpscr(FPSCR_UX) := '1';
2142 v.result_exp := r.result_exp + bias_exp;
2143 if r.r(54) = '0' then
2144 renormalize := '1';
2145 v.state := NORMALIZE;
2146 else
2147 v.shift := to_signed(-2, EXP_BITS);
2148 v.state := ROUNDING;
2149 end if;
2150 end if;
2151
2152 when ROUND_OFLOW =>
2153 v.fpscr(FPSCR_OX) := '1';
2154 if r.fpscr(FPSCR_OE) = '0' then
2155 -- disabled overflow exception
2156 -- result depends on rounding mode
2157 v.fpscr(FPSCR_XX) := '1';
2158 v.fpscr(FPSCR_FI) := '1';
2159 if r.round_mode(1 downto 0) = "00" or
2160 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161 v.result_class := INFINITY;
2162 v.fpscr(FPSCR_FR) := '1';
2163 else
2164 v.fpscr(FPSCR_FR) := '0';
2165 end if;
2166 -- construct largest representable number
2167 v.result_exp := max_exp;
2168 opsel_r <= RES_MISC;
2169 misc_sel <= "001" & r.single_prec;
2170 arith_done := '1';
2171 else
2172 -- enabled overflow exception
2173 v.result_exp := r.result_exp - bias_exp;
2174 v.shift := to_signed(-2, EXP_BITS);
2175 v.state := ROUNDING;
2176 end if;
2177
2178 when ROUNDING =>
2179 opsel_amask <= '1';
2180 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2181 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2182 if round(1) = '1' then
2183 -- set mask to increment the LSB for the precision
2184 opsel_b <= BIN_MASK;
2185 carry_in <= '1';
2186 v.shift := to_signed(-1, EXP_BITS);
2187 v.state := ROUNDING_2;
2188 else
2189 if r.r(54) = '0' then
2190 -- result after masking could be zero, or could be a
2191 -- denormalized result that needs to be renormalized
2192 renormalize := '1';
2193 v.state := ROUNDING_3;
2194 else
2195 arith_done := '1';
2196 end if;
2197 end if;
2198 if round(0) = '1' then
2199 v.fpscr(FPSCR_XX) := '1';
2200 if r.tiny = '1' then
2201 v.fpscr(FPSCR_UX) := '1';
2202 end if;
2203 end if;
2204
2205 when ROUNDING_2 =>
2206 -- Check for overflow during rounding
2207 v.x := '0';
2208 if r.r(55) = '1' then
2209 opsel_r <= RES_SHIFT;
2210 if exp_huge = '1' then
2211 v.state := ROUND_OFLOW;
2212 else
2213 arith_done := '1';
2214 end if;
2215 elsif r.r(54) = '0' then
2216 -- Do CLZ so we can renormalize the result
2217 renormalize := '1';
2218 v.state := ROUNDING_3;
2219 else
2220 arith_done := '1';
2221 end if;
2222
2223 when ROUNDING_3 =>
2224 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225 if mant_nz = '0' then
2226 v.result_class := ZERO;
2227 if r.is_subtract = '1' then
2228 -- set result sign depending on rounding mode
2229 v.result_sign := r.round_mode(1) and r.round_mode(0);
2230 end if;
2231 arith_done := '1';
2232 else
2233 -- Renormalize result after rounding
2234 opsel_r <= RES_SHIFT;
2235 v.denorm := exp_tiny;
2236 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237 if new_exp < to_signed(-1022, EXP_BITS) then
2238 v.state := DENORM;
2239 else
2240 arith_done := '1';
2241 end if;
2242 end if;
2243
2244 when DENORM =>
2245 opsel_r <= RES_SHIFT;
2246 arith_done := '1';
2247
2248 end case;
2249
2250 if zero_divide = '1' then
2251 v.fpscr(FPSCR_ZX) := '1';
2252 end if;
2253 if qnan_result = '1' then
2254 invalid := '1';
2255 v.result_class := NAN;
2256 v.result_sign := '0';
2257 misc_sel <= "0001";
2258 opsel_r <= RES_MISC;
2259 end if;
2260 if arith_done = '1' then
2261 -- Enabled invalid exception doesn't write result or FPRF
2262 -- Neither does enabled zero-divide exception
2263 if (invalid and r.fpscr(FPSCR_VE)) = '0' and
2264 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2265 v.writing_back := '1';
2266 v.update_fprf := '1';
2267 end if;
2268 v.instr_done := '1';
2269 v.state := IDLE;
2270 update_fx := '1';
2271 end if;
2272
2273 -- Multiplier and divide/square root data path
2274 case msel_1 is
2275 when MUL1_A =>
2276 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2277 when MUL1_B =>
2278 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2279 when MUL1_Y =>
2280 f_to_multiply.data1 <= r.y;
2281 when others =>
2282 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2283 end case;
2284 case msel_2 is
2285 when MUL2_C =>
2286 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2287 when MUL2_LUT =>
2288 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2289 when MUL2_P =>
2290 f_to_multiply.data2 <= r.p;
2291 when others =>
2292 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2293 end case;
2294 maddend := (others => '0');
2295 case msel_add is
2296 when MULADD_CONST =>
2297 -- addend is 2.0 or 1.5 in 16.112 format
2298 if r.is_sqrt = '0' then
2299 maddend(113) := '1'; -- 2.0
2300 else
2301 maddend(112 downto 111) := "11"; -- 1.5
2302 end if;
2303 when MULADD_A =>
2304 -- addend is A in 16.112 format
2305 maddend(121 downto 58) := r.a.mantissa;
2306 when MULADD_RS =>
2307 -- addend is concatenation of R and S in 16.112 format
2308 maddend := "000000" & r.r & r.s & "00";
2309 when others =>
2310 end case;
2311 if msel_inv = '1' then
2312 f_to_multiply.addend <= not maddend;
2313 else
2314 f_to_multiply.addend <= maddend;
2315 end if;
2316 f_to_multiply.not_result <= msel_inv;
2317 if set_y = '1' then
2318 v.y := f_to_multiply.data2;
2319 end if;
2320 if multiply_to_f.valid = '1' then
2321 if pshift = '0' then
2322 v.p := multiply_to_f.result(63 downto 0);
2323 else
2324 v.p := multiply_to_f.result(119 downto 56);
2325 end if;
2326 end if;
2327
2328 -- Data path.
2329 -- This has A and B input multiplexers, an adder, a shifter,
2330 -- count-leading-zeroes logic, and a result mux.
2331 if longmask = '1' then
2332 mshift := r.shift + to_signed(-29, EXP_BITS);
2333 else
2334 mshift := r.shift;
2335 end if;
2336 if mshift < to_signed(-64, EXP_BITS) then
2337 mask := (others => '1');
2338 elsif mshift >= to_signed(0, EXP_BITS) then
2339 mask := (others => '0');
2340 else
2341 mask := right_mask(unsigned(mshift(5 downto 0)));
2342 end if;
2343 case opsel_a is
2344 when AIN_R =>
2345 in_a0 := r.r;
2346 when AIN_A =>
2347 in_a0 := r.a.mantissa;
2348 when AIN_B =>
2349 in_a0 := r.b.mantissa;
2350 when others =>
2351 in_a0 := r.c.mantissa;
2352 end case;
2353 if (or (mask and in_a0)) = '1' and set_x = '1' then
2354 v.x := '1';
2355 end if;
2356 if opsel_ainv = '1' then
2357 in_a0 := not in_a0;
2358 end if;
2359 if opsel_amask = '1' then
2360 in_a0 := in_a0 and not mask;
2361 end if;
2362 in_a <= in_a0;
2363 case opsel_b is
2364 when BIN_ZERO =>
2365 in_b0 := (others => '0');
2366 when BIN_R =>
2367 in_b0 := r.r;
2368 when BIN_MASK =>
2369 in_b0 := mask;
2370 when others =>
2371 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2372 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2373 end case;
2374 if opsel_binv = '1' then
2375 in_b0 := not in_b0;
2376 end if;
2377 in_b <= in_b0;
2378 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2379 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2380 std_ulogic_vector(r.shift(6 downto 0)));
2381 else
2382 shift_res := (others => '0');
2383 end if;
2384 case opsel_r is
2385 when RES_SUM =>
2386 result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2387 when RES_SHIFT =>
2388 result <= shift_res;
2389 when RES_MULT =>
2390 result <= multiply_to_f.result(121 downto 58);
2391 when others =>
2392 case misc_sel is
2393 when "0000" =>
2394 misc := x"00000000" & (r.fpscr and fpscr_mask);
2395 when "0001" =>
2396 -- generated QNaN mantissa
2397 misc := x"0020000000000000";
2398 when "0010" =>
2399 -- mantissa of max representable DP number
2400 misc := x"007ffffffffffffc";
2401 when "0011" =>
2402 -- mantissa of max representable SP number
2403 misc := x"007fffff80000000";
2404 when "0100" =>
2405 -- fmrgow result
2406 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2407 when "0110" =>
2408 -- fmrgew result
2409 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2410 when "0111" =>
2411 misc := 10x"000" & inverse_est & 35x"000000000";
2412 when "1000" =>
2413 -- max positive result for fctiw[z]
2414 misc := x"000000007fffffff";
2415 when "1001" =>
2416 -- max negative result for fctiw[z]
2417 misc := x"ffffffff80000000";
2418 when "1010" =>
2419 -- max positive result for fctiwu[z]
2420 misc := x"00000000ffffffff";
2421 when "1011" =>
2422 -- max negative result for fctiwu[z]
2423 misc := x"0000000000000000";
2424 when "1100" =>
2425 -- max positive result for fctid[z]
2426 misc := x"7fffffffffffffff";
2427 when "1101" =>
2428 -- max negative result for fctid[z]
2429 misc := x"8000000000000000";
2430 when "1110" =>
2431 -- max positive result for fctidu[z]
2432 misc := x"ffffffffffffffff";
2433 when "1111" =>
2434 -- max negative result for fctidu[z]
2435 misc := x"0000000000000000";
2436 when others =>
2437 misc := x"0000000000000000";
2438 end case;
2439 result <= misc;
2440 end case;
2441 v.r := result;
2442 if set_s = '1' then
2443 case opsel_s is
2444 when S_NEG =>
2445 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2446 when S_MULT =>
2447 v.s := multiply_to_f.result(57 downto 2);
2448 when S_SHIFT =>
2449 v.s := shift_res(63 downto 8);
2450 if shift_res(7 downto 0) /= x"00" then
2451 v.x := '1';
2452 end if;
2453 when others =>
2454 v.s := (others => '0');
2455 end case;
2456 end if;
2457
2458 if set_a = '1' then
2459 v.a.exponent := new_exp;
2460 v.a.mantissa := shift_res;
2461 end if;
2462 if set_b = '1' then
2463 v.b.exponent := new_exp;
2464 v.b.mantissa := shift_res;
2465 end if;
2466 if set_c = '1' then
2467 v.c.exponent := new_exp;
2468 v.c.mantissa := shift_res;
2469 end if;
2470
2471 if opsel_r = RES_SHIFT then
2472 v.result_exp := new_exp;
2473 end if;
2474
2475 if renormalize = '1' then
2476 clz := count_left_zeroes(r.r);
2477 if renorm_sqrt = '1' then
2478 -- make denormalized value end up with even exponent
2479 clz(0) := '1';
2480 end if;
2481 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2482 end if;
2483
2484 if r.int_result = '1' then
2485 fp_result <= r.r;
2486 else
2487 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2488 r.single_prec, r.quieten_nan);
2489 end if;
2490 if r.update_fprf = '1' then
2491 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2492 r.r(54) and not r.denorm);
2493 end if;
2494
2495 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2496 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2497 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2498 v.fpscr(FPSCR_VE downto FPSCR_XE));
2499 if update_fx = '1' and
2500 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2501 v.fpscr(FPSCR_FX) := '1';
2502 end if;
2503 if r.rc = '1' then
2504 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2505 end if;
2506
2507 if illegal = '1' then
2508 v.instr_done := '0';
2509 v.do_intr := '0';
2510 v.writing_back := '0';
2511 v.busy := '0';
2512 v.state := IDLE;
2513 else
2514 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2515 if v.state /= IDLE or v.do_intr = '1' then
2516 v.busy := '1';
2517 end if;
2518 end if;
2519
2520 rin <= v;
2521 e_out.illegal <= illegal;
2522 end process;
2523
2524 end architecture behaviour;