Add Tercel PHY reset synchronization
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 illegal : std_ulogic;
77 op : insn_type_t;
78 insn : std_ulogic_vector(31 downto 0);
79 nia : std_ulogic_vector(63 downto 0);
80 instr_tag : instr_tag_t;
81 dest_fpr : gspr_index_t;
82 fe_mode : std_ulogic;
83 rc : std_ulogic;
84 is_cmp : std_ulogic;
85 single_prec : std_ulogic;
86 fpscr : std_ulogic_vector(31 downto 0);
87 a : fpu_reg_type;
88 b : fpu_reg_type;
89 c : fpu_reg_type;
90 r : std_ulogic_vector(63 downto 0); -- 10.54 format
91 s : std_ulogic_vector(55 downto 0); -- extended fraction
92 x : std_ulogic;
93 p : std_ulogic_vector(63 downto 0); -- 8.56 format
94 y : std_ulogic_vector(63 downto 0); -- 8.56 format
95 result_sign : std_ulogic;
96 result_class : fp_number_class;
97 result_exp : signed(EXP_BITS-1 downto 0);
98 shift : signed(EXP_BITS-1 downto 0);
99 writing_back : std_ulogic;
100 int_result : std_ulogic;
101 cr_result : std_ulogic_vector(3 downto 0);
102 cr_mask : std_ulogic_vector(7 downto 0);
103 old_exc : std_ulogic_vector(4 downto 0);
104 update_fprf : std_ulogic;
105 quieten_nan : std_ulogic;
106 tiny : std_ulogic;
107 denorm : std_ulogic;
108 round_mode : std_ulogic_vector(2 downto 0);
109 is_subtract : std_ulogic;
110 exp_cmp : std_ulogic;
111 madd_cmp : std_ulogic;
112 add_bsmall : std_ulogic;
113 is_multiply : std_ulogic;
114 is_sqrt : std_ulogic;
115 first : std_ulogic;
116 count : unsigned(1 downto 0);
117 doing_ftdiv : std_ulogic_vector(1 downto 0);
118 opsel_a : std_ulogic_vector(1 downto 0);
119 use_a : std_ulogic;
120 use_b : std_ulogic;
121 use_c : std_ulogic;
122 invalid : std_ulogic;
123 negate : std_ulogic;
124 longmask : std_ulogic;
125 end record;
126
127 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
128
129 signal r, rin : reg_type;
130
131 signal fp_result : std_ulogic_vector(63 downto 0);
132 signal opsel_b : std_ulogic_vector(1 downto 0);
133 signal opsel_r : std_ulogic_vector(1 downto 0);
134 signal opsel_s : std_ulogic_vector(1 downto 0);
135 signal opsel_ainv : std_ulogic;
136 signal opsel_mask : std_ulogic;
137 signal opsel_binv : std_ulogic;
138 signal in_a : std_ulogic_vector(63 downto 0);
139 signal in_b : std_ulogic_vector(63 downto 0);
140 signal result : std_ulogic_vector(63 downto 0);
141 signal carry_in : std_ulogic;
142 signal lost_bits : std_ulogic;
143 signal r_hi_nz : std_ulogic;
144 signal r_lo_nz : std_ulogic;
145 signal s_nz : std_ulogic;
146 signal misc_sel : std_ulogic_vector(3 downto 0);
147 signal f_to_multiply : MultiplyInputType;
148 signal multiply_to_f : MultiplyOutputType;
149 signal msel_1 : std_ulogic_vector(1 downto 0);
150 signal msel_2 : std_ulogic_vector(1 downto 0);
151 signal msel_add : std_ulogic_vector(1 downto 0);
152 signal msel_inv : std_ulogic;
153 signal inverse_est : std_ulogic_vector(18 downto 0);
154
155 -- opsel values
156 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
157 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
158 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
159 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
160
161 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
162 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
163 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
164 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
165
166 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
167 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
168 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
169 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
170
171 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
172 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
173 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
174 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
175
176 -- msel values
177 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
178 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
179 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
180 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
181
182 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
183 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
184 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
185 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
186
187 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
188 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
189 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
190 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
191
192 -- Inverse lookup table, indexed by the top 8 fraction bits
193 -- The first 256 entries are the reciprocal (1/x) lookup table,
194 -- and the remaining 768 entries are the reciprocal square root table.
195 -- Output range is [0.5, 1) in 0.19 format, though the top
196 -- bit isn't stored since it is always 1.
197 -- Each output value is the inverse of the center of the input
198 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
199 -- entry 1 is 1 / (1 + 3/512), etc.
200 signal inverse_table : lookup_table := (
201 -- 1/x lookup table
202 -- Unit bit is assumed to be 1, so input range is [1, 2)
203 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
204 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
205 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
206 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
207 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
208 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
209 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
210 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
211 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
212 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
213 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
214 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
215 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
216 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
217 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
218 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
219 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
220 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
221 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
222 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
223 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
224 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
225 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
226 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
227 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
228 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
229 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
230 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
231 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
232 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
233 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
234 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
235 -- 1/sqrt(x) lookup table
236 -- Input is in the range [1, 4), i.e. two bits to the left of the
237 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
238 -- 1.0 ... 1.9999
239 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
240 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
241 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
242 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
243 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
244 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
245 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
246 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
247 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
248 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
249 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
250 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
251 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
252 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
253 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
254 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
255 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
256 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
257 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
258 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
259 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
260 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
261 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
262 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
263 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
264 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
265 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
266 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
267 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
268 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
269 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
270 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
271 -- 2.0 ... 2.9999
272 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
273 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
274 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
275 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
276 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
277 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
278 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
279 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
280 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
281 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
282 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
283 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
284 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
285 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
286 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
287 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
288 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
289 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
290 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
291 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
292 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
293 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
294 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
295 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
296 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
297 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
298 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
299 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
300 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
301 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
302 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
303 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
304 -- 3.0 ... 3.9999
305 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
306 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
307 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
308 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
309 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
310 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
311 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
312 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
313 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
314 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
315 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
316 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
317 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
318 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
319 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
320 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
321 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
322 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
323 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
324 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
325 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
326 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
327 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
328 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
329 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
330 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
331 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
332 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
333 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
334 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
335 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
336 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
337 );
338
339 -- Left and right shifter with 120 bit input and 64 bit output.
340 -- Shifts inp left by shift bits and returns the upper 64 bits of
341 -- the result. The shift parameter is interpreted as a signed
342 -- number in the range -64..63, with negative values indicating
343 -- right shifts.
344 function shifter_64(inp: std_ulogic_vector(119 downto 0);
345 shift: std_ulogic_vector(6 downto 0))
346 return std_ulogic_vector is
347 variable s1 : std_ulogic_vector(94 downto 0);
348 variable s2 : std_ulogic_vector(70 downto 0);
349 variable result : std_ulogic_vector(63 downto 0);
350 begin
351 case shift(6 downto 5) is
352 when "00" =>
353 s1 := inp(119 downto 25);
354 when "01" =>
355 s1 := inp(87 downto 0) & "0000000";
356 when "10" =>
357 s1 := x"0000000000000000" & inp(119 downto 89);
358 when others =>
359 s1 := x"00000000" & inp(119 downto 57);
360 end case;
361 case shift(4 downto 3) is
362 when "00" =>
363 s2 := s1(94 downto 24);
364 when "01" =>
365 s2 := s1(86 downto 16);
366 when "10" =>
367 s2 := s1(78 downto 8);
368 when others =>
369 s2 := s1(70 downto 0);
370 end case;
371 case shift(2 downto 0) is
372 when "000" =>
373 result := s2(70 downto 7);
374 when "001" =>
375 result := s2(69 downto 6);
376 when "010" =>
377 result := s2(68 downto 5);
378 when "011" =>
379 result := s2(67 downto 4);
380 when "100" =>
381 result := s2(66 downto 3);
382 when "101" =>
383 result := s2(65 downto 2);
384 when "110" =>
385 result := s2(64 downto 1);
386 when others =>
387 result := s2(63 downto 0);
388 end case;
389 return result;
390 end;
391
392 -- Generate a mask with 0-bits on the left and 1-bits on the right which
393 -- selects the bits will be lost in doing a right shift. The shift
394 -- parameter is the bottom 6 bits of a negative shift count,
395 -- indicating a right shift.
396 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
397 variable result: std_ulogic_vector(63 downto 0);
398 begin
399 result := (others => '0');
400 for i in 0 to 63 loop
401 if i >= shift then
402 result(63 - i) := '1';
403 end if;
404 end loop;
405 return result;
406 end;
407
408 -- Split a DP floating-point number into components and work out its class.
409 -- If is_int = 1, the input is considered an integer
410 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
411 variable r : fpu_reg_type;
412 variable exp_nz : std_ulogic;
413 variable exp_ao : std_ulogic;
414 variable frac_nz : std_ulogic;
415 variable cls : std_ulogic_vector(2 downto 0);
416 begin
417 r.negative := fpr(63);
418 exp_nz := or (fpr(62 downto 52));
419 exp_ao := and (fpr(62 downto 52));
420 frac_nz := or (fpr(51 downto 0));
421 if is_int = '0' then
422 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
423 if exp_nz = '0' then
424 r.exponent := to_signed(-1022, EXP_BITS);
425 end if;
426 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
427 cls := exp_ao & exp_nz & frac_nz;
428 case cls is
429 when "000" => r.class := ZERO;
430 when "001" => r.class := FINITE; -- denormalized
431 when "010" => r.class := FINITE;
432 when "011" => r.class := FINITE;
433 when "110" => r.class := INFINITY;
434 when others => r.class := NAN;
435 end case;
436 else
437 r.mantissa := fpr;
438 r.exponent := (others => '0');
439 if (fpr(63) or exp_nz or frac_nz) = '1' then
440 r.class := FINITE;
441 else
442 r.class := ZERO;
443 end if;
444 end if;
445 return r;
446 end;
447
448 -- Construct a DP floating-point result from components
449 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
450 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
451 return std_ulogic_vector is
452 variable result : std_ulogic_vector(63 downto 0);
453 begin
454 result := (others => '0');
455 result(63) := sign;
456 case class is
457 when ZERO =>
458 when FINITE =>
459 if mantissa(54) = '1' then
460 -- normalized number
461 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
462 end if;
463 result(51 downto 29) := mantissa(53 downto 31);
464 if single_prec = '0' then
465 result(28 downto 0) := mantissa(30 downto 2);
466 end if;
467 when INFINITY =>
468 result(62 downto 52) := "11111111111";
469 when NAN =>
470 result(62 downto 52) := "11111111111";
471 result(51) := quieten_nan or mantissa(53);
472 result(50 downto 29) := mantissa(52 downto 31);
473 if single_prec = '0' then
474 result(28 downto 0) := mantissa(30 downto 2);
475 end if;
476 end case;
477 return result;
478 end;
479
480 -- Determine whether to increment when rounding
481 -- Returns rounding_inc & inexact
482 -- Assumes x includes the bottom 29 bits of the mantissa already
483 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
484 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
485 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
486 sign: std_ulogic)
487 return std_ulogic_vector is
488 variable grx : std_ulogic_vector(2 downto 0);
489 variable ret : std_ulogic_vector(1 downto 0);
490 variable lsb : std_ulogic;
491 begin
492 if single_prec = '0' then
493 grx := mantissa(1 downto 0) & x;
494 lsb := mantissa(2);
495 else
496 grx := mantissa(30 downto 29) & x;
497 lsb := mantissa(31);
498 end if;
499 ret(1) := '0';
500 ret(0) := or (grx);
501 case rn(1 downto 0) is
502 when "00" => -- round to nearest
503 if grx = "100" and rn(2) = '0' then
504 ret(1) := lsb; -- tie, round to even
505 else
506 ret(1) := grx(2);
507 end if;
508 when "01" => -- round towards zero
509 when others => -- round towards +/- inf
510 if rn(0) = sign then
511 -- round towards greater magnitude
512 ret(1) := ret(0);
513 end if;
514 end case;
515 return ret;
516 end;
517
518 -- Determine result flags to write into the FPSCR
519 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
520 return std_ulogic_vector is
521 begin
522 case class is
523 when ZERO =>
524 return sign & "0010";
525 when FINITE =>
526 return (not unitbit) & sign & (not sign) & "00";
527 when INFINITY =>
528 return '0' & sign & (not sign) & "01";
529 when NAN =>
530 return "10001";
531 end case;
532 end;
533
534 begin
535 fpu_multiply_0: entity work.multiply
536 port map (
537 clk => clk,
538 m_in => f_to_multiply,
539 m_out => multiply_to_f
540 );
541
542 fpu_0: process(clk)
543 begin
544 if rising_edge(clk) then
545 if rst = '1' then
546 r.state <= IDLE;
547 r.busy <= '0';
548 r.instr_done <= '0';
549 r.do_intr <= '0';
550 r.fpscr <= (others => '0');
551 r.writing_back <= '0';
552 else
553 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
554 r <= rin;
555 end if;
556 end if;
557 end process;
558
559 -- synchronous reads from lookup table
560 lut_access: process(clk)
561 variable addrhi : std_ulogic_vector(1 downto 0);
562 variable addr : std_ulogic_vector(9 downto 0);
563 begin
564 if rising_edge(clk) then
565 if r.is_sqrt = '1' then
566 addrhi := r.b.mantissa(55 downto 54);
567 else
568 addrhi := "00";
569 end if;
570 addr := addrhi & r.b.mantissa(53 downto 46);
571 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
572 end if;
573 end process;
574
575 e_out.busy <= r.busy;
576 e_out.exception <= r.fpscr(FPSCR_FEX);
577
578 w_out.valid <= r.instr_done and not r.do_intr;
579 w_out.instr_tag <= r.instr_tag;
580 w_out.write_enable <= r.writing_back;
581 w_out.write_reg <= r.dest_fpr;
582 w_out.write_data <= fp_result;
583 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
584 w_out.write_cr_mask <= r.cr_mask;
585 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
586 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
587 w_out.interrupt <= r.do_intr;
588 w_out.intr_vec <= 16#700#;
589 w_out.srr0 <= r.nia;
590 w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
591
592 fpu_1: process(all)
593 variable v : reg_type;
594 variable adec : fpu_reg_type;
595 variable bdec : fpu_reg_type;
596 variable cdec : fpu_reg_type;
597 variable fpscr_mask : std_ulogic_vector(31 downto 0);
598 variable illegal : std_ulogic;
599 variable j, k : integer;
600 variable flm : std_ulogic_vector(7 downto 0);
601 variable int_input : std_ulogic;
602 variable mask : std_ulogic_vector(63 downto 0);
603 variable in_a0 : std_ulogic_vector(63 downto 0);
604 variable in_b0 : std_ulogic_vector(63 downto 0);
605 variable misc : std_ulogic_vector(63 downto 0);
606 variable shift_res : std_ulogic_vector(63 downto 0);
607 variable round : std_ulogic_vector(1 downto 0);
608 variable update_fx : std_ulogic;
609 variable arith_done : std_ulogic;
610 variable invalid : std_ulogic;
611 variable zero_divide : std_ulogic;
612 variable mant_nz : std_ulogic;
613 variable min_exp : signed(EXP_BITS-1 downto 0);
614 variable max_exp : signed(EXP_BITS-1 downto 0);
615 variable bias_exp : signed(EXP_BITS-1 downto 0);
616 variable new_exp : signed(EXP_BITS-1 downto 0);
617 variable exp_tiny : std_ulogic;
618 variable exp_huge : std_ulogic;
619 variable renormalize : std_ulogic;
620 variable clz : std_ulogic_vector(5 downto 0);
621 variable set_x : std_ulogic;
622 variable mshift : signed(EXP_BITS-1 downto 0);
623 variable need_check : std_ulogic;
624 variable msb : std_ulogic;
625 variable is_add : std_ulogic;
626 variable set_a : std_ulogic;
627 variable set_b : std_ulogic;
628 variable set_c : std_ulogic;
629 variable set_y : std_ulogic;
630 variable set_s : std_ulogic;
631 variable qnan_result : std_ulogic;
632 variable px_nz : std_ulogic;
633 variable pcmpb_eq : std_ulogic;
634 variable pcmpb_lt : std_ulogic;
635 variable pshift : std_ulogic;
636 variable renorm_sqrt : std_ulogic;
637 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
638 variable shiftin : std_ulogic;
639 variable mulexp : signed(EXP_BITS-1 downto 0);
640 variable maddend : std_ulogic_vector(127 downto 0);
641 variable sum : std_ulogic_vector(63 downto 0);
642 variable round_inc : std_ulogic_vector(63 downto 0);
643 begin
644 v := r;
645 illegal := '0';
646 v.busy := '0';
647 int_input := '0';
648
649 -- capture incoming instruction
650 if e_in.valid = '1' then
651 v.insn := e_in.insn;
652 v.nia := e_in.nia;
653 v.op := e_in.op;
654 v.instr_tag := e_in.itag;
655 v.fe_mode := or (e_in.fe_mode);
656 v.dest_fpr := e_in.frt;
657 v.single_prec := e_in.single;
658 v.longmask := e_in.single;
659 v.int_result := '0';
660 v.rc := e_in.rc;
661 v.is_cmp := e_in.out_cr;
662 if e_in.out_cr = '0' then
663 v.cr_mask := num_to_fxm(1);
664 else
665 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
666 end if;
667 int_input := '0';
668 if e_in.op = OP_FPOP_I then
669 int_input := '1';
670 end if;
671 v.quieten_nan := '1';
672 v.tiny := '0';
673 v.denorm := '0';
674 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
675 v.is_subtract := '0';
676 v.is_multiply := '0';
677 v.is_sqrt := '0';
678 v.add_bsmall := '0';
679 v.doing_ftdiv := "00";
680
681 adec := decode_dp(e_in.fra, int_input);
682 bdec := decode_dp(e_in.frb, int_input);
683 cdec := decode_dp(e_in.frc, int_input);
684 v.a := adec;
685 v.b := bdec;
686 v.c := cdec;
687
688 v.exp_cmp := '0';
689 if adec.exponent > bdec.exponent then
690 v.exp_cmp := '1';
691 end if;
692 v.madd_cmp := '0';
693 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
694 v.madd_cmp := '1';
695 end if;
696 end if;
697
698 r_hi_nz <= or (r.r(55 downto 31));
699 r_lo_nz <= or (r.r(30 downto 2));
700 s_nz <= or (r.s);
701
702 if r.single_prec = '0' then
703 if r.doing_ftdiv(1) = '0' then
704 max_exp := to_signed(1023, EXP_BITS);
705 else
706 max_exp := to_signed(1020, EXP_BITS);
707 end if;
708 if r.doing_ftdiv(0) = '0' then
709 min_exp := to_signed(-1022, EXP_BITS);
710 else
711 min_exp := to_signed(-1021, EXP_BITS);
712 end if;
713 bias_exp := to_signed(1536, EXP_BITS);
714 else
715 max_exp := to_signed(127, EXP_BITS);
716 min_exp := to_signed(-126, EXP_BITS);
717 bias_exp := to_signed(192, EXP_BITS);
718 end if;
719 new_exp := r.result_exp - r.shift;
720 exp_tiny := '0';
721 exp_huge := '0';
722 if new_exp < min_exp then
723 exp_tiny := '1';
724 end if;
725 if new_exp > max_exp then
726 exp_huge := '1';
727 end if;
728
729 -- Compare P with zero and with B
730 px_nz := or (r.p(57 downto 4));
731 pcmpb_eq := '0';
732 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
733 pcmpb_eq := '1';
734 end if;
735 pcmpb_lt := '0';
736 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
737 pcmpb_lt := '1';
738 end if;
739
740 v.writing_back := '0';
741 v.instr_done := '0';
742 v.update_fprf := '0';
743 v.shift := to_signed(0, EXP_BITS);
744 v.first := '0';
745 v.opsel_a := AIN_R;
746 opsel_ainv <= '0';
747 opsel_mask <= '0';
748 opsel_b <= BIN_ZERO;
749 opsel_binv <= '0';
750 opsel_r <= RES_SUM;
751 opsel_s <= S_ZERO;
752 carry_in <= '0';
753 misc_sel <= "0000";
754 fpscr_mask := (others => '1');
755 update_fx := '0';
756 arith_done := '0';
757 invalid := '0';
758 zero_divide := '0';
759 renormalize := '0';
760 set_x := '0';
761 qnan_result := '0';
762 set_a := '0';
763 set_b := '0';
764 set_c := '0';
765 set_s := '0';
766 f_to_multiply.is_32bit <= '0';
767 f_to_multiply.valid <= '0';
768 msel_1 <= MUL1_A;
769 msel_2 <= MUL2_C;
770 msel_add <= MULADD_ZERO;
771 msel_inv <= '0';
772 set_y := '0';
773 pshift := '0';
774 renorm_sqrt := '0';
775 shiftin := '0';
776 case r.state is
777 when IDLE =>
778 v.use_a := '0';
779 v.use_b := '0';
780 v.use_c := '0';
781 v.invalid := '0';
782 v.negate := '0';
783 if e_in.valid = '1' then
784 case e_in.insn(5 downto 1) is
785 when "00000" =>
786 if e_in.insn(8) = '1' then
787 if e_in.insn(6) = '0' then
788 v.state := DO_FTDIV;
789 else
790 v.state := DO_FTSQRT;
791 end if;
792 elsif e_in.insn(7) = '1' then
793 v.state := DO_MCRFS;
794 else
795 v.opsel_a := AIN_B;
796 v.state := DO_FCMP;
797 end if;
798 when "00110" =>
799 if e_in.insn(10) = '0' then
800 if e_in.insn(8) = '0' then
801 v.state := DO_MTFSB;
802 else
803 v.state := DO_MTFSFI;
804 end if;
805 else
806 v.state := DO_FMRG;
807 end if;
808 when "00111" =>
809 if e_in.insn(8) = '0' then
810 v.state := DO_MFFS;
811 else
812 v.state := DO_MTFSF;
813 end if;
814 when "01000" =>
815 v.opsel_a := AIN_B;
816 if e_in.insn(9 downto 8) /= "11" then
817 v.state := DO_FMR;
818 else
819 v.state := DO_FRI;
820 end if;
821 when "01100" =>
822 v.opsel_a := AIN_B;
823 v.state := DO_FRSP;
824 when "01110" =>
825 v.opsel_a := AIN_B;
826 if int_input = '1' then
827 -- fcfid[u][s]
828 v.state := DO_FCFID;
829 else
830 v.state := DO_FCTI;
831 end if;
832 when "01111" =>
833 v.round_mode := "001";
834 v.opsel_a := AIN_B;
835 v.state := DO_FCTI;
836 when "10010" =>
837 v.opsel_a := AIN_A;
838 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
839 v.opsel_a := AIN_B;
840 end if;
841 v.state := DO_FDIV;
842 when "10100" | "10101" =>
843 v.opsel_a := AIN_A;
844 v.state := DO_FADD;
845 when "10110" =>
846 v.is_sqrt := '1';
847 v.opsel_a := AIN_B;
848 v.state := DO_FSQRT;
849 when "10111" =>
850 v.state := DO_FSEL;
851 when "11000" =>
852 v.opsel_a := AIN_B;
853 v.state := DO_FRE;
854 when "11001" =>
855 v.is_multiply := '1';
856 v.opsel_a := AIN_A;
857 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
858 v.opsel_a := AIN_C;
859 end if;
860 v.state := DO_FMUL;
861 when "11010" =>
862 v.is_sqrt := '1';
863 v.opsel_a := AIN_B;
864 v.state := DO_FRSQRTE;
865 when "11100" | "11101" | "11110" | "11111" =>
866 if v.a.mantissa(54) = '0' then
867 v.opsel_a := AIN_A;
868 elsif v.c.mantissa(54) = '0' then
869 v.opsel_a := AIN_C;
870 else
871 v.opsel_a := AIN_B;
872 end if;
873 v.state := DO_FMADD;
874 when others =>
875 illegal := '1';
876 end case;
877 end if;
878 v.x := '0';
879 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
880 set_s := '1';
881
882 when DO_MCRFS =>
883 j := to_integer(unsigned(insn_bfa(r.insn)));
884 for i in 0 to 7 loop
885 if i = j then
886 k := (7 - i) * 4;
887 v.cr_result := r.fpscr(k + 3 downto k);
888 fpscr_mask(k + 3 downto k) := "0000";
889 end if;
890 end loop;
891 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
892 v.instr_done := '1';
893 v.state := IDLE;
894
895 when DO_FTDIV =>
896 v.instr_done := '1';
897 v.state := IDLE;
898 v.cr_result := "0000";
899 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
900 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
901 v.cr_result(2) := '1';
902 end if;
903 if r.a.class = NAN or r.a.class = INFINITY or
904 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
905 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
906 v.cr_result(1) := '1';
907 else
908 v.doing_ftdiv := "11";
909 v.first := '1';
910 v.state := FTDIV_1;
911 v.instr_done := '0';
912 end if;
913
914 when DO_FTSQRT =>
915 v.instr_done := '1';
916 v.state := IDLE;
917 v.cr_result := "0000";
918 if r.b.class = ZERO or r.b.class = INFINITY or
919 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
920 v.cr_result(2) := '1';
921 end if;
922 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
923 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
924 v.cr_result(1) := '0';
925 end if;
926
927 when DO_FCMP =>
928 -- fcmp[uo]
929 -- r.opsel_a = AIN_B
930 v.instr_done := '1';
931 v.state := IDLE;
932 update_fx := '1';
933 v.result_exp := r.b.exponent;
934 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
935 (r.b.class = NAN and r.b.mantissa(53) = '0') then
936 -- Signalling NAN
937 v.fpscr(FPSCR_VXSNAN) := '1';
938 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
939 v.fpscr(FPSCR_VXVC) := '1';
940 end if;
941 invalid := '1';
942 v.cr_result := "0001"; -- unordered
943 elsif r.a.class = NAN or r.b.class = NAN then
944 if r.insn(6) = '1' then
945 -- fcmpo
946 v.fpscr(FPSCR_VXVC) := '1';
947 invalid := '1';
948 end if;
949 v.cr_result := "0001"; -- unordered
950 elsif r.a.class = ZERO and r.b.class = ZERO then
951 v.cr_result := "0010"; -- equal
952 elsif r.a.negative /= r.b.negative then
953 v.cr_result := r.a.negative & r.b.negative & "00";
954 elsif r.a.class = ZERO then
955 -- A and B are the same sign from here down
956 v.cr_result := not r.b.negative & r.b.negative & "00";
957 elsif r.a.class = INFINITY then
958 if r.b.class = INFINITY then
959 v.cr_result := "0010";
960 else
961 v.cr_result := r.a.negative & not r.a.negative & "00";
962 end if;
963 elsif r.b.class = ZERO then
964 -- A is finite from here down
965 v.cr_result := r.a.negative & not r.a.negative & "00";
966 elsif r.b.class = INFINITY then
967 v.cr_result := not r.b.negative & r.b.negative & "00";
968 elsif r.exp_cmp = '1' then
969 -- A and B are both finite from here down
970 v.cr_result := r.a.negative & not r.a.negative & "00";
971 elsif r.a.exponent /= r.b.exponent then
972 -- A exponent is smaller than B
973 v.cr_result := not r.a.negative & r.a.negative & "00";
974 else
975 -- Prepare to subtract mantissas, put B in R
976 v.cr_result := "0000";
977 v.instr_done := '0';
978 v.opsel_a := AIN_A;
979 v.state := CMP_1;
980 end if;
981 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
982
983 when DO_MTFSB =>
984 -- mtfsb{0,1}
985 j := to_integer(unsigned(insn_bt(r.insn)));
986 for i in 0 to 31 loop
987 if i = j then
988 v.fpscr(31 - i) := r.insn(6);
989 end if;
990 end loop;
991 v.instr_done := '1';
992 v.state := IDLE;
993
994 when DO_MTFSFI =>
995 -- mtfsfi
996 j := to_integer(unsigned(insn_bf(r.insn)));
997 if r.insn(16) = '0' then
998 for i in 0 to 7 loop
999 if i = j then
1000 k := (7 - i) * 4;
1001 v.fpscr(k + 3 downto k) := insn_u(r.insn);
1002 end if;
1003 end loop;
1004 end if;
1005 v.instr_done := '1';
1006 v.state := IDLE;
1007
1008 when DO_FMRG =>
1009 -- fmrgew, fmrgow
1010 opsel_r <= RES_MISC;
1011 misc_sel <= "01" & r.insn(8) & '0';
1012 v.int_result := '1';
1013 v.writing_back := '1';
1014 v.instr_done := '1';
1015 v.state := IDLE;
1016
1017 when DO_MFFS =>
1018 v.int_result := '1';
1019 v.writing_back := '1';
1020 opsel_r <= RES_MISC;
1021 case r.insn(20 downto 16) is
1022 when "00000" =>
1023 -- mffs
1024 when "00001" =>
1025 -- mffsce
1026 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1027 when "10100" | "10101" =>
1028 -- mffscdrn[i] (but we don't implement DRN)
1029 fpscr_mask := x"000000FF";
1030 when "10110" =>
1031 -- mffscrn
1032 fpscr_mask := x"000000FF";
1033 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1034 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1035 when "10111" =>
1036 -- mffscrni
1037 fpscr_mask := x"000000FF";
1038 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1039 when "11000" =>
1040 -- mffsl
1041 fpscr_mask := x"0007F0FF";
1042 when others =>
1043 illegal := '1';
1044 end case;
1045 v.instr_done := '1';
1046 v.state := IDLE;
1047
1048 when DO_MTFSF =>
1049 if r.insn(25) = '1' then
1050 flm := x"FF";
1051 elsif r.insn(16) = '1' then
1052 flm := x"00";
1053 else
1054 flm := r.insn(24 downto 17);
1055 end if;
1056 for i in 0 to 7 loop
1057 k := i * 4;
1058 if flm(i) = '1' then
1059 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1060 end if;
1061 end loop;
1062 v.instr_done := '1';
1063 v.state := IDLE;
1064
1065 when DO_FMR =>
1066 -- r.opsel_a = AIN_B
1067 v.result_class := r.b.class;
1068 v.result_exp := r.b.exponent;
1069 v.quieten_nan := '0';
1070 if r.insn(9) = '1' then
1071 v.result_sign := '0'; -- fabs
1072 elsif r.insn(8) = '1' then
1073 v.result_sign := '1'; -- fnabs
1074 elsif r.insn(7) = '1' then
1075 v.result_sign := r.b.negative; -- fmr
1076 elsif r.insn(6) = '1' then
1077 v.result_sign := not r.b.negative; -- fneg
1078 else
1079 v.result_sign := r.a.negative; -- fcpsgn
1080 end if;
1081 v.writing_back := '1';
1082 v.instr_done := '1';
1083 v.state := IDLE;
1084
1085 when DO_FRI => -- fri[nzpm]
1086 -- r.opsel_a = AIN_B
1087 v.result_class := r.b.class;
1088 v.result_sign := r.b.negative;
1089 v.result_exp := r.b.exponent;
1090 v.fpscr(FPSCR_FR) := '0';
1091 v.fpscr(FPSCR_FI) := '0';
1092 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1093 -- Signalling NAN
1094 v.fpscr(FPSCR_VXSNAN) := '1';
1095 invalid := '1';
1096 end if;
1097 if r.b.class = FINITE then
1098 if r.b.exponent >= to_signed(52, EXP_BITS) then
1099 -- integer already, no rounding required
1100 arith_done := '1';
1101 else
1102 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1103 v.state := FRI_1;
1104 v.round_mode := '1' & r.insn(7 downto 6);
1105 end if;
1106 else
1107 arith_done := '1';
1108 end if;
1109
1110 when DO_FRSP =>
1111 -- r.opsel_a = AIN_B, r.shift = 0
1112 v.result_class := r.b.class;
1113 v.result_sign := r.b.negative;
1114 v.result_exp := r.b.exponent;
1115 v.fpscr(FPSCR_FR) := '0';
1116 v.fpscr(FPSCR_FI) := '0';
1117 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1118 -- Signalling NAN
1119 v.fpscr(FPSCR_VXSNAN) := '1';
1120 invalid := '1';
1121 end if;
1122 set_x := '1';
1123 if r.b.class = FINITE then
1124 if r.b.exponent < to_signed(-126, EXP_BITS) then
1125 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1126 v.state := ROUND_UFLOW;
1127 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1128 v.state := ROUND_OFLOW;
1129 else
1130 v.state := ROUNDING;
1131 end if;
1132 else
1133 arith_done := '1';
1134 end if;
1135
1136 when DO_FCTI =>
1137 -- instr bit 9: 1=dword 0=word
1138 -- instr bit 8: 1=unsigned 0=signed
1139 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1140 -- r.opsel_a = AIN_B
1141 v.result_class := r.b.class;
1142 v.result_sign := r.b.negative;
1143 v.result_exp := r.b.exponent;
1144 v.fpscr(FPSCR_FR) := '0';
1145 v.fpscr(FPSCR_FI) := '0';
1146 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1147 -- Signalling NAN
1148 v.fpscr(FPSCR_VXSNAN) := '1';
1149 invalid := '1';
1150 end if;
1151
1152 v.int_result := '1';
1153 case r.b.class is
1154 when ZERO =>
1155 arith_done := '1';
1156 when FINITE =>
1157 if r.b.exponent >= to_signed(64, EXP_BITS) or
1158 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1159 v.state := INT_OFLOW;
1160 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1161 -- integer already, no rounding required,
1162 -- shift into final position
1163 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1164 if r.insn(8) = '1' and r.b.negative = '1' then
1165 v.state := INT_OFLOW;
1166 else
1167 v.state := INT_ISHIFT;
1168 end if;
1169 else
1170 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1171 v.state := INT_SHIFT;
1172 end if;
1173 when INFINITY | NAN =>
1174 v.state := INT_OFLOW;
1175 end case;
1176
1177 when DO_FCFID =>
1178 -- r.opsel_a = AIN_B
1179 v.result_sign := '0';
1180 if r.insn(8) = '0' and r.b.negative = '1' then
1181 -- fcfid[s] with negative operand, set R = -B
1182 opsel_ainv <= '1';
1183 carry_in <= '1';
1184 v.result_sign := '1';
1185 end if;
1186 v.result_class := r.b.class;
1187 v.result_exp := to_signed(54, EXP_BITS);
1188 v.fpscr(FPSCR_FR) := '0';
1189 v.fpscr(FPSCR_FI) := '0';
1190 if r.b.class = ZERO then
1191 arith_done := '1';
1192 else
1193 v.state := FINISH;
1194 end if;
1195
1196 when DO_FADD =>
1197 -- fadd[s] and fsub[s]
1198 -- r.opsel_a = AIN_A
1199 v.result_sign := r.a.negative;
1200 v.result_class := r.a.class;
1201 v.result_exp := r.a.exponent;
1202 v.fpscr(FPSCR_FR) := '0';
1203 v.fpscr(FPSCR_FI) := '0';
1204 v.use_a := '1';
1205 v.use_b := '1';
1206 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1207 if r.a.class = FINITE and r.b.class = FINITE then
1208 v.is_subtract := not is_add;
1209 v.add_bsmall := r.exp_cmp;
1210 v.opsel_a := AIN_B;
1211 if r.exp_cmp = '0' then
1212 v.shift := r.a.exponent - r.b.exponent;
1213 v.result_sign := r.b.negative xnor r.insn(1);
1214 if r.a.exponent = r.b.exponent then
1215 v.state := ADD_2;
1216 else
1217 v.longmask := '0';
1218 v.state := ADD_SHIFT;
1219 end if;
1220 else
1221 v.state := ADD_1;
1222 end if;
1223 else
1224 if r.a.class = NAN or r.b.class = NAN then
1225 v.state := NAN_RESULT;
1226 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1227 -- invalid operation, construct QNaN
1228 v.fpscr(FPSCR_VXISI) := '1';
1229 qnan_result := '1';
1230 arith_done := '1';
1231 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1232 -- return -0 for rounding to -infinity
1233 v.result_sign := r.round_mode(1) and r.round_mode(0);
1234 arith_done := '1';
1235 elsif r.a.class = INFINITY or r.b.class = ZERO then
1236 -- result is A
1237 v.opsel_a := AIN_A;
1238 v.state := EXC_RESULT;
1239 else
1240 -- result is +/- B
1241 v.opsel_a := AIN_B;
1242 v.negate := not r.insn(1);
1243 v.state := EXC_RESULT;
1244 end if;
1245 end if;
1246
1247 when DO_FMUL =>
1248 -- fmul[s]
1249 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1250 v.result_sign := r.a.negative xor r.c.negative;
1251 v.result_class := r.a.class;
1252 v.fpscr(FPSCR_FR) := '0';
1253 v.fpscr(FPSCR_FI) := '0';
1254 v.use_a := '1';
1255 v.use_c := '1';
1256 if r.a.class = FINITE and r.c.class = FINITE then
1257 v.result_exp := r.a.exponent + r.c.exponent;
1258 -- Renormalize denorm operands
1259 if r.a.mantissa(54) = '0' then
1260 v.state := RENORM_A;
1261 elsif r.c.mantissa(54) = '0' then
1262 v.state := RENORM_C;
1263 else
1264 f_to_multiply.valid <= '1';
1265 v.state := MULT_1;
1266 end if;
1267 else
1268 if r.a.class = NAN or r.c.class = NAN then
1269 v.state := NAN_RESULT;
1270 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1271 (r.a.class = ZERO and r.c.class = INFINITY) then
1272 -- invalid operation, construct QNaN
1273 v.fpscr(FPSCR_VXIMZ) := '1';
1274 qnan_result := '1';
1275 elsif r.a.class = ZERO or r.a.class = INFINITY then
1276 -- result is +/- A
1277 arith_done := '1';
1278 else
1279 -- r.c.class is ZERO or INFINITY
1280 v.opsel_a := AIN_C;
1281 v.negate := r.a.negative;
1282 v.state := EXC_RESULT;
1283 end if;
1284 end if;
1285
1286 when DO_FDIV =>
1287 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1288 v.result_class := r.a.class;
1289 v.fpscr(FPSCR_FR) := '0';
1290 v.fpscr(FPSCR_FI) := '0';
1291 v.use_a := '1';
1292 v.use_b := '1';
1293 v.result_sign := r.a.negative xor r.b.negative;
1294 v.result_exp := r.a.exponent - r.b.exponent;
1295 v.count := "00";
1296 if r.a.class = FINITE and r.b.class = FINITE then
1297 -- Renormalize denorm operands
1298 if r.a.mantissa(54) = '0' then
1299 v.state := RENORM_A;
1300 elsif r.b.mantissa(54) = '0' then
1301 v.state := RENORM_B;
1302 else
1303 v.first := '1';
1304 v.state := DIV_2;
1305 end if;
1306 else
1307 if r.a.class = NAN or r.b.class = NAN then
1308 v.state := NAN_RESULT;
1309 elsif r.b.class = INFINITY then
1310 if r.a.class = INFINITY then
1311 v.fpscr(FPSCR_VXIDI) := '1';
1312 qnan_result := '1';
1313 else
1314 v.result_class := ZERO;
1315 end if;
1316 arith_done := '1';
1317 elsif r.b.class = ZERO then
1318 if r.a.class = ZERO then
1319 v.fpscr(FPSCR_VXZDZ) := '1';
1320 qnan_result := '1';
1321 else
1322 if r.a.class = FINITE then
1323 zero_divide := '1';
1324 end if;
1325 v.result_class := INFINITY;
1326 end if;
1327 arith_done := '1';
1328 else -- r.b.class = FINITE, result_class = r.a.class
1329 arith_done := '1';
1330 end if;
1331 end if;
1332
1333 when DO_FSEL =>
1334 v.fpscr(FPSCR_FR) := '0';
1335 v.fpscr(FPSCR_FI) := '0';
1336 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1337 v.opsel_a := AIN_C;
1338 else
1339 v.opsel_a := AIN_B;
1340 end if;
1341 v.quieten_nan := '0';
1342 v.state := EXC_RESULT;
1343
1344 when DO_FSQRT =>
1345 -- r.opsel_a = AIN_B
1346 v.result_class := r.b.class;
1347 v.result_sign := r.b.negative;
1348 v.fpscr(FPSCR_FR) := '0';
1349 v.fpscr(FPSCR_FI) := '0';
1350 v.use_b := '1';
1351 case r.b.class is
1352 when FINITE =>
1353 v.result_exp := r.b.exponent;
1354 if r.b.negative = '1' then
1355 v.fpscr(FPSCR_VXSQRT) := '1';
1356 qnan_result := '1';
1357 elsif r.b.mantissa(54) = '0' then
1358 v.state := RENORM_B;
1359 elsif r.b.exponent(0) = '0' then
1360 v.state := SQRT_1;
1361 else
1362 v.shift := to_signed(1, EXP_BITS);
1363 v.state := RENORM_B2;
1364 end if;
1365 when NAN =>
1366 v.state := NAN_RESULT;
1367 when ZERO =>
1368 -- result is B
1369 arith_done := '1';
1370 when INFINITY =>
1371 if r.b.negative = '1' then
1372 v.fpscr(FPSCR_VXSQRT) := '1';
1373 qnan_result := '1';
1374 -- else result is B
1375 end if;
1376 arith_done := '1';
1377 end case;
1378
1379 when DO_FRE =>
1380 -- r.opsel_a = AIN_B
1381 v.result_class := r.b.class;
1382 v.result_sign := r.b.negative;
1383 v.fpscr(FPSCR_FR) := '0';
1384 v.fpscr(FPSCR_FI) := '0';
1385 v.use_b := '1';
1386 case r.b.class is
1387 when FINITE =>
1388 v.result_exp := - r.b.exponent;
1389 if r.b.mantissa(54) = '0' then
1390 v.state := RENORM_B;
1391 else
1392 v.state := FRE_1;
1393 end if;
1394 when NAN =>
1395 v.state := NAN_RESULT;
1396 when INFINITY =>
1397 v.result_class := ZERO;
1398 arith_done := '1';
1399 when ZERO =>
1400 v.result_class := INFINITY;
1401 zero_divide := '1';
1402 arith_done := '1';
1403 end case;
1404
1405 when DO_FRSQRTE =>
1406 -- r.opsel_a = AIN_B
1407 v.result_class := r.b.class;
1408 v.result_sign := r.b.negative;
1409 v.fpscr(FPSCR_FR) := '0';
1410 v.fpscr(FPSCR_FI) := '0';
1411 v.use_b := '1';
1412 v.shift := to_signed(1, EXP_BITS);
1413 case r.b.class is
1414 when FINITE =>
1415 v.result_exp := r.b.exponent;
1416 if r.b.negative = '1' then
1417 v.fpscr(FPSCR_VXSQRT) := '1';
1418 qnan_result := '1';
1419 elsif r.b.mantissa(54) = '0' then
1420 v.state := RENORM_B;
1421 elsif r.b.exponent(0) = '0' then
1422 v.state := RSQRT_1;
1423 else
1424 v.state := RENORM_B2;
1425 end if;
1426 when NAN =>
1427 v.state := NAN_RESULT;
1428 when INFINITY =>
1429 if r.b.negative = '1' then
1430 v.fpscr(FPSCR_VXSQRT) := '1';
1431 qnan_result := '1';
1432 else
1433 v.result_class := ZERO;
1434 end if;
1435 arith_done := '1';
1436 when ZERO =>
1437 v.result_class := INFINITY;
1438 zero_divide := '1';
1439 arith_done := '1';
1440 end case;
1441
1442 when DO_FMADD =>
1443 -- fmadd, fmsub, fnmadd, fnmsub
1444 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1445 -- else AIN_B
1446 v.result_sign := r.a.negative;
1447 v.result_class := r.a.class;
1448 v.result_exp := r.a.exponent;
1449 v.fpscr(FPSCR_FR) := '0';
1450 v.fpscr(FPSCR_FI) := '0';
1451 v.use_a := '1';
1452 v.use_b := '1';
1453 v.use_c := '1';
1454 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1455 if r.a.class = FINITE and r.c.class = FINITE and
1456 (r.b.class = FINITE or r.b.class = ZERO) then
1457 v.is_subtract := not is_add;
1458 mulexp := r.a.exponent + r.c.exponent;
1459 v.result_exp := mulexp;
1460 -- Make sure A and C are normalized
1461 if r.a.mantissa(54) = '0' then
1462 v.state := RENORM_A;
1463 elsif r.c.mantissa(54) = '0' then
1464 v.state := RENORM_C;
1465 elsif r.b.class = ZERO then
1466 -- no addend, degenerates to multiply
1467 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1468 f_to_multiply.valid <= '1';
1469 v.is_multiply := '1';
1470 v.state := MULT_1;
1471 elsif r.madd_cmp = '0' then
1472 -- addend is bigger, do multiply first
1473 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1474 f_to_multiply.valid <= '1';
1475 v.state := FMADD_1;
1476 else
1477 -- product is bigger, shift B right and use it as the
1478 -- addend to the multiplier
1479 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1480 -- for subtract, multiplier does B - A * C
1481 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1482 v.result_exp := r.b.exponent;
1483 v.state := FMADD_2;
1484 end if;
1485 else
1486 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1487 v.state := NAN_RESULT;
1488 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1489 (r.a.class = INFINITY and r.c.class = ZERO) then
1490 -- invalid operation, construct QNaN
1491 v.fpscr(FPSCR_VXIMZ) := '1';
1492 qnan_result := '1';
1493 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1494 if r.b.class = INFINITY and is_add = '0' then
1495 -- invalid operation, construct QNaN
1496 v.fpscr(FPSCR_VXISI) := '1';
1497 qnan_result := '1';
1498 else
1499 -- result is infinity
1500 v.result_class := INFINITY;
1501 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1502 arith_done := '1';
1503 end if;
1504 else
1505 -- Here A is zero, C is zero, or B is infinity
1506 -- Result is +/-B in all of those cases
1507 v.opsel_a := AIN_B;
1508 if r.b.class /= ZERO or is_add = '1' then
1509 v.negate := not (r.insn(1) xor r.insn(2));
1510 else
1511 -- have to be careful about rule for 0 - 0 result sign
1512 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1513 end if;
1514 v.state := EXC_RESULT;
1515 end if;
1516 end if;
1517
1518 when RENORM_A =>
1519 renormalize := '1';
1520 v.state := RENORM_A2;
1521 if r.insn(4) = '1' then
1522 v.opsel_a := AIN_C;
1523 else
1524 v.opsel_a := AIN_B;
1525 end if;
1526
1527 when RENORM_A2 =>
1528 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1529 set_a := '1';
1530 v.result_exp := new_exp;
1531 if r.insn(4) = '1' then
1532 if r.c.mantissa(54) = '1' then
1533 if r.insn(3) = '0' or r.b.class = ZERO then
1534 v.first := '1';
1535 v.state := MULT_1;
1536 else
1537 v.madd_cmp := '0';
1538 if new_exp + 1 >= r.b.exponent then
1539 v.madd_cmp := '1';
1540 end if;
1541 v.opsel_a := AIN_B;
1542 v.state := DO_FMADD;
1543 end if;
1544 else
1545 v.state := RENORM_C;
1546 end if;
1547 else
1548 if r.b.mantissa(54) = '1' then
1549 v.first := '1';
1550 v.state := DIV_2;
1551 else
1552 v.state := RENORM_B;
1553 end if;
1554 end if;
1555
1556 when RENORM_B =>
1557 renormalize := '1';
1558 renorm_sqrt := r.is_sqrt;
1559 v.state := RENORM_B2;
1560
1561 when RENORM_B2 =>
1562 set_b := '1';
1563 if r.is_sqrt = '0' then
1564 v.result_exp := r.result_exp + r.shift;
1565 else
1566 v.result_exp := new_exp;
1567 end if;
1568 v.opsel_a := AIN_B;
1569 v.state := LOOKUP;
1570
1571 when RENORM_C =>
1572 renormalize := '1';
1573 v.state := RENORM_C2;
1574
1575 when RENORM_C2 =>
1576 set_c := '1';
1577 v.result_exp := new_exp;
1578 if r.insn(3) = '0' or r.b.class = ZERO then
1579 v.first := '1';
1580 v.state := MULT_1;
1581 else
1582 v.madd_cmp := '0';
1583 if new_exp + 1 >= r.b.exponent then
1584 v.madd_cmp := '1';
1585 end if;
1586 v.opsel_a := AIN_B;
1587 v.state := DO_FMADD;
1588 end if;
1589
1590 when ADD_1 =>
1591 -- transferring B to R
1592 v.shift := r.b.exponent - r.a.exponent;
1593 v.result_exp := r.b.exponent;
1594 v.longmask := '0';
1595 v.state := ADD_SHIFT;
1596
1597 when ADD_SHIFT =>
1598 -- r.shift = - exponent difference, r.longmask = 0
1599 opsel_r <= RES_SHIFT;
1600 v.x := s_nz;
1601 set_x := '1';
1602 v.longmask := r.single_prec;
1603 if r.add_bsmall = '1' then
1604 v.opsel_a := AIN_A;
1605 else
1606 v.opsel_a := AIN_B;
1607 end if;
1608 v.state := ADD_2;
1609
1610 when ADD_2 =>
1611 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1612 opsel_b <= BIN_R;
1613 opsel_binv <= r.is_subtract;
1614 carry_in <= r.is_subtract and not r.x;
1615 v.shift := to_signed(-1, EXP_BITS);
1616 v.state := ADD_3;
1617
1618 when ADD_3 =>
1619 -- check for overflow or negative result (can't get both)
1620 -- r.shift = -1
1621 if r.r(63) = '1' then
1622 -- result is opposite sign to expected
1623 v.result_sign := not r.result_sign;
1624 opsel_ainv <= '1';
1625 carry_in <= '1';
1626 v.state := FINISH;
1627 elsif r.r(55) = '1' then
1628 -- sum overflowed, shift right
1629 opsel_r <= RES_SHIFT;
1630 set_x := '1';
1631 if exp_huge = '1' then
1632 v.state := ROUND_OFLOW;
1633 else
1634 v.state := ROUNDING;
1635 end if;
1636 elsif r.r(54) = '1' then
1637 set_x := '1';
1638 v.state := ROUNDING;
1639 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1640 -- r.x must be zero at this point
1641 v.result_class := ZERO;
1642 if r.is_subtract = '1' then
1643 -- set result sign depending on rounding mode
1644 v.result_sign := r.round_mode(1) and r.round_mode(0);
1645 end if;
1646 arith_done := '1';
1647 else
1648 renormalize := '1';
1649 v.state := NORMALIZE;
1650 end if;
1651
1652 when CMP_1 =>
1653 -- r.opsel_a = AIN_A
1654 opsel_b <= BIN_R;
1655 opsel_binv <= '1';
1656 carry_in <= '1';
1657 v.state := CMP_2;
1658
1659 when CMP_2 =>
1660 if r.r(63) = '1' then
1661 -- A is smaller in magnitude
1662 v.cr_result := not r.a.negative & r.a.negative & "00";
1663 elsif (r_hi_nz or r_lo_nz) = '0' then
1664 v.cr_result := "0010";
1665 else
1666 v.cr_result := r.a.negative & not r.a.negative & "00";
1667 end if;
1668 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1669 v.instr_done := '1';
1670 v.state := IDLE;
1671
1672 when MULT_1 =>
1673 f_to_multiply.valid <= r.first;
1674 opsel_r <= RES_MULT;
1675 if multiply_to_f.valid = '1' then
1676 v.state := FINISH;
1677 end if;
1678
1679 when FMADD_1 =>
1680 -- Addend is bigger here
1681 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1682 -- note v.shift is at most -2 here
1683 v.shift := r.result_exp - r.b.exponent;
1684 opsel_r <= RES_MULT;
1685 opsel_s <= S_MULT;
1686 set_s := '1';
1687 f_to_multiply.valid <= r.first;
1688 if multiply_to_f.valid = '1' then
1689 v.longmask := '0';
1690 v.state := ADD_SHIFT;
1691 end if;
1692
1693 when FMADD_2 =>
1694 -- Product is potentially bigger here
1695 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1696 set_s := '1';
1697 opsel_s <= S_SHIFT;
1698 v.shift := r.shift - to_signed(64, EXP_BITS);
1699 v.state := FMADD_3;
1700
1701 when FMADD_3 =>
1702 -- r.shift = addend exp - product exp
1703 opsel_r <= RES_SHIFT;
1704 v.first := '1';
1705 v.state := FMADD_4;
1706
1707 when FMADD_4 =>
1708 msel_add <= MULADD_RS;
1709 f_to_multiply.valid <= r.first;
1710 msel_inv <= r.is_subtract;
1711 opsel_r <= RES_MULT;
1712 opsel_s <= S_MULT;
1713 set_s := '1';
1714 if multiply_to_f.valid = '1' then
1715 v.state := FMADD_5;
1716 end if;
1717
1718 when FMADD_5 =>
1719 -- negate R:S:X if negative
1720 if r.r(63) = '1' then
1721 v.result_sign := not r.result_sign;
1722 opsel_ainv <= '1';
1723 carry_in <= not (s_nz or r.x);
1724 opsel_s <= S_NEG;
1725 set_s := '1';
1726 end if;
1727 v.shift := to_signed(56, EXP_BITS);
1728 v.state := FMADD_6;
1729
1730 when FMADD_6 =>
1731 -- r.shift = 56 (or 0, but only if r is now nonzero)
1732 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1733 if s_nz = '0' then
1734 -- must be a subtraction, and r.x must be zero
1735 v.result_class := ZERO;
1736 v.result_sign := r.round_mode(1) and r.round_mode(0);
1737 arith_done := '1';
1738 else
1739 -- R is all zeroes but there are non-zero bits in S
1740 -- so shift them into R and set S to 0
1741 opsel_r <= RES_SHIFT;
1742 set_s := '1';
1743 -- stay in state FMADD_6
1744 end if;
1745 elsif r.r(56 downto 54) = "001" then
1746 v.state := FINISH;
1747 else
1748 renormalize := '1';
1749 v.state := NORMALIZE;
1750 end if;
1751
1752 when LOOKUP =>
1753 -- r.opsel_a = AIN_B
1754 -- wait one cycle for inverse_table[B] lookup
1755 v.first := '1';
1756 if r.insn(4) = '0' then
1757 if r.insn(3) = '0' then
1758 v.state := DIV_2;
1759 else
1760 v.state := SQRT_1;
1761 end if;
1762 elsif r.insn(2) = '0' then
1763 v.state := FRE_1;
1764 else
1765 v.state := RSQRT_1;
1766 end if;
1767
1768 when DIV_2 =>
1769 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1770 msel_1 <= MUL1_B;
1771 msel_add <= MULADD_CONST;
1772 msel_inv <= '1';
1773 if r.count = 0 then
1774 msel_2 <= MUL2_LUT;
1775 else
1776 msel_2 <= MUL2_P;
1777 end if;
1778 set_y := r.first;
1779 pshift := '1';
1780 f_to_multiply.valid <= r.first;
1781 if multiply_to_f.valid = '1' then
1782 v.first := '1';
1783 v.count := r.count + 1;
1784 v.state := DIV_3;
1785 end if;
1786
1787 when DIV_3 =>
1788 -- compute Y = P = P * Y
1789 msel_1 <= MUL1_Y;
1790 msel_2 <= MUL2_P;
1791 f_to_multiply.valid <= r.first;
1792 pshift := '1';
1793 if multiply_to_f.valid = '1' then
1794 v.first := '1';
1795 if r.count = 3 then
1796 v.state := DIV_4;
1797 else
1798 v.state := DIV_2;
1799 end if;
1800 end if;
1801
1802 when DIV_4 =>
1803 -- compute R = P = A * Y (quotient)
1804 msel_1 <= MUL1_A;
1805 msel_2 <= MUL2_P;
1806 set_y := r.first;
1807 f_to_multiply.valid <= r.first;
1808 pshift := '1';
1809 if multiply_to_f.valid = '1' then
1810 opsel_r <= RES_MULT;
1811 v.first := '1';
1812 v.state := DIV_5;
1813 end if;
1814
1815 when DIV_5 =>
1816 -- compute P = A - B * R (remainder)
1817 msel_1 <= MUL1_B;
1818 msel_2 <= MUL2_R;
1819 msel_add <= MULADD_A;
1820 msel_inv <= '1';
1821 f_to_multiply.valid <= r.first;
1822 if multiply_to_f.valid = '1' then
1823 v.state := DIV_6;
1824 end if;
1825
1826 when DIV_6 =>
1827 -- test if remainder is 0 or >= B
1828 if pcmpb_lt = '1' then
1829 -- quotient is correct, set X if remainder non-zero
1830 v.x := r.p(58) or px_nz;
1831 else
1832 -- quotient needs to be incremented by 1
1833 carry_in <= '1';
1834 v.x := not pcmpb_eq;
1835 end if;
1836 v.state := FINISH;
1837
1838 when FRE_1 =>
1839 opsel_r <= RES_MISC;
1840 misc_sel <= "0111";
1841 v.shift := to_signed(1, EXP_BITS);
1842 v.state := NORMALIZE;
1843
1844 when FTDIV_1 =>
1845 v.cr_result(1) := exp_tiny or exp_huge;
1846 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1847 v.instr_done := '1';
1848 v.state := IDLE;
1849 else
1850 v.shift := r.a.exponent;
1851 v.doing_ftdiv := "10";
1852 end if;
1853
1854 when RSQRT_1 =>
1855 opsel_r <= RES_MISC;
1856 misc_sel <= "0111";
1857 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1858 v.result_exp := - sqrt_exp;
1859 v.shift := to_signed(1, EXP_BITS);
1860 v.state := NORMALIZE;
1861
1862 when SQRT_1 =>
1863 -- put invsqr[B] in R and compute P = invsqr[B] * B
1864 -- also transfer B (in R) to A
1865 set_a := '1';
1866 opsel_r <= RES_MISC;
1867 misc_sel <= "0111";
1868 msel_1 <= MUL1_B;
1869 msel_2 <= MUL2_LUT;
1870 f_to_multiply.valid <= '1';
1871 v.shift := to_signed(-1, EXP_BITS);
1872 v.count := "00";
1873 v.state := SQRT_2;
1874
1875 when SQRT_2 =>
1876 -- shift R right one place
1877 -- not expecting multiplier result yet
1878 -- r.shift = -1
1879 opsel_r <= RES_SHIFT;
1880 v.first := '1';
1881 v.state := SQRT_3;
1882
1883 when SQRT_3 =>
1884 -- put R into Y, wait for product from multiplier
1885 msel_2 <= MUL2_R;
1886 set_y := r.first;
1887 pshift := '1';
1888 if multiply_to_f.valid = '1' then
1889 -- put result into R
1890 opsel_r <= RES_MULT;
1891 v.first := '1';
1892 v.state := SQRT_4;
1893 end if;
1894
1895 when SQRT_4 =>
1896 -- compute 1.5 - Y * P
1897 msel_1 <= MUL1_Y;
1898 msel_2 <= MUL2_P;
1899 msel_add <= MULADD_CONST;
1900 msel_inv <= '1';
1901 f_to_multiply.valid <= r.first;
1902 pshift := '1';
1903 if multiply_to_f.valid = '1' then
1904 v.state := SQRT_5;
1905 end if;
1906
1907 when SQRT_5 =>
1908 -- compute Y = Y * P
1909 msel_1 <= MUL1_Y;
1910 msel_2 <= MUL2_P;
1911 f_to_multiply.valid <= '1';
1912 v.first := '1';
1913 v.state := SQRT_6;
1914
1915 when SQRT_6 =>
1916 -- pipeline in R = R * P
1917 msel_1 <= MUL1_R;
1918 msel_2 <= MUL2_P;
1919 f_to_multiply.valid <= r.first;
1920 pshift := '1';
1921 if multiply_to_f.valid = '1' then
1922 v.first := '1';
1923 v.state := SQRT_7;
1924 end if;
1925
1926 when SQRT_7 =>
1927 -- first multiply is done, put result in Y
1928 msel_2 <= MUL2_P;
1929 set_y := r.first;
1930 -- wait for second multiply (should be here already)
1931 pshift := '1';
1932 if multiply_to_f.valid = '1' then
1933 -- put result into R
1934 opsel_r <= RES_MULT;
1935 v.first := '1';
1936 v.count := r.count + 1;
1937 if r.count < 2 then
1938 v.state := SQRT_4;
1939 else
1940 v.first := '1';
1941 v.state := SQRT_8;
1942 end if;
1943 end if;
1944
1945 when SQRT_8 =>
1946 -- compute P = A - R * R, which can be +ve or -ve
1947 -- we arranged for B to be put into A earlier
1948 msel_1 <= MUL1_R;
1949 msel_2 <= MUL2_R;
1950 msel_add <= MULADD_A;
1951 msel_inv <= '1';
1952 pshift := '1';
1953 f_to_multiply.valid <= r.first;
1954 if multiply_to_f.valid = '1' then
1955 v.first := '1';
1956 v.state := SQRT_9;
1957 end if;
1958
1959 when SQRT_9 =>
1960 -- compute P = P * Y
1961 -- since Y is an estimate of 1/sqrt(B), this makes P an
1962 -- estimate of the adjustment needed to R. Since the error
1963 -- could be negative and we have an unsigned multiplier, the
1964 -- upper bits can be wrong, but it turns out the lowest 8 bits
1965 -- are correct and are all we need (given 3 iterations through
1966 -- SQRT_4 to SQRT_7).
1967 msel_1 <= MUL1_Y;
1968 msel_2 <= MUL2_P;
1969 pshift := '1';
1970 f_to_multiply.valid <= r.first;
1971 if multiply_to_f.valid = '1' then
1972 v.state := SQRT_10;
1973 end if;
1974
1975 when SQRT_10 =>
1976 -- Add the bottom 8 bits of P, sign-extended,
1977 -- divided by 4, onto R.
1978 -- The division by 4 is because R is 10.54 format
1979 -- whereas P is 8.56 format.
1980 opsel_b <= BIN_PS6;
1981 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1982 v.result_exp := sqrt_exp;
1983 v.shift := to_signed(1, EXP_BITS);
1984 v.first := '1';
1985 v.state := SQRT_11;
1986
1987 when SQRT_11 =>
1988 -- compute P = A - R * R (remainder)
1989 -- also put 2 * R + 1 into B for comparison with P
1990 msel_1 <= MUL1_R;
1991 msel_2 <= MUL2_R;
1992 msel_add <= MULADD_A;
1993 msel_inv <= '1';
1994 f_to_multiply.valid <= r.first;
1995 shiftin := '1';
1996 set_b := r.first;
1997 if multiply_to_f.valid = '1' then
1998 v.state := SQRT_12;
1999 end if;
2000
2001 when SQRT_12 =>
2002 -- test if remainder is 0 or >= B = 2*R + 1
2003 if pcmpb_lt = '1' then
2004 -- square root is correct, set X if remainder non-zero
2005 v.x := r.p(58) or px_nz;
2006 else
2007 -- square root needs to be incremented by 1
2008 carry_in <= '1';
2009 v.x := not pcmpb_eq;
2010 end if;
2011 v.state := FINISH;
2012
2013 when INT_SHIFT =>
2014 -- r.shift = b.exponent - 52
2015 opsel_r <= RES_SHIFT;
2016 set_x := '1';
2017 v.state := INT_ROUND;
2018 v.shift := to_signed(-2, EXP_BITS);
2019
2020 when INT_ROUND =>
2021 -- r.shift = -2
2022 opsel_r <= RES_SHIFT;
2023 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2024 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2025 -- Check for negative values that don't round to 0 for fcti*u*
2026 if r.insn(8) = '1' and r.result_sign = '1' and
2027 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2028 v.state := INT_OFLOW;
2029 else
2030 v.state := INT_FINAL;
2031 end if;
2032
2033 when INT_ISHIFT =>
2034 -- r.shift = b.exponent - 54;
2035 opsel_r <= RES_SHIFT;
2036 v.state := INT_FINAL;
2037
2038 when INT_FINAL =>
2039 -- Negate if necessary, and increment for rounding if needed
2040 opsel_ainv <= r.result_sign;
2041 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2042 -- Check for possible overflows
2043 case r.insn(9 downto 8) is
2044 when "00" => -- fctiw[z]
2045 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2046 when "01" => -- fctiwu[z]
2047 need_check := r.r(31);
2048 when "10" => -- fctid[z]
2049 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2050 when others => -- fctidu[z]
2051 need_check := r.r(63);
2052 end case;
2053 if need_check = '1' then
2054 v.state := INT_CHECK;
2055 else
2056 if r.fpscr(FPSCR_FI) = '1' then
2057 v.fpscr(FPSCR_XX) := '1';
2058 end if;
2059 arith_done := '1';
2060 end if;
2061
2062 when INT_CHECK =>
2063 if r.insn(9) = '0' then
2064 msb := r.r(31);
2065 else
2066 msb := r.r(63);
2067 end if;
2068 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2069 if (r.insn(8) = '0' and msb /= r.result_sign) or
2070 (r.insn(8) = '1' and msb /= '1') then
2071 opsel_r <= RES_MISC;
2072 v.fpscr(FPSCR_VXCVI) := '1';
2073 invalid := '1';
2074 else
2075 if r.fpscr(FPSCR_FI) = '1' then
2076 v.fpscr(FPSCR_XX) := '1';
2077 end if;
2078 end if;
2079 arith_done := '1';
2080
2081 when INT_OFLOW =>
2082 opsel_r <= RES_MISC;
2083 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2084 if r.b.class = NAN then
2085 misc_sel(0) <= '1';
2086 end if;
2087 v.fpscr(FPSCR_VXCVI) := '1';
2088 invalid := '1';
2089 arith_done := '1';
2090
2091 when FRI_1 =>
2092 -- r.shift = b.exponent - 52
2093 opsel_r <= RES_SHIFT;
2094 set_x := '1';
2095 v.state := ROUNDING;
2096
2097 when FINISH =>
2098 if r.is_multiply = '1' and px_nz = '1' then
2099 v.x := '1';
2100 end if;
2101 if r.r(63 downto 54) /= "0000000001" then
2102 renormalize := '1';
2103 v.state := NORMALIZE;
2104 else
2105 set_x := '1';
2106 if exp_tiny = '1' then
2107 v.shift := new_exp - min_exp;
2108 v.state := ROUND_UFLOW;
2109 elsif exp_huge = '1' then
2110 v.state := ROUND_OFLOW;
2111 else
2112 v.state := ROUNDING;
2113 end if;
2114 end if;
2115
2116 when NORMALIZE =>
2117 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2118 -- r.shift = clz(r.r) - 9
2119 opsel_r <= RES_SHIFT;
2120 set_x := '1';
2121 if exp_tiny = '1' then
2122 v.shift := new_exp - min_exp;
2123 v.state := ROUND_UFLOW;
2124 elsif exp_huge = '1' then
2125 v.state := ROUND_OFLOW;
2126 else
2127 v.state := ROUNDING;
2128 end if;
2129
2130 when ROUND_UFLOW =>
2131 -- r.shift = - amount by which exponent underflows
2132 v.tiny := '1';
2133 if r.fpscr(FPSCR_UE) = '0' then
2134 -- disabled underflow exception case
2135 -- have to denormalize before rounding
2136 opsel_r <= RES_SHIFT;
2137 set_x := '1';
2138 v.state := ROUNDING;
2139 else
2140 -- enabled underflow exception case
2141 -- if denormalized, have to normalize before rounding
2142 v.fpscr(FPSCR_UX) := '1';
2143 v.result_exp := r.result_exp + bias_exp;
2144 if r.r(54) = '0' then
2145 renormalize := '1';
2146 v.state := NORMALIZE;
2147 else
2148 v.state := ROUNDING;
2149 end if;
2150 end if;
2151
2152 when ROUND_OFLOW =>
2153 v.fpscr(FPSCR_OX) := '1';
2154 if r.fpscr(FPSCR_OE) = '0' then
2155 -- disabled overflow exception
2156 -- result depends on rounding mode
2157 v.fpscr(FPSCR_XX) := '1';
2158 v.fpscr(FPSCR_FI) := '1';
2159 if r.round_mode(1 downto 0) = "00" or
2160 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2161 v.result_class := INFINITY;
2162 v.fpscr(FPSCR_FR) := '1';
2163 else
2164 v.fpscr(FPSCR_FR) := '0';
2165 end if;
2166 -- construct largest representable number
2167 v.result_exp := max_exp;
2168 opsel_r <= RES_MISC;
2169 misc_sel <= "001" & r.single_prec;
2170 arith_done := '1';
2171 else
2172 -- enabled overflow exception
2173 v.result_exp := r.result_exp - bias_exp;
2174 v.state := ROUNDING;
2175 end if;
2176
2177 when ROUNDING =>
2178 opsel_mask <= '1';
2179 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2180 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2181 if round(1) = '1' then
2182 -- increment the LSB for the precision
2183 opsel_b <= BIN_RND;
2184 v.shift := to_signed(-1, EXP_BITS);
2185 v.state := ROUNDING_2;
2186 else
2187 if r.r(54) = '0' then
2188 -- result after masking could be zero, or could be a
2189 -- denormalized result that needs to be renormalized
2190 renormalize := '1';
2191 v.state := ROUNDING_3;
2192 else
2193 arith_done := '1';
2194 end if;
2195 end if;
2196 if round(0) = '1' then
2197 v.fpscr(FPSCR_XX) := '1';
2198 if r.tiny = '1' then
2199 v.fpscr(FPSCR_UX) := '1';
2200 end if;
2201 end if;
2202
2203 when ROUNDING_2 =>
2204 -- Check for overflow during rounding
2205 -- r.shift = -1
2206 v.x := '0';
2207 if r.r(55) = '1' then
2208 opsel_r <= RES_SHIFT;
2209 if exp_huge = '1' then
2210 v.state := ROUND_OFLOW;
2211 else
2212 arith_done := '1';
2213 end if;
2214 elsif r.r(54) = '0' then
2215 -- Do CLZ so we can renormalize the result
2216 renormalize := '1';
2217 v.state := ROUNDING_3;
2218 else
2219 arith_done := '1';
2220 end if;
2221
2222 when ROUNDING_3 =>
2223 -- r.shift = clz(r.r) - 9
2224 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225 if mant_nz = '0' then
2226 v.result_class := ZERO;
2227 if r.is_subtract = '1' then
2228 -- set result sign depending on rounding mode
2229 v.result_sign := r.round_mode(1) and r.round_mode(0);
2230 end if;
2231 arith_done := '1';
2232 else
2233 -- Renormalize result after rounding
2234 opsel_r <= RES_SHIFT;
2235 v.denorm := exp_tiny;
2236 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237 if new_exp < to_signed(-1022, EXP_BITS) then
2238 v.state := DENORM;
2239 else
2240 arith_done := '1';
2241 end if;
2242 end if;
2243
2244 when DENORM =>
2245 -- r.shift = result_exp - -1022
2246 opsel_r <= RES_SHIFT;
2247 arith_done := '1';
2248
2249 when NAN_RESULT =>
2250 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2251 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2252 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2253 -- Signalling NAN
2254 v.fpscr(FPSCR_VXSNAN) := '1';
2255 invalid := '1';
2256 end if;
2257 if r.use_a = '1' and r.a.class = NAN then
2258 v.opsel_a := AIN_A;
2259 elsif r.use_b = '1' and r.b.class = NAN then
2260 v.opsel_a := AIN_B;
2261 elsif r.use_c = '1' and r.c.class = NAN then
2262 v.opsel_a := AIN_C;
2263 end if;
2264 v.state := EXC_RESULT;
2265
2266 when EXC_RESULT =>
2267 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2268 case r.opsel_a is
2269 when AIN_B =>
2270 v.result_sign := r.b.negative xor r.negate;
2271 v.result_exp := r.b.exponent;
2272 v.result_class := r.b.class;
2273 when AIN_C =>
2274 v.result_sign := r.c.negative xor r.negate;
2275 v.result_exp := r.c.exponent;
2276 v.result_class := r.c.class;
2277 when others =>
2278 v.result_sign := r.a.negative xor r.negate;
2279 v.result_exp := r.a.exponent;
2280 v.result_class := r.a.class;
2281 end case;
2282 arith_done := '1';
2283
2284 end case;
2285
2286 if zero_divide = '1' then
2287 v.fpscr(FPSCR_ZX) := '1';
2288 end if;
2289 if qnan_result = '1' then
2290 invalid := '1';
2291 v.result_class := NAN;
2292 v.result_sign := '0';
2293 misc_sel <= "0001";
2294 opsel_r <= RES_MISC;
2295 arith_done := '1';
2296 end if;
2297 if invalid = '1' then
2298 v.invalid := '1';
2299 end if;
2300 if arith_done = '1' then
2301 -- Enabled invalid exception doesn't write result or FPRF
2302 -- Neither does enabled zero-divide exception
2303 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2304 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2305 v.writing_back := '1';
2306 v.update_fprf := '1';
2307 end if;
2308 v.instr_done := '1';
2309 v.state := IDLE;
2310 update_fx := '1';
2311 end if;
2312
2313 -- Multiplier and divide/square root data path
2314 case msel_1 is
2315 when MUL1_A =>
2316 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2317 when MUL1_B =>
2318 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2319 when MUL1_Y =>
2320 f_to_multiply.data1 <= r.y;
2321 when others =>
2322 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2323 end case;
2324 case msel_2 is
2325 when MUL2_C =>
2326 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2327 when MUL2_LUT =>
2328 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2329 when MUL2_P =>
2330 f_to_multiply.data2 <= r.p;
2331 when others =>
2332 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2333 end case;
2334 maddend := (others => '0');
2335 case msel_add is
2336 when MULADD_CONST =>
2337 -- addend is 2.0 or 1.5 in 16.112 format
2338 if r.is_sqrt = '0' then
2339 maddend(113) := '1'; -- 2.0
2340 else
2341 maddend(112 downto 111) := "11"; -- 1.5
2342 end if;
2343 when MULADD_A =>
2344 -- addend is A in 16.112 format
2345 maddend(121 downto 58) := r.a.mantissa;
2346 when MULADD_RS =>
2347 -- addend is concatenation of R and S in 16.112 format
2348 maddend := "000000" & r.r & r.s & "00";
2349 when others =>
2350 end case;
2351 if msel_inv = '1' then
2352 f_to_multiply.addend <= not maddend;
2353 else
2354 f_to_multiply.addend <= maddend;
2355 end if;
2356 f_to_multiply.not_result <= msel_inv;
2357 if set_y = '1' then
2358 v.y := f_to_multiply.data2;
2359 end if;
2360 if multiply_to_f.valid = '1' then
2361 if pshift = '0' then
2362 v.p := multiply_to_f.result(63 downto 0);
2363 else
2364 v.p := multiply_to_f.result(119 downto 56);
2365 end if;
2366 end if;
2367
2368 -- Data path.
2369 -- This has A and B input multiplexers, an adder, a shifter,
2370 -- count-leading-zeroes logic, and a result mux.
2371 if r.longmask = '1' then
2372 mshift := r.shift + to_signed(-29, EXP_BITS);
2373 else
2374 mshift := r.shift;
2375 end if;
2376 if mshift < to_signed(-64, EXP_BITS) then
2377 mask := (others => '1');
2378 elsif mshift >= to_signed(0, EXP_BITS) then
2379 mask := (others => '0');
2380 else
2381 mask := right_mask(unsigned(mshift(5 downto 0)));
2382 end if;
2383 case r.opsel_a is
2384 when AIN_R =>
2385 in_a0 := r.r;
2386 when AIN_A =>
2387 in_a0 := r.a.mantissa;
2388 when AIN_B =>
2389 in_a0 := r.b.mantissa;
2390 when others =>
2391 in_a0 := r.c.mantissa;
2392 end case;
2393 if (or (mask and in_a0)) = '1' and set_x = '1' then
2394 v.x := '1';
2395 end if;
2396 if opsel_ainv = '1' then
2397 in_a0 := not in_a0;
2398 end if;
2399 in_a <= in_a0;
2400 case opsel_b is
2401 when BIN_ZERO =>
2402 in_b0 := (others => '0');
2403 when BIN_R =>
2404 in_b0 := r.r;
2405 when BIN_RND =>
2406 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2407 in_b0 := round_inc;
2408 when others =>
2409 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2410 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2411 end case;
2412 if opsel_binv = '1' then
2413 in_b0 := not in_b0;
2414 end if;
2415 in_b <= in_b0;
2416 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2417 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2418 std_ulogic_vector(r.shift(6 downto 0)));
2419 else
2420 shift_res := (others => '0');
2421 end if;
2422 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2423 if opsel_mask = '1' then
2424 sum(1 downto 0) := "00";
2425 if r.single_prec = '1' then
2426 sum(30 downto 2) := (others => '0');
2427 end if;
2428 end if;
2429 case opsel_r is
2430 when RES_SUM =>
2431 result <= sum;
2432 when RES_SHIFT =>
2433 result <= shift_res;
2434 when RES_MULT =>
2435 result <= multiply_to_f.result(121 downto 58);
2436 when others =>
2437 case misc_sel is
2438 when "0000" =>
2439 misc := x"00000000" & (r.fpscr and fpscr_mask);
2440 when "0001" =>
2441 -- generated QNaN mantissa
2442 misc := x"0020000000000000";
2443 when "0010" =>
2444 -- mantissa of max representable DP number
2445 misc := x"007ffffffffffffc";
2446 when "0011" =>
2447 -- mantissa of max representable SP number
2448 misc := x"007fffff80000000";
2449 when "0100" =>
2450 -- fmrgow result
2451 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2452 when "0110" =>
2453 -- fmrgew result
2454 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2455 when "0111" =>
2456 misc := 10x"000" & inverse_est & 35x"000000000";
2457 when "1000" =>
2458 -- max positive result for fctiw[z]
2459 misc := x"000000007fffffff";
2460 when "1001" =>
2461 -- max negative result for fctiw[z]
2462 misc := x"ffffffff80000000";
2463 when "1010" =>
2464 -- max positive result for fctiwu[z]
2465 misc := x"00000000ffffffff";
2466 when "1011" =>
2467 -- max negative result for fctiwu[z]
2468 misc := x"0000000000000000";
2469 when "1100" =>
2470 -- max positive result for fctid[z]
2471 misc := x"7fffffffffffffff";
2472 when "1101" =>
2473 -- max negative result for fctid[z]
2474 misc := x"8000000000000000";
2475 when "1110" =>
2476 -- max positive result for fctidu[z]
2477 misc := x"ffffffffffffffff";
2478 when "1111" =>
2479 -- max negative result for fctidu[z]
2480 misc := x"0000000000000000";
2481 when others =>
2482 misc := x"0000000000000000";
2483 end case;
2484 result <= misc;
2485 end case;
2486 v.r := result;
2487 if set_s = '1' then
2488 case opsel_s is
2489 when S_NEG =>
2490 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2491 when S_MULT =>
2492 v.s := multiply_to_f.result(57 downto 2);
2493 when S_SHIFT =>
2494 v.s := shift_res(63 downto 8);
2495 if shift_res(7 downto 0) /= x"00" then
2496 v.x := '1';
2497 end if;
2498 when others =>
2499 v.s := (others => '0');
2500 end case;
2501 end if;
2502
2503 if set_a = '1' then
2504 v.a.exponent := new_exp;
2505 v.a.mantissa := shift_res;
2506 end if;
2507 if set_b = '1' then
2508 v.b.exponent := new_exp;
2509 v.b.mantissa := shift_res;
2510 end if;
2511 if set_c = '1' then
2512 v.c.exponent := new_exp;
2513 v.c.mantissa := shift_res;
2514 end if;
2515
2516 if opsel_r = RES_SHIFT then
2517 v.result_exp := new_exp;
2518 end if;
2519
2520 if renormalize = '1' then
2521 clz := count_left_zeroes(r.r);
2522 if renorm_sqrt = '1' then
2523 -- make denormalized value end up with even exponent
2524 clz(0) := '1';
2525 end if;
2526 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2527 end if;
2528
2529 if r.int_result = '1' then
2530 fp_result <= r.r;
2531 else
2532 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2533 r.single_prec, r.quieten_nan);
2534 end if;
2535 if r.update_fprf = '1' then
2536 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2537 r.r(54) and not r.denorm);
2538 end if;
2539
2540 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2541 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2542 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2543 v.fpscr(FPSCR_VE downto FPSCR_XE));
2544 if update_fx = '1' and
2545 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2546 v.fpscr(FPSCR_FX) := '1';
2547 end if;
2548 if r.rc = '1' then
2549 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2550 end if;
2551
2552 v.illegal := illegal;
2553 if illegal = '1' then
2554 v.instr_done := '0';
2555 v.do_intr := '1';
2556 v.writing_back := '0';
2557 v.busy := '0';
2558 v.state := IDLE;
2559 else
2560 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2561 if v.state /= IDLE or v.do_intr = '1' then
2562 v.busy := '1';
2563 end if;
2564 end if;
2565
2566 rin <= v;
2567 end process;
2568
2569 end architecture behaviour;