FPU: Relax timing around multiplier output
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 op : insn_type_t;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
79 fe_mode : std_ulogic;
80 rc : std_ulogic;
81 is_cmp : std_ulogic;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
84 a : fpu_reg_type;
85 b : fpu_reg_type;
86 c : fpu_reg_type;
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 x : std_ulogic;
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
103 tiny : std_ulogic;
104 denorm : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
112 first : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
116 use_a : std_ulogic;
117 use_b : std_ulogic;
118 use_c : std_ulogic;
119 invalid : std_ulogic;
120 negate : std_ulogic;
121 longmask : std_ulogic;
122 end record;
123
124 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
125
126 signal r, rin : reg_type;
127
128 signal fp_result : std_ulogic_vector(63 downto 0);
129 signal opsel_b : std_ulogic_vector(1 downto 0);
130 signal opsel_r : std_ulogic_vector(1 downto 0);
131 signal opsel_s : std_ulogic_vector(1 downto 0);
132 signal opsel_ainv : std_ulogic;
133 signal opsel_mask : std_ulogic;
134 signal opsel_binv : std_ulogic;
135 signal in_a : std_ulogic_vector(63 downto 0);
136 signal in_b : std_ulogic_vector(63 downto 0);
137 signal result : std_ulogic_vector(63 downto 0);
138 signal carry_in : std_ulogic;
139 signal lost_bits : std_ulogic;
140 signal r_hi_nz : std_ulogic;
141 signal r_lo_nz : std_ulogic;
142 signal s_nz : std_ulogic;
143 signal misc_sel : std_ulogic_vector(3 downto 0);
144 signal f_to_multiply : MultiplyInputType;
145 signal multiply_to_f : MultiplyOutputType;
146 signal msel_1 : std_ulogic_vector(1 downto 0);
147 signal msel_2 : std_ulogic_vector(1 downto 0);
148 signal msel_add : std_ulogic_vector(1 downto 0);
149 signal msel_inv : std_ulogic;
150 signal inverse_est : std_ulogic_vector(18 downto 0);
151
152 -- opsel values
153 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
154 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
155 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
156 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
157
158 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
159 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
160 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
161 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
162
163 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
164 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
165 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
166 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
167
168 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
169 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
170 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
171 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
172
173 -- msel values
174 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
175 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
176 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
177 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
178
179 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
180 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
181 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
182 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
183
184 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
185 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
186 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
187 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
188
189 -- Inverse lookup table, indexed by the top 8 fraction bits
190 -- The first 256 entries are the reciprocal (1/x) lookup table,
191 -- and the remaining 768 entries are the reciprocal square root table.
192 -- Output range is [0.5, 1) in 0.19 format, though the top
193 -- bit isn't stored since it is always 1.
194 -- Each output value is the inverse of the center of the input
195 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
196 -- entry 1 is 1 / (1 + 3/512), etc.
197 signal inverse_table : lookup_table := (
198 -- 1/x lookup table
199 -- Unit bit is assumed to be 1, so input range is [1, 2)
200 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
201 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
202 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
203 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
204 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
205 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
206 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
207 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
208 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
209 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
210 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
211 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
212 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
213 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
214 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
215 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
216 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
217 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
218 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
219 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
220 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
221 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
222 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
223 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
224 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
225 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
226 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
227 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
228 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
229 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
230 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
231 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
232 -- 1/sqrt(x) lookup table
233 -- Input is in the range [1, 4), i.e. two bits to the left of the
234 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
235 -- 1.0 ... 1.9999
236 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
237 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
238 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
239 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
240 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
241 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
242 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
243 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
244 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
245 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
246 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
247 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
248 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
249 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
250 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
251 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
252 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
253 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
254 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
255 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
256 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
257 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
258 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
259 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
260 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
261 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
262 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
263 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
264 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
265 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
266 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
267 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
268 -- 2.0 ... 2.9999
269 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
270 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
271 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
272 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
273 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
274 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
275 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
276 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
277 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
278 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
279 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
280 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
281 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
282 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
283 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
284 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
285 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
286 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
287 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
288 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
289 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
290 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
291 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
292 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
293 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
294 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
295 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
296 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
297 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
298 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
299 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
300 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
301 -- 3.0 ... 3.9999
302 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
303 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
304 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
305 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
306 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
307 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
308 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
309 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
310 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
311 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
312 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
313 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
314 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
315 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
316 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
317 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
318 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
319 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
320 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
321 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
322 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
323 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
324 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
325 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
326 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
327 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
328 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
329 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
330 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
331 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
332 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
333 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
334 );
335
336 -- Left and right shifter with 120 bit input and 64 bit output.
337 -- Shifts inp left by shift bits and returns the upper 64 bits of
338 -- the result. The shift parameter is interpreted as a signed
339 -- number in the range -64..63, with negative values indicating
340 -- right shifts.
341 function shifter_64(inp: std_ulogic_vector(119 downto 0);
342 shift: std_ulogic_vector(6 downto 0))
343 return std_ulogic_vector is
344 variable s1 : std_ulogic_vector(94 downto 0);
345 variable s2 : std_ulogic_vector(70 downto 0);
346 variable result : std_ulogic_vector(63 downto 0);
347 begin
348 case shift(6 downto 5) is
349 when "00" =>
350 s1 := inp(119 downto 25);
351 when "01" =>
352 s1 := inp(87 downto 0) & "0000000";
353 when "10" =>
354 s1 := x"0000000000000000" & inp(119 downto 89);
355 when others =>
356 s1 := x"00000000" & inp(119 downto 57);
357 end case;
358 case shift(4 downto 3) is
359 when "00" =>
360 s2 := s1(94 downto 24);
361 when "01" =>
362 s2 := s1(86 downto 16);
363 when "10" =>
364 s2 := s1(78 downto 8);
365 when others =>
366 s2 := s1(70 downto 0);
367 end case;
368 case shift(2 downto 0) is
369 when "000" =>
370 result := s2(70 downto 7);
371 when "001" =>
372 result := s2(69 downto 6);
373 when "010" =>
374 result := s2(68 downto 5);
375 when "011" =>
376 result := s2(67 downto 4);
377 when "100" =>
378 result := s2(66 downto 3);
379 when "101" =>
380 result := s2(65 downto 2);
381 when "110" =>
382 result := s2(64 downto 1);
383 when others =>
384 result := s2(63 downto 0);
385 end case;
386 return result;
387 end;
388
389 -- Generate a mask with 0-bits on the left and 1-bits on the right which
390 -- selects the bits will be lost in doing a right shift. The shift
391 -- parameter is the bottom 6 bits of a negative shift count,
392 -- indicating a right shift.
393 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
394 variable result: std_ulogic_vector(63 downto 0);
395 begin
396 result := (others => '0');
397 for i in 0 to 63 loop
398 if i >= shift then
399 result(63 - i) := '1';
400 end if;
401 end loop;
402 return result;
403 end;
404
405 -- Split a DP floating-point number into components and work out its class.
406 -- If is_int = 1, the input is considered an integer
407 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
408 variable r : fpu_reg_type;
409 variable exp_nz : std_ulogic;
410 variable exp_ao : std_ulogic;
411 variable frac_nz : std_ulogic;
412 variable cls : std_ulogic_vector(2 downto 0);
413 begin
414 r.negative := fpr(63);
415 exp_nz := or (fpr(62 downto 52));
416 exp_ao := and (fpr(62 downto 52));
417 frac_nz := or (fpr(51 downto 0));
418 if is_int = '0' then
419 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
420 if exp_nz = '0' then
421 r.exponent := to_signed(-1022, EXP_BITS);
422 end if;
423 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
424 cls := exp_ao & exp_nz & frac_nz;
425 case cls is
426 when "000" => r.class := ZERO;
427 when "001" => r.class := FINITE; -- denormalized
428 when "010" => r.class := FINITE;
429 when "011" => r.class := FINITE;
430 when "110" => r.class := INFINITY;
431 when others => r.class := NAN;
432 end case;
433 else
434 r.mantissa := fpr;
435 r.exponent := (others => '0');
436 if (fpr(63) or exp_nz or frac_nz) = '1' then
437 r.class := FINITE;
438 else
439 r.class := ZERO;
440 end if;
441 end if;
442 return r;
443 end;
444
445 -- Construct a DP floating-point result from components
446 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
447 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
448 return std_ulogic_vector is
449 variable result : std_ulogic_vector(63 downto 0);
450 begin
451 result := (others => '0');
452 result(63) := sign;
453 case class is
454 when ZERO =>
455 when FINITE =>
456 if mantissa(54) = '1' then
457 -- normalized number
458 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
459 end if;
460 result(51 downto 29) := mantissa(53 downto 31);
461 if single_prec = '0' then
462 result(28 downto 0) := mantissa(30 downto 2);
463 end if;
464 when INFINITY =>
465 result(62 downto 52) := "11111111111";
466 when NAN =>
467 result(62 downto 52) := "11111111111";
468 result(51) := quieten_nan or mantissa(53);
469 result(50 downto 29) := mantissa(52 downto 31);
470 if single_prec = '0' then
471 result(28 downto 0) := mantissa(30 downto 2);
472 end if;
473 end case;
474 return result;
475 end;
476
477 -- Determine whether to increment when rounding
478 -- Returns rounding_inc & inexact
479 -- Assumes x includes the bottom 29 bits of the mantissa already
480 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
481 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
482 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
483 sign: std_ulogic)
484 return std_ulogic_vector is
485 variable grx : std_ulogic_vector(2 downto 0);
486 variable ret : std_ulogic_vector(1 downto 0);
487 variable lsb : std_ulogic;
488 begin
489 if single_prec = '0' then
490 grx := mantissa(1 downto 0) & x;
491 lsb := mantissa(2);
492 else
493 grx := mantissa(30 downto 29) & x;
494 lsb := mantissa(31);
495 end if;
496 ret(1) := '0';
497 ret(0) := or (grx);
498 case rn(1 downto 0) is
499 when "00" => -- round to nearest
500 if grx = "100" and rn(2) = '0' then
501 ret(1) := lsb; -- tie, round to even
502 else
503 ret(1) := grx(2);
504 end if;
505 when "01" => -- round towards zero
506 when others => -- round towards +/- inf
507 if rn(0) = sign then
508 -- round towards greater magnitude
509 ret(1) := ret(0);
510 end if;
511 end case;
512 return ret;
513 end;
514
515 -- Determine result flags to write into the FPSCR
516 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
517 return std_ulogic_vector is
518 begin
519 case class is
520 when ZERO =>
521 return sign & "0010";
522 when FINITE =>
523 return (not unitbit) & sign & (not sign) & "00";
524 when INFINITY =>
525 return '0' & sign & (not sign) & "01";
526 when NAN =>
527 return "10001";
528 end case;
529 end;
530
531 begin
532 fpu_multiply_0: entity work.multiply
533 port map (
534 clk => clk,
535 m_in => f_to_multiply,
536 m_out => multiply_to_f
537 );
538
539 fpu_0: process(clk)
540 begin
541 if rising_edge(clk) then
542 if rst = '1' then
543 r.state <= IDLE;
544 r.busy <= '0';
545 r.instr_done <= '0';
546 r.do_intr <= '0';
547 r.fpscr <= (others => '0');
548 r.writing_back <= '0';
549 else
550 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
551 r <= rin;
552 end if;
553 end if;
554 end process;
555
556 -- synchronous reads from lookup table
557 lut_access: process(clk)
558 variable addrhi : std_ulogic_vector(1 downto 0);
559 variable addr : std_ulogic_vector(9 downto 0);
560 begin
561 if rising_edge(clk) then
562 if r.is_sqrt = '1' then
563 addrhi := r.b.mantissa(55 downto 54);
564 else
565 addrhi := "00";
566 end if;
567 addr := addrhi & r.b.mantissa(53 downto 46);
568 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
569 end if;
570 end process;
571
572 e_out.busy <= r.busy;
573 e_out.exception <= r.fpscr(FPSCR_FEX);
574 e_out.interrupt <= r.do_intr;
575
576 w_out.valid <= r.instr_done and not r.do_intr;
577 w_out.write_enable <= r.writing_back;
578 w_out.write_reg <= r.dest_fpr;
579 w_out.write_data <= fp_result;
580 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
581 w_out.write_cr_mask <= r.cr_mask;
582 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
583 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
584
585 fpu_1: process(all)
586 variable v : reg_type;
587 variable adec : fpu_reg_type;
588 variable bdec : fpu_reg_type;
589 variable cdec : fpu_reg_type;
590 variable fpscr_mask : std_ulogic_vector(31 downto 0);
591 variable illegal : std_ulogic;
592 variable j, k : integer;
593 variable flm : std_ulogic_vector(7 downto 0);
594 variable int_input : std_ulogic;
595 variable mask : std_ulogic_vector(63 downto 0);
596 variable in_a0 : std_ulogic_vector(63 downto 0);
597 variable in_b0 : std_ulogic_vector(63 downto 0);
598 variable misc : std_ulogic_vector(63 downto 0);
599 variable shift_res : std_ulogic_vector(63 downto 0);
600 variable round : std_ulogic_vector(1 downto 0);
601 variable update_fx : std_ulogic;
602 variable arith_done : std_ulogic;
603 variable invalid : std_ulogic;
604 variable zero_divide : std_ulogic;
605 variable mant_nz : std_ulogic;
606 variable min_exp : signed(EXP_BITS-1 downto 0);
607 variable max_exp : signed(EXP_BITS-1 downto 0);
608 variable bias_exp : signed(EXP_BITS-1 downto 0);
609 variable new_exp : signed(EXP_BITS-1 downto 0);
610 variable exp_tiny : std_ulogic;
611 variable exp_huge : std_ulogic;
612 variable renormalize : std_ulogic;
613 variable clz : std_ulogic_vector(5 downto 0);
614 variable set_x : std_ulogic;
615 variable mshift : signed(EXP_BITS-1 downto 0);
616 variable need_check : std_ulogic;
617 variable msb : std_ulogic;
618 variable is_add : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 variable sum : std_ulogic_vector(63 downto 0);
635 begin
636 v := r;
637 illegal := '0';
638 v.busy := '0';
639 int_input := '0';
640
641 -- capture incoming instruction
642 if e_in.valid = '1' then
643 v.insn := e_in.insn;
644 v.op := e_in.op;
645 v.fe_mode := or (e_in.fe_mode);
646 v.dest_fpr := e_in.frt;
647 v.single_prec := e_in.single;
648 v.longmask := e_in.single;
649 v.int_result := '0';
650 v.rc := e_in.rc;
651 v.is_cmp := e_in.out_cr;
652 if e_in.out_cr = '0' then
653 v.cr_mask := num_to_fxm(1);
654 else
655 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
656 end if;
657 int_input := '0';
658 if e_in.op = OP_FPOP_I then
659 int_input := '1';
660 end if;
661 v.quieten_nan := '1';
662 v.tiny := '0';
663 v.denorm := '0';
664 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
665 v.is_subtract := '0';
666 v.is_multiply := '0';
667 v.is_sqrt := '0';
668 v.add_bsmall := '0';
669 v.doing_ftdiv := "00";
670
671 adec := decode_dp(e_in.fra, int_input);
672 bdec := decode_dp(e_in.frb, int_input);
673 cdec := decode_dp(e_in.frc, int_input);
674 v.a := adec;
675 v.b := bdec;
676 v.c := cdec;
677
678 v.exp_cmp := '0';
679 if adec.exponent > bdec.exponent then
680 v.exp_cmp := '1';
681 end if;
682 v.madd_cmp := '0';
683 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
684 v.madd_cmp := '1';
685 end if;
686 end if;
687
688 r_hi_nz <= or (r.r(55 downto 31));
689 r_lo_nz <= or (r.r(30 downto 2));
690 s_nz <= or (r.s);
691
692 if r.single_prec = '0' then
693 if r.doing_ftdiv(1) = '0' then
694 max_exp := to_signed(1023, EXP_BITS);
695 else
696 max_exp := to_signed(1020, EXP_BITS);
697 end if;
698 if r.doing_ftdiv(0) = '0' then
699 min_exp := to_signed(-1022, EXP_BITS);
700 else
701 min_exp := to_signed(-1021, EXP_BITS);
702 end if;
703 bias_exp := to_signed(1536, EXP_BITS);
704 else
705 max_exp := to_signed(127, EXP_BITS);
706 min_exp := to_signed(-126, EXP_BITS);
707 bias_exp := to_signed(192, EXP_BITS);
708 end if;
709 new_exp := r.result_exp - r.shift;
710 exp_tiny := '0';
711 exp_huge := '0';
712 if new_exp < min_exp then
713 exp_tiny := '1';
714 end if;
715 if new_exp > max_exp then
716 exp_huge := '1';
717 end if;
718
719 -- Compare P with zero and with B
720 px_nz := or (r.p(57 downto 4));
721 pcmpb_eq := '0';
722 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
723 pcmpb_eq := '1';
724 end if;
725 pcmpb_lt := '0';
726 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
727 pcmpb_lt := '1';
728 end if;
729
730 v.writing_back := '0';
731 v.instr_done := '0';
732 v.update_fprf := '0';
733 v.shift := to_signed(0, EXP_BITS);
734 v.first := '0';
735 v.opsel_a := AIN_R;
736 opsel_ainv <= '0';
737 opsel_mask <= '0';
738 opsel_b <= BIN_ZERO;
739 opsel_binv <= '0';
740 opsel_r <= RES_SUM;
741 opsel_s <= S_ZERO;
742 carry_in <= '0';
743 misc_sel <= "0000";
744 fpscr_mask := (others => '1');
745 update_fx := '0';
746 arith_done := '0';
747 invalid := '0';
748 zero_divide := '0';
749 renormalize := '0';
750 set_x := '0';
751 qnan_result := '0';
752 set_a := '0';
753 set_b := '0';
754 set_c := '0';
755 set_s := '0';
756 f_to_multiply.is_32bit <= '0';
757 f_to_multiply.valid <= '0';
758 msel_1 <= MUL1_A;
759 msel_2 <= MUL2_C;
760 msel_add <= MULADD_ZERO;
761 msel_inv <= '0';
762 set_y := '0';
763 pshift := '0';
764 renorm_sqrt := '0';
765 shiftin := '0';
766 case r.state is
767 when IDLE =>
768 v.use_a := '0';
769 v.use_b := '0';
770 v.use_c := '0';
771 v.invalid := '0';
772 v.negate := '0';
773 if e_in.valid = '1' then
774 case e_in.insn(5 downto 1) is
775 when "00000" =>
776 if e_in.insn(8) = '1' then
777 if e_in.insn(6) = '0' then
778 v.state := DO_FTDIV;
779 else
780 v.state := DO_FTSQRT;
781 end if;
782 elsif e_in.insn(7) = '1' then
783 v.state := DO_MCRFS;
784 else
785 v.opsel_a := AIN_B;
786 v.state := DO_FCMP;
787 end if;
788 when "00110" =>
789 if e_in.insn(10) = '0' then
790 if e_in.insn(8) = '0' then
791 v.state := DO_MTFSB;
792 else
793 v.state := DO_MTFSFI;
794 end if;
795 else
796 v.state := DO_FMRG;
797 end if;
798 when "00111" =>
799 if e_in.insn(8) = '0' then
800 v.state := DO_MFFS;
801 else
802 v.state := DO_MTFSF;
803 end if;
804 when "01000" =>
805 v.opsel_a := AIN_B;
806 if e_in.insn(9 downto 8) /= "11" then
807 v.state := DO_FMR;
808 else
809 v.state := DO_FRI;
810 end if;
811 when "01100" =>
812 v.opsel_a := AIN_B;
813 v.state := DO_FRSP;
814 when "01110" =>
815 v.opsel_a := AIN_B;
816 if int_input = '1' then
817 -- fcfid[u][s]
818 v.state := DO_FCFID;
819 else
820 v.state := DO_FCTI;
821 end if;
822 when "01111" =>
823 v.round_mode := "001";
824 v.opsel_a := AIN_B;
825 v.state := DO_FCTI;
826 when "10010" =>
827 v.opsel_a := AIN_A;
828 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
829 v.opsel_a := AIN_B;
830 end if;
831 v.state := DO_FDIV;
832 when "10100" | "10101" =>
833 v.opsel_a := AIN_A;
834 v.state := DO_FADD;
835 when "10110" =>
836 v.is_sqrt := '1';
837 v.opsel_a := AIN_B;
838 v.state := DO_FSQRT;
839 when "10111" =>
840 v.state := DO_FSEL;
841 when "11000" =>
842 v.opsel_a := AIN_B;
843 v.state := DO_FRE;
844 when "11001" =>
845 v.is_multiply := '1';
846 v.opsel_a := AIN_A;
847 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
848 v.opsel_a := AIN_C;
849 end if;
850 v.state := DO_FMUL;
851 when "11010" =>
852 v.is_sqrt := '1';
853 v.opsel_a := AIN_B;
854 v.state := DO_FRSQRTE;
855 when "11100" | "11101" | "11110" | "11111" =>
856 if v.a.mantissa(54) = '0' then
857 v.opsel_a := AIN_A;
858 elsif v.c.mantissa(54) = '0' then
859 v.opsel_a := AIN_C;
860 else
861 v.opsel_a := AIN_B;
862 end if;
863 v.state := DO_FMADD;
864 when others =>
865 illegal := '1';
866 end case;
867 end if;
868 v.x := '0';
869 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
870 set_s := '1';
871
872 when DO_MCRFS =>
873 j := to_integer(unsigned(insn_bfa(r.insn)));
874 for i in 0 to 7 loop
875 if i = j then
876 k := (7 - i) * 4;
877 v.cr_result := r.fpscr(k + 3 downto k);
878 fpscr_mask(k + 3 downto k) := "0000";
879 end if;
880 end loop;
881 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
882 v.instr_done := '1';
883 v.state := IDLE;
884
885 when DO_FTDIV =>
886 v.instr_done := '1';
887 v.state := IDLE;
888 v.cr_result := "0000";
889 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
890 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
891 v.cr_result(2) := '1';
892 end if;
893 if r.a.class = NAN or r.a.class = INFINITY or
894 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
895 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
896 v.cr_result(1) := '1';
897 else
898 v.doing_ftdiv := "11";
899 v.first := '1';
900 v.state := FTDIV_1;
901 v.instr_done := '0';
902 end if;
903
904 when DO_FTSQRT =>
905 v.instr_done := '1';
906 v.state := IDLE;
907 v.cr_result := "0000";
908 if r.b.class = ZERO or r.b.class = INFINITY or
909 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
910 v.cr_result(2) := '1';
911 end if;
912 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
913 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
914 v.cr_result(1) := '0';
915 end if;
916
917 when DO_FCMP =>
918 -- fcmp[uo]
919 -- r.opsel_a = AIN_B
920 v.instr_done := '1';
921 v.state := IDLE;
922 update_fx := '1';
923 v.result_exp := r.b.exponent;
924 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
925 (r.b.class = NAN and r.b.mantissa(53) = '0') then
926 -- Signalling NAN
927 v.fpscr(FPSCR_VXSNAN) := '1';
928 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
929 v.fpscr(FPSCR_VXVC) := '1';
930 end if;
931 invalid := '1';
932 v.cr_result := "0001"; -- unordered
933 elsif r.a.class = NAN or r.b.class = NAN then
934 if r.insn(6) = '1' then
935 -- fcmpo
936 v.fpscr(FPSCR_VXVC) := '1';
937 invalid := '1';
938 end if;
939 v.cr_result := "0001"; -- unordered
940 elsif r.a.class = ZERO and r.b.class = ZERO then
941 v.cr_result := "0010"; -- equal
942 elsif r.a.negative /= r.b.negative then
943 v.cr_result := r.a.negative & r.b.negative & "00";
944 elsif r.a.class = ZERO then
945 -- A and B are the same sign from here down
946 v.cr_result := not r.b.negative & r.b.negative & "00";
947 elsif r.a.class = INFINITY then
948 if r.b.class = INFINITY then
949 v.cr_result := "0010";
950 else
951 v.cr_result := r.a.negative & not r.a.negative & "00";
952 end if;
953 elsif r.b.class = ZERO then
954 -- A is finite from here down
955 v.cr_result := r.a.negative & not r.a.negative & "00";
956 elsif r.b.class = INFINITY then
957 v.cr_result := not r.b.negative & r.b.negative & "00";
958 elsif r.exp_cmp = '1' then
959 -- A and B are both finite from here down
960 v.cr_result := r.a.negative & not r.a.negative & "00";
961 elsif r.a.exponent /= r.b.exponent then
962 -- A exponent is smaller than B
963 v.cr_result := not r.a.negative & r.a.negative & "00";
964 else
965 -- Prepare to subtract mantissas, put B in R
966 v.cr_result := "0000";
967 v.instr_done := '0';
968 v.opsel_a := AIN_A;
969 v.state := CMP_1;
970 end if;
971 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
972
973 when DO_MTFSB =>
974 -- mtfsb{0,1}
975 j := to_integer(unsigned(insn_bt(r.insn)));
976 for i in 0 to 31 loop
977 if i = j then
978 v.fpscr(31 - i) := r.insn(6);
979 end if;
980 end loop;
981 v.instr_done := '1';
982 v.state := IDLE;
983
984 when DO_MTFSFI =>
985 -- mtfsfi
986 j := to_integer(unsigned(insn_bf(r.insn)));
987 if r.insn(16) = '0' then
988 for i in 0 to 7 loop
989 if i = j then
990 k := (7 - i) * 4;
991 v.fpscr(k + 3 downto k) := insn_u(r.insn);
992 end if;
993 end loop;
994 end if;
995 v.instr_done := '1';
996 v.state := IDLE;
997
998 when DO_FMRG =>
999 -- fmrgew, fmrgow
1000 opsel_r <= RES_MISC;
1001 misc_sel <= "01" & r.insn(8) & '0';
1002 v.int_result := '1';
1003 v.writing_back := '1';
1004 v.instr_done := '1';
1005 v.state := IDLE;
1006
1007 when DO_MFFS =>
1008 v.int_result := '1';
1009 v.writing_back := '1';
1010 opsel_r <= RES_MISC;
1011 case r.insn(20 downto 16) is
1012 when "00000" =>
1013 -- mffs
1014 when "00001" =>
1015 -- mffsce
1016 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1017 when "10100" | "10101" =>
1018 -- mffscdrn[i] (but we don't implement DRN)
1019 fpscr_mask := x"000000FF";
1020 when "10110" =>
1021 -- mffscrn
1022 fpscr_mask := x"000000FF";
1023 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1024 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1025 when "10111" =>
1026 -- mffscrni
1027 fpscr_mask := x"000000FF";
1028 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1029 when "11000" =>
1030 -- mffsl
1031 fpscr_mask := x"0007F0FF";
1032 when others =>
1033 illegal := '1';
1034 end case;
1035 v.instr_done := '1';
1036 v.state := IDLE;
1037
1038 when DO_MTFSF =>
1039 if r.insn(25) = '1' then
1040 flm := x"FF";
1041 elsif r.insn(16) = '1' then
1042 flm := x"00";
1043 else
1044 flm := r.insn(24 downto 17);
1045 end if;
1046 for i in 0 to 7 loop
1047 k := i * 4;
1048 if flm(i) = '1' then
1049 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1050 end if;
1051 end loop;
1052 v.instr_done := '1';
1053 v.state := IDLE;
1054
1055 when DO_FMR =>
1056 -- r.opsel_a = AIN_B
1057 v.result_class := r.b.class;
1058 v.result_exp := r.b.exponent;
1059 v.quieten_nan := '0';
1060 if r.insn(9) = '1' then
1061 v.result_sign := '0'; -- fabs
1062 elsif r.insn(8) = '1' then
1063 v.result_sign := '1'; -- fnabs
1064 elsif r.insn(7) = '1' then
1065 v.result_sign := r.b.negative; -- fmr
1066 elsif r.insn(6) = '1' then
1067 v.result_sign := not r.b.negative; -- fneg
1068 else
1069 v.result_sign := r.a.negative; -- fcpsgn
1070 end if;
1071 v.writing_back := '1';
1072 v.instr_done := '1';
1073 v.state := IDLE;
1074
1075 when DO_FRI => -- fri[nzpm]
1076 -- r.opsel_a = AIN_B
1077 v.result_class := r.b.class;
1078 v.result_sign := r.b.negative;
1079 v.result_exp := r.b.exponent;
1080 v.fpscr(FPSCR_FR) := '0';
1081 v.fpscr(FPSCR_FI) := '0';
1082 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1083 -- Signalling NAN
1084 v.fpscr(FPSCR_VXSNAN) := '1';
1085 invalid := '1';
1086 end if;
1087 if r.b.class = FINITE then
1088 if r.b.exponent >= to_signed(52, EXP_BITS) then
1089 -- integer already, no rounding required
1090 arith_done := '1';
1091 else
1092 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1093 v.state := FRI_1;
1094 v.round_mode := '1' & r.insn(7 downto 6);
1095 end if;
1096 else
1097 arith_done := '1';
1098 end if;
1099
1100 when DO_FRSP =>
1101 -- r.opsel_a = AIN_B, r.shift = 0
1102 v.result_class := r.b.class;
1103 v.result_sign := r.b.negative;
1104 v.result_exp := r.b.exponent;
1105 v.fpscr(FPSCR_FR) := '0';
1106 v.fpscr(FPSCR_FI) := '0';
1107 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1108 -- Signalling NAN
1109 v.fpscr(FPSCR_VXSNAN) := '1';
1110 invalid := '1';
1111 end if;
1112 set_x := '1';
1113 if r.b.class = FINITE then
1114 if r.b.exponent < to_signed(-126, EXP_BITS) then
1115 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1116 v.state := ROUND_UFLOW;
1117 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1118 v.state := ROUND_OFLOW;
1119 else
1120 v.shift := to_signed(-2, EXP_BITS);
1121 v.state := ROUNDING;
1122 end if;
1123 else
1124 arith_done := '1';
1125 end if;
1126
1127 when DO_FCTI =>
1128 -- instr bit 9: 1=dword 0=word
1129 -- instr bit 8: 1=unsigned 0=signed
1130 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131 -- r.opsel_a = AIN_B
1132 v.result_class := r.b.class;
1133 v.result_sign := r.b.negative;
1134 v.result_exp := r.b.exponent;
1135 v.fpscr(FPSCR_FR) := '0';
1136 v.fpscr(FPSCR_FI) := '0';
1137 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138 -- Signalling NAN
1139 v.fpscr(FPSCR_VXSNAN) := '1';
1140 invalid := '1';
1141 end if;
1142
1143 v.int_result := '1';
1144 case r.b.class is
1145 when ZERO =>
1146 arith_done := '1';
1147 when FINITE =>
1148 if r.b.exponent >= to_signed(64, EXP_BITS) or
1149 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150 v.state := INT_OFLOW;
1151 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152 -- integer already, no rounding required,
1153 -- shift into final position
1154 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155 if r.insn(8) = '1' and r.b.negative = '1' then
1156 v.state := INT_OFLOW;
1157 else
1158 v.state := INT_ISHIFT;
1159 end if;
1160 else
1161 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162 v.state := INT_SHIFT;
1163 end if;
1164 when INFINITY | NAN =>
1165 v.state := INT_OFLOW;
1166 end case;
1167
1168 when DO_FCFID =>
1169 -- r.opsel_a = AIN_B
1170 v.result_sign := '0';
1171 if r.insn(8) = '0' and r.b.negative = '1' then
1172 -- fcfid[s] with negative operand, set R = -B
1173 opsel_ainv <= '1';
1174 carry_in <= '1';
1175 v.result_sign := '1';
1176 end if;
1177 v.result_class := r.b.class;
1178 v.result_exp := to_signed(54, EXP_BITS);
1179 v.fpscr(FPSCR_FR) := '0';
1180 v.fpscr(FPSCR_FI) := '0';
1181 if r.b.class = ZERO then
1182 arith_done := '1';
1183 else
1184 v.state := FINISH;
1185 end if;
1186
1187 when DO_FADD =>
1188 -- fadd[s] and fsub[s]
1189 -- r.opsel_a = AIN_A
1190 v.result_sign := r.a.negative;
1191 v.result_class := r.a.class;
1192 v.result_exp := r.a.exponent;
1193 v.fpscr(FPSCR_FR) := '0';
1194 v.fpscr(FPSCR_FI) := '0';
1195 v.use_a := '1';
1196 v.use_b := '1';
1197 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198 if r.a.class = FINITE and r.b.class = FINITE then
1199 v.is_subtract := not is_add;
1200 v.add_bsmall := r.exp_cmp;
1201 v.opsel_a := AIN_B;
1202 if r.exp_cmp = '0' then
1203 v.shift := r.a.exponent - r.b.exponent;
1204 v.result_sign := r.b.negative xnor r.insn(1);
1205 if r.a.exponent = r.b.exponent then
1206 v.state := ADD_2;
1207 else
1208 v.longmask := '0';
1209 v.state := ADD_SHIFT;
1210 end if;
1211 else
1212 v.state := ADD_1;
1213 end if;
1214 else
1215 if r.a.class = NAN or r.b.class = NAN then
1216 v.state := NAN_RESULT;
1217 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218 -- invalid operation, construct QNaN
1219 v.fpscr(FPSCR_VXISI) := '1';
1220 qnan_result := '1';
1221 arith_done := '1';
1222 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223 -- return -0 for rounding to -infinity
1224 v.result_sign := r.round_mode(1) and r.round_mode(0);
1225 arith_done := '1';
1226 elsif r.a.class = INFINITY or r.b.class = ZERO then
1227 -- result is A
1228 v.opsel_a := AIN_A;
1229 v.state := EXC_RESULT;
1230 else
1231 -- result is +/- B
1232 v.opsel_a := AIN_B;
1233 v.negate := not r.insn(1);
1234 v.state := EXC_RESULT;
1235 end if;
1236 end if;
1237
1238 when DO_FMUL =>
1239 -- fmul[s]
1240 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241 v.result_sign := r.a.negative xor r.c.negative;
1242 v.result_class := r.a.class;
1243 v.fpscr(FPSCR_FR) := '0';
1244 v.fpscr(FPSCR_FI) := '0';
1245 v.use_a := '1';
1246 v.use_c := '1';
1247 if r.a.class = FINITE and r.c.class = FINITE then
1248 v.result_exp := r.a.exponent + r.c.exponent;
1249 -- Renormalize denorm operands
1250 if r.a.mantissa(54) = '0' then
1251 v.state := RENORM_A;
1252 elsif r.c.mantissa(54) = '0' then
1253 v.state := RENORM_C;
1254 else
1255 f_to_multiply.valid <= '1';
1256 v.state := MULT_1;
1257 end if;
1258 else
1259 if r.a.class = NAN or r.c.class = NAN then
1260 v.state := NAN_RESULT;
1261 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262 (r.a.class = ZERO and r.c.class = INFINITY) then
1263 -- invalid operation, construct QNaN
1264 v.fpscr(FPSCR_VXIMZ) := '1';
1265 qnan_result := '1';
1266 elsif r.a.class = ZERO or r.a.class = INFINITY then
1267 -- result is +/- A
1268 arith_done := '1';
1269 else
1270 -- r.c.class is ZERO or INFINITY
1271 v.opsel_a := AIN_C;
1272 v.negate := r.a.negative;
1273 v.state := EXC_RESULT;
1274 end if;
1275 end if;
1276
1277 when DO_FDIV =>
1278 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279 v.result_class := r.a.class;
1280 v.fpscr(FPSCR_FR) := '0';
1281 v.fpscr(FPSCR_FI) := '0';
1282 v.use_a := '1';
1283 v.use_b := '1';
1284 v.result_sign := r.a.negative xor r.b.negative;
1285 v.result_exp := r.a.exponent - r.b.exponent;
1286 v.count := "00";
1287 if r.a.class = FINITE and r.b.class = FINITE then
1288 -- Renormalize denorm operands
1289 if r.a.mantissa(54) = '0' then
1290 v.state := RENORM_A;
1291 elsif r.b.mantissa(54) = '0' then
1292 v.state := RENORM_B;
1293 else
1294 v.first := '1';
1295 v.state := DIV_2;
1296 end if;
1297 else
1298 if r.a.class = NAN or r.b.class = NAN then
1299 v.state := NAN_RESULT;
1300 elsif r.b.class = INFINITY then
1301 if r.a.class = INFINITY then
1302 v.fpscr(FPSCR_VXIDI) := '1';
1303 qnan_result := '1';
1304 else
1305 v.result_class := ZERO;
1306 end if;
1307 arith_done := '1';
1308 elsif r.b.class = ZERO then
1309 if r.a.class = ZERO then
1310 v.fpscr(FPSCR_VXZDZ) := '1';
1311 qnan_result := '1';
1312 else
1313 if r.a.class = FINITE then
1314 zero_divide := '1';
1315 end if;
1316 v.result_class := INFINITY;
1317 end if;
1318 arith_done := '1';
1319 else -- r.b.class = FINITE, result_class = r.a.class
1320 arith_done := '1';
1321 end if;
1322 end if;
1323
1324 when DO_FSEL =>
1325 v.fpscr(FPSCR_FR) := '0';
1326 v.fpscr(FPSCR_FI) := '0';
1327 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328 v.opsel_a := AIN_C;
1329 else
1330 v.opsel_a := AIN_B;
1331 end if;
1332 v.quieten_nan := '0';
1333 v.state := EXC_RESULT;
1334
1335 when DO_FSQRT =>
1336 -- r.opsel_a = AIN_B
1337 v.result_class := r.b.class;
1338 v.result_sign := r.b.negative;
1339 v.fpscr(FPSCR_FR) := '0';
1340 v.fpscr(FPSCR_FI) := '0';
1341 v.use_b := '1';
1342 case r.b.class is
1343 when FINITE =>
1344 v.result_exp := r.b.exponent;
1345 if r.b.negative = '1' then
1346 v.fpscr(FPSCR_VXSQRT) := '1';
1347 qnan_result := '1';
1348 elsif r.b.mantissa(54) = '0' then
1349 v.state := RENORM_B;
1350 elsif r.b.exponent(0) = '0' then
1351 v.state := SQRT_1;
1352 else
1353 v.shift := to_signed(1, EXP_BITS);
1354 v.state := RENORM_B2;
1355 end if;
1356 when NAN =>
1357 v.state := NAN_RESULT;
1358 when ZERO =>
1359 -- result is B
1360 arith_done := '1';
1361 when INFINITY =>
1362 if r.b.negative = '1' then
1363 v.fpscr(FPSCR_VXSQRT) := '1';
1364 qnan_result := '1';
1365 -- else result is B
1366 end if;
1367 arith_done := '1';
1368 end case;
1369
1370 when DO_FRE =>
1371 -- r.opsel_a = AIN_B
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 v.fpscr(FPSCR_FR) := '0';
1375 v.fpscr(FPSCR_FI) := '0';
1376 v.use_b := '1';
1377 case r.b.class is
1378 when FINITE =>
1379 v.result_exp := - r.b.exponent;
1380 if r.b.mantissa(54) = '0' then
1381 v.state := RENORM_B;
1382 else
1383 v.state := FRE_1;
1384 end if;
1385 when NAN =>
1386 v.state := NAN_RESULT;
1387 when INFINITY =>
1388 v.result_class := ZERO;
1389 arith_done := '1';
1390 when ZERO =>
1391 v.result_class := INFINITY;
1392 zero_divide := '1';
1393 arith_done := '1';
1394 end case;
1395
1396 when DO_FRSQRTE =>
1397 -- r.opsel_a = AIN_B
1398 v.result_class := r.b.class;
1399 v.result_sign := r.b.negative;
1400 v.fpscr(FPSCR_FR) := '0';
1401 v.fpscr(FPSCR_FI) := '0';
1402 v.use_b := '1';
1403 v.shift := to_signed(1, EXP_BITS);
1404 case r.b.class is
1405 when FINITE =>
1406 v.result_exp := r.b.exponent;
1407 if r.b.negative = '1' then
1408 v.fpscr(FPSCR_VXSQRT) := '1';
1409 qnan_result := '1';
1410 elsif r.b.mantissa(54) = '0' then
1411 v.state := RENORM_B;
1412 elsif r.b.exponent(0) = '0' then
1413 v.state := RSQRT_1;
1414 else
1415 v.state := RENORM_B2;
1416 end if;
1417 when NAN =>
1418 v.state := NAN_RESULT;
1419 when INFINITY =>
1420 if r.b.negative = '1' then
1421 v.fpscr(FPSCR_VXSQRT) := '1';
1422 qnan_result := '1';
1423 else
1424 v.result_class := ZERO;
1425 end if;
1426 arith_done := '1';
1427 when ZERO =>
1428 v.result_class := INFINITY;
1429 zero_divide := '1';
1430 arith_done := '1';
1431 end case;
1432
1433 when DO_FMADD =>
1434 -- fmadd, fmsub, fnmadd, fnmsub
1435 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436 -- else AIN_B
1437 v.result_sign := r.a.negative;
1438 v.result_class := r.a.class;
1439 v.result_exp := r.a.exponent;
1440 v.fpscr(FPSCR_FR) := '0';
1441 v.fpscr(FPSCR_FI) := '0';
1442 v.use_a := '1';
1443 v.use_b := '1';
1444 v.use_c := '1';
1445 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446 if r.a.class = FINITE and r.c.class = FINITE and
1447 (r.b.class = FINITE or r.b.class = ZERO) then
1448 v.is_subtract := not is_add;
1449 mulexp := r.a.exponent + r.c.exponent;
1450 v.result_exp := mulexp;
1451 -- Make sure A and C are normalized
1452 if r.a.mantissa(54) = '0' then
1453 v.state := RENORM_A;
1454 elsif r.c.mantissa(54) = '0' then
1455 v.state := RENORM_C;
1456 elsif r.b.class = ZERO then
1457 -- no addend, degenerates to multiply
1458 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459 f_to_multiply.valid <= '1';
1460 v.is_multiply := '1';
1461 v.state := MULT_1;
1462 elsif r.madd_cmp = '0' then
1463 -- addend is bigger, do multiply first
1464 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465 f_to_multiply.valid <= '1';
1466 v.state := FMADD_1;
1467 else
1468 -- product is bigger, shift B right and use it as the
1469 -- addend to the multiplier
1470 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471 -- for subtract, multiplier does B - A * C
1472 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473 v.result_exp := r.b.exponent;
1474 v.state := FMADD_2;
1475 end if;
1476 else
1477 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478 v.state := NAN_RESULT;
1479 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480 (r.a.class = INFINITY and r.c.class = ZERO) then
1481 -- invalid operation, construct QNaN
1482 v.fpscr(FPSCR_VXIMZ) := '1';
1483 qnan_result := '1';
1484 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485 if r.b.class = INFINITY and is_add = '0' then
1486 -- invalid operation, construct QNaN
1487 v.fpscr(FPSCR_VXISI) := '1';
1488 qnan_result := '1';
1489 else
1490 -- result is infinity
1491 v.result_class := INFINITY;
1492 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493 arith_done := '1';
1494 end if;
1495 else
1496 -- Here A is zero, C is zero, or B is infinity
1497 -- Result is +/-B in all of those cases
1498 v.opsel_a := AIN_B;
1499 if r.b.class /= ZERO or is_add = '1' then
1500 v.negate := not (r.insn(1) xor r.insn(2));
1501 else
1502 -- have to be careful about rule for 0 - 0 result sign
1503 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504 end if;
1505 v.state := EXC_RESULT;
1506 end if;
1507 end if;
1508
1509 when RENORM_A =>
1510 renormalize := '1';
1511 v.state := RENORM_A2;
1512 if r.insn(4) = '1' then
1513 v.opsel_a := AIN_C;
1514 else
1515 v.opsel_a := AIN_B;
1516 end if;
1517
1518 when RENORM_A2 =>
1519 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520 set_a := '1';
1521 v.result_exp := new_exp;
1522 if r.insn(4) = '1' then
1523 if r.c.mantissa(54) = '1' then
1524 if r.insn(3) = '0' or r.b.class = ZERO then
1525 v.first := '1';
1526 v.state := MULT_1;
1527 else
1528 v.madd_cmp := '0';
1529 if new_exp + 1 >= r.b.exponent then
1530 v.madd_cmp := '1';
1531 end if;
1532 v.opsel_a := AIN_B;
1533 v.state := DO_FMADD;
1534 end if;
1535 else
1536 v.state := RENORM_C;
1537 end if;
1538 else
1539 if r.b.mantissa(54) = '1' then
1540 v.first := '1';
1541 v.state := DIV_2;
1542 else
1543 v.state := RENORM_B;
1544 end if;
1545 end if;
1546
1547 when RENORM_B =>
1548 renormalize := '1';
1549 renorm_sqrt := r.is_sqrt;
1550 v.state := RENORM_B2;
1551
1552 when RENORM_B2 =>
1553 set_b := '1';
1554 if r.is_sqrt = '0' then
1555 v.result_exp := r.result_exp + r.shift;
1556 else
1557 v.result_exp := new_exp;
1558 end if;
1559 v.opsel_a := AIN_B;
1560 v.state := LOOKUP;
1561
1562 when RENORM_C =>
1563 renormalize := '1';
1564 v.state := RENORM_C2;
1565
1566 when RENORM_C2 =>
1567 set_c := '1';
1568 v.result_exp := new_exp;
1569 if r.insn(3) = '0' or r.b.class = ZERO then
1570 v.first := '1';
1571 v.state := MULT_1;
1572 else
1573 v.madd_cmp := '0';
1574 if new_exp + 1 >= r.b.exponent then
1575 v.madd_cmp := '1';
1576 end if;
1577 v.opsel_a := AIN_B;
1578 v.state := DO_FMADD;
1579 end if;
1580
1581 when ADD_1 =>
1582 -- transferring B to R
1583 v.shift := r.b.exponent - r.a.exponent;
1584 v.result_exp := r.b.exponent;
1585 v.longmask := '0';
1586 v.state := ADD_SHIFT;
1587
1588 when ADD_SHIFT =>
1589 -- r.shift = - exponent difference, r.longmask = 0
1590 opsel_r <= RES_SHIFT;
1591 v.x := s_nz;
1592 set_x := '1';
1593 v.longmask := r.single_prec;
1594 if r.add_bsmall = '1' then
1595 v.opsel_a := AIN_A;
1596 else
1597 v.opsel_a := AIN_B;
1598 end if;
1599 v.state := ADD_2;
1600
1601 when ADD_2 =>
1602 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603 opsel_b <= BIN_R;
1604 opsel_binv <= r.is_subtract;
1605 carry_in <= r.is_subtract and not r.x;
1606 v.shift := to_signed(-1, EXP_BITS);
1607 v.state := ADD_3;
1608
1609 when ADD_3 =>
1610 -- check for overflow or negative result (can't get both)
1611 -- r.shift = -1
1612 if r.r(63) = '1' then
1613 -- result is opposite sign to expected
1614 v.result_sign := not r.result_sign;
1615 opsel_ainv <= '1';
1616 carry_in <= '1';
1617 v.state := FINISH;
1618 elsif r.r(55) = '1' then
1619 -- sum overflowed, shift right
1620 opsel_r <= RES_SHIFT;
1621 set_x := '1';
1622 v.shift := to_signed(-2, EXP_BITS);
1623 if exp_huge = '1' then
1624 v.state := ROUND_OFLOW;
1625 else
1626 v.state := ROUNDING;
1627 end if;
1628 elsif r.r(54) = '1' then
1629 set_x := '1';
1630 v.shift := to_signed(-2, EXP_BITS);
1631 v.state := ROUNDING;
1632 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1633 -- r.x must be zero at this point
1634 v.result_class := ZERO;
1635 if r.is_subtract = '1' then
1636 -- set result sign depending on rounding mode
1637 v.result_sign := r.round_mode(1) and r.round_mode(0);
1638 end if;
1639 arith_done := '1';
1640 else
1641 renormalize := '1';
1642 v.state := NORMALIZE;
1643 end if;
1644
1645 when CMP_1 =>
1646 -- r.opsel_a = AIN_A
1647 opsel_b <= BIN_R;
1648 opsel_binv <= '1';
1649 carry_in <= '1';
1650 v.state := CMP_2;
1651
1652 when CMP_2 =>
1653 if r.r(63) = '1' then
1654 -- A is smaller in magnitude
1655 v.cr_result := not r.a.negative & r.a.negative & "00";
1656 elsif (r_hi_nz or r_lo_nz) = '0' then
1657 v.cr_result := "0010";
1658 else
1659 v.cr_result := r.a.negative & not r.a.negative & "00";
1660 end if;
1661 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1662 v.instr_done := '1';
1663 v.state := IDLE;
1664
1665 when MULT_1 =>
1666 f_to_multiply.valid <= r.first;
1667 opsel_r <= RES_MULT;
1668 if multiply_to_f.valid = '1' then
1669 v.state := FINISH;
1670 end if;
1671
1672 when FMADD_1 =>
1673 -- Addend is bigger here
1674 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1675 -- note v.shift is at most -2 here
1676 v.shift := r.result_exp - r.b.exponent;
1677 opsel_r <= RES_MULT;
1678 opsel_s <= S_MULT;
1679 set_s := '1';
1680 f_to_multiply.valid <= r.first;
1681 if multiply_to_f.valid = '1' then
1682 v.longmask := '0';
1683 v.state := ADD_SHIFT;
1684 end if;
1685
1686 when FMADD_2 =>
1687 -- Product is potentially bigger here
1688 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1689 set_s := '1';
1690 opsel_s <= S_SHIFT;
1691 v.shift := r.shift - to_signed(64, EXP_BITS);
1692 v.state := FMADD_3;
1693
1694 when FMADD_3 =>
1695 -- r.shift = addend exp - product exp
1696 opsel_r <= RES_SHIFT;
1697 v.first := '1';
1698 v.state := FMADD_4;
1699
1700 when FMADD_4 =>
1701 msel_add <= MULADD_RS;
1702 f_to_multiply.valid <= r.first;
1703 msel_inv <= r.is_subtract;
1704 opsel_r <= RES_MULT;
1705 opsel_s <= S_MULT;
1706 set_s := '1';
1707 if multiply_to_f.valid = '1' then
1708 v.state := FMADD_5;
1709 end if;
1710
1711 when FMADD_5 =>
1712 -- negate R:S:X if negative
1713 if r.r(63) = '1' then
1714 v.result_sign := not r.result_sign;
1715 opsel_ainv <= '1';
1716 carry_in <= not (s_nz or r.x);
1717 opsel_s <= S_NEG;
1718 set_s := '1';
1719 end if;
1720 v.shift := to_signed(56, EXP_BITS);
1721 v.state := FMADD_6;
1722
1723 when FMADD_6 =>
1724 -- r.shift = 56 (or 0, but only if r is now nonzero)
1725 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1726 if s_nz = '0' then
1727 -- must be a subtraction, and r.x must be zero
1728 v.result_class := ZERO;
1729 v.result_sign := r.round_mode(1) and r.round_mode(0);
1730 arith_done := '1';
1731 else
1732 -- R is all zeroes but there are non-zero bits in S
1733 -- so shift them into R and set S to 0
1734 opsel_r <= RES_SHIFT;
1735 set_s := '1';
1736 -- stay in state FMADD_6
1737 end if;
1738 elsif r.r(56 downto 54) = "001" then
1739 v.state := FINISH;
1740 else
1741 renormalize := '1';
1742 v.state := NORMALIZE;
1743 end if;
1744
1745 when LOOKUP =>
1746 -- r.opsel_a = AIN_B
1747 -- wait one cycle for inverse_table[B] lookup
1748 v.first := '1';
1749 if r.insn(4) = '0' then
1750 if r.insn(3) = '0' then
1751 v.state := DIV_2;
1752 else
1753 v.state := SQRT_1;
1754 end if;
1755 elsif r.insn(2) = '0' then
1756 v.state := FRE_1;
1757 else
1758 v.state := RSQRT_1;
1759 end if;
1760
1761 when DIV_2 =>
1762 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1763 msel_1 <= MUL1_B;
1764 msel_add <= MULADD_CONST;
1765 msel_inv <= '1';
1766 if r.count = 0 then
1767 msel_2 <= MUL2_LUT;
1768 else
1769 msel_2 <= MUL2_P;
1770 end if;
1771 set_y := r.first;
1772 pshift := '1';
1773 f_to_multiply.valid <= r.first;
1774 if multiply_to_f.valid = '1' then
1775 v.first := '1';
1776 v.count := r.count + 1;
1777 v.state := DIV_3;
1778 end if;
1779
1780 when DIV_3 =>
1781 -- compute Y = P = P * Y
1782 msel_1 <= MUL1_Y;
1783 msel_2 <= MUL2_P;
1784 f_to_multiply.valid <= r.first;
1785 pshift := '1';
1786 if multiply_to_f.valid = '1' then
1787 v.first := '1';
1788 if r.count = 3 then
1789 v.state := DIV_4;
1790 else
1791 v.state := DIV_2;
1792 end if;
1793 end if;
1794
1795 when DIV_4 =>
1796 -- compute R = P = A * Y (quotient)
1797 msel_1 <= MUL1_A;
1798 msel_2 <= MUL2_P;
1799 set_y := r.first;
1800 f_to_multiply.valid <= r.first;
1801 pshift := '1';
1802 if multiply_to_f.valid = '1' then
1803 opsel_r <= RES_MULT;
1804 v.first := '1';
1805 v.state := DIV_5;
1806 end if;
1807
1808 when DIV_5 =>
1809 -- compute P = A - B * R (remainder)
1810 msel_1 <= MUL1_B;
1811 msel_2 <= MUL2_R;
1812 msel_add <= MULADD_A;
1813 msel_inv <= '1';
1814 f_to_multiply.valid <= r.first;
1815 if multiply_to_f.valid = '1' then
1816 v.state := DIV_6;
1817 end if;
1818
1819 when DIV_6 =>
1820 -- test if remainder is 0 or >= B
1821 if pcmpb_lt = '1' then
1822 -- quotient is correct, set X if remainder non-zero
1823 v.x := r.p(58) or px_nz;
1824 else
1825 -- quotient needs to be incremented by 1
1826 carry_in <= '1';
1827 v.x := not pcmpb_eq;
1828 end if;
1829 v.state := FINISH;
1830
1831 when FRE_1 =>
1832 opsel_r <= RES_MISC;
1833 misc_sel <= "0111";
1834 v.shift := to_signed(1, EXP_BITS);
1835 v.state := NORMALIZE;
1836
1837 when FTDIV_1 =>
1838 v.cr_result(1) := exp_tiny or exp_huge;
1839 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1840 v.instr_done := '1';
1841 v.state := IDLE;
1842 else
1843 v.shift := r.a.exponent;
1844 v.doing_ftdiv := "10";
1845 end if;
1846
1847 when RSQRT_1 =>
1848 opsel_r <= RES_MISC;
1849 misc_sel <= "0111";
1850 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1851 v.result_exp := - sqrt_exp;
1852 v.shift := to_signed(1, EXP_BITS);
1853 v.state := NORMALIZE;
1854
1855 when SQRT_1 =>
1856 -- put invsqr[B] in R and compute P = invsqr[B] * B
1857 -- also transfer B (in R) to A
1858 set_a := '1';
1859 opsel_r <= RES_MISC;
1860 misc_sel <= "0111";
1861 msel_1 <= MUL1_B;
1862 msel_2 <= MUL2_LUT;
1863 f_to_multiply.valid <= '1';
1864 v.shift := to_signed(-1, EXP_BITS);
1865 v.count := "00";
1866 v.state := SQRT_2;
1867
1868 when SQRT_2 =>
1869 -- shift R right one place
1870 -- not expecting multiplier result yet
1871 -- r.shift = -1
1872 opsel_r <= RES_SHIFT;
1873 v.first := '1';
1874 v.state := SQRT_3;
1875
1876 when SQRT_3 =>
1877 -- put R into Y, wait for product from multiplier
1878 msel_2 <= MUL2_R;
1879 set_y := r.first;
1880 pshift := '1';
1881 if multiply_to_f.valid = '1' then
1882 -- put result into R
1883 opsel_r <= RES_MULT;
1884 v.first := '1';
1885 v.state := SQRT_4;
1886 end if;
1887
1888 when SQRT_4 =>
1889 -- compute 1.5 - Y * P
1890 msel_1 <= MUL1_Y;
1891 msel_2 <= MUL2_P;
1892 msel_add <= MULADD_CONST;
1893 msel_inv <= '1';
1894 f_to_multiply.valid <= r.first;
1895 pshift := '1';
1896 if multiply_to_f.valid = '1' then
1897 v.state := SQRT_5;
1898 end if;
1899
1900 when SQRT_5 =>
1901 -- compute Y = Y * P
1902 msel_1 <= MUL1_Y;
1903 msel_2 <= MUL2_P;
1904 f_to_multiply.valid <= '1';
1905 v.first := '1';
1906 v.state := SQRT_6;
1907
1908 when SQRT_6 =>
1909 -- pipeline in R = R * P
1910 msel_1 <= MUL1_R;
1911 msel_2 <= MUL2_P;
1912 f_to_multiply.valid <= r.first;
1913 pshift := '1';
1914 if multiply_to_f.valid = '1' then
1915 v.first := '1';
1916 v.state := SQRT_7;
1917 end if;
1918
1919 when SQRT_7 =>
1920 -- first multiply is done, put result in Y
1921 msel_2 <= MUL2_P;
1922 set_y := r.first;
1923 -- wait for second multiply (should be here already)
1924 pshift := '1';
1925 if multiply_to_f.valid = '1' then
1926 -- put result into R
1927 opsel_r <= RES_MULT;
1928 v.first := '1';
1929 v.count := r.count + 1;
1930 if r.count < 2 then
1931 v.state := SQRT_4;
1932 else
1933 v.first := '1';
1934 v.state := SQRT_8;
1935 end if;
1936 end if;
1937
1938 when SQRT_8 =>
1939 -- compute P = A - R * R, which can be +ve or -ve
1940 -- we arranged for B to be put into A earlier
1941 msel_1 <= MUL1_R;
1942 msel_2 <= MUL2_R;
1943 msel_add <= MULADD_A;
1944 msel_inv <= '1';
1945 pshift := '1';
1946 f_to_multiply.valid <= r.first;
1947 if multiply_to_f.valid = '1' then
1948 v.first := '1';
1949 v.state := SQRT_9;
1950 end if;
1951
1952 when SQRT_9 =>
1953 -- compute P = P * Y
1954 -- since Y is an estimate of 1/sqrt(B), this makes P an
1955 -- estimate of the adjustment needed to R. Since the error
1956 -- could be negative and we have an unsigned multiplier, the
1957 -- upper bits can be wrong, but it turns out the lowest 8 bits
1958 -- are correct and are all we need (given 3 iterations through
1959 -- SQRT_4 to SQRT_7).
1960 msel_1 <= MUL1_Y;
1961 msel_2 <= MUL2_P;
1962 pshift := '1';
1963 f_to_multiply.valid <= r.first;
1964 if multiply_to_f.valid = '1' then
1965 v.state := SQRT_10;
1966 end if;
1967
1968 when SQRT_10 =>
1969 -- Add the bottom 8 bits of P, sign-extended,
1970 -- divided by 4, onto R.
1971 -- The division by 4 is because R is 10.54 format
1972 -- whereas P is 8.56 format.
1973 opsel_b <= BIN_PS6;
1974 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1975 v.result_exp := sqrt_exp;
1976 v.shift := to_signed(1, EXP_BITS);
1977 v.first := '1';
1978 v.state := SQRT_11;
1979
1980 when SQRT_11 =>
1981 -- compute P = A - R * R (remainder)
1982 -- also put 2 * R + 1 into B for comparison with P
1983 msel_1 <= MUL1_R;
1984 msel_2 <= MUL2_R;
1985 msel_add <= MULADD_A;
1986 msel_inv <= '1';
1987 f_to_multiply.valid <= r.first;
1988 shiftin := '1';
1989 set_b := r.first;
1990 if multiply_to_f.valid = '1' then
1991 v.state := SQRT_12;
1992 end if;
1993
1994 when SQRT_12 =>
1995 -- test if remainder is 0 or >= B = 2*R + 1
1996 if pcmpb_lt = '1' then
1997 -- square root is correct, set X if remainder non-zero
1998 v.x := r.p(58) or px_nz;
1999 else
2000 -- square root needs to be incremented by 1
2001 carry_in <= '1';
2002 v.x := not pcmpb_eq;
2003 end if;
2004 v.state := FINISH;
2005
2006 when INT_SHIFT =>
2007 -- r.shift = b.exponent - 52
2008 opsel_r <= RES_SHIFT;
2009 set_x := '1';
2010 v.state := INT_ROUND;
2011 v.shift := to_signed(-2, EXP_BITS);
2012
2013 when INT_ROUND =>
2014 -- r.shift = -2
2015 opsel_r <= RES_SHIFT;
2016 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2017 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2018 -- Check for negative values that don't round to 0 for fcti*u*
2019 if r.insn(8) = '1' and r.result_sign = '1' and
2020 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2021 v.state := INT_OFLOW;
2022 else
2023 v.state := INT_FINAL;
2024 end if;
2025
2026 when INT_ISHIFT =>
2027 -- r.shift = b.exponent - 54;
2028 opsel_r <= RES_SHIFT;
2029 v.state := INT_FINAL;
2030
2031 when INT_FINAL =>
2032 -- Negate if necessary, and increment for rounding if needed
2033 opsel_ainv <= r.result_sign;
2034 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2035 -- Check for possible overflows
2036 case r.insn(9 downto 8) is
2037 when "00" => -- fctiw[z]
2038 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2039 when "01" => -- fctiwu[z]
2040 need_check := r.r(31);
2041 when "10" => -- fctid[z]
2042 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2043 when others => -- fctidu[z]
2044 need_check := r.r(63);
2045 end case;
2046 if need_check = '1' then
2047 v.state := INT_CHECK;
2048 else
2049 if r.fpscr(FPSCR_FI) = '1' then
2050 v.fpscr(FPSCR_XX) := '1';
2051 end if;
2052 arith_done := '1';
2053 end if;
2054
2055 when INT_CHECK =>
2056 if r.insn(9) = '0' then
2057 msb := r.r(31);
2058 else
2059 msb := r.r(63);
2060 end if;
2061 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2062 if (r.insn(8) = '0' and msb /= r.result_sign) or
2063 (r.insn(8) = '1' and msb /= '1') then
2064 opsel_r <= RES_MISC;
2065 v.fpscr(FPSCR_VXCVI) := '1';
2066 invalid := '1';
2067 else
2068 if r.fpscr(FPSCR_FI) = '1' then
2069 v.fpscr(FPSCR_XX) := '1';
2070 end if;
2071 end if;
2072 arith_done := '1';
2073
2074 when INT_OFLOW =>
2075 opsel_r <= RES_MISC;
2076 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2077 if r.b.class = NAN then
2078 misc_sel(0) <= '1';
2079 end if;
2080 v.fpscr(FPSCR_VXCVI) := '1';
2081 invalid := '1';
2082 arith_done := '1';
2083
2084 when FRI_1 =>
2085 -- r.shift = b.exponent - 52
2086 opsel_r <= RES_SHIFT;
2087 set_x := '1';
2088 v.shift := to_signed(-2, EXP_BITS);
2089 v.state := ROUNDING;
2090
2091 when FINISH =>
2092 if r.is_multiply = '1' and px_nz = '1' then
2093 v.x := '1';
2094 end if;
2095 if r.r(63 downto 54) /= "0000000001" then
2096 renormalize := '1';
2097 v.state := NORMALIZE;
2098 else
2099 set_x := '1';
2100 if exp_tiny = '1' then
2101 v.shift := new_exp - min_exp;
2102 v.state := ROUND_UFLOW;
2103 elsif exp_huge = '1' then
2104 v.state := ROUND_OFLOW;
2105 else
2106 v.shift := to_signed(-2, EXP_BITS);
2107 v.state := ROUNDING;
2108 end if;
2109 end if;
2110
2111 when NORMALIZE =>
2112 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2113 -- r.shift = clz(r.r) - 9
2114 opsel_r <= RES_SHIFT;
2115 set_x := '1';
2116 if exp_tiny = '1' then
2117 v.shift := new_exp - min_exp;
2118 v.state := ROUND_UFLOW;
2119 elsif exp_huge = '1' then
2120 v.state := ROUND_OFLOW;
2121 else
2122 v.shift := to_signed(-2, EXP_BITS);
2123 v.state := ROUNDING;
2124 end if;
2125
2126 when ROUND_UFLOW =>
2127 -- r.shift = - amount by which exponent underflows
2128 v.tiny := '1';
2129 if r.fpscr(FPSCR_UE) = '0' then
2130 -- disabled underflow exception case
2131 -- have to denormalize before rounding
2132 opsel_r <= RES_SHIFT;
2133 set_x := '1';
2134 v.shift := to_signed(-2, EXP_BITS);
2135 v.state := ROUNDING;
2136 else
2137 -- enabled underflow exception case
2138 -- if denormalized, have to normalize before rounding
2139 v.fpscr(FPSCR_UX) := '1';
2140 v.result_exp := r.result_exp + bias_exp;
2141 if r.r(54) = '0' then
2142 renormalize := '1';
2143 v.state := NORMALIZE;
2144 else
2145 v.shift := to_signed(-2, EXP_BITS);
2146 v.state := ROUNDING;
2147 end if;
2148 end if;
2149
2150 when ROUND_OFLOW =>
2151 v.fpscr(FPSCR_OX) := '1';
2152 if r.fpscr(FPSCR_OE) = '0' then
2153 -- disabled overflow exception
2154 -- result depends on rounding mode
2155 v.fpscr(FPSCR_XX) := '1';
2156 v.fpscr(FPSCR_FI) := '1';
2157 if r.round_mode(1 downto 0) = "00" or
2158 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2159 v.result_class := INFINITY;
2160 v.fpscr(FPSCR_FR) := '1';
2161 else
2162 v.fpscr(FPSCR_FR) := '0';
2163 end if;
2164 -- construct largest representable number
2165 v.result_exp := max_exp;
2166 opsel_r <= RES_MISC;
2167 misc_sel <= "001" & r.single_prec;
2168 arith_done := '1';
2169 else
2170 -- enabled overflow exception
2171 v.result_exp := r.result_exp - bias_exp;
2172 v.shift := to_signed(-2, EXP_BITS);
2173 v.state := ROUNDING;
2174 end if;
2175
2176 when ROUNDING =>
2177 opsel_mask <= '1';
2178 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2179 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2180 if round(1) = '1' then
2181 -- set mask to increment the LSB for the precision
2182 opsel_b <= BIN_MASK;
2183 carry_in <= '1';
2184 v.shift := to_signed(-1, EXP_BITS);
2185 v.state := ROUNDING_2;
2186 else
2187 if r.r(54) = '0' then
2188 -- result after masking could be zero, or could be a
2189 -- denormalized result that needs to be renormalized
2190 renormalize := '1';
2191 v.state := ROUNDING_3;
2192 else
2193 arith_done := '1';
2194 end if;
2195 end if;
2196 if round(0) = '1' then
2197 v.fpscr(FPSCR_XX) := '1';
2198 if r.tiny = '1' then
2199 v.fpscr(FPSCR_UX) := '1';
2200 end if;
2201 end if;
2202
2203 when ROUNDING_2 =>
2204 -- Check for overflow during rounding
2205 -- r.shift = -1
2206 v.x := '0';
2207 if r.r(55) = '1' then
2208 opsel_r <= RES_SHIFT;
2209 if exp_huge = '1' then
2210 v.state := ROUND_OFLOW;
2211 else
2212 arith_done := '1';
2213 end if;
2214 elsif r.r(54) = '0' then
2215 -- Do CLZ so we can renormalize the result
2216 renormalize := '1';
2217 v.state := ROUNDING_3;
2218 else
2219 arith_done := '1';
2220 end if;
2221
2222 when ROUNDING_3 =>
2223 -- r.shift = clz(r.r) - 9
2224 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2225 if mant_nz = '0' then
2226 v.result_class := ZERO;
2227 if r.is_subtract = '1' then
2228 -- set result sign depending on rounding mode
2229 v.result_sign := r.round_mode(1) and r.round_mode(0);
2230 end if;
2231 arith_done := '1';
2232 else
2233 -- Renormalize result after rounding
2234 opsel_r <= RES_SHIFT;
2235 v.denorm := exp_tiny;
2236 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2237 if new_exp < to_signed(-1022, EXP_BITS) then
2238 v.state := DENORM;
2239 else
2240 arith_done := '1';
2241 end if;
2242 end if;
2243
2244 when DENORM =>
2245 -- r.shift = result_exp - -1022
2246 opsel_r <= RES_SHIFT;
2247 arith_done := '1';
2248
2249 when NAN_RESULT =>
2250 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2251 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2252 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2253 -- Signalling NAN
2254 v.fpscr(FPSCR_VXSNAN) := '1';
2255 invalid := '1';
2256 end if;
2257 if r.use_a = '1' and r.a.class = NAN then
2258 v.opsel_a := AIN_A;
2259 elsif r.use_b = '1' and r.b.class = NAN then
2260 v.opsel_a := AIN_B;
2261 elsif r.use_c = '1' and r.c.class = NAN then
2262 v.opsel_a := AIN_C;
2263 end if;
2264 v.state := EXC_RESULT;
2265
2266 when EXC_RESULT =>
2267 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2268 case r.opsel_a is
2269 when AIN_B =>
2270 v.result_sign := r.b.negative xor r.negate;
2271 v.result_exp := r.b.exponent;
2272 v.result_class := r.b.class;
2273 when AIN_C =>
2274 v.result_sign := r.c.negative xor r.negate;
2275 v.result_exp := r.c.exponent;
2276 v.result_class := r.c.class;
2277 when others =>
2278 v.result_sign := r.a.negative xor r.negate;
2279 v.result_exp := r.a.exponent;
2280 v.result_class := r.a.class;
2281 end case;
2282 arith_done := '1';
2283
2284 end case;
2285
2286 if zero_divide = '1' then
2287 v.fpscr(FPSCR_ZX) := '1';
2288 end if;
2289 if qnan_result = '1' then
2290 invalid := '1';
2291 v.result_class := NAN;
2292 v.result_sign := '0';
2293 misc_sel <= "0001";
2294 opsel_r <= RES_MISC;
2295 arith_done := '1';
2296 end if;
2297 if invalid = '1' then
2298 v.invalid := '1';
2299 end if;
2300 if arith_done = '1' then
2301 -- Enabled invalid exception doesn't write result or FPRF
2302 -- Neither does enabled zero-divide exception
2303 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2304 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2305 v.writing_back := '1';
2306 v.update_fprf := '1';
2307 end if;
2308 v.instr_done := '1';
2309 v.state := IDLE;
2310 update_fx := '1';
2311 end if;
2312
2313 -- Multiplier and divide/square root data path
2314 case msel_1 is
2315 when MUL1_A =>
2316 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2317 when MUL1_B =>
2318 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2319 when MUL1_Y =>
2320 f_to_multiply.data1 <= r.y;
2321 when others =>
2322 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2323 end case;
2324 case msel_2 is
2325 when MUL2_C =>
2326 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2327 when MUL2_LUT =>
2328 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2329 when MUL2_P =>
2330 f_to_multiply.data2 <= r.p;
2331 when others =>
2332 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2333 end case;
2334 maddend := (others => '0');
2335 case msel_add is
2336 when MULADD_CONST =>
2337 -- addend is 2.0 or 1.5 in 16.112 format
2338 if r.is_sqrt = '0' then
2339 maddend(113) := '1'; -- 2.0
2340 else
2341 maddend(112 downto 111) := "11"; -- 1.5
2342 end if;
2343 when MULADD_A =>
2344 -- addend is A in 16.112 format
2345 maddend(121 downto 58) := r.a.mantissa;
2346 when MULADD_RS =>
2347 -- addend is concatenation of R and S in 16.112 format
2348 maddend := "000000" & r.r & r.s & "00";
2349 when others =>
2350 end case;
2351 if msel_inv = '1' then
2352 f_to_multiply.addend <= not maddend;
2353 else
2354 f_to_multiply.addend <= maddend;
2355 end if;
2356 f_to_multiply.not_result <= msel_inv;
2357 if set_y = '1' then
2358 v.y := f_to_multiply.data2;
2359 end if;
2360 if multiply_to_f.valid = '1' then
2361 if pshift = '0' then
2362 v.p := multiply_to_f.result(63 downto 0);
2363 else
2364 v.p := multiply_to_f.result(119 downto 56);
2365 end if;
2366 end if;
2367
2368 -- Data path.
2369 -- This has A and B input multiplexers, an adder, a shifter,
2370 -- count-leading-zeroes logic, and a result mux.
2371 if r.longmask = '1' then
2372 mshift := r.shift + to_signed(-29, EXP_BITS);
2373 else
2374 mshift := r.shift;
2375 end if;
2376 if mshift < to_signed(-64, EXP_BITS) then
2377 mask := (others => '1');
2378 elsif mshift >= to_signed(0, EXP_BITS) then
2379 mask := (others => '0');
2380 else
2381 mask := right_mask(unsigned(mshift(5 downto 0)));
2382 end if;
2383 case r.opsel_a is
2384 when AIN_R =>
2385 in_a0 := r.r;
2386 when AIN_A =>
2387 in_a0 := r.a.mantissa;
2388 when AIN_B =>
2389 in_a0 := r.b.mantissa;
2390 when others =>
2391 in_a0 := r.c.mantissa;
2392 end case;
2393 if (or (mask and in_a0)) = '1' and set_x = '1' then
2394 v.x := '1';
2395 end if;
2396 if opsel_ainv = '1' then
2397 in_a0 := not in_a0;
2398 end if;
2399 in_a <= in_a0;
2400 case opsel_b is
2401 when BIN_ZERO =>
2402 in_b0 := (others => '0');
2403 when BIN_R =>
2404 in_b0 := r.r;
2405 when BIN_MASK =>
2406 in_b0 := mask;
2407 when others =>
2408 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2409 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2410 end case;
2411 if opsel_binv = '1' then
2412 in_b0 := not in_b0;
2413 end if;
2414 in_b <= in_b0;
2415 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2416 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2417 std_ulogic_vector(r.shift(6 downto 0)));
2418 else
2419 shift_res := (others => '0');
2420 end if;
2421 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2422 if opsel_mask = '1' then
2423 sum := sum and not mask;
2424 end if;
2425 case opsel_r is
2426 when RES_SUM =>
2427 result <= sum;
2428 when RES_SHIFT =>
2429 result <= shift_res;
2430 when RES_MULT =>
2431 result <= multiply_to_f.result(121 downto 58);
2432 when others =>
2433 case misc_sel is
2434 when "0000" =>
2435 misc := x"00000000" & (r.fpscr and fpscr_mask);
2436 when "0001" =>
2437 -- generated QNaN mantissa
2438 misc := x"0020000000000000";
2439 when "0010" =>
2440 -- mantissa of max representable DP number
2441 misc := x"007ffffffffffffc";
2442 when "0011" =>
2443 -- mantissa of max representable SP number
2444 misc := x"007fffff80000000";
2445 when "0100" =>
2446 -- fmrgow result
2447 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2448 when "0110" =>
2449 -- fmrgew result
2450 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2451 when "0111" =>
2452 misc := 10x"000" & inverse_est & 35x"000000000";
2453 when "1000" =>
2454 -- max positive result for fctiw[z]
2455 misc := x"000000007fffffff";
2456 when "1001" =>
2457 -- max negative result for fctiw[z]
2458 misc := x"ffffffff80000000";
2459 when "1010" =>
2460 -- max positive result for fctiwu[z]
2461 misc := x"00000000ffffffff";
2462 when "1011" =>
2463 -- max negative result for fctiwu[z]
2464 misc := x"0000000000000000";
2465 when "1100" =>
2466 -- max positive result for fctid[z]
2467 misc := x"7fffffffffffffff";
2468 when "1101" =>
2469 -- max negative result for fctid[z]
2470 misc := x"8000000000000000";
2471 when "1110" =>
2472 -- max positive result for fctidu[z]
2473 misc := x"ffffffffffffffff";
2474 when "1111" =>
2475 -- max negative result for fctidu[z]
2476 misc := x"0000000000000000";
2477 when others =>
2478 misc := x"0000000000000000";
2479 end case;
2480 result <= misc;
2481 end case;
2482 v.r := result;
2483 if set_s = '1' then
2484 case opsel_s is
2485 when S_NEG =>
2486 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2487 when S_MULT =>
2488 v.s := multiply_to_f.result(57 downto 2);
2489 when S_SHIFT =>
2490 v.s := shift_res(63 downto 8);
2491 if shift_res(7 downto 0) /= x"00" then
2492 v.x := '1';
2493 end if;
2494 when others =>
2495 v.s := (others => '0');
2496 end case;
2497 end if;
2498
2499 if set_a = '1' then
2500 v.a.exponent := new_exp;
2501 v.a.mantissa := shift_res;
2502 end if;
2503 if set_b = '1' then
2504 v.b.exponent := new_exp;
2505 v.b.mantissa := shift_res;
2506 end if;
2507 if set_c = '1' then
2508 v.c.exponent := new_exp;
2509 v.c.mantissa := shift_res;
2510 end if;
2511
2512 if opsel_r = RES_SHIFT then
2513 v.result_exp := new_exp;
2514 end if;
2515
2516 if renormalize = '1' then
2517 clz := count_left_zeroes(r.r);
2518 if renorm_sqrt = '1' then
2519 -- make denormalized value end up with even exponent
2520 clz(0) := '1';
2521 end if;
2522 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2523 end if;
2524
2525 if r.int_result = '1' then
2526 fp_result <= r.r;
2527 else
2528 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2529 r.single_prec, r.quieten_nan);
2530 end if;
2531 if r.update_fprf = '1' then
2532 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2533 r.r(54) and not r.denorm);
2534 end if;
2535
2536 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2537 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2538 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2539 v.fpscr(FPSCR_VE downto FPSCR_XE));
2540 if update_fx = '1' and
2541 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2542 v.fpscr(FPSCR_FX) := '1';
2543 end if;
2544 if r.rc = '1' then
2545 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2546 end if;
2547
2548 if illegal = '1' then
2549 v.instr_done := '0';
2550 v.do_intr := '0';
2551 v.writing_back := '0';
2552 v.busy := '0';
2553 v.state := IDLE;
2554 else
2555 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2556 if v.state /= IDLE or v.do_intr = '1' then
2557 v.busy := '1';
2558 end if;
2559 end if;
2560
2561 rin <= v;
2562 e_out.illegal <= illegal;
2563 end process;
2564
2565 end architecture behaviour;