core_debug: Stop logging 256 cycles after trigger
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 op : insn_type_t;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
79 fe_mode : std_ulogic;
80 rc : std_ulogic;
81 is_cmp : std_ulogic;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
84 a : fpu_reg_type;
85 b : fpu_reg_type;
86 c : fpu_reg_type;
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 x : std_ulogic;
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
103 tiny : std_ulogic;
104 denorm : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
112 first : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
116 use_a : std_ulogic;
117 use_b : std_ulogic;
118 use_c : std_ulogic;
119 invalid : std_ulogic;
120 negate : std_ulogic;
121 longmask : std_ulogic;
122 end record;
123
124 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
125
126 signal r, rin : reg_type;
127
128 signal fp_result : std_ulogic_vector(63 downto 0);
129 signal opsel_b : std_ulogic_vector(1 downto 0);
130 signal opsel_r : std_ulogic_vector(1 downto 0);
131 signal opsel_s : std_ulogic_vector(1 downto 0);
132 signal opsel_ainv : std_ulogic;
133 signal opsel_mask : std_ulogic;
134 signal opsel_binv : std_ulogic;
135 signal in_a : std_ulogic_vector(63 downto 0);
136 signal in_b : std_ulogic_vector(63 downto 0);
137 signal result : std_ulogic_vector(63 downto 0);
138 signal carry_in : std_ulogic;
139 signal lost_bits : std_ulogic;
140 signal r_hi_nz : std_ulogic;
141 signal r_lo_nz : std_ulogic;
142 signal s_nz : std_ulogic;
143 signal misc_sel : std_ulogic_vector(3 downto 0);
144 signal f_to_multiply : MultiplyInputType;
145 signal multiply_to_f : MultiplyOutputType;
146 signal msel_1 : std_ulogic_vector(1 downto 0);
147 signal msel_2 : std_ulogic_vector(1 downto 0);
148 signal msel_add : std_ulogic_vector(1 downto 0);
149 signal msel_inv : std_ulogic;
150 signal inverse_est : std_ulogic_vector(18 downto 0);
151
152 -- opsel values
153 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
154 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
155 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
156 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
157
158 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
159 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
160 constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
161 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
162
163 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
164 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
165 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
166 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
167
168 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
169 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
170 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
171 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
172
173 -- msel values
174 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
175 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
176 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
177 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
178
179 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
180 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
181 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
182 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
183
184 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
185 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
186 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
187 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
188
189 -- Inverse lookup table, indexed by the top 8 fraction bits
190 -- The first 256 entries are the reciprocal (1/x) lookup table,
191 -- and the remaining 768 entries are the reciprocal square root table.
192 -- Output range is [0.5, 1) in 0.19 format, though the top
193 -- bit isn't stored since it is always 1.
194 -- Each output value is the inverse of the center of the input
195 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
196 -- entry 1 is 1 / (1 + 3/512), etc.
197 signal inverse_table : lookup_table := (
198 -- 1/x lookup table
199 -- Unit bit is assumed to be 1, so input range is [1, 2)
200 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
201 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
202 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
203 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
204 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
205 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
206 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
207 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
208 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
209 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
210 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
211 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
212 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
213 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
214 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
215 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
216 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
217 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
218 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
219 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
220 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
221 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
222 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
223 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
224 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
225 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
226 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
227 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
228 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
229 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
230 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
231 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
232 -- 1/sqrt(x) lookup table
233 -- Input is in the range [1, 4), i.e. two bits to the left of the
234 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
235 -- 1.0 ... 1.9999
236 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
237 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
238 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
239 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
240 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
241 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
242 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
243 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
244 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
245 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
246 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
247 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
248 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
249 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
250 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
251 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
252 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
253 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
254 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
255 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
256 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
257 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
258 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
259 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
260 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
261 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
262 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
263 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
264 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
265 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
266 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
267 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
268 -- 2.0 ... 2.9999
269 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
270 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
271 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
272 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
273 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
274 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
275 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
276 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
277 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
278 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
279 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
280 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
281 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
282 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
283 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
284 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
285 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
286 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
287 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
288 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
289 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
290 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
291 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
292 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
293 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
294 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
295 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
296 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
297 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
298 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
299 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
300 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
301 -- 3.0 ... 3.9999
302 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
303 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
304 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
305 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
306 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
307 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
308 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
309 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
310 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
311 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
312 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
313 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
314 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
315 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
316 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
317 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
318 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
319 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
320 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
321 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
322 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
323 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
324 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
325 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
326 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
327 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
328 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
329 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
330 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
331 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
332 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
333 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
334 );
335
336 -- Left and right shifter with 120 bit input and 64 bit output.
337 -- Shifts inp left by shift bits and returns the upper 64 bits of
338 -- the result. The shift parameter is interpreted as a signed
339 -- number in the range -64..63, with negative values indicating
340 -- right shifts.
341 function shifter_64(inp: std_ulogic_vector(119 downto 0);
342 shift: std_ulogic_vector(6 downto 0))
343 return std_ulogic_vector is
344 variable s1 : std_ulogic_vector(94 downto 0);
345 variable s2 : std_ulogic_vector(70 downto 0);
346 variable result : std_ulogic_vector(63 downto 0);
347 begin
348 case shift(6 downto 5) is
349 when "00" =>
350 s1 := inp(119 downto 25);
351 when "01" =>
352 s1 := inp(87 downto 0) & "0000000";
353 when "10" =>
354 s1 := x"0000000000000000" & inp(119 downto 89);
355 when others =>
356 s1 := x"00000000" & inp(119 downto 57);
357 end case;
358 case shift(4 downto 3) is
359 when "00" =>
360 s2 := s1(94 downto 24);
361 when "01" =>
362 s2 := s1(86 downto 16);
363 when "10" =>
364 s2 := s1(78 downto 8);
365 when others =>
366 s2 := s1(70 downto 0);
367 end case;
368 case shift(2 downto 0) is
369 when "000" =>
370 result := s2(70 downto 7);
371 when "001" =>
372 result := s2(69 downto 6);
373 when "010" =>
374 result := s2(68 downto 5);
375 when "011" =>
376 result := s2(67 downto 4);
377 when "100" =>
378 result := s2(66 downto 3);
379 when "101" =>
380 result := s2(65 downto 2);
381 when "110" =>
382 result := s2(64 downto 1);
383 when others =>
384 result := s2(63 downto 0);
385 end case;
386 return result;
387 end;
388
389 -- Generate a mask with 0-bits on the left and 1-bits on the right which
390 -- selects the bits will be lost in doing a right shift. The shift
391 -- parameter is the bottom 6 bits of a negative shift count,
392 -- indicating a right shift.
393 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
394 variable result: std_ulogic_vector(63 downto 0);
395 begin
396 result := (others => '0');
397 for i in 0 to 63 loop
398 if i >= shift then
399 result(63 - i) := '1';
400 end if;
401 end loop;
402 return result;
403 end;
404
405 -- Split a DP floating-point number into components and work out its class.
406 -- If is_int = 1, the input is considered an integer
407 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
408 variable r : fpu_reg_type;
409 variable exp_nz : std_ulogic;
410 variable exp_ao : std_ulogic;
411 variable frac_nz : std_ulogic;
412 variable cls : std_ulogic_vector(2 downto 0);
413 begin
414 r.negative := fpr(63);
415 exp_nz := or (fpr(62 downto 52));
416 exp_ao := and (fpr(62 downto 52));
417 frac_nz := or (fpr(51 downto 0));
418 if is_int = '0' then
419 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
420 if exp_nz = '0' then
421 r.exponent := to_signed(-1022, EXP_BITS);
422 end if;
423 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
424 cls := exp_ao & exp_nz & frac_nz;
425 case cls is
426 when "000" => r.class := ZERO;
427 when "001" => r.class := FINITE; -- denormalized
428 when "010" => r.class := FINITE;
429 when "011" => r.class := FINITE;
430 when "110" => r.class := INFINITY;
431 when others => r.class := NAN;
432 end case;
433 else
434 r.mantissa := fpr;
435 r.exponent := (others => '0');
436 if (fpr(63) or exp_nz or frac_nz) = '1' then
437 r.class := FINITE;
438 else
439 r.class := ZERO;
440 end if;
441 end if;
442 return r;
443 end;
444
445 -- Construct a DP floating-point result from components
446 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
447 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
448 return std_ulogic_vector is
449 variable result : std_ulogic_vector(63 downto 0);
450 begin
451 result := (others => '0');
452 result(63) := sign;
453 case class is
454 when ZERO =>
455 when FINITE =>
456 if mantissa(54) = '1' then
457 -- normalized number
458 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
459 end if;
460 result(51 downto 29) := mantissa(53 downto 31);
461 if single_prec = '0' then
462 result(28 downto 0) := mantissa(30 downto 2);
463 end if;
464 when INFINITY =>
465 result(62 downto 52) := "11111111111";
466 when NAN =>
467 result(62 downto 52) := "11111111111";
468 result(51) := quieten_nan or mantissa(53);
469 result(50 downto 29) := mantissa(52 downto 31);
470 if single_prec = '0' then
471 result(28 downto 0) := mantissa(30 downto 2);
472 end if;
473 end case;
474 return result;
475 end;
476
477 -- Determine whether to increment when rounding
478 -- Returns rounding_inc & inexact
479 -- Assumes x includes the bottom 29 bits of the mantissa already
480 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
481 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
482 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
483 sign: std_ulogic)
484 return std_ulogic_vector is
485 variable grx : std_ulogic_vector(2 downto 0);
486 variable ret : std_ulogic_vector(1 downto 0);
487 variable lsb : std_ulogic;
488 begin
489 if single_prec = '0' then
490 grx := mantissa(1 downto 0) & x;
491 lsb := mantissa(2);
492 else
493 grx := mantissa(30 downto 29) & x;
494 lsb := mantissa(31);
495 end if;
496 ret(1) := '0';
497 ret(0) := or (grx);
498 case rn(1 downto 0) is
499 when "00" => -- round to nearest
500 if grx = "100" and rn(2) = '0' then
501 ret(1) := lsb; -- tie, round to even
502 else
503 ret(1) := grx(2);
504 end if;
505 when "01" => -- round towards zero
506 when others => -- round towards +/- inf
507 if rn(0) = sign then
508 -- round towards greater magnitude
509 ret(1) := ret(0);
510 end if;
511 end case;
512 return ret;
513 end;
514
515 -- Determine result flags to write into the FPSCR
516 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
517 return std_ulogic_vector is
518 begin
519 case class is
520 when ZERO =>
521 return sign & "0010";
522 when FINITE =>
523 return (not unitbit) & sign & (not sign) & "00";
524 when INFINITY =>
525 return '0' & sign & (not sign) & "01";
526 when NAN =>
527 return "10001";
528 end case;
529 end;
530
531 begin
532 fpu_multiply_0: entity work.multiply
533 port map (
534 clk => clk,
535 m_in => f_to_multiply,
536 m_out => multiply_to_f
537 );
538
539 fpu_0: process(clk)
540 begin
541 if rising_edge(clk) then
542 if rst = '1' then
543 r.state <= IDLE;
544 r.busy <= '0';
545 r.instr_done <= '0';
546 r.do_intr <= '0';
547 r.fpscr <= (others => '0');
548 r.writing_back <= '0';
549 else
550 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
551 r <= rin;
552 end if;
553 end if;
554 end process;
555
556 -- synchronous reads from lookup table
557 lut_access: process(clk)
558 variable addrhi : std_ulogic_vector(1 downto 0);
559 variable addr : std_ulogic_vector(9 downto 0);
560 begin
561 if rising_edge(clk) then
562 if r.is_sqrt = '1' then
563 addrhi := r.b.mantissa(55 downto 54);
564 else
565 addrhi := "00";
566 end if;
567 addr := addrhi & r.b.mantissa(53 downto 46);
568 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
569 end if;
570 end process;
571
572 e_out.busy <= r.busy;
573 e_out.exception <= r.fpscr(FPSCR_FEX);
574 e_out.interrupt <= r.do_intr;
575
576 w_out.valid <= r.instr_done and not r.do_intr;
577 w_out.write_enable <= r.writing_back;
578 w_out.write_reg <= r.dest_fpr;
579 w_out.write_data <= fp_result;
580 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
581 w_out.write_cr_mask <= r.cr_mask;
582 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
583 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
584
585 fpu_1: process(all)
586 variable v : reg_type;
587 variable adec : fpu_reg_type;
588 variable bdec : fpu_reg_type;
589 variable cdec : fpu_reg_type;
590 variable fpscr_mask : std_ulogic_vector(31 downto 0);
591 variable illegal : std_ulogic;
592 variable j, k : integer;
593 variable flm : std_ulogic_vector(7 downto 0);
594 variable int_input : std_ulogic;
595 variable mask : std_ulogic_vector(63 downto 0);
596 variable in_a0 : std_ulogic_vector(63 downto 0);
597 variable in_b0 : std_ulogic_vector(63 downto 0);
598 variable misc : std_ulogic_vector(63 downto 0);
599 variable shift_res : std_ulogic_vector(63 downto 0);
600 variable round : std_ulogic_vector(1 downto 0);
601 variable update_fx : std_ulogic;
602 variable arith_done : std_ulogic;
603 variable invalid : std_ulogic;
604 variable zero_divide : std_ulogic;
605 variable mant_nz : std_ulogic;
606 variable min_exp : signed(EXP_BITS-1 downto 0);
607 variable max_exp : signed(EXP_BITS-1 downto 0);
608 variable bias_exp : signed(EXP_BITS-1 downto 0);
609 variable new_exp : signed(EXP_BITS-1 downto 0);
610 variable exp_tiny : std_ulogic;
611 variable exp_huge : std_ulogic;
612 variable renormalize : std_ulogic;
613 variable clz : std_ulogic_vector(5 downto 0);
614 variable set_x : std_ulogic;
615 variable mshift : signed(EXP_BITS-1 downto 0);
616 variable need_check : std_ulogic;
617 variable msb : std_ulogic;
618 variable is_add : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 variable sum : std_ulogic_vector(63 downto 0);
635 variable round_inc : std_ulogic_vector(63 downto 0);
636 begin
637 v := r;
638 illegal := '0';
639 v.busy := '0';
640 int_input := '0';
641
642 -- capture incoming instruction
643 if e_in.valid = '1' then
644 v.insn := e_in.insn;
645 v.op := e_in.op;
646 v.fe_mode := or (e_in.fe_mode);
647 v.dest_fpr := e_in.frt;
648 v.single_prec := e_in.single;
649 v.longmask := e_in.single;
650 v.int_result := '0';
651 v.rc := e_in.rc;
652 v.is_cmp := e_in.out_cr;
653 if e_in.out_cr = '0' then
654 v.cr_mask := num_to_fxm(1);
655 else
656 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
657 end if;
658 int_input := '0';
659 if e_in.op = OP_FPOP_I then
660 int_input := '1';
661 end if;
662 v.quieten_nan := '1';
663 v.tiny := '0';
664 v.denorm := '0';
665 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
666 v.is_subtract := '0';
667 v.is_multiply := '0';
668 v.is_sqrt := '0';
669 v.add_bsmall := '0';
670 v.doing_ftdiv := "00";
671
672 adec := decode_dp(e_in.fra, int_input);
673 bdec := decode_dp(e_in.frb, int_input);
674 cdec := decode_dp(e_in.frc, int_input);
675 v.a := adec;
676 v.b := bdec;
677 v.c := cdec;
678
679 v.exp_cmp := '0';
680 if adec.exponent > bdec.exponent then
681 v.exp_cmp := '1';
682 end if;
683 v.madd_cmp := '0';
684 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
685 v.madd_cmp := '1';
686 end if;
687 end if;
688
689 r_hi_nz <= or (r.r(55 downto 31));
690 r_lo_nz <= or (r.r(30 downto 2));
691 s_nz <= or (r.s);
692
693 if r.single_prec = '0' then
694 if r.doing_ftdiv(1) = '0' then
695 max_exp := to_signed(1023, EXP_BITS);
696 else
697 max_exp := to_signed(1020, EXP_BITS);
698 end if;
699 if r.doing_ftdiv(0) = '0' then
700 min_exp := to_signed(-1022, EXP_BITS);
701 else
702 min_exp := to_signed(-1021, EXP_BITS);
703 end if;
704 bias_exp := to_signed(1536, EXP_BITS);
705 else
706 max_exp := to_signed(127, EXP_BITS);
707 min_exp := to_signed(-126, EXP_BITS);
708 bias_exp := to_signed(192, EXP_BITS);
709 end if;
710 new_exp := r.result_exp - r.shift;
711 exp_tiny := '0';
712 exp_huge := '0';
713 if new_exp < min_exp then
714 exp_tiny := '1';
715 end if;
716 if new_exp > max_exp then
717 exp_huge := '1';
718 end if;
719
720 -- Compare P with zero and with B
721 px_nz := or (r.p(57 downto 4));
722 pcmpb_eq := '0';
723 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
724 pcmpb_eq := '1';
725 end if;
726 pcmpb_lt := '0';
727 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
728 pcmpb_lt := '1';
729 end if;
730
731 v.writing_back := '0';
732 v.instr_done := '0';
733 v.update_fprf := '0';
734 v.shift := to_signed(0, EXP_BITS);
735 v.first := '0';
736 v.opsel_a := AIN_R;
737 opsel_ainv <= '0';
738 opsel_mask <= '0';
739 opsel_b <= BIN_ZERO;
740 opsel_binv <= '0';
741 opsel_r <= RES_SUM;
742 opsel_s <= S_ZERO;
743 carry_in <= '0';
744 misc_sel <= "0000";
745 fpscr_mask := (others => '1');
746 update_fx := '0';
747 arith_done := '0';
748 invalid := '0';
749 zero_divide := '0';
750 renormalize := '0';
751 set_x := '0';
752 qnan_result := '0';
753 set_a := '0';
754 set_b := '0';
755 set_c := '0';
756 set_s := '0';
757 f_to_multiply.is_32bit <= '0';
758 f_to_multiply.valid <= '0';
759 msel_1 <= MUL1_A;
760 msel_2 <= MUL2_C;
761 msel_add <= MULADD_ZERO;
762 msel_inv <= '0';
763 set_y := '0';
764 pshift := '0';
765 renorm_sqrt := '0';
766 shiftin := '0';
767 case r.state is
768 when IDLE =>
769 v.use_a := '0';
770 v.use_b := '0';
771 v.use_c := '0';
772 v.invalid := '0';
773 v.negate := '0';
774 if e_in.valid = '1' then
775 case e_in.insn(5 downto 1) is
776 when "00000" =>
777 if e_in.insn(8) = '1' then
778 if e_in.insn(6) = '0' then
779 v.state := DO_FTDIV;
780 else
781 v.state := DO_FTSQRT;
782 end if;
783 elsif e_in.insn(7) = '1' then
784 v.state := DO_MCRFS;
785 else
786 v.opsel_a := AIN_B;
787 v.state := DO_FCMP;
788 end if;
789 when "00110" =>
790 if e_in.insn(10) = '0' then
791 if e_in.insn(8) = '0' then
792 v.state := DO_MTFSB;
793 else
794 v.state := DO_MTFSFI;
795 end if;
796 else
797 v.state := DO_FMRG;
798 end if;
799 when "00111" =>
800 if e_in.insn(8) = '0' then
801 v.state := DO_MFFS;
802 else
803 v.state := DO_MTFSF;
804 end if;
805 when "01000" =>
806 v.opsel_a := AIN_B;
807 if e_in.insn(9 downto 8) /= "11" then
808 v.state := DO_FMR;
809 else
810 v.state := DO_FRI;
811 end if;
812 when "01100" =>
813 v.opsel_a := AIN_B;
814 v.state := DO_FRSP;
815 when "01110" =>
816 v.opsel_a := AIN_B;
817 if int_input = '1' then
818 -- fcfid[u][s]
819 v.state := DO_FCFID;
820 else
821 v.state := DO_FCTI;
822 end if;
823 when "01111" =>
824 v.round_mode := "001";
825 v.opsel_a := AIN_B;
826 v.state := DO_FCTI;
827 when "10010" =>
828 v.opsel_a := AIN_A;
829 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
830 v.opsel_a := AIN_B;
831 end if;
832 v.state := DO_FDIV;
833 when "10100" | "10101" =>
834 v.opsel_a := AIN_A;
835 v.state := DO_FADD;
836 when "10110" =>
837 v.is_sqrt := '1';
838 v.opsel_a := AIN_B;
839 v.state := DO_FSQRT;
840 when "10111" =>
841 v.state := DO_FSEL;
842 when "11000" =>
843 v.opsel_a := AIN_B;
844 v.state := DO_FRE;
845 when "11001" =>
846 v.is_multiply := '1';
847 v.opsel_a := AIN_A;
848 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
849 v.opsel_a := AIN_C;
850 end if;
851 v.state := DO_FMUL;
852 when "11010" =>
853 v.is_sqrt := '1';
854 v.opsel_a := AIN_B;
855 v.state := DO_FRSQRTE;
856 when "11100" | "11101" | "11110" | "11111" =>
857 if v.a.mantissa(54) = '0' then
858 v.opsel_a := AIN_A;
859 elsif v.c.mantissa(54) = '0' then
860 v.opsel_a := AIN_C;
861 else
862 v.opsel_a := AIN_B;
863 end if;
864 v.state := DO_FMADD;
865 when others =>
866 illegal := '1';
867 end case;
868 end if;
869 v.x := '0';
870 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
871 set_s := '1';
872
873 when DO_MCRFS =>
874 j := to_integer(unsigned(insn_bfa(r.insn)));
875 for i in 0 to 7 loop
876 if i = j then
877 k := (7 - i) * 4;
878 v.cr_result := r.fpscr(k + 3 downto k);
879 fpscr_mask(k + 3 downto k) := "0000";
880 end if;
881 end loop;
882 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
883 v.instr_done := '1';
884 v.state := IDLE;
885
886 when DO_FTDIV =>
887 v.instr_done := '1';
888 v.state := IDLE;
889 v.cr_result := "0000";
890 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
891 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
892 v.cr_result(2) := '1';
893 end if;
894 if r.a.class = NAN or r.a.class = INFINITY or
895 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
896 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
897 v.cr_result(1) := '1';
898 else
899 v.doing_ftdiv := "11";
900 v.first := '1';
901 v.state := FTDIV_1;
902 v.instr_done := '0';
903 end if;
904
905 when DO_FTSQRT =>
906 v.instr_done := '1';
907 v.state := IDLE;
908 v.cr_result := "0000";
909 if r.b.class = ZERO or r.b.class = INFINITY or
910 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
911 v.cr_result(2) := '1';
912 end if;
913 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
914 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
915 v.cr_result(1) := '0';
916 end if;
917
918 when DO_FCMP =>
919 -- fcmp[uo]
920 -- r.opsel_a = AIN_B
921 v.instr_done := '1';
922 v.state := IDLE;
923 update_fx := '1';
924 v.result_exp := r.b.exponent;
925 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
926 (r.b.class = NAN and r.b.mantissa(53) = '0') then
927 -- Signalling NAN
928 v.fpscr(FPSCR_VXSNAN) := '1';
929 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
930 v.fpscr(FPSCR_VXVC) := '1';
931 end if;
932 invalid := '1';
933 v.cr_result := "0001"; -- unordered
934 elsif r.a.class = NAN or r.b.class = NAN then
935 if r.insn(6) = '1' then
936 -- fcmpo
937 v.fpscr(FPSCR_VXVC) := '1';
938 invalid := '1';
939 end if;
940 v.cr_result := "0001"; -- unordered
941 elsif r.a.class = ZERO and r.b.class = ZERO then
942 v.cr_result := "0010"; -- equal
943 elsif r.a.negative /= r.b.negative then
944 v.cr_result := r.a.negative & r.b.negative & "00";
945 elsif r.a.class = ZERO then
946 -- A and B are the same sign from here down
947 v.cr_result := not r.b.negative & r.b.negative & "00";
948 elsif r.a.class = INFINITY then
949 if r.b.class = INFINITY then
950 v.cr_result := "0010";
951 else
952 v.cr_result := r.a.negative & not r.a.negative & "00";
953 end if;
954 elsif r.b.class = ZERO then
955 -- A is finite from here down
956 v.cr_result := r.a.negative & not r.a.negative & "00";
957 elsif r.b.class = INFINITY then
958 v.cr_result := not r.b.negative & r.b.negative & "00";
959 elsif r.exp_cmp = '1' then
960 -- A and B are both finite from here down
961 v.cr_result := r.a.negative & not r.a.negative & "00";
962 elsif r.a.exponent /= r.b.exponent then
963 -- A exponent is smaller than B
964 v.cr_result := not r.a.negative & r.a.negative & "00";
965 else
966 -- Prepare to subtract mantissas, put B in R
967 v.cr_result := "0000";
968 v.instr_done := '0';
969 v.opsel_a := AIN_A;
970 v.state := CMP_1;
971 end if;
972 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
973
974 when DO_MTFSB =>
975 -- mtfsb{0,1}
976 j := to_integer(unsigned(insn_bt(r.insn)));
977 for i in 0 to 31 loop
978 if i = j then
979 v.fpscr(31 - i) := r.insn(6);
980 end if;
981 end loop;
982 v.instr_done := '1';
983 v.state := IDLE;
984
985 when DO_MTFSFI =>
986 -- mtfsfi
987 j := to_integer(unsigned(insn_bf(r.insn)));
988 if r.insn(16) = '0' then
989 for i in 0 to 7 loop
990 if i = j then
991 k := (7 - i) * 4;
992 v.fpscr(k + 3 downto k) := insn_u(r.insn);
993 end if;
994 end loop;
995 end if;
996 v.instr_done := '1';
997 v.state := IDLE;
998
999 when DO_FMRG =>
1000 -- fmrgew, fmrgow
1001 opsel_r <= RES_MISC;
1002 misc_sel <= "01" & r.insn(8) & '0';
1003 v.int_result := '1';
1004 v.writing_back := '1';
1005 v.instr_done := '1';
1006 v.state := IDLE;
1007
1008 when DO_MFFS =>
1009 v.int_result := '1';
1010 v.writing_back := '1';
1011 opsel_r <= RES_MISC;
1012 case r.insn(20 downto 16) is
1013 when "00000" =>
1014 -- mffs
1015 when "00001" =>
1016 -- mffsce
1017 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1018 when "10100" | "10101" =>
1019 -- mffscdrn[i] (but we don't implement DRN)
1020 fpscr_mask := x"000000FF";
1021 when "10110" =>
1022 -- mffscrn
1023 fpscr_mask := x"000000FF";
1024 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1025 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1026 when "10111" =>
1027 -- mffscrni
1028 fpscr_mask := x"000000FF";
1029 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1030 when "11000" =>
1031 -- mffsl
1032 fpscr_mask := x"0007F0FF";
1033 when others =>
1034 illegal := '1';
1035 end case;
1036 v.instr_done := '1';
1037 v.state := IDLE;
1038
1039 when DO_MTFSF =>
1040 if r.insn(25) = '1' then
1041 flm := x"FF";
1042 elsif r.insn(16) = '1' then
1043 flm := x"00";
1044 else
1045 flm := r.insn(24 downto 17);
1046 end if;
1047 for i in 0 to 7 loop
1048 k := i * 4;
1049 if flm(i) = '1' then
1050 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1051 end if;
1052 end loop;
1053 v.instr_done := '1';
1054 v.state := IDLE;
1055
1056 when DO_FMR =>
1057 -- r.opsel_a = AIN_B
1058 v.result_class := r.b.class;
1059 v.result_exp := r.b.exponent;
1060 v.quieten_nan := '0';
1061 if r.insn(9) = '1' then
1062 v.result_sign := '0'; -- fabs
1063 elsif r.insn(8) = '1' then
1064 v.result_sign := '1'; -- fnabs
1065 elsif r.insn(7) = '1' then
1066 v.result_sign := r.b.negative; -- fmr
1067 elsif r.insn(6) = '1' then
1068 v.result_sign := not r.b.negative; -- fneg
1069 else
1070 v.result_sign := r.a.negative; -- fcpsgn
1071 end if;
1072 v.writing_back := '1';
1073 v.instr_done := '1';
1074 v.state := IDLE;
1075
1076 when DO_FRI => -- fri[nzpm]
1077 -- r.opsel_a = AIN_B
1078 v.result_class := r.b.class;
1079 v.result_sign := r.b.negative;
1080 v.result_exp := r.b.exponent;
1081 v.fpscr(FPSCR_FR) := '0';
1082 v.fpscr(FPSCR_FI) := '0';
1083 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1084 -- Signalling NAN
1085 v.fpscr(FPSCR_VXSNAN) := '1';
1086 invalid := '1';
1087 end if;
1088 if r.b.class = FINITE then
1089 if r.b.exponent >= to_signed(52, EXP_BITS) then
1090 -- integer already, no rounding required
1091 arith_done := '1';
1092 else
1093 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1094 v.state := FRI_1;
1095 v.round_mode := '1' & r.insn(7 downto 6);
1096 end if;
1097 else
1098 arith_done := '1';
1099 end if;
1100
1101 when DO_FRSP =>
1102 -- r.opsel_a = AIN_B, r.shift = 0
1103 v.result_class := r.b.class;
1104 v.result_sign := r.b.negative;
1105 v.result_exp := r.b.exponent;
1106 v.fpscr(FPSCR_FR) := '0';
1107 v.fpscr(FPSCR_FI) := '0';
1108 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1109 -- Signalling NAN
1110 v.fpscr(FPSCR_VXSNAN) := '1';
1111 invalid := '1';
1112 end if;
1113 set_x := '1';
1114 if r.b.class = FINITE then
1115 if r.b.exponent < to_signed(-126, EXP_BITS) then
1116 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1117 v.state := ROUND_UFLOW;
1118 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1119 v.state := ROUND_OFLOW;
1120 else
1121 v.state := ROUNDING;
1122 end if;
1123 else
1124 arith_done := '1';
1125 end if;
1126
1127 when DO_FCTI =>
1128 -- instr bit 9: 1=dword 0=word
1129 -- instr bit 8: 1=unsigned 0=signed
1130 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131 -- r.opsel_a = AIN_B
1132 v.result_class := r.b.class;
1133 v.result_sign := r.b.negative;
1134 v.result_exp := r.b.exponent;
1135 v.fpscr(FPSCR_FR) := '0';
1136 v.fpscr(FPSCR_FI) := '0';
1137 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138 -- Signalling NAN
1139 v.fpscr(FPSCR_VXSNAN) := '1';
1140 invalid := '1';
1141 end if;
1142
1143 v.int_result := '1';
1144 case r.b.class is
1145 when ZERO =>
1146 arith_done := '1';
1147 when FINITE =>
1148 if r.b.exponent >= to_signed(64, EXP_BITS) or
1149 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150 v.state := INT_OFLOW;
1151 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152 -- integer already, no rounding required,
1153 -- shift into final position
1154 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155 if r.insn(8) = '1' and r.b.negative = '1' then
1156 v.state := INT_OFLOW;
1157 else
1158 v.state := INT_ISHIFT;
1159 end if;
1160 else
1161 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162 v.state := INT_SHIFT;
1163 end if;
1164 when INFINITY | NAN =>
1165 v.state := INT_OFLOW;
1166 end case;
1167
1168 when DO_FCFID =>
1169 -- r.opsel_a = AIN_B
1170 v.result_sign := '0';
1171 if r.insn(8) = '0' and r.b.negative = '1' then
1172 -- fcfid[s] with negative operand, set R = -B
1173 opsel_ainv <= '1';
1174 carry_in <= '1';
1175 v.result_sign := '1';
1176 end if;
1177 v.result_class := r.b.class;
1178 v.result_exp := to_signed(54, EXP_BITS);
1179 v.fpscr(FPSCR_FR) := '0';
1180 v.fpscr(FPSCR_FI) := '0';
1181 if r.b.class = ZERO then
1182 arith_done := '1';
1183 else
1184 v.state := FINISH;
1185 end if;
1186
1187 when DO_FADD =>
1188 -- fadd[s] and fsub[s]
1189 -- r.opsel_a = AIN_A
1190 v.result_sign := r.a.negative;
1191 v.result_class := r.a.class;
1192 v.result_exp := r.a.exponent;
1193 v.fpscr(FPSCR_FR) := '0';
1194 v.fpscr(FPSCR_FI) := '0';
1195 v.use_a := '1';
1196 v.use_b := '1';
1197 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198 if r.a.class = FINITE and r.b.class = FINITE then
1199 v.is_subtract := not is_add;
1200 v.add_bsmall := r.exp_cmp;
1201 v.opsel_a := AIN_B;
1202 if r.exp_cmp = '0' then
1203 v.shift := r.a.exponent - r.b.exponent;
1204 v.result_sign := r.b.negative xnor r.insn(1);
1205 if r.a.exponent = r.b.exponent then
1206 v.state := ADD_2;
1207 else
1208 v.longmask := '0';
1209 v.state := ADD_SHIFT;
1210 end if;
1211 else
1212 v.state := ADD_1;
1213 end if;
1214 else
1215 if r.a.class = NAN or r.b.class = NAN then
1216 v.state := NAN_RESULT;
1217 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218 -- invalid operation, construct QNaN
1219 v.fpscr(FPSCR_VXISI) := '1';
1220 qnan_result := '1';
1221 arith_done := '1';
1222 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223 -- return -0 for rounding to -infinity
1224 v.result_sign := r.round_mode(1) and r.round_mode(0);
1225 arith_done := '1';
1226 elsif r.a.class = INFINITY or r.b.class = ZERO then
1227 -- result is A
1228 v.opsel_a := AIN_A;
1229 v.state := EXC_RESULT;
1230 else
1231 -- result is +/- B
1232 v.opsel_a := AIN_B;
1233 v.negate := not r.insn(1);
1234 v.state := EXC_RESULT;
1235 end if;
1236 end if;
1237
1238 when DO_FMUL =>
1239 -- fmul[s]
1240 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241 v.result_sign := r.a.negative xor r.c.negative;
1242 v.result_class := r.a.class;
1243 v.fpscr(FPSCR_FR) := '0';
1244 v.fpscr(FPSCR_FI) := '0';
1245 v.use_a := '1';
1246 v.use_c := '1';
1247 if r.a.class = FINITE and r.c.class = FINITE then
1248 v.result_exp := r.a.exponent + r.c.exponent;
1249 -- Renormalize denorm operands
1250 if r.a.mantissa(54) = '0' then
1251 v.state := RENORM_A;
1252 elsif r.c.mantissa(54) = '0' then
1253 v.state := RENORM_C;
1254 else
1255 f_to_multiply.valid <= '1';
1256 v.state := MULT_1;
1257 end if;
1258 else
1259 if r.a.class = NAN or r.c.class = NAN then
1260 v.state := NAN_RESULT;
1261 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262 (r.a.class = ZERO and r.c.class = INFINITY) then
1263 -- invalid operation, construct QNaN
1264 v.fpscr(FPSCR_VXIMZ) := '1';
1265 qnan_result := '1';
1266 elsif r.a.class = ZERO or r.a.class = INFINITY then
1267 -- result is +/- A
1268 arith_done := '1';
1269 else
1270 -- r.c.class is ZERO or INFINITY
1271 v.opsel_a := AIN_C;
1272 v.negate := r.a.negative;
1273 v.state := EXC_RESULT;
1274 end if;
1275 end if;
1276
1277 when DO_FDIV =>
1278 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279 v.result_class := r.a.class;
1280 v.fpscr(FPSCR_FR) := '0';
1281 v.fpscr(FPSCR_FI) := '0';
1282 v.use_a := '1';
1283 v.use_b := '1';
1284 v.result_sign := r.a.negative xor r.b.negative;
1285 v.result_exp := r.a.exponent - r.b.exponent;
1286 v.count := "00";
1287 if r.a.class = FINITE and r.b.class = FINITE then
1288 -- Renormalize denorm operands
1289 if r.a.mantissa(54) = '0' then
1290 v.state := RENORM_A;
1291 elsif r.b.mantissa(54) = '0' then
1292 v.state := RENORM_B;
1293 else
1294 v.first := '1';
1295 v.state := DIV_2;
1296 end if;
1297 else
1298 if r.a.class = NAN or r.b.class = NAN then
1299 v.state := NAN_RESULT;
1300 elsif r.b.class = INFINITY then
1301 if r.a.class = INFINITY then
1302 v.fpscr(FPSCR_VXIDI) := '1';
1303 qnan_result := '1';
1304 else
1305 v.result_class := ZERO;
1306 end if;
1307 arith_done := '1';
1308 elsif r.b.class = ZERO then
1309 if r.a.class = ZERO then
1310 v.fpscr(FPSCR_VXZDZ) := '1';
1311 qnan_result := '1';
1312 else
1313 if r.a.class = FINITE then
1314 zero_divide := '1';
1315 end if;
1316 v.result_class := INFINITY;
1317 end if;
1318 arith_done := '1';
1319 else -- r.b.class = FINITE, result_class = r.a.class
1320 arith_done := '1';
1321 end if;
1322 end if;
1323
1324 when DO_FSEL =>
1325 v.fpscr(FPSCR_FR) := '0';
1326 v.fpscr(FPSCR_FI) := '0';
1327 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328 v.opsel_a := AIN_C;
1329 else
1330 v.opsel_a := AIN_B;
1331 end if;
1332 v.quieten_nan := '0';
1333 v.state := EXC_RESULT;
1334
1335 when DO_FSQRT =>
1336 -- r.opsel_a = AIN_B
1337 v.result_class := r.b.class;
1338 v.result_sign := r.b.negative;
1339 v.fpscr(FPSCR_FR) := '0';
1340 v.fpscr(FPSCR_FI) := '0';
1341 v.use_b := '1';
1342 case r.b.class is
1343 when FINITE =>
1344 v.result_exp := r.b.exponent;
1345 if r.b.negative = '1' then
1346 v.fpscr(FPSCR_VXSQRT) := '1';
1347 qnan_result := '1';
1348 elsif r.b.mantissa(54) = '0' then
1349 v.state := RENORM_B;
1350 elsif r.b.exponent(0) = '0' then
1351 v.state := SQRT_1;
1352 else
1353 v.shift := to_signed(1, EXP_BITS);
1354 v.state := RENORM_B2;
1355 end if;
1356 when NAN =>
1357 v.state := NAN_RESULT;
1358 when ZERO =>
1359 -- result is B
1360 arith_done := '1';
1361 when INFINITY =>
1362 if r.b.negative = '1' then
1363 v.fpscr(FPSCR_VXSQRT) := '1';
1364 qnan_result := '1';
1365 -- else result is B
1366 end if;
1367 arith_done := '1';
1368 end case;
1369
1370 when DO_FRE =>
1371 -- r.opsel_a = AIN_B
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 v.fpscr(FPSCR_FR) := '0';
1375 v.fpscr(FPSCR_FI) := '0';
1376 v.use_b := '1';
1377 case r.b.class is
1378 when FINITE =>
1379 v.result_exp := - r.b.exponent;
1380 if r.b.mantissa(54) = '0' then
1381 v.state := RENORM_B;
1382 else
1383 v.state := FRE_1;
1384 end if;
1385 when NAN =>
1386 v.state := NAN_RESULT;
1387 when INFINITY =>
1388 v.result_class := ZERO;
1389 arith_done := '1';
1390 when ZERO =>
1391 v.result_class := INFINITY;
1392 zero_divide := '1';
1393 arith_done := '1';
1394 end case;
1395
1396 when DO_FRSQRTE =>
1397 -- r.opsel_a = AIN_B
1398 v.result_class := r.b.class;
1399 v.result_sign := r.b.negative;
1400 v.fpscr(FPSCR_FR) := '0';
1401 v.fpscr(FPSCR_FI) := '0';
1402 v.use_b := '1';
1403 v.shift := to_signed(1, EXP_BITS);
1404 case r.b.class is
1405 when FINITE =>
1406 v.result_exp := r.b.exponent;
1407 if r.b.negative = '1' then
1408 v.fpscr(FPSCR_VXSQRT) := '1';
1409 qnan_result := '1';
1410 elsif r.b.mantissa(54) = '0' then
1411 v.state := RENORM_B;
1412 elsif r.b.exponent(0) = '0' then
1413 v.state := RSQRT_1;
1414 else
1415 v.state := RENORM_B2;
1416 end if;
1417 when NAN =>
1418 v.state := NAN_RESULT;
1419 when INFINITY =>
1420 if r.b.negative = '1' then
1421 v.fpscr(FPSCR_VXSQRT) := '1';
1422 qnan_result := '1';
1423 else
1424 v.result_class := ZERO;
1425 end if;
1426 arith_done := '1';
1427 when ZERO =>
1428 v.result_class := INFINITY;
1429 zero_divide := '1';
1430 arith_done := '1';
1431 end case;
1432
1433 when DO_FMADD =>
1434 -- fmadd, fmsub, fnmadd, fnmsub
1435 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436 -- else AIN_B
1437 v.result_sign := r.a.negative;
1438 v.result_class := r.a.class;
1439 v.result_exp := r.a.exponent;
1440 v.fpscr(FPSCR_FR) := '0';
1441 v.fpscr(FPSCR_FI) := '0';
1442 v.use_a := '1';
1443 v.use_b := '1';
1444 v.use_c := '1';
1445 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446 if r.a.class = FINITE and r.c.class = FINITE and
1447 (r.b.class = FINITE or r.b.class = ZERO) then
1448 v.is_subtract := not is_add;
1449 mulexp := r.a.exponent + r.c.exponent;
1450 v.result_exp := mulexp;
1451 -- Make sure A and C are normalized
1452 if r.a.mantissa(54) = '0' then
1453 v.state := RENORM_A;
1454 elsif r.c.mantissa(54) = '0' then
1455 v.state := RENORM_C;
1456 elsif r.b.class = ZERO then
1457 -- no addend, degenerates to multiply
1458 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459 f_to_multiply.valid <= '1';
1460 v.is_multiply := '1';
1461 v.state := MULT_1;
1462 elsif r.madd_cmp = '0' then
1463 -- addend is bigger, do multiply first
1464 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465 f_to_multiply.valid <= '1';
1466 v.state := FMADD_1;
1467 else
1468 -- product is bigger, shift B right and use it as the
1469 -- addend to the multiplier
1470 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471 -- for subtract, multiplier does B - A * C
1472 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473 v.result_exp := r.b.exponent;
1474 v.state := FMADD_2;
1475 end if;
1476 else
1477 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478 v.state := NAN_RESULT;
1479 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480 (r.a.class = INFINITY and r.c.class = ZERO) then
1481 -- invalid operation, construct QNaN
1482 v.fpscr(FPSCR_VXIMZ) := '1';
1483 qnan_result := '1';
1484 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485 if r.b.class = INFINITY and is_add = '0' then
1486 -- invalid operation, construct QNaN
1487 v.fpscr(FPSCR_VXISI) := '1';
1488 qnan_result := '1';
1489 else
1490 -- result is infinity
1491 v.result_class := INFINITY;
1492 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493 arith_done := '1';
1494 end if;
1495 else
1496 -- Here A is zero, C is zero, or B is infinity
1497 -- Result is +/-B in all of those cases
1498 v.opsel_a := AIN_B;
1499 if r.b.class /= ZERO or is_add = '1' then
1500 v.negate := not (r.insn(1) xor r.insn(2));
1501 else
1502 -- have to be careful about rule for 0 - 0 result sign
1503 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504 end if;
1505 v.state := EXC_RESULT;
1506 end if;
1507 end if;
1508
1509 when RENORM_A =>
1510 renormalize := '1';
1511 v.state := RENORM_A2;
1512 if r.insn(4) = '1' then
1513 v.opsel_a := AIN_C;
1514 else
1515 v.opsel_a := AIN_B;
1516 end if;
1517
1518 when RENORM_A2 =>
1519 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520 set_a := '1';
1521 v.result_exp := new_exp;
1522 if r.insn(4) = '1' then
1523 if r.c.mantissa(54) = '1' then
1524 if r.insn(3) = '0' or r.b.class = ZERO then
1525 v.first := '1';
1526 v.state := MULT_1;
1527 else
1528 v.madd_cmp := '0';
1529 if new_exp + 1 >= r.b.exponent then
1530 v.madd_cmp := '1';
1531 end if;
1532 v.opsel_a := AIN_B;
1533 v.state := DO_FMADD;
1534 end if;
1535 else
1536 v.state := RENORM_C;
1537 end if;
1538 else
1539 if r.b.mantissa(54) = '1' then
1540 v.first := '1';
1541 v.state := DIV_2;
1542 else
1543 v.state := RENORM_B;
1544 end if;
1545 end if;
1546
1547 when RENORM_B =>
1548 renormalize := '1';
1549 renorm_sqrt := r.is_sqrt;
1550 v.state := RENORM_B2;
1551
1552 when RENORM_B2 =>
1553 set_b := '1';
1554 if r.is_sqrt = '0' then
1555 v.result_exp := r.result_exp + r.shift;
1556 else
1557 v.result_exp := new_exp;
1558 end if;
1559 v.opsel_a := AIN_B;
1560 v.state := LOOKUP;
1561
1562 when RENORM_C =>
1563 renormalize := '1';
1564 v.state := RENORM_C2;
1565
1566 when RENORM_C2 =>
1567 set_c := '1';
1568 v.result_exp := new_exp;
1569 if r.insn(3) = '0' or r.b.class = ZERO then
1570 v.first := '1';
1571 v.state := MULT_1;
1572 else
1573 v.madd_cmp := '0';
1574 if new_exp + 1 >= r.b.exponent then
1575 v.madd_cmp := '1';
1576 end if;
1577 v.opsel_a := AIN_B;
1578 v.state := DO_FMADD;
1579 end if;
1580
1581 when ADD_1 =>
1582 -- transferring B to R
1583 v.shift := r.b.exponent - r.a.exponent;
1584 v.result_exp := r.b.exponent;
1585 v.longmask := '0';
1586 v.state := ADD_SHIFT;
1587
1588 when ADD_SHIFT =>
1589 -- r.shift = - exponent difference, r.longmask = 0
1590 opsel_r <= RES_SHIFT;
1591 v.x := s_nz;
1592 set_x := '1';
1593 v.longmask := r.single_prec;
1594 if r.add_bsmall = '1' then
1595 v.opsel_a := AIN_A;
1596 else
1597 v.opsel_a := AIN_B;
1598 end if;
1599 v.state := ADD_2;
1600
1601 when ADD_2 =>
1602 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603 opsel_b <= BIN_R;
1604 opsel_binv <= r.is_subtract;
1605 carry_in <= r.is_subtract and not r.x;
1606 v.shift := to_signed(-1, EXP_BITS);
1607 v.state := ADD_3;
1608
1609 when ADD_3 =>
1610 -- check for overflow or negative result (can't get both)
1611 -- r.shift = -1
1612 if r.r(63) = '1' then
1613 -- result is opposite sign to expected
1614 v.result_sign := not r.result_sign;
1615 opsel_ainv <= '1';
1616 carry_in <= '1';
1617 v.state := FINISH;
1618 elsif r.r(55) = '1' then
1619 -- sum overflowed, shift right
1620 opsel_r <= RES_SHIFT;
1621 set_x := '1';
1622 if exp_huge = '1' then
1623 v.state := ROUND_OFLOW;
1624 else
1625 v.state := ROUNDING;
1626 end if;
1627 elsif r.r(54) = '1' then
1628 set_x := '1';
1629 v.state := ROUNDING;
1630 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1631 -- r.x must be zero at this point
1632 v.result_class := ZERO;
1633 if r.is_subtract = '1' then
1634 -- set result sign depending on rounding mode
1635 v.result_sign := r.round_mode(1) and r.round_mode(0);
1636 end if;
1637 arith_done := '1';
1638 else
1639 renormalize := '1';
1640 v.state := NORMALIZE;
1641 end if;
1642
1643 when CMP_1 =>
1644 -- r.opsel_a = AIN_A
1645 opsel_b <= BIN_R;
1646 opsel_binv <= '1';
1647 carry_in <= '1';
1648 v.state := CMP_2;
1649
1650 when CMP_2 =>
1651 if r.r(63) = '1' then
1652 -- A is smaller in magnitude
1653 v.cr_result := not r.a.negative & r.a.negative & "00";
1654 elsif (r_hi_nz or r_lo_nz) = '0' then
1655 v.cr_result := "0010";
1656 else
1657 v.cr_result := r.a.negative & not r.a.negative & "00";
1658 end if;
1659 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1660 v.instr_done := '1';
1661 v.state := IDLE;
1662
1663 when MULT_1 =>
1664 f_to_multiply.valid <= r.first;
1665 opsel_r <= RES_MULT;
1666 if multiply_to_f.valid = '1' then
1667 v.state := FINISH;
1668 end if;
1669
1670 when FMADD_1 =>
1671 -- Addend is bigger here
1672 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1673 -- note v.shift is at most -2 here
1674 v.shift := r.result_exp - r.b.exponent;
1675 opsel_r <= RES_MULT;
1676 opsel_s <= S_MULT;
1677 set_s := '1';
1678 f_to_multiply.valid <= r.first;
1679 if multiply_to_f.valid = '1' then
1680 v.longmask := '0';
1681 v.state := ADD_SHIFT;
1682 end if;
1683
1684 when FMADD_2 =>
1685 -- Product is potentially bigger here
1686 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1687 set_s := '1';
1688 opsel_s <= S_SHIFT;
1689 v.shift := r.shift - to_signed(64, EXP_BITS);
1690 v.state := FMADD_3;
1691
1692 when FMADD_3 =>
1693 -- r.shift = addend exp - product exp
1694 opsel_r <= RES_SHIFT;
1695 v.first := '1';
1696 v.state := FMADD_4;
1697
1698 when FMADD_4 =>
1699 msel_add <= MULADD_RS;
1700 f_to_multiply.valid <= r.first;
1701 msel_inv <= r.is_subtract;
1702 opsel_r <= RES_MULT;
1703 opsel_s <= S_MULT;
1704 set_s := '1';
1705 if multiply_to_f.valid = '1' then
1706 v.state := FMADD_5;
1707 end if;
1708
1709 when FMADD_5 =>
1710 -- negate R:S:X if negative
1711 if r.r(63) = '1' then
1712 v.result_sign := not r.result_sign;
1713 opsel_ainv <= '1';
1714 carry_in <= not (s_nz or r.x);
1715 opsel_s <= S_NEG;
1716 set_s := '1';
1717 end if;
1718 v.shift := to_signed(56, EXP_BITS);
1719 v.state := FMADD_6;
1720
1721 when FMADD_6 =>
1722 -- r.shift = 56 (or 0, but only if r is now nonzero)
1723 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1724 if s_nz = '0' then
1725 -- must be a subtraction, and r.x must be zero
1726 v.result_class := ZERO;
1727 v.result_sign := r.round_mode(1) and r.round_mode(0);
1728 arith_done := '1';
1729 else
1730 -- R is all zeroes but there are non-zero bits in S
1731 -- so shift them into R and set S to 0
1732 opsel_r <= RES_SHIFT;
1733 set_s := '1';
1734 -- stay in state FMADD_6
1735 end if;
1736 elsif r.r(56 downto 54) = "001" then
1737 v.state := FINISH;
1738 else
1739 renormalize := '1';
1740 v.state := NORMALIZE;
1741 end if;
1742
1743 when LOOKUP =>
1744 -- r.opsel_a = AIN_B
1745 -- wait one cycle for inverse_table[B] lookup
1746 v.first := '1';
1747 if r.insn(4) = '0' then
1748 if r.insn(3) = '0' then
1749 v.state := DIV_2;
1750 else
1751 v.state := SQRT_1;
1752 end if;
1753 elsif r.insn(2) = '0' then
1754 v.state := FRE_1;
1755 else
1756 v.state := RSQRT_1;
1757 end if;
1758
1759 when DIV_2 =>
1760 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1761 msel_1 <= MUL1_B;
1762 msel_add <= MULADD_CONST;
1763 msel_inv <= '1';
1764 if r.count = 0 then
1765 msel_2 <= MUL2_LUT;
1766 else
1767 msel_2 <= MUL2_P;
1768 end if;
1769 set_y := r.first;
1770 pshift := '1';
1771 f_to_multiply.valid <= r.first;
1772 if multiply_to_f.valid = '1' then
1773 v.first := '1';
1774 v.count := r.count + 1;
1775 v.state := DIV_3;
1776 end if;
1777
1778 when DIV_3 =>
1779 -- compute Y = P = P * Y
1780 msel_1 <= MUL1_Y;
1781 msel_2 <= MUL2_P;
1782 f_to_multiply.valid <= r.first;
1783 pshift := '1';
1784 if multiply_to_f.valid = '1' then
1785 v.first := '1';
1786 if r.count = 3 then
1787 v.state := DIV_4;
1788 else
1789 v.state := DIV_2;
1790 end if;
1791 end if;
1792
1793 when DIV_4 =>
1794 -- compute R = P = A * Y (quotient)
1795 msel_1 <= MUL1_A;
1796 msel_2 <= MUL2_P;
1797 set_y := r.first;
1798 f_to_multiply.valid <= r.first;
1799 pshift := '1';
1800 if multiply_to_f.valid = '1' then
1801 opsel_r <= RES_MULT;
1802 v.first := '1';
1803 v.state := DIV_5;
1804 end if;
1805
1806 when DIV_5 =>
1807 -- compute P = A - B * R (remainder)
1808 msel_1 <= MUL1_B;
1809 msel_2 <= MUL2_R;
1810 msel_add <= MULADD_A;
1811 msel_inv <= '1';
1812 f_to_multiply.valid <= r.first;
1813 if multiply_to_f.valid = '1' then
1814 v.state := DIV_6;
1815 end if;
1816
1817 when DIV_6 =>
1818 -- test if remainder is 0 or >= B
1819 if pcmpb_lt = '1' then
1820 -- quotient is correct, set X if remainder non-zero
1821 v.x := r.p(58) or px_nz;
1822 else
1823 -- quotient needs to be incremented by 1
1824 carry_in <= '1';
1825 v.x := not pcmpb_eq;
1826 end if;
1827 v.state := FINISH;
1828
1829 when FRE_1 =>
1830 opsel_r <= RES_MISC;
1831 misc_sel <= "0111";
1832 v.shift := to_signed(1, EXP_BITS);
1833 v.state := NORMALIZE;
1834
1835 when FTDIV_1 =>
1836 v.cr_result(1) := exp_tiny or exp_huge;
1837 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1838 v.instr_done := '1';
1839 v.state := IDLE;
1840 else
1841 v.shift := r.a.exponent;
1842 v.doing_ftdiv := "10";
1843 end if;
1844
1845 when RSQRT_1 =>
1846 opsel_r <= RES_MISC;
1847 misc_sel <= "0111";
1848 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1849 v.result_exp := - sqrt_exp;
1850 v.shift := to_signed(1, EXP_BITS);
1851 v.state := NORMALIZE;
1852
1853 when SQRT_1 =>
1854 -- put invsqr[B] in R and compute P = invsqr[B] * B
1855 -- also transfer B (in R) to A
1856 set_a := '1';
1857 opsel_r <= RES_MISC;
1858 misc_sel <= "0111";
1859 msel_1 <= MUL1_B;
1860 msel_2 <= MUL2_LUT;
1861 f_to_multiply.valid <= '1';
1862 v.shift := to_signed(-1, EXP_BITS);
1863 v.count := "00";
1864 v.state := SQRT_2;
1865
1866 when SQRT_2 =>
1867 -- shift R right one place
1868 -- not expecting multiplier result yet
1869 -- r.shift = -1
1870 opsel_r <= RES_SHIFT;
1871 v.first := '1';
1872 v.state := SQRT_3;
1873
1874 when SQRT_3 =>
1875 -- put R into Y, wait for product from multiplier
1876 msel_2 <= MUL2_R;
1877 set_y := r.first;
1878 pshift := '1';
1879 if multiply_to_f.valid = '1' then
1880 -- put result into R
1881 opsel_r <= RES_MULT;
1882 v.first := '1';
1883 v.state := SQRT_4;
1884 end if;
1885
1886 when SQRT_4 =>
1887 -- compute 1.5 - Y * P
1888 msel_1 <= MUL1_Y;
1889 msel_2 <= MUL2_P;
1890 msel_add <= MULADD_CONST;
1891 msel_inv <= '1';
1892 f_to_multiply.valid <= r.first;
1893 pshift := '1';
1894 if multiply_to_f.valid = '1' then
1895 v.state := SQRT_5;
1896 end if;
1897
1898 when SQRT_5 =>
1899 -- compute Y = Y * P
1900 msel_1 <= MUL1_Y;
1901 msel_2 <= MUL2_P;
1902 f_to_multiply.valid <= '1';
1903 v.first := '1';
1904 v.state := SQRT_6;
1905
1906 when SQRT_6 =>
1907 -- pipeline in R = R * P
1908 msel_1 <= MUL1_R;
1909 msel_2 <= MUL2_P;
1910 f_to_multiply.valid <= r.first;
1911 pshift := '1';
1912 if multiply_to_f.valid = '1' then
1913 v.first := '1';
1914 v.state := SQRT_7;
1915 end if;
1916
1917 when SQRT_7 =>
1918 -- first multiply is done, put result in Y
1919 msel_2 <= MUL2_P;
1920 set_y := r.first;
1921 -- wait for second multiply (should be here already)
1922 pshift := '1';
1923 if multiply_to_f.valid = '1' then
1924 -- put result into R
1925 opsel_r <= RES_MULT;
1926 v.first := '1';
1927 v.count := r.count + 1;
1928 if r.count < 2 then
1929 v.state := SQRT_4;
1930 else
1931 v.first := '1';
1932 v.state := SQRT_8;
1933 end if;
1934 end if;
1935
1936 when SQRT_8 =>
1937 -- compute P = A - R * R, which can be +ve or -ve
1938 -- we arranged for B to be put into A earlier
1939 msel_1 <= MUL1_R;
1940 msel_2 <= MUL2_R;
1941 msel_add <= MULADD_A;
1942 msel_inv <= '1';
1943 pshift := '1';
1944 f_to_multiply.valid <= r.first;
1945 if multiply_to_f.valid = '1' then
1946 v.first := '1';
1947 v.state := SQRT_9;
1948 end if;
1949
1950 when SQRT_9 =>
1951 -- compute P = P * Y
1952 -- since Y is an estimate of 1/sqrt(B), this makes P an
1953 -- estimate of the adjustment needed to R. Since the error
1954 -- could be negative and we have an unsigned multiplier, the
1955 -- upper bits can be wrong, but it turns out the lowest 8 bits
1956 -- are correct and are all we need (given 3 iterations through
1957 -- SQRT_4 to SQRT_7).
1958 msel_1 <= MUL1_Y;
1959 msel_2 <= MUL2_P;
1960 pshift := '1';
1961 f_to_multiply.valid <= r.first;
1962 if multiply_to_f.valid = '1' then
1963 v.state := SQRT_10;
1964 end if;
1965
1966 when SQRT_10 =>
1967 -- Add the bottom 8 bits of P, sign-extended,
1968 -- divided by 4, onto R.
1969 -- The division by 4 is because R is 10.54 format
1970 -- whereas P is 8.56 format.
1971 opsel_b <= BIN_PS6;
1972 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1973 v.result_exp := sqrt_exp;
1974 v.shift := to_signed(1, EXP_BITS);
1975 v.first := '1';
1976 v.state := SQRT_11;
1977
1978 when SQRT_11 =>
1979 -- compute P = A - R * R (remainder)
1980 -- also put 2 * R + 1 into B for comparison with P
1981 msel_1 <= MUL1_R;
1982 msel_2 <= MUL2_R;
1983 msel_add <= MULADD_A;
1984 msel_inv <= '1';
1985 f_to_multiply.valid <= r.first;
1986 shiftin := '1';
1987 set_b := r.first;
1988 if multiply_to_f.valid = '1' then
1989 v.state := SQRT_12;
1990 end if;
1991
1992 when SQRT_12 =>
1993 -- test if remainder is 0 or >= B = 2*R + 1
1994 if pcmpb_lt = '1' then
1995 -- square root is correct, set X if remainder non-zero
1996 v.x := r.p(58) or px_nz;
1997 else
1998 -- square root needs to be incremented by 1
1999 carry_in <= '1';
2000 v.x := not pcmpb_eq;
2001 end if;
2002 v.state := FINISH;
2003
2004 when INT_SHIFT =>
2005 -- r.shift = b.exponent - 52
2006 opsel_r <= RES_SHIFT;
2007 set_x := '1';
2008 v.state := INT_ROUND;
2009 v.shift := to_signed(-2, EXP_BITS);
2010
2011 when INT_ROUND =>
2012 -- r.shift = -2
2013 opsel_r <= RES_SHIFT;
2014 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2015 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2016 -- Check for negative values that don't round to 0 for fcti*u*
2017 if r.insn(8) = '1' and r.result_sign = '1' and
2018 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2019 v.state := INT_OFLOW;
2020 else
2021 v.state := INT_FINAL;
2022 end if;
2023
2024 when INT_ISHIFT =>
2025 -- r.shift = b.exponent - 54;
2026 opsel_r <= RES_SHIFT;
2027 v.state := INT_FINAL;
2028
2029 when INT_FINAL =>
2030 -- Negate if necessary, and increment for rounding if needed
2031 opsel_ainv <= r.result_sign;
2032 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2033 -- Check for possible overflows
2034 case r.insn(9 downto 8) is
2035 when "00" => -- fctiw[z]
2036 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2037 when "01" => -- fctiwu[z]
2038 need_check := r.r(31);
2039 when "10" => -- fctid[z]
2040 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2041 when others => -- fctidu[z]
2042 need_check := r.r(63);
2043 end case;
2044 if need_check = '1' then
2045 v.state := INT_CHECK;
2046 else
2047 if r.fpscr(FPSCR_FI) = '1' then
2048 v.fpscr(FPSCR_XX) := '1';
2049 end if;
2050 arith_done := '1';
2051 end if;
2052
2053 when INT_CHECK =>
2054 if r.insn(9) = '0' then
2055 msb := r.r(31);
2056 else
2057 msb := r.r(63);
2058 end if;
2059 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2060 if (r.insn(8) = '0' and msb /= r.result_sign) or
2061 (r.insn(8) = '1' and msb /= '1') then
2062 opsel_r <= RES_MISC;
2063 v.fpscr(FPSCR_VXCVI) := '1';
2064 invalid := '1';
2065 else
2066 if r.fpscr(FPSCR_FI) = '1' then
2067 v.fpscr(FPSCR_XX) := '1';
2068 end if;
2069 end if;
2070 arith_done := '1';
2071
2072 when INT_OFLOW =>
2073 opsel_r <= RES_MISC;
2074 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2075 if r.b.class = NAN then
2076 misc_sel(0) <= '1';
2077 end if;
2078 v.fpscr(FPSCR_VXCVI) := '1';
2079 invalid := '1';
2080 arith_done := '1';
2081
2082 when FRI_1 =>
2083 -- r.shift = b.exponent - 52
2084 opsel_r <= RES_SHIFT;
2085 set_x := '1';
2086 v.state := ROUNDING;
2087
2088 when FINISH =>
2089 if r.is_multiply = '1' and px_nz = '1' then
2090 v.x := '1';
2091 end if;
2092 if r.r(63 downto 54) /= "0000000001" then
2093 renormalize := '1';
2094 v.state := NORMALIZE;
2095 else
2096 set_x := '1';
2097 if exp_tiny = '1' then
2098 v.shift := new_exp - min_exp;
2099 v.state := ROUND_UFLOW;
2100 elsif exp_huge = '1' then
2101 v.state := ROUND_OFLOW;
2102 else
2103 v.state := ROUNDING;
2104 end if;
2105 end if;
2106
2107 when NORMALIZE =>
2108 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2109 -- r.shift = clz(r.r) - 9
2110 opsel_r <= RES_SHIFT;
2111 set_x := '1';
2112 if exp_tiny = '1' then
2113 v.shift := new_exp - min_exp;
2114 v.state := ROUND_UFLOW;
2115 elsif exp_huge = '1' then
2116 v.state := ROUND_OFLOW;
2117 else
2118 v.state := ROUNDING;
2119 end if;
2120
2121 when ROUND_UFLOW =>
2122 -- r.shift = - amount by which exponent underflows
2123 v.tiny := '1';
2124 if r.fpscr(FPSCR_UE) = '0' then
2125 -- disabled underflow exception case
2126 -- have to denormalize before rounding
2127 opsel_r <= RES_SHIFT;
2128 set_x := '1';
2129 v.state := ROUNDING;
2130 else
2131 -- enabled underflow exception case
2132 -- if denormalized, have to normalize before rounding
2133 v.fpscr(FPSCR_UX) := '1';
2134 v.result_exp := r.result_exp + bias_exp;
2135 if r.r(54) = '0' then
2136 renormalize := '1';
2137 v.state := NORMALIZE;
2138 else
2139 v.state := ROUNDING;
2140 end if;
2141 end if;
2142
2143 when ROUND_OFLOW =>
2144 v.fpscr(FPSCR_OX) := '1';
2145 if r.fpscr(FPSCR_OE) = '0' then
2146 -- disabled overflow exception
2147 -- result depends on rounding mode
2148 v.fpscr(FPSCR_XX) := '1';
2149 v.fpscr(FPSCR_FI) := '1';
2150 if r.round_mode(1 downto 0) = "00" or
2151 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2152 v.result_class := INFINITY;
2153 v.fpscr(FPSCR_FR) := '1';
2154 else
2155 v.fpscr(FPSCR_FR) := '0';
2156 end if;
2157 -- construct largest representable number
2158 v.result_exp := max_exp;
2159 opsel_r <= RES_MISC;
2160 misc_sel <= "001" & r.single_prec;
2161 arith_done := '1';
2162 else
2163 -- enabled overflow exception
2164 v.result_exp := r.result_exp - bias_exp;
2165 v.state := ROUNDING;
2166 end if;
2167
2168 when ROUNDING =>
2169 opsel_mask <= '1';
2170 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2171 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2172 if round(1) = '1' then
2173 -- increment the LSB for the precision
2174 opsel_b <= BIN_RND;
2175 v.shift := to_signed(-1, EXP_BITS);
2176 v.state := ROUNDING_2;
2177 else
2178 if r.r(54) = '0' then
2179 -- result after masking could be zero, or could be a
2180 -- denormalized result that needs to be renormalized
2181 renormalize := '1';
2182 v.state := ROUNDING_3;
2183 else
2184 arith_done := '1';
2185 end if;
2186 end if;
2187 if round(0) = '1' then
2188 v.fpscr(FPSCR_XX) := '1';
2189 if r.tiny = '1' then
2190 v.fpscr(FPSCR_UX) := '1';
2191 end if;
2192 end if;
2193
2194 when ROUNDING_2 =>
2195 -- Check for overflow during rounding
2196 -- r.shift = -1
2197 v.x := '0';
2198 if r.r(55) = '1' then
2199 opsel_r <= RES_SHIFT;
2200 if exp_huge = '1' then
2201 v.state := ROUND_OFLOW;
2202 else
2203 arith_done := '1';
2204 end if;
2205 elsif r.r(54) = '0' then
2206 -- Do CLZ so we can renormalize the result
2207 renormalize := '1';
2208 v.state := ROUNDING_3;
2209 else
2210 arith_done := '1';
2211 end if;
2212
2213 when ROUNDING_3 =>
2214 -- r.shift = clz(r.r) - 9
2215 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2216 if mant_nz = '0' then
2217 v.result_class := ZERO;
2218 if r.is_subtract = '1' then
2219 -- set result sign depending on rounding mode
2220 v.result_sign := r.round_mode(1) and r.round_mode(0);
2221 end if;
2222 arith_done := '1';
2223 else
2224 -- Renormalize result after rounding
2225 opsel_r <= RES_SHIFT;
2226 v.denorm := exp_tiny;
2227 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2228 if new_exp < to_signed(-1022, EXP_BITS) then
2229 v.state := DENORM;
2230 else
2231 arith_done := '1';
2232 end if;
2233 end if;
2234
2235 when DENORM =>
2236 -- r.shift = result_exp - -1022
2237 opsel_r <= RES_SHIFT;
2238 arith_done := '1';
2239
2240 when NAN_RESULT =>
2241 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2242 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2243 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2244 -- Signalling NAN
2245 v.fpscr(FPSCR_VXSNAN) := '1';
2246 invalid := '1';
2247 end if;
2248 if r.use_a = '1' and r.a.class = NAN then
2249 v.opsel_a := AIN_A;
2250 elsif r.use_b = '1' and r.b.class = NAN then
2251 v.opsel_a := AIN_B;
2252 elsif r.use_c = '1' and r.c.class = NAN then
2253 v.opsel_a := AIN_C;
2254 end if;
2255 v.state := EXC_RESULT;
2256
2257 when EXC_RESULT =>
2258 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2259 case r.opsel_a is
2260 when AIN_B =>
2261 v.result_sign := r.b.negative xor r.negate;
2262 v.result_exp := r.b.exponent;
2263 v.result_class := r.b.class;
2264 when AIN_C =>
2265 v.result_sign := r.c.negative xor r.negate;
2266 v.result_exp := r.c.exponent;
2267 v.result_class := r.c.class;
2268 when others =>
2269 v.result_sign := r.a.negative xor r.negate;
2270 v.result_exp := r.a.exponent;
2271 v.result_class := r.a.class;
2272 end case;
2273 arith_done := '1';
2274
2275 end case;
2276
2277 if zero_divide = '1' then
2278 v.fpscr(FPSCR_ZX) := '1';
2279 end if;
2280 if qnan_result = '1' then
2281 invalid := '1';
2282 v.result_class := NAN;
2283 v.result_sign := '0';
2284 misc_sel <= "0001";
2285 opsel_r <= RES_MISC;
2286 arith_done := '1';
2287 end if;
2288 if invalid = '1' then
2289 v.invalid := '1';
2290 end if;
2291 if arith_done = '1' then
2292 -- Enabled invalid exception doesn't write result or FPRF
2293 -- Neither does enabled zero-divide exception
2294 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2295 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2296 v.writing_back := '1';
2297 v.update_fprf := '1';
2298 end if;
2299 v.instr_done := '1';
2300 v.state := IDLE;
2301 update_fx := '1';
2302 end if;
2303
2304 -- Multiplier and divide/square root data path
2305 case msel_1 is
2306 when MUL1_A =>
2307 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2308 when MUL1_B =>
2309 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2310 when MUL1_Y =>
2311 f_to_multiply.data1 <= r.y;
2312 when others =>
2313 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2314 end case;
2315 case msel_2 is
2316 when MUL2_C =>
2317 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2318 when MUL2_LUT =>
2319 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2320 when MUL2_P =>
2321 f_to_multiply.data2 <= r.p;
2322 when others =>
2323 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2324 end case;
2325 maddend := (others => '0');
2326 case msel_add is
2327 when MULADD_CONST =>
2328 -- addend is 2.0 or 1.5 in 16.112 format
2329 if r.is_sqrt = '0' then
2330 maddend(113) := '1'; -- 2.0
2331 else
2332 maddend(112 downto 111) := "11"; -- 1.5
2333 end if;
2334 when MULADD_A =>
2335 -- addend is A in 16.112 format
2336 maddend(121 downto 58) := r.a.mantissa;
2337 when MULADD_RS =>
2338 -- addend is concatenation of R and S in 16.112 format
2339 maddend := "000000" & r.r & r.s & "00";
2340 when others =>
2341 end case;
2342 if msel_inv = '1' then
2343 f_to_multiply.addend <= not maddend;
2344 else
2345 f_to_multiply.addend <= maddend;
2346 end if;
2347 f_to_multiply.not_result <= msel_inv;
2348 if set_y = '1' then
2349 v.y := f_to_multiply.data2;
2350 end if;
2351 if multiply_to_f.valid = '1' then
2352 if pshift = '0' then
2353 v.p := multiply_to_f.result(63 downto 0);
2354 else
2355 v.p := multiply_to_f.result(119 downto 56);
2356 end if;
2357 end if;
2358
2359 -- Data path.
2360 -- This has A and B input multiplexers, an adder, a shifter,
2361 -- count-leading-zeroes logic, and a result mux.
2362 if r.longmask = '1' then
2363 mshift := r.shift + to_signed(-29, EXP_BITS);
2364 else
2365 mshift := r.shift;
2366 end if;
2367 if mshift < to_signed(-64, EXP_BITS) then
2368 mask := (others => '1');
2369 elsif mshift >= to_signed(0, EXP_BITS) then
2370 mask := (others => '0');
2371 else
2372 mask := right_mask(unsigned(mshift(5 downto 0)));
2373 end if;
2374 case r.opsel_a is
2375 when AIN_R =>
2376 in_a0 := r.r;
2377 when AIN_A =>
2378 in_a0 := r.a.mantissa;
2379 when AIN_B =>
2380 in_a0 := r.b.mantissa;
2381 when others =>
2382 in_a0 := r.c.mantissa;
2383 end case;
2384 if (or (mask and in_a0)) = '1' and set_x = '1' then
2385 v.x := '1';
2386 end if;
2387 if opsel_ainv = '1' then
2388 in_a0 := not in_a0;
2389 end if;
2390 in_a <= in_a0;
2391 case opsel_b is
2392 when BIN_ZERO =>
2393 in_b0 := (others => '0');
2394 when BIN_R =>
2395 in_b0 := r.r;
2396 when BIN_RND =>
2397 round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
2398 in_b0 := round_inc;
2399 when others =>
2400 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2401 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2402 end case;
2403 if opsel_binv = '1' then
2404 in_b0 := not in_b0;
2405 end if;
2406 in_b <= in_b0;
2407 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2408 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2409 std_ulogic_vector(r.shift(6 downto 0)));
2410 else
2411 shift_res := (others => '0');
2412 end if;
2413 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2414 if opsel_mask = '1' then
2415 sum(1 downto 0) := "00";
2416 if r.single_prec = '1' then
2417 sum(30 downto 2) := (others => '0');
2418 end if;
2419 end if;
2420 case opsel_r is
2421 when RES_SUM =>
2422 result <= sum;
2423 when RES_SHIFT =>
2424 result <= shift_res;
2425 when RES_MULT =>
2426 result <= multiply_to_f.result(121 downto 58);
2427 when others =>
2428 case misc_sel is
2429 when "0000" =>
2430 misc := x"00000000" & (r.fpscr and fpscr_mask);
2431 when "0001" =>
2432 -- generated QNaN mantissa
2433 misc := x"0020000000000000";
2434 when "0010" =>
2435 -- mantissa of max representable DP number
2436 misc := x"007ffffffffffffc";
2437 when "0011" =>
2438 -- mantissa of max representable SP number
2439 misc := x"007fffff80000000";
2440 when "0100" =>
2441 -- fmrgow result
2442 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2443 when "0110" =>
2444 -- fmrgew result
2445 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2446 when "0111" =>
2447 misc := 10x"000" & inverse_est & 35x"000000000";
2448 when "1000" =>
2449 -- max positive result for fctiw[z]
2450 misc := x"000000007fffffff";
2451 when "1001" =>
2452 -- max negative result for fctiw[z]
2453 misc := x"ffffffff80000000";
2454 when "1010" =>
2455 -- max positive result for fctiwu[z]
2456 misc := x"00000000ffffffff";
2457 when "1011" =>
2458 -- max negative result for fctiwu[z]
2459 misc := x"0000000000000000";
2460 when "1100" =>
2461 -- max positive result for fctid[z]
2462 misc := x"7fffffffffffffff";
2463 when "1101" =>
2464 -- max negative result for fctid[z]
2465 misc := x"8000000000000000";
2466 when "1110" =>
2467 -- max positive result for fctidu[z]
2468 misc := x"ffffffffffffffff";
2469 when "1111" =>
2470 -- max negative result for fctidu[z]
2471 misc := x"0000000000000000";
2472 when others =>
2473 misc := x"0000000000000000";
2474 end case;
2475 result <= misc;
2476 end case;
2477 v.r := result;
2478 if set_s = '1' then
2479 case opsel_s is
2480 when S_NEG =>
2481 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2482 when S_MULT =>
2483 v.s := multiply_to_f.result(57 downto 2);
2484 when S_SHIFT =>
2485 v.s := shift_res(63 downto 8);
2486 if shift_res(7 downto 0) /= x"00" then
2487 v.x := '1';
2488 end if;
2489 when others =>
2490 v.s := (others => '0');
2491 end case;
2492 end if;
2493
2494 if set_a = '1' then
2495 v.a.exponent := new_exp;
2496 v.a.mantissa := shift_res;
2497 end if;
2498 if set_b = '1' then
2499 v.b.exponent := new_exp;
2500 v.b.mantissa := shift_res;
2501 end if;
2502 if set_c = '1' then
2503 v.c.exponent := new_exp;
2504 v.c.mantissa := shift_res;
2505 end if;
2506
2507 if opsel_r = RES_SHIFT then
2508 v.result_exp := new_exp;
2509 end if;
2510
2511 if renormalize = '1' then
2512 clz := count_left_zeroes(r.r);
2513 if renorm_sqrt = '1' then
2514 -- make denormalized value end up with even exponent
2515 clz(0) := '1';
2516 end if;
2517 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2518 end if;
2519
2520 if r.int_result = '1' then
2521 fp_result <= r.r;
2522 else
2523 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2524 r.single_prec, r.quieten_nan);
2525 end if;
2526 if r.update_fprf = '1' then
2527 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2528 r.r(54) and not r.denorm);
2529 end if;
2530
2531 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2532 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2533 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2534 v.fpscr(FPSCR_VE downto FPSCR_XE));
2535 if update_fx = '1' and
2536 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2537 v.fpscr(FPSCR_FX) := '1';
2538 end if;
2539 if r.rc = '1' then
2540 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2541 end if;
2542
2543 if illegal = '1' then
2544 v.instr_done := '0';
2545 v.do_intr := '0';
2546 v.writing_back := '0';
2547 v.busy := '0';
2548 v.state := IDLE;
2549 else
2550 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2551 if v.state /= IDLE or v.do_intr = '1' then
2552 v.busy := '1';
2553 end if;
2554 end if;
2555
2556 rin <= v;
2557 e_out.illegal <= illegal;
2558 end process;
2559
2560 end architecture behaviour;